Merge 4.19.71 into android-4.19-q Changes in 4.19.71 Revert "Input: elantech - enable SMBus on new (2018+) systems" Linux 4.19.71 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: I4a7427ca04e312145dd45493d371db71b80d70af

commit: 3716d346e9fe202af82396eb2550d4cfb7dd23b4 [log] [tgz]
author: Greg Kroah-Hartman <gregkh@google.com> Fri Sep 06 12:51:15 2019 +0200
committer: Greg Kroah-Hartman <gregkh@google.com> Fri Sep 06 12:51:15 2019 +0200
tree: 9b6c1c910ed7eb289aa3aa6b6d22d72bba5c8ce3
parent: 544e55161db3574e626e4f1eafd1c6bf9a016b7e [diff]
parent: e7d2672c66e4d3675570369bf20856296da312c4 [diff]
diff --git a/Documentation/ABI/testing/procfs-concurrent_time b/Documentation/ABI/testing/procfs-concurrent_time
new file mode 100644
index 0000000..55b4142
--- /dev/null
+++ b/Documentation/ABI/testing/procfs-concurrent_time

@@ -0,0 +1,16 @@
+What:		/proc/uid_concurrent_active_time
+Date:		December 2018
+Contact:	Connor O'Brien <connoro@google.com>
+Description:
+	The /proc/uid_concurrent_active_time file displays aggregated cputime
+	numbers for each uid, broken down by the total number of cores that were
+	active while the uid's task was running.
+
+What:		/proc/uid_concurrent_policy_time
+Date:		December 2018
+Contact:	Connor O'Brien <connoro@google.com>
+Description:
+	The /proc/uid_concurrent_policy_time file displays aggregated cputime
+	numbers for each uid, broken down based on the cpufreq policy
+	of the core used by the uid's task and the number of cores associated
+	with that policy that were active while the uid's task was running.

diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram
index c1513c7..14b2bf2 100644
--- a/Documentation/ABI/testing/sysfs-block-zram
+++ b/Documentation/ABI/testing/sysfs-block-zram

@@ -98,3 +98,42 @@
 		The backing_dev file is read-write and set up backing
 		device for zram to write incompressible pages.
 		For using, user should enable CONFIG_ZRAM_WRITEBACK.
+
+What:		/sys/block/zram<id>/idle
+Date:		November 2018
+Contact:	Minchan Kim <minchan@kernel.org>
+Description:
+		idle file is write-only and mark zram slot as idle.
+		If system has mounted debugfs, user can see which slots
+		are idle via /sys/kernel/debug/zram/zram<id>/block_state
+
+What:		/sys/block/zram<id>/writeback
+Date:		November 2018
+Contact:	Minchan Kim <minchan@kernel.org>
+Description:
+		The writeback file is write-only and trigger idle and/or
+		huge page writeback to backing device.
+
+What:		/sys/block/zram<id>/bd_stat
+Date:		November 2018
+Contact:	Minchan Kim <minchan@kernel.org>
+Description:
+		The bd_stat file is read-only and represents backing device's
+		statistics (bd_count, bd_reads, bd_writes) in a format
+		similar to block layer statistics file format.
+
+What:		/sys/block/zram<id>/writeback_limit_enable
+Date:		November 2018
+Contact:	Minchan Kim <minchan@kernel.org>
+Description:
+		The writeback_limit_enable file is read-write and specifies
+		eanbe of writeback_limit feature. "1" means eable the feature.
+		No limit "0" is the initial state.
+
+What:		/sys/block/zram<id>/writeback_limit
+Date:		November 2018
+Contact:	Minchan Kim <minchan@kernel.org>
+Description:
+		The writeback_limit file is read-write and specifies the maximum
+		amount of writeback ZRAM can do. The limit could be changed
+		in run time.

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 94a24ae..91822ce 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs

@@ -86,12 +86,28 @@
 		The unit size is one block, now only support configuring in range
 		of [1, 512].
 
+What:          /sys/fs/f2fs/<disk>/umount_discard_timeout
+Date:          January 2019
+Contact:       "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+		Set timeout to issue discard commands during umount.
+		Default: 5 secs
+
 What:		/sys/fs/f2fs/<disk>/max_victim_search
 Date:		January 2014
 Contact:	"Jaegeuk Kim" <jaegeuk.kim@samsung.com>
 Description:
 		 Controls the number of trials to find a victim segment.
 
+What:		/sys/fs/f2fs/<disk>/migration_granularity
+Date:		October 2018
+Contact:	"Chao Yu" <yuchao0@huawei.com>
+Description:
+		 Controls migration granularity of garbage collection on large
+		 section, it can let GC move partial segment{s} of one section
+		 in one GC cycle, so that dispersing heavy overhead GC to
+		 multiple lightweight one.
+
 What:		/sys/fs/f2fs/<disk>/dir_level
 Date:		March 2014
 Contact:	"Jaegeuk Kim" <jaegeuk.kim@samsung.com>
@@ -121,7 +137,22 @@
 Date:		January 2016
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
 Description:
-		 Controls the idle timing.
+		 Controls the idle timing for all paths other than
+		 discard and gc path.
+
+What:		/sys/fs/f2fs/<disk>/discard_idle_interval
+Date:		September 2018
+Contact:	"Chao Yu" <yuchao0@huawei.com>
+Contact:	"Sahitya Tummala" <stummala@codeaurora.org>
+Description:
+		 Controls the idle timing for discard path.
+
+What:		/sys/fs/f2fs/<disk>/gc_idle_interval
+Date:		September 2018
+Contact:	"Chao Yu" <yuchao0@huawei.com>
+Contact:	"Sahitya Tummala" <stummala@codeaurora.org>
+Description:
+		 Controls the idle timing for gc path.
 
 What:		/sys/fs/f2fs/<disk>/iostat_enable
 Date:		August 2017

diff --git a/Documentation/ABI/testing/sysfs-kernel-wakeup_reasons b/Documentation/ABI/testing/sysfs-kernel-wakeup_reasons
new file mode 100644
index 0000000..acb19b9
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-wakeup_reasons

@@ -0,0 +1,16 @@
+What:		/sys/kernel/wakeup_reasons/last_resume_reason
+Date:		February 2014
+Contact:	Ruchi Kandoi <kandoiruchi@google.com>
+Description:
+		The /sys/kernel/wakeup_reasons/last_resume_reason is
+		used to report wakeup reasons after system exited suspend.
+
+What:		/sys/kernel/wakeup_reasons/last_suspend_time
+Date:		March 2015
+Contact:	jinqian <jinqian@google.com>
+Description:
+		The /sys/kernel/wakeup_reasons/last_suspend_time is
+		used to report time spent in last suspend cycle. It contains
+		two numbers (in seconds) separated by space. First number is
+		the time spent in suspend and resume processes. Second number
+		is the time spent in sleep state.
\ No newline at end of file

diff --git a/Documentation/accounting/psi.txt b/Documentation/accounting/psi.txt
new file mode 100644
index 0000000..4fb40fe
--- /dev/null
+++ b/Documentation/accounting/psi.txt

@@ -0,0 +1,180 @@
+================================
+PSI - Pressure Stall Information
+================================
+
+:Date: April, 2018
+:Author: Johannes Weiner <hannes@cmpxchg.org>
+
+When CPU, memory or IO devices are contended, workloads experience
+latency spikes, throughput losses, and run the risk of OOM kills.
+
+Without an accurate measure of such contention, users are forced to
+either play it safe and under-utilize their hardware resources, or
+roll the dice and frequently suffer the disruptions resulting from
+excessive overcommit.
+
+The psi feature identifies and quantifies the disruptions caused by
+such resource crunches and the time impact it has on complex workloads
+or even entire systems.
+
+Having an accurate measure of productivity losses caused by resource
+scarcity aids users in sizing workloads to hardware--or provisioning
+hardware according to workload demand.
+
+As psi aggregates this information in realtime, systems can be managed
+dynamically using techniques such as load shedding, migrating jobs to
+other systems or data centers, or strategically pausing or killing low
+priority or restartable batch jobs.
+
+This allows maximizing hardware utilization without sacrificing
+workload health or risking major disruptions such as OOM kills.
+
+Pressure interface
+==================
+
+Pressure information for each resource is exported through the
+respective file in /proc/pressure/ -- cpu, memory, and io.
+
+The format for CPU is as such:
+
+some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+
+and for memory and IO:
+
+some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+full avg10=0.00 avg60=0.00 avg300=0.00 total=0
+
+The "some" line indicates the share of time in which at least some
+tasks are stalled on a given resource.
+
+The "full" line indicates the share of time in which all non-idle
+tasks are stalled on a given resource simultaneously. In this state
+actual CPU cycles are going to waste, and a workload that spends
+extended time in this state is considered to be thrashing. This has
+severe impact on performance, and it's useful to distinguish this
+situation from a state where some tasks are stalled but the CPU is
+still doing productive work. As such, time spent in this subset of the
+stall state is tracked separately and exported in the "full" averages.
+
+The ratios are tracked as recent trends over ten, sixty, and three
+hundred second windows, which gives insight into short term events as
+well as medium and long term trends. The total absolute stall time is
+tracked and exported as well, to allow detection of latency spikes
+which wouldn't necessarily make a dent in the time averages, or to
+average trends over custom time frames.
+
+Monitoring for pressure thresholds
+==================================
+
+Users can register triggers and use poll() to be woken up when resource
+pressure exceeds certain thresholds.
+
+A trigger describes the maximum cumulative stall time over a specific
+time window, e.g. 100ms of total stall time within any 500ms window to
+generate a wakeup event.
+
+To register a trigger user has to open psi interface file under
+/proc/pressure/ representing the resource to be monitored and write the
+desired threshold and time window. The open file descriptor should be
+used to wait for trigger events using select(), poll() or epoll().
+The following format is used:
+
+<some|full> <stall amount in us> <time window in us>
+
+For example writing "some 150000 1000000" into /proc/pressure/memory
+would add 150ms threshold for partial memory stall measured within
+1sec time window. Writing "full 50000 1000000" into /proc/pressure/io
+would add 50ms threshold for full io stall measured within 1sec time window.
+
+Triggers can be set on more than one psi metric and more than one trigger
+for the same psi metric can be specified. However for each trigger a separate
+file descriptor is required to be able to poll it separately from others,
+therefore for each trigger a separate open() syscall should be made even
+when opening the same psi interface file.
+
+Monitors activate only when system enters stall state for the monitored
+psi metric and deactivates upon exit from the stall state. While system is
+in the stall state psi signal growth is monitored at a rate of 10 times per
+tracking window.
+
+The kernel accepts window sizes ranging from 500ms to 10s, therefore min
+monitoring update interval is 50ms and max is 1s. Min limit is set to
+prevent overly frequent polling. Max limit is chosen as a high enough number
+after which monitors are most likely not needed and psi averages can be used
+instead.
+
+When activated, psi monitor stays active for at least the duration of one
+tracking window to avoid repeated activations/deactivations when system is
+bouncing in and out of the stall state.
+
+Notifications to the userspace are rate-limited to one per tracking window.
+
+The trigger will de-register when the file descriptor used to define the
+trigger  is closed.
+
+Userspace monitor usage example
+===============================
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <poll.h>
+#include <string.h>
+#include <unistd.h>
+
+/*
+ * Monitor memory partial stall with 1s tracking window size
+ * and 150ms threshold.
+ */
+int main() {
+	const char trig[] = "some 150000 1000000";
+	struct pollfd fds;
+	int n;
+
+	fds.fd = open("/proc/pressure/memory", O_RDWR | O_NONBLOCK);
+	if (fds.fd < 0) {
+		printf("/proc/pressure/memory open error: %s\n",
+			strerror(errno));
+		return 1;
+	}
+	fds.events = POLLPRI;
+
+	if (write(fds.fd, trig, strlen(trig) + 1) < 0) {
+		printf("/proc/pressure/memory write error: %s\n",
+			strerror(errno));
+		return 1;
+	}
+
+	printf("waiting for events...\n");
+	while (1) {
+		n = poll(&fds, 1, -1);
+		if (n < 0) {
+			printf("poll error: %s\n", strerror(errno));
+			return 1;
+		}
+		if (fds.revents & POLLERR) {
+			printf("got POLLERR, event source is gone\n");
+			return 0;
+		}
+		if (fds.revents & POLLPRI) {
+			printf("event triggered!\n");
+		} else {
+			printf("unknown event received: 0x%x\n", fds.revents);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+Cgroup2 interface
+=================
+
+In a system with a CONFIG_CGROUP=y kernel and the cgroup2 filesystem
+mounted, pressure stall information is also tracked for tasks grouped
+into cgroups. Each subdirectory in the cgroupfs mountpoint contains
+cpu.pressure, memory.pressure, and io.pressure files; the format is
+the same as the /proc/pressure/ files.
+
+Per-cgroup psi monitors can be specified and used the same way as
+system-wide ones.

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 184193b..52d093b 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst

@@ -966,6 +966,12 @@
 	$PERIOD duration.  "max" for $MAX indicates no limit.  If only
 	one number is written, $MAX is updated.
 
+  cpu.pressure
+	A read-only nested-key file which exists on non-root cgroups.
+
+	Shows pressure stall information for CPU. See
+	Documentation/accounting/psi.txt for details.
+
 
 Memory
 ------
@@ -1271,6 +1277,12 @@
 	higher than the limit for an extended period of time.  This
 	reduces the impact on the workload and memory management.
 
+  memory.pressure
+	A read-only nested-key file which exists on non-root cgroups.
+
+	Shows pressure stall information for memory. See
+	Documentation/accounting/psi.txt for details.
+
 
 Usage Guidelines
 ~~~~~~~~~~~~~~~~
@@ -1408,6 +1420,12 @@
 
 	  8:16 rbps=2097152 wbps=max riops=max wiops=max
 
+  io.pressure
+	A read-only nested-key file which exists on non-root cgroups.
+
+	Shows pressure stall information for IO. See
+	Documentation/accounting/psi.txt for details.
+
 
 Writeback
 ~~~~~~~~~

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index e8ddf0e..cd39534 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt

@@ -3544,6 +3544,10 @@
 			before loading.
 			See Documentation/blockdev/ramdisk.txt.
 
+	psi=		[KNL] Enable or disable pressure stall information
+			tracking.
+			Format: <bool>
+
 	psmouse.proto=	[HW,MOUSE] Highest PS2 mouse protocol extension to
 			probe for; one of (bare|imps|exps|lifebook|any).
 	psmouse.rate=	[HW,MOUSE] Set desired mouse report rate, in reports

diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt
index 875b2b5..6e5c2bb 100644
--- a/Documentation/blockdev/zram.txt
+++ b/Documentation/blockdev/zram.txt

@@ -156,19 +156,23 @@
 A brief description of exported device attributes. For more details please
 read Documentation/ABI/testing/sysfs-block-zram.
 
-Name            access            description
-----            ------            -----------
-disksize          RW    show and set the device's disk size
-initstate         RO    shows the initialization state of the device
-reset             WO    trigger device reset
-mem_used_max      WO    reset the `mem_used_max' counter (see later)
-mem_limit         WO    specifies the maximum amount of memory ZRAM can use
-                        to store the compressed data
-max_comp_streams  RW    the number of possible concurrent compress operations
-comp_algorithm    RW    show and change the compression algorithm
-compact           WO    trigger memory compaction
-debug_stat        RO    this file is used for zram debugging purposes
-backing_dev	  RW	set up backend storage for zram to write out
+Name            	access            description
+----            	------            -----------
+disksize          	RW	show and set the device's disk size
+initstate         	RO	shows the initialization state of the device
+reset             	WO	trigger device reset
+mem_used_max      	WO	reset the `mem_used_max' counter (see later)
+mem_limit         	WO	specifies the maximum amount of memory ZRAM can use
+				to store the compressed data
+writeback_limit   	WO	specifies the maximum amount of write IO zram can
+				write out to backing device as 4KB unit
+writeback_limit_enable  RW	show and set writeback_limit feature
+max_comp_streams  	RW	the number of possible concurrent compress operations
+comp_algorithm    	RW	show and change the compression algorithm
+compact           	WO	trigger memory compaction
+debug_stat        	RO	this file is used for zram debugging purposes
+backing_dev	  	RW	set up backend storage for zram to write out
+idle		  	WO	mark allocated slot as idle
 
 
 User space is advised to use the following files to read the device statistics.
@@ -220,6 +224,17 @@
  pages_compacted  the number of pages freed during compaction
  huge_pages	  the number of incompressible pages
 
+File /sys/block/zram<id>/bd_stat
+
+The stat file represents device's backing device statistics. It consists of
+a single line of text and contains the following stats separated by whitespace:
+ bd_count	size of data written in backing device.
+		Unit: 4K bytes
+ bd_reads	the number of reads from backing device
+		Unit: 4K bytes
+ bd_writes	the number of writes to backing device
+		Unit: 4K bytes
+
 9) Deactivate:
 	swapoff /dev/zram0
 	umount /dev/zram1
@@ -237,11 +252,79 @@
 
 = writeback
 
-With incompressible pages, there is no memory saving with zram.
-Instead, with CONFIG_ZRAM_WRITEBACK, zram can write incompressible page
+With CONFIG_ZRAM_WRITEBACK, zram can write idle/incompressible page
 to backing storage rather than keeping it in memory.
-User should set up backing device via /sys/block/zramX/backing_dev
-before disksize setting.
+To use the feature, admin should set up backing device via
+
+	"echo /dev/sda5 > /sys/block/zramX/backing_dev"
+
+before disksize setting. It supports only partition at this moment.
+If admin want to use incompressible page writeback, they could do via
+
+	"echo huge > /sys/block/zramX/write"
+
+To use idle page writeback, first, user need to declare zram pages
+as idle.
+
+	"echo all > /sys/block/zramX/idle"
+
+From now on, any pages on zram are idle pages. The idle mark
+will be removed until someone request access of the block.
+IOW, unless there is access request, those pages are still idle pages.
+
+Admin can request writeback of those idle pages at right timing via
+
+	"echo idle > /sys/block/zramX/writeback"
+
+With the command, zram writeback idle pages from memory to the storage.
+
+If there are lots of write IO with flash device, potentially, it has
+flash wearout problem so that admin needs to design write limitation
+to guarantee storage health for entire product life.
+
+To overcome the concern, zram supports "writeback_limit" feature.
+The "writeback_limit_enable"'s default value is 0 so that it doesn't limit
+any writeback. IOW, if admin want to apply writeback budget, he should
+enable writeback_limit_enable via
+
+	$ echo 1 > /sys/block/zramX/writeback_limit_enable
+
+Once writeback_limit_enable is set, zram doesn't allow any writeback
+until admin set the budget via /sys/block/zramX/writeback_limit.
+
+(If admin doesn't enable writeback_limit_enable, writeback_limit's value
+assigned via /sys/block/zramX/writeback_limit is meaninless.)
+
+If admin want to limit writeback as per-day 400M, he could do it
+like below.
+
+	$ MB_SHIFT=20
+	$ 4K_SHIFT=12
+	$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
+		/sys/block/zram0/writeback_limit.
+	$ echo 1 > /sys/block/zram0/writeback_limit_enable
+
+If admin want to allow further write again once the bugdet is exausted,
+he could do it like below
+
+	$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
+		/sys/block/zram0/writeback_limit
+
+If admin want to see remaining writeback budget since he set,
+
+	$ cat /sys/block/zramX/writeback_limit
+
+If admin want to disable writeback limit, he could do
+
+	$ echo 0 > /sys/block/zramX/writeback_limit_enable
+
+The writeback_limit count will reset whenever you reset zram(e.g.,
+system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of
+writeback happened until you reset the zram to allocate extra writeback
+budget in next setting is user's job.
+
+If admin want to measure writeback count in a certain period, he could
+know it via /sys/block/zram0/bd_stat's 3rd column.
 
 = memory tracking
 
@@ -251,16 +334,17 @@
 If you enable the feature, you could see block state via
 /sys/kernel/debug/zram/zram0/block_state". The output is as follows,
 
-	  300    75.033841 .wh
-	  301    63.806904 s..
-	  302    63.806919 ..h
+	  300    75.033841 .wh.
+	  301    63.806904 s...
+	  302    63.806919 ..hi
 
 First column is zram's block index.
 Second column is access time since the system was booted
 Third column is state of the block.
 (s: same page
 w: written page to backing store
-h: huge page)
+h: huge page
+i: idle page)
 
 First line of above example says 300th block is accessed at 75.033841sec
 and the block's state is huge so it is written back to the backing

diff --git a/Documentation/device-mapper/dm-bow.txt b/Documentation/device-mapper/dm-bow.txt
new file mode 100644
index 0000000..e3fc4d2
--- /dev/null
+++ b/Documentation/device-mapper/dm-bow.txt

@@ -0,0 +1,99 @@
+dm_bow (backup on write)
+========================
+
+dm_bow is a device mapper driver that uses the free space on a device to back up
+data that is overwritten. The changes can then be committed by a simple state
+change, or rolled back by removing the dm_bow device and running a command line
+utility over the underlying device.
+
+dm_bow has three states, set by writing ‘1’ or ‘2’ to /sys/block/dm-?/bow/state.
+It is only possible to go from state 0 (initial state) to state 1, and then from
+state 1 to state 2.
+
+State 0: dm_bow collects all trims to the device and assumes that these mark
+free space on the overlying file system that can be safely used. Typically the
+mount code would create the dm_bow device, mount the file system, call the
+FITRIM ioctl on the file system then switch to state 1. These trims are not
+propagated to the underlying device.
+
+State 1: All writes to the device cause the underlying data to be backed up to
+the free (trimmed) area as needed in such a way as they can be restored.
+However, the writes, with one exception, then happen exactly as they would
+without dm_bow, so the device is always in a good final state. The exception is
+that sector 0 is used to keep a log of the latest changes, both to indicate that
+we are in this state and to allow rollback. See below for all details. If there
+isn't enough free space, writes are failed with -ENOSPC.
+
+State 2: The transition to state 2 triggers replacing the special sector 0 with
+the normal sector 0, and the freeing of all state information. dm_bow then
+becomes a pass-through driver, allowing the device to continue to be used with
+minimal performance impact.
+
+Usage
+=====
+dm-bow takes one command line parameter, the name of the underlying device.
+
+dm-bow will typically be used in the following way. dm-bow will be loaded with a
+suitable underlying device and the resultant device will be mounted. A file
+system trim will be issued via the FITRIM ioctl, then the device will be
+switched to state 1. The file system will now be used as normal. At some point,
+the changes can either be committed by switching to state 2, or rolled back by
+unmounting the file system, removing the dm-bow device and running the command
+line utility. Note that rebooting the device will be equivalent to unmounting
+and removing, but the command line utility must still be run
+
+Details of operation in state 1
+===============================
+
+dm_bow maintains a type for all sectors. A sector can be any of:
+
+SECTOR0
+SECTOR0_CURRENT
+UNCHANGED
+FREE
+CHANGED
+BACKUP
+
+SECTOR0 is the first sector on the device, and is used to hold the log of
+changes. This is the one exception.
+
+SECTOR0_CURRENT is a sector picked from the FREE sectors, and is where reads and
+writes from the true sector zero are redirected to. Note that like any backup
+sector, if the sector is written to directly, it must be moved again.
+
+UNCHANGED means that the sector has not been changed since we entered state 1.
+Thus if it is written to or trimmed, the contents must first be backed up.
+
+FREE means that the sector was trimmed in state 0 and has not yet been written
+to or used for backup. On being written to, a FREE sector is changed to CHANGED.
+
+CHANGED means that the sector has been modified, and can be further modified
+without further backup.
+
+BACKUP means that this is a free sector being used as a backup. On being written
+to, the contents must first be backed up again.
+
+All backup operations are logged to the first sector. The log sector has the
+format:
+--------------------------------------------------------
+| Magic | Count | Sequence | Log entry | Log entry | …
+--------------------------------------------------------
+
+Magic is a magic number. Count is the number of log entries. Sequence is 0
+initially. A log entry is
+
+-----------------------------------
+| Source | Dest | Size | Checksum |
+-----------------------------------
+
+When SECTOR0 is full, the log sector is backed up and another empty log sector
+created with sequence number one higher. The first entry in any log entry with
+sequence > 0 therefore must be the log of the backing up of the previous log
+sector. Note that sequence is not strictly needed, but is a useful sanity check
+and potentially limits the time spent trying to restore a corrupted snapshot.
+
+On entering state 1, dm_bow has a list of free sectors. All other sectors are
+unchanged. Sector0_current is selected from the free sectors and the contents of
+sector 0 are copied there. The sector 0 is backed up, which triggers the first
+log entry to be written.
+

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index e5edd29..f7b5e4f 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt

@@ -126,6 +126,8 @@
                        does not aware of cold files such as media files.
 inline_xattr           Enable the inline xattrs feature.
 noinline_xattr         Disable the inline xattrs feature.
+inline_xattr_size=%u   Support configuring inline xattr size, it depends on
+		       flexible inline xattr feature.
 inline_data            Enable the inline data feature: New created small(<~3.4k)
                        files can be written into inode block.
 inline_dentry          Enable the inline dir feature: data in new created
@@ -172,9 +174,10 @@
                        FAULT_DIR_DEPTH		0x000000100
                        FAULT_EVICT_INODE	0x000000200
                        FAULT_TRUNCATE		0x000000400
-                       FAULT_IO			0x000000800
+                       FAULT_READ_IO		0x000000800
                        FAULT_CHECKPOINT		0x000001000
                        FAULT_DISCARD		0x000002000
+                       FAULT_WRITE_IO		0x000004000
 mode=%s                Control block allocation mode which supports "adaptive"
                        and "lfs". In "lfs" mode, there should be no random
                        writes towards main area.
@@ -211,6 +214,11 @@
                        non-atomic files likewise "nobarrier" mount option.
 test_dummy_encryption  Enable dummy encryption, which provides a fake fscrypt
                        context. The fake fscrypt context is used by xfstests.
+checkpoint=%s          Set to "disable" to turn off checkpointing. Set to "enable"
+                       to reenable checkpointing. Is enabled by default. While
+                       disabled, any unmounting or unexpected shutdowns will cause
+                       the filesystem contents to appear as they did when the
+                       filesystem was mounted with that option.
 
 ================================================================================
 DEBUGFS ENTRIES

diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index cfbc18f..3a7b605 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst

@@ -132,47 +132,28 @@
 Per-file keys
 -------------
 
-Master keys are not used to encrypt file contents or names directly.
-Instead, a unique key is derived for each encrypted file, including
-each regular file, directory, and symbolic link.  This has several
-advantages:
+Since each master key can protect many files, it is necessary to
+"tweak" the encryption of each file so that the same plaintext in two
+files doesn't map to the same ciphertext, or vice versa.  In most
+cases, fscrypt does this by deriving per-file keys.  When a new
+encrypted inode (regular file, directory, or symlink) is created,
+fscrypt randomly generates a 16-byte nonce and stores it in the
+inode's encryption xattr.  Then, it uses a KDF (Key Derivation
+Function) to derive the file's key from the master key and nonce.
 
-- In cryptosystems, the same key material should never be used for
-  different purposes.  Using the master key as both an XTS key for
-  contents encryption and as a CTS-CBC key for filenames encryption
-  would violate this rule.
-- Per-file keys simplify the choice of IVs (Initialization Vectors)
-  for contents encryption.  Without per-file keys, to ensure IV
-  uniqueness both the inode and logical block number would need to be
-  encoded in the IVs.  This would make it impossible to renumber
-  inodes, which e.g. ``resize2fs`` can do when resizing an ext4
-  filesystem.  With per-file keys, it is sufficient to encode just the
-  logical block number in the IVs.
-- Per-file keys strengthen the encryption of filenames, where IVs are
-  reused out of necessity.  With a unique key per directory, IV reuse
-  is limited to within a single directory.
-- Per-file keys allow individual files to be securely erased simply by
-  securely erasing their keys.  (Not yet implemented.)
+The Adiantum encryption mode (see `Encryption modes and usage`_) is
+special, since it accepts longer IVs and is suitable for both contents
+and filenames encryption.  For it, a "direct key" option is offered
+where the file's nonce is included in the IVs and the master key is
+used for encryption directly.  This improves performance; however,
+users must not use the same master key for any other encryption mode.
 
-A KDF (Key Derivation Function) is used to derive per-file keys from
-the master key.  This is done instead of wrapping a randomly-generated
-key for each file because it reduces the size of the encryption xattr,
-which for some filesystems makes the xattr more likely to fit in-line
-in the filesystem's inode table.  With a KDF, only a 16-byte nonce is
-required --- long enough to make key reuse extremely unlikely.  A
-wrapped key, on the other hand, would need to be up to 64 bytes ---
-the length of an AES-256-XTS key.  Furthermore, currently there is no
-requirement to support unlocking a file with multiple alternative
-master keys or to support rotating master keys.  Instead, the master
-keys may be wrapped in userspace, e.g. as done by the `fscrypt
-<https://github.com/google/fscrypt>`_ tool.
+Below, the KDF and design considerations are described in more detail.
 
-The current KDF encrypts the master key using the 16-byte nonce as an
-AES-128-ECB key.  The output is used as the derived key.  If the
-output is longer than needed, then it is truncated to the needed
-length.  Truncation is the norm for directories and symlinks, since
-those use the CTS-CBC encryption mode which requires a key half as
-long as that required by the XTS encryption mode.
+The current KDF works by encrypting the master key with AES-128-ECB,
+using the file's nonce as the AES key.  The output is used as the
+derived key.  If the output is longer than needed, then it is
+truncated to the needed length.
 
 Note: this KDF meets the primary security requirement, which is to
 produce unique derived keys that preserve the entropy of the master
@@ -181,6 +162,20 @@
 reversible, so it is generally considered to be a mistake!  It may be
 replaced with HKDF or another more standard KDF in the future.
 
+Key derivation was chosen over key wrapping because wrapped keys would
+require larger xattrs which would be less likely to fit in-line in the
+filesystem's inode table, and there didn't appear to be any
+significant advantages to key wrapping.  In particular, currently
+there is no requirement to support unlocking a file with multiple
+alternative master keys or to support rotating master keys.  Instead,
+the master keys may be wrapped in userspace, e.g. as is done by the
+`fscrypt <https://github.com/google/fscrypt>`_ tool.
+
+Including the inode number in the IVs was considered.  However, it was
+rejected as it would have prevented ext4 filesystems from being
+resized, and by itself still wouldn't have been sufficient to prevent
+the same key from being directly reused for both XTS and CTS-CBC.
+
 Encryption modes and usage
 ==========================
 
@@ -191,54 +186,80 @@
 
 - AES-256-XTS for contents and AES-256-CTS-CBC for filenames
 - AES-128-CBC for contents and AES-128-CTS-CBC for filenames
+- Adiantum for both contents and filenames
 
-It is strongly recommended to use AES-256-XTS for contents encryption.
+If unsure, you should use the (AES-256-XTS, AES-256-CTS-CBC) pair.
+
 AES-128-CBC was added only for low-powered embedded devices with
 crypto accelerators such as CAAM or CESA that do not support XTS.
 
+Adiantum is a (primarily) stream cipher-based mode that is fast even
+on CPUs without dedicated crypto instructions.  It's also a true
+wide-block mode, unlike XTS.  It can also eliminate the need to derive
+per-file keys.  However, it depends on the security of two primitives,
+XChaCha12 and AES-256, rather than just one.  See the paper
+"Adiantum: length-preserving encryption for entry-level processors"
+(https://eprint.iacr.org/2018/720.pdf) for more details.  To use
+Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled.  Also, fast
+implementations of ChaCha and NHPoly1305 should be enabled, e.g.
+CONFIG_CRYPTO_CHACHA20_NEON and CONFIG_CRYPTO_NHPOLY1305_NEON for ARM.
+
 New encryption modes can be added relatively easily, without changes
 to individual filesystems.  However, authenticated encryption (AE)
 modes are not currently supported because of the difficulty of dealing
 with ciphertext expansion.
 
+Contents encryption
+-------------------
+
 For file contents, each filesystem block is encrypted independently.
 Currently, only the case where the filesystem block size is equal to
-the system's page size (usually 4096 bytes) is supported.  With the
-XTS mode of operation (recommended), the logical block number within
-the file is used as the IV.  With the CBC mode of operation (not
-recommended), ESSIV is used; specifically, the IV for CBC is the
-logical block number encrypted with AES-256, where the AES-256 key is
-the SHA-256 hash of the inode's data encryption key.
+the system's page size (usually 4096 bytes) is supported.
 
-For filenames, the full filename is encrypted at once.  Because of the
-requirements to retain support for efficient directory lookups and
-filenames of up to 255 bytes, a constant initialization vector (IV) is
-used.  However, each encrypted directory uses a unique key, which
-limits IV reuse to within a single directory.  Note that IV reuse in
-the context of CTS-CBC encryption means that when the original
-filenames share a common prefix at least as long as the cipher block
-size (16 bytes for AES), the corresponding encrypted filenames will
-also share a common prefix.  This is undesirable; it may be fixed in
-the future by switching to an encryption mode that is a strong
-pseudorandom permutation on arbitrary-length messages, e.g. the HEH
-(Hash-Encrypt-Hash) mode.
+Each block's IV is set to the logical block number within the file as
+a little endian number, except that:
 
-Since filenames are encrypted with the CTS-CBC mode of operation, the
-plaintext and ciphertext filenames need not be multiples of the AES
-block size, i.e. 16 bytes.  However, the minimum size that can be
-encrypted is 16 bytes, so shorter filenames are NUL-padded to 16 bytes
-before being encrypted.  In addition, to reduce leakage of filename
-lengths via their ciphertexts, all filenames are NUL-padded to the
-next 4, 8, 16, or 32-byte boundary (configurable).  32 is recommended
-since this provides the best confidentiality, at the cost of making
-directory entries consume slightly more space.  Note that since NUL
-(``\0``) is not otherwise a valid character in filenames, the padding
-will never produce duplicate plaintexts.
+- With CBC mode encryption, ESSIV is also used.  Specifically, each IV
+  is encrypted with AES-256 where the AES-256 key is the SHA-256 hash
+  of the file's data encryption key.
+
+- In the "direct key" configuration (FS_POLICY_FLAG_DIRECT_KEY set in
+  the fscrypt_policy), the file's nonce is also appended to the IV.
+  Currently this is only allowed with the Adiantum encryption mode.
+
+Filenames encryption
+--------------------
+
+For filenames, each full filename is encrypted at once.  Because of
+the requirements to retain support for efficient directory lookups and
+filenames of up to 255 bytes, the same IV is used for every filename
+in a directory.
+
+However, each encrypted directory still uses a unique key; or
+alternatively (for the "direct key" configuration) has the file's
+nonce included in the IVs.  Thus, IV reuse is limited to within a
+single directory.
+
+With CTS-CBC, the IV reuse means that when the plaintext filenames
+share a common prefix at least as long as the cipher block size (16
+bytes for AES), the corresponding encrypted filenames will also share
+a common prefix.  This is undesirable.  Adiantum does not have this
+weakness, as it is a wide-block encryption mode.
+
+All supported filenames encryption modes accept any plaintext length
+>= 16 bytes; cipher block alignment is not required.  However,
+filenames shorter than 16 bytes are NUL-padded to 16 bytes before
+being encrypted.  In addition, to reduce leakage of filename lengths
+via their ciphertexts, all filenames are NUL-padded to the next 4, 8,
+16, or 32-byte boundary (configurable).  32 is recommended since this
+provides the best confidentiality, at the cost of making directory
+entries consume slightly more space.  Note that since NUL (``\0``) is
+not otherwise a valid character in filenames, the padding will never
+produce duplicate plaintexts.
 
 Symbolic link targets are considered a type of filename and are
-encrypted in the same way as filenames in directory entries.  Each
-symlink also uses a unique key; hence, the hardcoded IV is not a
-problem for symlinks.
+encrypted in the same way as filenames in directory entries, except
+that IV reuse is not a problem as each symlink has its own inode.
 
 User API
 ========
@@ -272,9 +293,13 @@
   and FS_ENCRYPTION_MODE_AES_256_CTS (4) for
   ``filenames_encryption_mode``.
 
-- ``flags`` must be set to a value from ``<linux/fs.h>`` which
+- ``flags`` must contain a value from ``<linux/fs.h>`` which
   identifies the amount of NUL-padding to use when encrypting
   filenames.  If unsure, use FS_POLICY_FLAGS_PAD_32 (0x3).
+  In addition, if the chosen encryption modes are both
+  FS_ENCRYPTION_MODE_ADIANTUM, this can contain
+  FS_POLICY_FLAG_DIRECT_KEY to specify that the master key should be
+  used directly, without key derivation.
 
 - ``master_key_descriptor`` specifies how to find the master key in
   the keyring; see `Adding keys`_.  It is up to userspace to choose a

diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
index eef7d9d..f205898 100644
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt

@@ -102,6 +102,29 @@
 such as metadata and extended attributes are reported for the upper
 directory only.  These attributes of the lower directory are hidden.
 
+credentials
+-----------
+
+By default, all access to the upper, lower and work directories is the
+recorded mounter's MAC and DAC credentials.  The incoming accesses are
+checked against the caller's credentials.
+
+In the case where caller MAC or DAC credentials do not overlap, a
+use case available in older versions of the driver, the
+override_creds mount flag can be turned off and help when the use
+pattern has caller with legitimate credentials where the mounter
+does not.  Several unintended side effects will occur though.  The
+caller without certain key capabilities or lower privilege will not
+always be able to delete files or directories, create nodes, or
+search some restricted directories.  The ability to search and read
+a directory entry is spotty as a result of the cache mechanism not
+retesting the credentials because of the assumption, a privileged
+caller can fill cache, then a lower privilege can read the directory
+cache.  The uneven security model where cache, upperdir and workdir
+are opened at privilege, but accessed without creating a form of
+privilege escalation, should only be used with strict understanding
+of the side effects and of the security policies.
+
 whiteouts and opaque directories
 --------------------------------
 

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 06ac6dd..a9ce10b0 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt

@@ -398,6 +398,8 @@
  [stack]                  = the stack of the main process
  [vdso]                   = the "virtual dynamic shared object",
                             the kernel system call handler
+ [anon:<name>]            = an anonymous mapping that has been
+                            named by userspace
 
  or if empty, the mapping is anonymous.
 
@@ -426,6 +428,7 @@
 MMUPageSize:           4 kB
 Locked:                0 kB
 VmFlags: rd ex mr mw me dw
+Name:           name from userspace
 
 the first of these lines shows the same information as is displayed for the
 mapping in /proc/PID/maps.  The remaining lines show the size of the mapping
@@ -500,6 +503,9 @@
 might change in future as well. So each consumer of these flags has to
 follow each specific kernel version for the exact semantic.
 
+The "Name" field will only be present on a mapping that has been named by
+userspace, and will show the name passed in by userspace.
+
 This file is only present if the CONFIG_MMU kernel configuration option is
 enabled.
 

diff --git a/Documentation/power/energy-model.txt b/Documentation/power/energy-model.txt
new file mode 100644
index 0000000..5a23c6f
--- /dev/null
+++ b/Documentation/power/energy-model.txt

@@ -0,0 +1,169 @@
+                           ====================
+                           Energy Model of CPUs
+                           ====================
+
+1. Overview
+-----------
+
+The Energy Model (EM) framework serves as an interface between drivers knowing
+the power consumed by CPUs at various performance levels, and the kernel
+subsystems willing to use that information to make energy-aware decisions.
+
+The source of the information about the power consumed by CPUs can vary greatly
+from one platform to another. These power costs can be estimated using
+devicetree data in some cases. In others, the firmware will know better.
+Alternatively, userspace might be best positioned. And so on. In order to avoid
+each and every client subsystem to re-implement support for each and every
+possible source of information on its own, the EM framework intervenes as an
+abstraction layer which standardizes the format of power cost tables in the
+kernel, hence enabling to avoid redundant work.
+
+The figure below depicts an example of drivers (Arm-specific here, but the
+approach is applicable to any architecture) providing power costs to the EM
+framework, and interested clients reading the data from it.
+
+       +---------------+  +-----------------+  +---------------+
+       | Thermal (IPA) |  | Scheduler (EAS) |  |     Other     |
+       +---------------+  +-----------------+  +---------------+
+               |                   | em_pd_energy()    |
+               |                   | em_cpu_get()      |
+               +---------+         |         +---------+
+                         |         |         |
+                         v         v         v
+                        +---------------------+
+                        |    Energy Model     |
+                        |     Framework       |
+                        +---------------------+
+                           ^       ^       ^
+                           |       |       | em_register_perf_domain()
+                +----------+       |       +---------+
+                |                  |                 |
+        +---------------+  +---------------+  +--------------+
+        |  cpufreq-dt   |  |   arm_scmi    |  |    Other     |
+        +---------------+  +---------------+  +--------------+
+                ^                  ^                 ^
+                |                  |                 |
+        +--------------+   +---------------+  +--------------+
+        | Device Tree  |   |   Firmware    |  |      ?       |
+        +--------------+   +---------------+  +--------------+
+
+The EM framework manages power cost tables per 'performance domain' in the
+system. A performance domain is a group of CPUs whose performance is scaled
+together. Performance domains generally have a 1-to-1 mapping with CPUFreq
+policies. All CPUs in a performance domain are required to have the same
+micro-architecture. CPUs in different performance domains can have different
+micro-architectures.
+
+
+2. Core APIs
+------------
+
+  2.1 Config options
+
+CONFIG_ENERGY_MODEL must be enabled to use the EM framework.
+
+
+  2.2 Registration of performance domains
+
+Drivers are expected to register performance domains into the EM framework by
+calling the following API:
+
+  int em_register_perf_domain(cpumask_t *span, unsigned int nr_states,
+			      struct em_data_callback *cb);
+
+Drivers must specify the CPUs of the performance domains using the cpumask
+argument, and provide a callback function returning <frequency, power> tuples
+for each capacity state. The callback function provided by the driver is free
+to fetch data from any relevant location (DT, firmware, ...), and by any mean
+deemed necessary. See Section 3. for an example of driver implementing this
+callback, and kernel/power/energy_model.c for further documentation on this
+API.
+
+
+  2.3 Accessing performance domains
+
+Subsystems interested in the energy model of a CPU can retrieve it using the
+em_cpu_get() API. The energy model tables are allocated once upon creation of
+the performance domains, and kept in memory untouched.
+
+The energy consumed by a performance domain can be estimated using the
+em_pd_energy() API. The estimation is performed assuming that the schedutil
+CPUfreq governor is in use.
+
+More details about the above APIs can be found in include/linux/energy_model.h.
+
+
+3. Example driver
+-----------------
+
+This section provides a simple example of a CPUFreq driver registering a
+performance domain in the Energy Model framework using the (fake) 'foo'
+protocol. The driver implements an est_power() function to be provided to the
+EM framework.
+
+ -> drivers/cpufreq/foo_cpufreq.c
+
+01	static int est_power(unsigned long *mW, unsigned long *KHz, int cpu)
+02	{
+03		long freq, power;
+04
+05		/* Use the 'foo' protocol to ceil the frequency */
+06		freq = foo_get_freq_ceil(cpu, *KHz);
+07		if (freq < 0);
+08			return freq;
+09
+10		/* Estimate the power cost for the CPU at the relevant freq. */
+11		power = foo_estimate_power(cpu, freq);
+12		if (power < 0);
+13			return power;
+14
+15		/* Return the values to the EM framework */
+16		*mW = power;
+17		*KHz = freq;
+18
+19		return 0;
+20	}
+21
+22	static int foo_cpufreq_init(struct cpufreq_policy *policy)
+23	{
+24		struct em_data_callback em_cb = EM_DATA_CB(est_power);
+25		int nr_opp, ret;
+26
+27		/* Do the actual CPUFreq init work ... */
+28		ret = do_foo_cpufreq_init(policy);
+29		if (ret)
+30			return ret;
+31
+32		/* Find the number of OPPs for this policy */
+33		nr_opp = foo_get_nr_opp(policy);
+34
+35		/* And register the new performance domain */
+36		em_register_perf_domain(policy->cpus, nr_opp, &em_cb);
+37
+38	        return 0;
+39	}
+
+
+4. Support for legacy Energy Models (DEPRECATED)
+------------------------------------------------
+
+The Android kernel version 4.14 and before used a different type of EM for EAS,
+referred to as the 'legacy' EM. The legacy EM relies on the out-of-tree
+'sched-energy-costs' devicetree bindings to provide the kernel with power costs.
+The usage of such bindings in Android has now been DEPRECATED in favour of the
+mainline equivalents.
+
+The currently supported alternatives to populate the EM include:
+ - using a firmware-based solution such as Arm SCMI (supported in
+   drivers/cpufreq/scmi-cpufreq.c);
+ - using the 'dynamic-power-coefficient' devicetree binding together with
+   PM_OPP. See the of_dev_pm_opp_get_cpu_power() helper in PM_OPP, and the
+   reference implementation in drivers/cpufreq/cpufreq-dt.c.
+
+In order to ease the transition to the new EM format, Android 4.19 also provides
+a compatibility driver able to load a legacy EM from DT into the EM framework.
+*** Please note that THIS FEATURE WILL NOT BE AVAILABLE in future Android
+kernels, and as such it must be considered only as a temporary workaround. ***
+
+If you know what you're doing and still want to use this driver, you need to set
+CONFIG_LEGACY_ENERGY_MODEL_DT=y in your kernel configuration to enable it.

diff --git a/Documentation/scheduler/sched-energy.txt b/Documentation/scheduler/sched-energy.txt
new file mode 100644
index 0000000..197d81f
--- /dev/null
+++ b/Documentation/scheduler/sched-energy.txt

@@ -0,0 +1,425 @@
+			   =======================
+			   Energy Aware Scheduling
+			   =======================
+
+1. Introduction
+---------------
+
+Energy Aware Scheduling (or EAS) gives the scheduler the ability to predict
+the impact of its decisions on the energy consumed by CPUs. EAS relies on an
+Energy Model (EM) of the CPUs to select an energy efficient CPU for each task,
+with a minimal impact on throughput. This document aims at providing an
+introduction on how EAS works, what are the main design decisions behind it, and
+details what is needed to get it to run.
+
+Before going any further, please note that at the time of writing:
+
+   /!\ EAS does not support platforms with symmetric CPU topologies /!\
+
+EAS operates only on heterogeneous CPU topologies (such as Arm big.LITTLE)
+because this is where the potential for saving energy through scheduling is
+the highest.
+
+The actual EM used by EAS is _not_ maintained by the scheduler, but by a
+dedicated framework. For details about this framework and what it provides,
+please refer to its documentation (see Documentation/power/energy-model.txt).
+
+
+2. Background and Terminology
+-----------------------------
+
+To make it clear from the start:
+ - energy = [joule] (resource like a battery on powered devices)
+ - power = energy/time = [joule/second] = [watt]
+
+The goal of EAS is to minimize energy, while still getting the job done. That
+is, we want to maximize:
+
+	performance [inst/s]
+	--------------------
+	    power [W]
+
+which is equivalent to minimizing:
+
+	energy [J]
+	-----------
+	instruction
+
+while still getting 'good' performance. It is essentially an alternative
+optimization objective to the current performance-only objective for the
+scheduler. This alternative considers two objectives: energy-efficiency and
+performance.
+
+The idea behind introducing an EM is to allow the scheduler to evaluate the
+implications of its decisions rather than blindly applying energy-saving
+techniques that may have positive effects only on some platforms. At the same
+time, the EM must be as simple as possible to minimize the scheduler latency
+impact.
+
+In short, EAS changes the way CFS tasks are assigned to CPUs. When it is time
+for the scheduler to decide where a task should run (during wake-up), the EM
+is used to break the tie between several good CPU candidates and pick the one
+that is predicted to yield the best energy consumption without harming the
+system's throughput. The predictions made by EAS rely on specific elements of
+knowledge about the platform's topology, which include the 'capacity' of CPUs,
+and their respective energy costs.
+
+
+3. Topology information
+-----------------------
+
+EAS (as well as the rest of the scheduler) uses the notion of 'capacity' to
+differentiate CPUs with different computing throughput. The 'capacity' of a CPU
+represents the amount of work it can absorb when running at its highest
+frequency compared to the most capable CPU of the system. Capacity values are
+normalized in a 1024 range, and are comparable with the utilization signals of
+tasks and CPUs computed by the Per-Entity Load Tracking (PELT) mechanism. Thanks
+to capacity and utilization values, EAS is able to estimate how big/busy a
+task/CPU is, and to take this into consideration when evaluating performance vs
+energy trade-offs. The capacity of CPUs is provided via arch-specific code
+through the arch_scale_cpu_capacity() callback.
+
+The rest of platform knowledge used by EAS is directly read from the Energy
+Model (EM) framework. The EM of a platform is composed of a power cost table
+per 'performance domain' in the system (see Documentation/power/energy-model.txt
+for futher details about performance domains).
+
+The scheduler manages references to the EM objects in the topology code when the
+scheduling domains are built, or re-built. For each root domain (rd), the
+scheduler maintains a singly linked list of all performance domains intersecting
+the current rd->span. Each node in the list contains a pointer to a struct
+em_perf_domain as provided by the EM framework.
+
+The lists are attached to the root domains in order to cope with exclusive
+cpuset configurations. Since the boundaries of exclusive cpusets do not
+necessarily match those of performance domains, the lists of different root
+domains can contain duplicate elements.
+
+Example 1.
+    Let us consider a platform with 12 CPUs, split in 3 performance domains
+    (pd0, pd4 and pd8), organized as follows:
+
+	          CPUs:   0 1 2 3 4 5 6 7 8 9 10 11
+	          PDs:   |--pd0--|--pd4--|---pd8---|
+	          RDs:   |----rd1----|-----rd2-----|
+
+    Now, consider that userspace decided to split the system with two
+    exclusive cpusets, hence creating two independent root domains, each
+    containing 6 CPUs. The two root domains are denoted rd1 and rd2 in the
+    above figure. Since pd4 intersects with both rd1 and rd2, it will be
+    present in the linked list '->pd' attached to each of them:
+       * rd1->pd: pd0 -> pd4
+       * rd2->pd: pd4 -> pd8
+
+    Please note that the scheduler will create two duplicate list nodes for
+    pd4 (one for each list). However, both just hold a pointer to the same
+    shared data structure of the EM framework.
+
+Since the access to these lists can happen concurrently with hotplug and other
+things, they are protected by RCU, like the rest of topology structures
+manipulated by the scheduler.
+
+EAS also maintains a static key (sched_energy_present) which is enabled when at
+least one root domain meets all conditions for EAS to start. Those conditions
+are summarized in Section 6.
+
+
+4. Energy-Aware task placement
+------------------------------
+
+EAS overrides the CFS task wake-up balancing code. It uses the EM of the
+platform and the PELT signals to choose an energy-efficient target CPU during
+wake-up balance. When EAS is enabled, select_task_rq_fair() calls
+find_energy_efficient_cpu() to do the placement decision. This function looks
+for the CPU with the highest spare capacity (CPU capacity - CPU utilization) in
+each performance domain since it is the one which will allow us to keep the
+frequency the lowest. Then, the function checks if placing the task there could
+save energy compared to leaving it on prev_cpu, i.e. the CPU where the task ran
+in its previous activation.
+
+find_energy_efficient_cpu() uses compute_energy() to estimate what will be the
+energy consumed by the system if the waking task was migrated. compute_energy()
+looks at the current utilization landscape of the CPUs and adjusts it to
+'simulate' the task migration. The EM framework provides the em_pd_energy() API
+which computes the expected energy consumption of each performance domain for
+the given utilization landscape.
+
+An example of energy-optimized task placement decision is detailed below.
+
+Example 2.
+    Let us consider a (fake) platform with 2 independent performance domains
+    composed of two CPUs each. CPU0 and CPU1 are little CPUs; CPU2 and CPU3
+    are big.
+
+    The scheduler must decide where to place a task P whose util_avg = 200
+    and prev_cpu = 0.
+
+    The current utilization landscape of the CPUs is depicted on the graph
+    below. CPUs 0-3 have a util_avg of 400, 100, 600 and 500 respectively
+    Each performance domain has three Operating Performance Points (OPPs).
+    The CPU capacity and power cost associated with each OPP is listed in
+    the Energy Model table. The util_avg of P is shown on the figures
+    below as 'PP'.
+
+    CPU util.
+      1024                 - - - - - - -              Energy Model
+                                               +-----------+-------------+
+                                               |  Little   |     Big     |
+       768                 =============       +-----+-----+------+------+
+                                               | Cap | Pwr | Cap  | Pwr  |
+                                               +-----+-----+------+------+
+       512  ===========    - ##- - - - -       | 170 | 50  | 512  | 400  |
+                             ##     ##         | 341 | 150 | 768  | 800  |
+       341  -PP - - - -      ##     ##         | 512 | 300 | 1024 | 1700 |
+             PP              ##     ##         +-----+-----+------+------+
+       170  -## - - - -      ##     ##
+             ##     ##       ##     ##
+           ------------    -------------
+            CPU0   CPU1     CPU2   CPU3
+
+      Current OPP: =====       Other OPP: - - -     util_avg (100 each): ##
+
+
+    find_energy_efficient_cpu() will first look for the CPUs with the
+    maximum spare capacity in the two performance domains. In this example,
+    CPU1 and CPU3. Then it will estimate the energy of the system if P was
+    placed on either of them, and check if that would save some energy
+    compared to leaving P on CPU0. EAS assumes that OPPs follow utilization
+    (which is coherent with the behaviour of the schedutil CPUFreq
+    governor, see Section 6. for more details on this topic).
+
+    Case 1. P is migrated to CPU1
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+      1024                 - - - - - - -
+
+                                            Energy calculation:
+       768                 =============     * CPU0: 200 / 341 * 150 = 88
+                                             * CPU1: 300 / 341 * 150 = 131
+                                             * CPU2: 600 / 768 * 800 = 625
+       512  - - - - - -    - ##- - - - -     * CPU3: 500 / 768 * 800 = 520
+                             ##     ##          => total_energy = 1364
+       341  ===========      ##     ##
+                    PP       ##     ##
+       170  -## - - PP-      ##     ##
+             ##     ##       ##     ##
+           ------------    -------------
+            CPU0   CPU1     CPU2   CPU3
+
+
+    Case 2. P is migrated to CPU3
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+      1024                 - - - - - - -
+
+                                            Energy calculation:
+       768                 =============     * CPU0: 200 / 341 * 150 = 88
+                                             * CPU1: 100 / 341 * 150 = 43
+                                    PP       * CPU2: 600 / 768 * 800 = 625
+       512  - - - - - -    - ##- - -PP -     * CPU3: 700 / 768 * 800 = 729
+                             ##     ##          => total_energy = 1485
+       341  ===========      ##     ##
+                             ##     ##
+       170  -## - - - -      ##     ##
+             ##     ##       ##     ##
+           ------------    -------------
+            CPU0   CPU1     CPU2   CPU3
+
+
+    Case 3. P stays on prev_cpu / CPU 0
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+      1024                 - - - - - - -
+
+                                            Energy calculation:
+       768                 =============     * CPU0: 400 / 512 * 300 = 234
+                                             * CPU1: 100 / 512 * 300 = 58
+                                             * CPU2: 600 / 768 * 800 = 625
+       512  ===========    - ##- - - - -     * CPU3: 500 / 768 * 800 = 520
+                             ##     ##          => total_energy = 1437
+       341  -PP - - - -      ##     ##
+             PP              ##     ##
+       170  -## - - - -      ##     ##
+             ##     ##       ##     ##
+           ------------    -------------
+            CPU0   CPU1     CPU2   CPU3
+
+
+    From these calculations, the Case 1 has the lowest total energy. So CPU 1
+    is be the best candidate from an energy-efficiency standpoint.
+
+Big CPUs are generally more power hungry than the little ones and are thus used
+mainly when a task doesn't fit the littles. However, little CPUs aren't always
+necessarily more energy-efficient than big CPUs. For some systems, the high OPPs
+of the little CPUs can be less energy-efficient than the lowest OPPs of the
+bigs, for example. So, if the little CPUs happen to have enough utilization at
+a specific point in time, a small task waking up at that moment could be better
+of executing on the big side in order to save energy, even though it would fit
+on the little side.
+
+And even in the case where all OPPs of the big CPUs are less energy-efficient
+than those of the little, using the big CPUs for a small task might still, under
+specific conditions, save energy. Indeed, placing a task on a little CPU can
+result in raising the OPP of the entire performance domain, and that will
+increase the cost of the tasks already running there. If the waking task is
+placed on a big CPU, its own execution cost might be higher than if it was
+running on a little, but it won't impact the other tasks of the little CPUs
+which will keep running at a lower OPP. So, when considering the total energy
+consumed by CPUs, the extra cost of running that one task on a big core can be
+smaller than the cost of raising the OPP on the little CPUs for all the other
+tasks.
+
+The examples above would be nearly impossible to get right in a generic way, and
+for all platforms, without knowing the cost of running at different OPPs on all
+CPUs of the system. Thanks to its EM-based design, EAS should cope with them
+correctly without too many troubles. However, in order to ensure a minimal
+impact on throughput for high-utilization scenarios, EAS also implements another
+mechanism called 'over-utilization'.
+
+
+5. Over-utilization
+-------------------
+
+From a general standpoint, the use-cases where EAS can help the most are those
+involving a light/medium CPU utilization. Whenever long CPU-bound tasks are
+being run, they will require all of the available CPU capacity, and there isn't
+much that can be done by the scheduler to save energy without severly harming
+throughput. In order to avoid hurting performance with EAS, CPUs are flagged as
+'over-utilized' as soon as they are used at more than 80% of their compute
+capacity. As long as no CPUs are over-utilized in a root domain, load balancing
+is disabled and EAS overridess the wake-up balancing code. EAS is likely to load
+the most energy efficient CPUs of the system more than the others if that can be
+done without harming throughput. So, the load-balancer is disabled to prevent
+it from breaking the energy-efficient task placement found by EAS. It is safe to
+do so when the system isn't overutilized since being below the 80% tipping point
+implies that:
+
+    a. there is some idle time on all CPUs, so the utilization signals used by
+       EAS are likely to accurately represent the 'size' of the various tasks
+       in the system;
+    b. all tasks should already be provided with enough CPU capacity,
+       regardless of their nice values;
+    c. since there is spare capacity all tasks must be blocking/sleeping
+       regularly and balancing at wake-up is sufficient.
+
+As soon as one CPU goes above the 80% tipping point, at least one of the three
+assumptions above becomes incorrect. In this scenario, the 'overutilized' flag
+is raised for the entire root domain, EAS is disabled, and the load-balancer is
+re-enabled. By doing so, the scheduler falls back onto load-based algorithms for
+wake-up and load balance under CPU-bound conditions. This provides a better
+respect of the nice values of tasks.
+
+Since the notion of overutilization largely relies on detecting whether or not
+there is some idle time in the system, the CPU capacity 'stolen' by higher
+(than CFS) scheduling classes (as well as IRQ) must be taken into account. As
+such, the detection of overutilization accounts for the capacity used not only
+by CFS tasks, but also by the other scheduling classes and IRQ.
+
+
+6. Dependencies and requirements for EAS
+----------------------------------------
+
+Energy Aware Scheduling depends on the CPUs of the system having specific
+hardware properties and on other features of the kernel being enabled. This
+section lists these dependencies and provides hints as to how they can be met.
+
+
+  6.1 - Asymmetric CPU topology
+
+As mentioned in the introduction, EAS is only supported on platforms with
+asymmetric CPU topologies for now. This requirement is checked at run-time by
+looking for the presence of the SD_ASYM_CPUCAPACITY flag when the scheduling
+domains are built.
+
+The flag is set/cleared automatically by the scheduler topology code whenever
+there are CPUs with different capacities in a root domain. The capacities of
+CPUs are provided by arch-specific code through the arch_scale_cpu_capacity()
+callback. As an example, arm and arm64 share an implementation of this callback
+which uses a combination of CPUFreq data and device-tree bindings to compute the
+capacity of CPUs (see drivers/base/arch_topology.c for more details).
+
+So, in order to use EAS on your platform your architecture must implement the
+arch_scale_cpu_capacity() callback, and some of the CPUs must have a lower
+capacity than others.
+
+Please note that EAS is not fundamentally incompatible with SMP, but no
+significant savings on SMP platforms have been observed yet. This restriction
+could be amended in the future if proven otherwise.
+
+
+  6.2 - Energy Model presence
+
+EAS uses the EM of a platform to estimate the impact of scheduling decisions on
+energy. So, your platform must provide power cost tables to the EM framework in
+order to make EAS start. To do so, please refer to documentation of the
+independent EM framework in Documentation/power/energy-model.txt.
+
+Please also note that the scheduling domains need to be re-built after the
+EM has been registered in order to start EAS.
+
+
+  6.3 - Energy Model complexity
+
+The task wake-up path is very latency-sensitive. When the EM of a platform is
+too complex (too many CPUs, too many performance domains, too many performance
+states, ...), the cost of using it in the wake-up path can become prohibitive.
+The energy-aware wake-up algorithm has a complexity of:
+
+	C = Nd * (Nc + Ns)
+
+with: Nd the number of performance domains; Nc the number of CPUs; and Ns the
+total number of OPPs (ex: for two perf. domains with 4 OPPs each, Ns = 8).
+
+A complexity check is performed at the root domain level, when scheduling
+domains are built. EAS will not start on a root domain if its C happens to be
+higher than the completely arbitrary EM_MAX_COMPLEXITY threshold (2048 at the
+time of writing).
+
+If you really want to use EAS but the complexity of your platform's Energy
+Model is too high to be used with a single root domain, you're left with only
+two possible options:
+
+    1. split your system into separate, smaller, root domains using exclusive
+       cpusets and enable EAS locally on each of them. This option has the
+       benefit to work out of the box but the drawback of preventing load
+       balance between root domains, which can result in an unbalanced system
+       overall;
+    2. submit patches to reduce the complexity of the EAS wake-up algorithm,
+       hence enabling it to cope with larger EMs in reasonable time.
+
+
+  6.4 - Schedutil governor
+
+EAS tries to predict at which OPP will the CPUs be running in the close future
+in order to estimate their energy consumption. To do so, it is assumed that OPPs
+of CPUs follow their utilization.
+
+Although it is very difficult to provide hard guarantees regarding the accuracy
+of this assumption in practice (because the hardware might not do what it is
+told to do, for example), schedutil as opposed to other CPUFreq governors at
+least _requests_ frequencies calculated using the utilization signals.
+Consequently, the only sane governor to use together with EAS is schedutil,
+because it is the only one providing some degree of consistency between
+frequency requests and energy predictions.
+
+Using EAS with any other governor than schedutil is not supported.
+
+
+  6.5 Scale-invariant utilization signals
+
+In order to make accurate prediction across CPUs and for all performance
+states, EAS needs frequency-invariant and CPU-invariant PELT signals. These can
+be obtained using the architecture-defined arch_scale{cpu,freq}_capacity()
+callbacks.
+
+Using EAS on a platform that doesn't implement these two callbacks is not
+supported.
+
+
+  6.6 Multithreading (SMT)
+
+EAS in its current form is SMT unaware and is not able to leverage
+multithreaded hardware to save energy. EAS considers threads as independent
+CPUs, which can actually be counter-productive for both performance and energy.
+
+EAS on SMT is not supported.

diff --git a/Documentation/scheduler/sched-tune.txt b/Documentation/scheduler/sched-tune.txt
new file mode 100644
index 0000000..1a10371
--- /dev/null
+++ b/Documentation/scheduler/sched-tune.txt

@@ -0,0 +1,388 @@
+             Central, scheduler-driven, power-performance control
+                               (EXPERIMENTAL)
+
+Abstract
+========
+
+The topic of a single simple power-performance tunable, that is wholly
+scheduler centric, and has well defined and predictable properties has come up
+on several occasions in the past [1,2]. With techniques such as a scheduler
+driven DVFS [3], we now have a good framework for implementing such a tunable.
+This document describes the overall ideas behind its design and implementation.
+
+
+Table of Contents
+=================
+
+1. Motivation
+2. Introduction
+3. Signal Boosting Strategy
+4. OPP selection using boosted CPU utilization
+5. Per task group boosting
+6. Per-task wakeup-placement-strategy Selection
+7. Question and Answers
+   - What about "auto" mode?
+   - What about boosting on a congested system?
+   - How CPUs are boosted when we have tasks with multiple boost values?
+8. References
+
+
+1. Motivation
+=============
+
+Schedutil [3] is a utilization-driven cpufreq governor which allows the
+scheduler to select the optimal DVFS operating point (OPP) for running a task
+allocated to a CPU.
+
+However, sometimes it may be desired to intentionally boost the performance of
+a workload even if that could imply a reasonable increase in energy
+consumption. For example, in order to reduce the response time of a task, we
+may want to run the task at a higher OPP than the one that is actually required
+by it's CPU bandwidth demand.
+
+This last requirement is especially important if we consider that one of the
+main goals of the utilization-driven governor component is to replace all
+currently available CPUFreq policies. Since schedutil is event-based, as
+opposed to the sampling driven governors we currently have, they are already
+more responsive at selecting the optimal OPP to run tasks allocated to a CPU.
+However, just tracking the actual task utilization may not be enough from a
+performance standpoint.  For example, it is not possible to get behaviors
+similar to those provided by the "performance" and "interactive" CPUFreq
+governors.
+
+This document describes an implementation of a tunable, stacked on top of the
+utilization-driven governor which extends its functionality to support task
+performance boosting.
+
+By "performance boosting" we mean the reduction of the time required to
+complete a task activation, i.e. the time elapsed from a task wakeup to its
+next deactivation (e.g. because it goes back to sleep or it terminates).  For
+example, if we consider a simple periodic task which executes the same workload
+for 5[s] every 20[s] while running at a certain OPP, a boosted execution of
+that task must complete each of its activations in less than 5[s].
+
+The rest of this document introduces in more details the proposed solution
+which has been named SchedTune.
+
+
+2. Introduction
+===============
+
+SchedTune exposes a simple user-space interface provided through a new
+CGroup controller 'stune' which provides two power-performance tunables
+per group:
+
+  /<stune cgroup mount point>/schedtune.prefer_idle
+  /<stune cgroup mount point>/schedtune.boost
+
+The CGroup implementation permits arbitrary user-space defined task
+classification to tune the scheduler for different goals depending on the
+specific nature of the task, e.g. background vs interactive vs low-priority.
+
+More details are given in section 5.
+
+2.1 Boosting
+============
+
+The boost value is expressed as an integer in the range [0..100].
+
+A value of 0 (default) configures the CFS scheduler for maximum energy
+efficiency. This means that schedutil runs the tasks at the minimum OPP
+required to satisfy their workload demand.
+
+A value of 100 configures scheduler for maximum performance, which translates
+to the selection of the maximum OPP on that CPU.
+
+The range between 0 and 100 can be set to satisfy other scenarios suitably. For
+example to satisfy interactive response or depending on other system events
+(battery level etc).
+
+The overall design of the SchedTune module is built on top of "Per-Entity Load
+Tracking" (PELT) signals and schedutil by introducing a bias on the OPP
+selection.
+
+Each time a task is allocated on a CPU, cpufreq is given the opportunity to tune
+the operating frequency of that CPU to better match the workload demand. The
+selection of the actual OPP being activated is influenced by the boost value
+for the task CGroup.
+
+This simple biasing approach leverages existing frameworks, which means minimal
+modifications to the scheduler, and yet it allows to achieve a range of
+different behaviours all from a single simple tunable knob.
+
+In EAS schedulers, we use boosted task and CPU utilization for energy
+calculation and energy-aware task placement.
+
+2.2 prefer_idle
+===============
+
+This is a flag which indicates to the scheduler that userspace would like
+the scheduler to focus on energy or to focus on performance.
+
+A value of 0 (default) signals to the CFS scheduler that tasks in this group
+can be placed according to the energy-aware wakeup strategy.
+
+A value of 1 signals to the CFS scheduler that tasks in this group should be
+placed to minimise wakeup latency.
+
+Android platforms typically use this flag for application tasks which the
+user is currently interacting with.
+
+
+3. Signal Boosting Strategy
+===========================
+
+The whole PELT machinery works based on the value of a few load tracking signals
+which basically track the CPU bandwidth requirements for tasks and the capacity
+of CPUs. The basic idea behind the SchedTune knob is to artificially inflate
+some of these load tracking signals to make a task or RQ appears more demanding
+that it actually is.
+
+Which signals have to be inflated depends on the specific "consumer".  However,
+independently from the specific (signal, consumer) pair, it is important to
+define a simple and possibly consistent strategy for the concept of boosting a
+signal.
+
+A boosting strategy defines how the "abstract" user-space defined
+sched_cfs_boost value is translated into an internal "margin" value to be added
+to a signal to get its inflated value:
+
+  margin         := boosting_strategy(sched_cfs_boost, signal)
+  boosted_signal := signal + margin
+
+The boosting strategy currently implemented in SchedTune is called 'Signal
+Proportional Compensation' (SPC). With SPC, the sched_cfs_boost value is used to
+compute a margin which is proportional to the complement of the original signal.
+When a signal has a maximum possible value, its complement is defined as
+the delta from the actual value and its possible maximum.
+
+Since the tunable implementation uses signals which have SCHED_CAPACITY_SCALE as
+the maximum possible value, the margin becomes:
+
+	margin := sched_cfs_boost * (SCHED_CAPACITY_SCALE - signal)
+
+Using this boosting strategy:
+- a 100% sched_cfs_boost means that the signal is scaled to the maximum value
+- each value in the range of sched_cfs_boost effectively inflates the signal in
+  question by a quantity which is proportional to the maximum value.
+
+For example, by applying the SPC boosting strategy to the selection of the OPP
+to run a task it is possible to achieve these behaviors:
+
+-   0% boosting: run the task at the minimum OPP required by its workload
+- 100% boosting: run the task at the maximum OPP available for the CPU
+-  50% boosting: run at the half-way OPP between minimum and maximum
+
+Which means that, at 50% boosting, a task will be scheduled to run at half of
+the maximum theoretically achievable performance on the specific target
+platform.
+
+A graphical representation of an SPC boosted signal is represented in the
+following figure where:
+ a) "-" represents the original signal
+ b) "b" represents a  50% boosted signal
+ c) "p" represents a 100% boosted signal
+
+
+   ^
+   |  SCHED_CAPACITY_SCALE
+   +-----------------------------------------------------------------+
+   |pppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppp
+   |
+   |                                             boosted_signal
+   |                                          bbbbbbbbbbbbbbbbbbbbbbbb
+   |
+   |                                            original signal
+   |                  bbbbbbbbbbbbbbbbbbbbbbbb+----------------------+
+   |                                          |
+   |bbbbbbbbbbbbbbbbbb                        |
+   |                                          |
+   |                                          |
+   |                                          |
+   |                  +-----------------------+
+   |                  |
+   |                  |
+   |                  |
+   |------------------+
+   |
+   |
+   +----------------------------------------------------------------------->
+
+The plot above shows a ramped load signal (titled 'original_signal') and it's
+boosted equivalent. For each step of the original signal the boosted signal
+corresponding to a 50% boost is midway from the original signal and the upper
+bound. Boosting by 100% generates a boosted signal which is always saturated to
+the upper bound.
+
+
+4. OPP selection using boosted CPU utilization
+==============================================
+
+It is worth calling out that the implementation does not introduce any new load
+signals. Instead, it provides an API to tune existing signals. This tuning is
+done on demand and only in scheduler code paths where it is sensible to do so.
+The new API calls are defined to return either the default signal or a boosted
+one, depending on the value of sched_cfs_boost. This is a clean an non invasive
+modification of the existing existing code paths.
+
+The signal representing a CPU's utilization is boosted according to the
+previously described SPC boosting strategy. To schedutil, this allows a CPU
+(ie CFS run-queue) to appear more used then it actually is.
+
+Thus, with the sched_cfs_boost enabled we have the following main functions to
+get the current utilization of a CPU:
+
+  cpu_util()
+  boosted_cpu_util()
+
+The new boosted_cpu_util() is similar to the first but returns a boosted
+utilization signal which is a function of the sched_cfs_boost value.
+
+This function is used in the CFS scheduler code paths where schedutil needs to
+decide the OPP to run a CPU at. For example, this allows selecting the highest
+OPP for a CPU which has the boost value set to 100%.
+
+
+5. Per task group boosting
+==========================
+
+On battery powered devices there usually are many background services which are
+long running and need energy efficient scheduling. On the other hand, some
+applications are more performance sensitive and require an interactive
+response and/or maximum performance, regardless of the energy cost.
+
+To better service such scenarios, the SchedTune implementation has an extension
+that provides a more fine grained boosting interface.
+
+A new CGroup controller, namely "schedtune", can be enabled which allows to
+defined and configure task groups with different boosting values.
+Tasks that require special performance can be put into separate CGroups.
+The value of the boost associated with the tasks in this group can be specified
+using a single knob exposed by the CGroup controller:
+
+   schedtune.boost
+
+This knob allows the definition of a boost value that is to be used for
+SPC boosting of all tasks attached to this group.
+
+The current schedtune controller implementation is really simple and has these
+main characteristics:
+
+  1) It is only possible to create 1 level depth hierarchies
+
+     The root control groups define the system-wide boost value to be applied
+     by default to all tasks. Its direct subgroups are named "boost groups" and
+     they define the boost value for specific set of tasks.
+     Further nested subgroups are not allowed since they do not have a sensible
+     meaning from a user-space standpoint.
+
+  2) It is possible to define only a limited number of "boost groups"
+
+     This number is defined at compile time and by default configured to 16.
+     This is a design decision motivated by two main reasons:
+     a) In a real system we do not expect utilization scenarios with more than
+        a few boost groups. For example, a reasonable collection of groups could
+        be just "background", "interactive" and "performance".
+     b) It simplifies the implementation considerably, especially for the code
+	which has to compute the per CPU boosting once there are multiple
+        RUNNABLE tasks with different boost values.
+
+Such a simple design should allow servicing the main utilization scenarios
+identified so far. It provides a simple interface which can be used to manage
+the power-performance of all tasks or only selected tasks.
+Moreover, this interface can be easily integrated by user-space run-times (e.g.
+Android, ChromeOS) to implement a QoS solution for task boosting based on tasks
+classification, which has been a long standing requirement.
+
+Setup and usage
+---------------
+
+0. Use a kernel with CONFIG_SCHED_TUNE support enabled
+
+1. Check that the "schedtune" CGroup controller is available:
+
+   root@linaro-nano:~# cat /proc/cgroups
+   #subsys_name	hierarchy	num_cgroups	enabled
+   cpuset  	0		1		1
+   cpu     	0		1		1
+   schedtune	0		1		1
+
+2. Mount a tmpfs to create the CGroups mount point (Optional)
+
+   root@linaro-nano:~# sudo mount -t tmpfs cgroups /sys/fs/cgroup
+
+3. Mount the "schedtune" controller
+
+   root@linaro-nano:~# mkdir /sys/fs/cgroup/stune
+   root@linaro-nano:~# sudo mount -t cgroup -o schedtune stune /sys/fs/cgroup/stune
+
+4. Create task groups and configure their specific boost value (Optional)
+
+   For example here we create a "performance" boost group configure to boost
+   all its tasks to 100%
+
+   root@linaro-nano:~# mkdir /sys/fs/cgroup/stune/performance
+   root@linaro-nano:~# echo 100 > /sys/fs/cgroup/stune/performance/schedtune.boost
+
+5. Move tasks into the boost group
+
+   For example, the following moves the tasks with PID $TASKPID (and all its
+   threads) into the "performance" boost group.
+
+   root@linaro-nano:~# echo "TASKPID > /sys/fs/cgroup/stune/performance/cgroup.procs
+
+This simple configuration allows only the threads of the $TASKPID task to run,
+when needed, at the highest OPP in the most capable CPU of the system.
+
+
+6. Per-task wakeup-placement-strategy Selection
+===============================================
+
+Many devices have a number of CFS tasks in use which require an absolute
+minimum wakeup latency, and many tasks for which wakeup latency is not
+important.
+
+For touch-driven environments, removing additional wakeup latency can be
+critical.
+
+When you use the Schedtume CGroup controller, you have access to a second
+parameter which allows a group to be marked such that energy_aware task
+placement is bypassed for tasks belonging to that group.
+
+prefer_idle=0 (default - use energy-aware task placement if available)
+prefer_idle=1 (never use energy-aware task placement for these tasks)
+
+Since the regular wakeup task placement algorithm in CFS is biased for
+performance, this has the effect of restoring minimum wakeup latency
+for the desired tasks whilst still allowing energy-aware wakeup placement
+to save energy for other tasks.
+
+
+7. Question and Answers
+=======================
+
+What about "auto" mode?
+-----------------------
+
+The 'auto' mode as described in [5] can be implemented by interfacing SchedTune
+with some suitable user-space element. This element could use the exposed
+system-wide or cgroup based interface.
+
+How are multiple groups of tasks with different boost values managed?
+---------------------------------------------------------------------
+
+The current SchedTune implementation keeps track of the boosted RUNNABLE tasks
+on a CPU. The CPU utilization seen by schedutil (and used to select an
+appropriate OPP) is boosted with a value which is the maximum of the boost
+values of the currently RUNNABLE tasks in its RQ.
+
+This allows cpufreq to boost a CPU only while there are boosted tasks ready
+to run and switch back to the energy efficient mode as soon as the last boosted
+task is dequeued.
+
+
+8. References
+=============
+[1] http://lwn.net/Articles/552889
+[2] http://lkml.org/lkml/2012/5/18/91
+[3] https://lkml.org/lkml/2016/3/29/1041

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 37a6795..0e4a812 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt

@@ -693,7 +693,8 @@
 perf_event_paranoid:
 
 Controls use of the performance events system by unprivileged
-users (without CAP_SYS_ADMIN).  The default value is 2.
+users (without CAP_SYS_ADMIN).  The default value is 3 if
+CONFIG_SECURITY_PERF_EVENTS_RESTRICT is set, or 2 otherwise.
 
  -1: Allow use of (almost) all events by all users
      Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK
@@ -701,6 +702,7 @@
      Disallow raw tracepoint access by users without CAP_SYS_ADMIN
 >=1: Disallow CPU event access by users without CAP_SYS_ADMIN
 >=2: Disallow kernel profiling by users without CAP_SYS_ADMIN
+>=3: Disallow all event access by users without CAP_SYS_ADMIN
 
 ==============================================================
 

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 7d73882..a48baf2 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt

@@ -31,6 +31,7 @@
 - dirty_writeback_centisecs
 - drop_caches
 - extfrag_threshold
+- extra_free_kbytes
 - hugetlb_shm_group
 - laptop_mode
 - legacy_va_layout
@@ -274,6 +275,21 @@
 
 ==============================================================
 
+extra_free_kbytes
+
+This parameter tells the VM to keep extra free memory between the threshold
+where background reclaim (kswapd) kicks in, and the threshold where direct
+reclaim (by allocating processes) kicks in.
+
+This is useful for workloads that require low latency memory allocations
+and have a bounded burstiness in memory allocations, for example a
+realtime application that receives and transmits network traffic
+(causing in-kernel memory allocations) with a maximum total message burst
+size of 200MB may need 200MB of extra free memory to avoid direct reclaim
+related latencies.
+
+==============================================================
+
 hugetlb_shm_group
 
 hugetlb_shm_group contains group id that is allowed to create SysV

diff --git a/Makefile b/Makefile
index f6c9d57..621eee1 100644
--- a/Makefile
+++ b/Makefile

@@ -483,7 +483,11 @@
 
 ifeq ($(cc-name),clang)
 ifneq ($(CROSS_COMPILE),)
-CLANG_FLAGS	+= --target=$(notdir $(CROSS_COMPILE:%-=%))
+CLANG_TRIPLE	?= $(CROSS_COMPILE)
+CLANG_FLAGS	+= --target=$(notdir $(CLANG_TRIPLE:%-=%))
+ifeq ($(shell $(srctree)/scripts/clang-android.sh $(CC) $(CLANG_FLAGS)), y)
+$(error "Clang with Android --target detected. Did you specify CLANG_TRIPLE?")
+endif
 GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)elfedit))
 CLANG_FLAGS	+= --prefix=$(GCC_TOOLCHAIN_DIR)
 GCC_TOOLCHAIN	:= $(realpath $(GCC_TOOLCHAIN_DIR)/..)
@@ -606,6 +610,16 @@
   CC_FLAGS_FTRACE := -pg
 endif
 
+# Make toolchain changes before including arch/$(SRCARCH)/Makefile to ensure
+# ar/cc/ld-* macros return correct values.
+ifdef CONFIG_LTO_CLANG
+# use llvm-ar for building symbol tables from IR files, and llvm-nm instead
+# of objdump for processing symbol versions and exports
+LLVM_AR		:= llvm-ar
+LLVM_NM		:= llvm-nm
+export LLVM_AR LLVM_NM
+endif
+
 # The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default
 # values of the respective KBUILD_* variables
 ARCH_CPPFLAGS :=
@@ -672,6 +686,12 @@
 # Tell gcc to never replace conditional load with a non-conditional one
 KBUILD_CFLAGS	+= $(call cc-option,--param=allow-store-data-races=0)
 
+# check for 'asm goto'
+ifeq ($(call shell-cached,$(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLAGS)), y)
+	KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
+	KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO
+endif
+
 include scripts/Makefile.kcov
 include scripts/Makefile.gcc-plugins
 
@@ -787,6 +807,48 @@
 LDFLAGS_vmlinux += --gc-sections
 endif
 
+ifdef CONFIG_LTO_CLANG
+lto-clang-flags	:= -flto -fvisibility=hidden
+
+# allow disabling only clang LTO where needed
+DISABLE_LTO_CLANG := -fno-lto -fvisibility=default
+export DISABLE_LTO_CLANG
+endif
+
+ifdef CONFIG_LTO
+LTO_CFLAGS	:= $(lto-clang-flags)
+KBUILD_CFLAGS	+= $(LTO_CFLAGS)
+
+DISABLE_LTO	:= $(DISABLE_LTO_CLANG)
+export LTO_CFLAGS DISABLE_LTO
+endif
+
+ifdef CONFIG_CFI_CLANG
+cfi-clang-flags	+= -fsanitize=cfi $(call cc-option, -fsplit-lto-unit)
+DISABLE_CFI_CLANG := -fno-sanitize=cfi
+ifdef CONFIG_MODULES
+cfi-clang-flags	+= -fsanitize-cfi-cross-dso
+DISABLE_CFI_CLANG += -fno-sanitize-cfi-cross-dso
+endif
+ifdef CONFIG_CFI_PERMISSIVE
+cfi-clang-flags	+= -fsanitize-recover=cfi -fno-sanitize-trap=cfi
+endif
+
+# also disable CFI when LTO is disabled
+DISABLE_LTO_CLANG += $(DISABLE_CFI_CLANG)
+# allow disabling only clang CFI where needed
+export DISABLE_CFI_CLANG
+endif
+
+ifdef CONFIG_CFI
+CFI_CFLAGS	:= $(cfi-clang-flags)
+KBUILD_CFLAGS	+= $(CFI_CFLAGS)
+
+DISABLE_CFI	:= $(DISABLE_CFI_CLANG)
+DISABLE_LTO	+= $(DISABLE_CFI)
+export CFI_CFLAGS DISABLE_CFI
+endif
+
 # arch Makefile may override CC so keep this after arch Makefile is included
 NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
 
@@ -1575,7 +1637,8 @@
 		-o -name modules.builtin -o -name '.tmp_*.o.*' \
 		-o -name '*.c.[012]*.*' \
 		-o -name '*.ll' \
-		-o -name '*.gcno' \) -type f -print | xargs rm -f
+		-o -name '*.gcno' \
+		-o -name '*.*.symversions' \) -type f -print | xargs rm -f
 
 # Generate tags for editors
 # ---------------------------------------------------------------------------

diff --git a/arch/Kconfig b/arch/Kconfig
index a336548..2d3925c 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig

@@ -475,6 +475,73 @@
 	  about 20% of all kernel functions, which increases the kernel code
 	  size by about 2%.
 
+config LTO
+	def_bool n
+
+config ARCH_SUPPORTS_LTO_CLANG
+	bool
+	help
+	  An architecture should select this option if it supports:
+	  - compiling with clang,
+	  - compiling inline assembly with clang's integrated assembler,
+	  - and linking with LLD.
+
+choice
+	prompt "Link-Time Optimization (LTO) (EXPERIMENTAL)"
+	default LTO_NONE
+	help
+	  This option turns on Link-Time Optimization (LTO).
+
+config LTO_NONE
+	bool "None"
+
+config LTO_CLANG
+	bool "Use clang Link Time Optimization (LTO) (EXPERIMENTAL)"
+	depends on ARCH_SUPPORTS_LTO_CLANG
+	depends on !FTRACE_MCOUNT_RECORD || HAVE_C_RECORDMCOUNT
+	depends on !KASAN
+	depends on CC_IS_CLANG && LD_IS_LLD
+	select LTO
+	help
+          This option enables clang's Link Time Optimization (LTO), which allows
+          the compiler to optimize the kernel globally at link time. If you
+          enable this option, the compiler generates LLVM IR instead of object
+          files, and the actual compilation from IR occurs at the LTO link step,
+          which may take several minutes.
+
+          If you select this option, you must compile the kernel with clang and
+	  LLD.
+
+endchoice
+
+config CFI
+	bool
+
+config CFI_PERMISSIVE
+	bool "Use CFI in permissive mode"
+	depends on CFI
+	help
+	  When selected, Control Flow Integrity (CFI) violations result in a
+	  warning instead of a kernel panic. This option is useful for finding
+	  CFI violations in drivers during development.
+
+config CFI_CLANG
+	bool "Use clang Control Flow Integrity (CFI) (EXPERIMENTAL)"
+	depends on LTO_CLANG
+	depends on KALLSYMS
+	select CFI
+	help
+	  This option enables clang Control Flow Integrity (CFI), which adds
+	  runtime checking for indirect function calls.
+
+config CFI_CLANG_SHADOW
+	bool "Use CFI shadow to speed up cross-module checks"
+	default y
+	depends on CFI_CLANG
+	help
+	  If you select this option, the kernel builds a fast look-up table of
+	  CFI check functions in loaded modules to reduce overhead.
+
 config HAVE_ARCH_WITHIN_STACK_FRAMES
 	bool
 	help

diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
index ac6b90e..75663ed 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts

@@ -42,6 +42,7 @@
 			cci-control-port = <&cci_control1>;
 			cpu-idle-states = <&CLUSTER_SLEEP_BIG>;
 			capacity-dmips-mhz = <1024>;
+			dynamic-power-coefficient = <990>;
 		};
 
 		cpu1: cpu@1 {
@@ -51,6 +52,7 @@
 			cci-control-port = <&cci_control1>;
 			cpu-idle-states = <&CLUSTER_SLEEP_BIG>;
 			capacity-dmips-mhz = <1024>;
+			dynamic-power-coefficient = <990>;
 		};
 
 		cpu2: cpu@2 {
@@ -60,6 +62,7 @@
 			cci-control-port = <&cci_control2>;
 			cpu-idle-states = <&CLUSTER_SLEEP_LITTLE>;
 			capacity-dmips-mhz = <516>;
+			dynamic-power-coefficient = <133>;
 		};
 
 		cpu3: cpu@3 {
@@ -69,6 +72,7 @@
 			cci-control-port = <&cci_control2>;
 			cpu-idle-states = <&CLUSTER_SLEEP_LITTLE>;
 			capacity-dmips-mhz = <516>;
+			dynamic-power-coefficient = <133>;
 		};
 
 		cpu4: cpu@4 {
@@ -78,6 +82,7 @@
 			cci-control-port = <&cci_control2>;
 			cpu-idle-states = <&CLUSTER_SLEEP_LITTLE>;
 			capacity-dmips-mhz = <516>;
+			dynamic-power-coefficient = <133>;
 		};
 
 		idle-states {

diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index fc33444..2721877 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig

@@ -2,6 +2,12 @@
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_CGROUPS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CPUSETS=y
+CONFIG_PROC_PID_CPUSET=y
+CONFIG_SCHED_AUTOGROUP=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EMBEDDED=y
 CONFIG_PERF_EVENTS=y
@@ -116,6 +122,7 @@
 CONFIG_PCI_ENDPOINT_CONFIGFS=y
 CONFIG_PCI_EPF_TEST=m
 CONFIG_SMP=y
+CONFIG_SCHED_MC=y
 CONFIG_NR_CPUS=16
 CONFIG_SECCOMP=y
 CONFIG_ARM_APPENDED_DTB=y
@@ -124,10 +131,10 @@
 CONFIG_EFI=y
 CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ_STAT=y
-CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
-CONFIG_CPU_FREQ_GOV_POWERSAVE=m
-CONFIG_CPU_FREQ_GOV_USERSPACE=m
-CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
+CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
 CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
 CONFIG_CPUFREQ_DT=y
 CONFIG_ARM_IMX6Q_CPUFREQ=y
@@ -137,6 +144,7 @@
 CONFIG_ARM_ZYNQ_CPUIDLE=y
 CONFIG_ARM_EXYNOS_CPUIDLE=y
 CONFIG_KERNEL_MODE_NEON=y
+CONFIG_ENERGY_MODEL=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y

diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index b8e69fe..5c278a8 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig

@@ -69,6 +69,15 @@
 	help
 	  Use optimized AES assembler routines for ARM platforms.
 
+	  On ARM processors without the Crypto Extensions, this is the
+	  fastest AES implementation for single blocks.  For multiple
+	  blocks, the NEON bit-sliced implementation is usually faster.
+
+	  This implementation may be vulnerable to cache timing attacks,
+	  since it uses lookup tables.  However, as countermeasures it
+	  disables IRQs and preloads the tables; it is hoped this makes
+	  such attacks very difficult.
+
 config CRYPTO_AES_ARM_BS
 	tristate "Bit sliced AES using NEON instructions"
 	depends on KERNEL_MODE_NEON
@@ -116,9 +125,14 @@
 	select CRYPTO_HASH
 
 config CRYPTO_CHACHA20_NEON
-	tristate "NEON accelerated ChaCha20 symmetric cipher"
+	tristate "NEON accelerated ChaCha stream cipher algorithms"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_CHACHA20
 
+config CRYPTO_NHPOLY1305_NEON
+	tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_NHPOLY1305
+
 endif

diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index bd5bcee..b65d6bf 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile

@@ -9,7 +9,8 @@
 obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
 obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
 obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
-obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
+obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
+obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
 
 ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
 ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
@@ -52,7 +53,8 @@
 ghash-arm-ce-y	:= ghash-ce-core.o ghash-ce-glue.o
 crct10dif-arm-ce-y	:= crct10dif-ce-core.o crct10dif-ce-glue.o
 crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
-chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
+chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
+nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
 
 ifdef REGENERATE_ARM_CRYPTO
 quiet_cmd_perl = PERL    $@

diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S
index 184d6c2..f2d67c0 100644
--- a/arch/arm/crypto/aes-cipher-core.S
+++ b/arch/arm/crypto/aes-cipher-core.S

@@ -10,6 +10,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 #include <asm/cache.h>
 
 	.text
@@ -41,7 +42,7 @@
 	.endif
 	.endm
 
-	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op
+	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
 	__select	\out0, \in0, 0
 	__select	t0, \in1, 1
 	__load		\out0, \out0, 0, \sz, \op
@@ -73,6 +74,14 @@
 	__load		t0, t0, 3, \sz, \op
 	__load		\t4, \t4, 3, \sz, \op
 
+	.ifnb		\oldcpsr
+	/*
+	 * This is the final round and we're done with all data-dependent table
+	 * lookups, so we can safely re-enable interrupts.
+	 */
+	restore_irqs	\oldcpsr
+	.endif
+
 	eor		\out1, \out1, t1, ror #24
 	eor		\out0, \out0, t2, ror #16
 	ldm		rk!, {t1, t2}
@@ -83,14 +92,14 @@
 	eor		\out1, \out1, t2
 	.endm
 
-	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
 	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
-	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
+	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
 	.endm
 
-	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
 	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
-	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
+	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
 	.endm
 
 	.macro		__rev, out, in
@@ -118,13 +127,14 @@
 	.macro		do_crypt, round, ttab, ltab, bsz
 	push		{r3-r11, lr}
 
+	// Load keys first, to reduce latency in case they're not cached yet.
+	ldm		rk!, {r8-r11}
+
 	ldr		r4, [in]
 	ldr		r5, [in, #4]
 	ldr		r6, [in, #8]
 	ldr		r7, [in, #12]
 
-	ldm		rk!, {r8-r11}
-
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	__rev		r4, r4
 	__rev		r5, r5
@@ -138,6 +148,25 @@
 	eor		r7, r7, r11
 
 	__adrl		ttab, \ttab
+	/*
+	 * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
+	 * L1 cache, assuming cacheline size >= 32.  This is a hardening measure
+	 * intended to make cache-timing attacks more difficult.  They may not
+	 * be fully prevented, however; see the paper
+	 * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
+	 * ("Cache-timing attacks on AES") for a discussion of the many
+	 * difficulties involved in writing truly constant-time AES software.
+	 */
+	 save_and_disable_irqs	t0
+	.set		i, 0
+	.rept		1024 / 128
+	ldr		r8, [ttab, #i + 0]
+	ldr		r9, [ttab, #i + 32]
+	ldr		r10, [ttab, #i + 64]
+	ldr		r11, [ttab, #i + 96]
+	.set		i, i + 128
+	.endr
+	push		{t0}		// oldcpsr
 
 	tst		rounds, #2
 	bne		1f
@@ -151,8 +180,21 @@
 	\round		r4, r5, r6, r7, r8, r9, r10, r11
 	b		0b
 
-2:	__adrl		ttab, \ltab
-	\round		r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b
+2:	.ifb		\ltab
+	add		ttab, ttab, #1
+	.else
+	__adrl		ttab, \ltab
+	// Prefetch inverse S-box for final round; see explanation above
+	.set		i, 0
+	.rept		256 / 64
+	ldr		t0, [ttab, #i + 0]
+	ldr		t1, [ttab, #i + 32]
+	.set		i, i + 64
+	.endr
+	.endif
+
+	pop		{rounds}	// oldcpsr
+	\round		r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	__rev		r4, r4
@@ -175,7 +217,7 @@
 	.endm
 
 ENTRY(__aes_arm_encrypt)
-	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
+	do_crypt	fround, crypto_ft_tab,, 2
 ENDPROC(__aes_arm_encrypt)
 
 	.align		5

diff --git a/arch/arm/crypto/chacha-neon-core.S b/arch/arm/crypto/chacha-neon-core.S
new file mode 100644
index 0000000..eb22926
--- /dev/null
+++ b/arch/arm/crypto/chacha-neon-core.S

@@ -0,0 +1,560 @@
+/*
+ * ChaCha/XChaCha NEON helper functions
+ *
+ * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on:
+ * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSE3 functions
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+ /*
+  * NEON doesn't have a rotate instruction.  The alternatives are, more or less:
+  *
+  * (a)  vshl.u32 + vsri.u32		(needs temporary register)
+  * (b)  vshl.u32 + vshr.u32 + vorr	(needs temporary register)
+  * (c)  vrev32.16			(16-bit rotations only)
+  * (d)  vtbl.8 + vtbl.8		(multiple of 8 bits rotations only,
+  *					 needs index vector)
+  *
+  * ChaCha has 16, 12, 8, and 7-bit rotations.  For the 12 and 7-bit rotations,
+  * the only choices are (a) and (b).  We use (a) since it takes two-thirds the
+  * cycles of (b) on both Cortex-A7 and Cortex-A53.
+  *
+  * For the 16-bit rotation, we use vrev32.16 since it's consistently fastest
+  * and doesn't need a temporary register.
+  *
+  * For the 8-bit rotation, we use vtbl.8 + vtbl.8.  On Cortex-A7, this sequence
+  * is twice as fast as (a), even when doing (a) on multiple registers
+  * simultaneously to eliminate the stall between vshl and vsri.  Also, it
+  * parallelizes better when temporary registers are scarce.
+  *
+  * A disadvantage is that on Cortex-A53, the vtbl sequence is the same speed as
+  * (a), so the need to load the rotation table actually makes the vtbl method
+  * slightly slower overall on that CPU (~1.3% slower ChaCha20).  Still, it
+  * seems to be a good compromise to get a more significant speed boost on some
+  * CPUs, e.g. ~4.8% faster ChaCha20 on Cortex-A7.
+  */
+
+#include <linux/linkage.h>
+
+	.text
+	.fpu		neon
+	.align		5
+
+/*
+ * chacha_permute - permute one block
+ *
+ * Permute one 64-byte block where the state matrix is stored in the four NEON
+ * registers q0-q3.  It performs matrix operations on four words in parallel,
+ * but requires shuffling to rearrange the words after each round.
+ *
+ * The round count is given in r3.
+ *
+ * Clobbers: r3, ip, q4-q5
+ */
+chacha_permute:
+
+	adr		ip, .Lrol8_table
+	vld1.8		{d10}, [ip, :64]
+
+.Ldoubleround:
+	// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
+	vadd.i32	q0, q0, q1
+	veor		q3, q3, q0
+	vrev32.16	q3, q3
+
+	// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
+	vadd.i32	q2, q2, q3
+	veor		q4, q1, q2
+	vshl.u32	q1, q4, #12
+	vsri.u32	q1, q4, #20
+
+	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
+	vadd.i32	q0, q0, q1
+	veor		q3, q3, q0
+	vtbl.8		d6, {d6}, d10
+	vtbl.8		d7, {d7}, d10
+
+	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
+	vadd.i32	q2, q2, q3
+	veor		q4, q1, q2
+	vshl.u32	q1, q4, #7
+	vsri.u32	q1, q4, #25
+
+	// x1 = shuffle32(x1, MASK(0, 3, 2, 1))
+	vext.8		q1, q1, q1, #4
+	// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
+	vext.8		q2, q2, q2, #8
+	// x3 = shuffle32(x3, MASK(2, 1, 0, 3))
+	vext.8		q3, q3, q3, #12
+
+	// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
+	vadd.i32	q0, q0, q1
+	veor		q3, q3, q0
+	vrev32.16	q3, q3
+
+	// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
+	vadd.i32	q2, q2, q3
+	veor		q4, q1, q2
+	vshl.u32	q1, q4, #12
+	vsri.u32	q1, q4, #20
+
+	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
+	vadd.i32	q0, q0, q1
+	veor		q3, q3, q0
+	vtbl.8		d6, {d6}, d10
+	vtbl.8		d7, {d7}, d10
+
+	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
+	vadd.i32	q2, q2, q3
+	veor		q4, q1, q2
+	vshl.u32	q1, q4, #7
+	vsri.u32	q1, q4, #25
+
+	// x1 = shuffle32(x1, MASK(2, 1, 0, 3))
+	vext.8		q1, q1, q1, #12
+	// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
+	vext.8		q2, q2, q2, #8
+	// x3 = shuffle32(x3, MASK(0, 3, 2, 1))
+	vext.8		q3, q3, q3, #4
+
+	subs		r3, r3, #2
+	bne		.Ldoubleround
+
+	bx		lr
+ENDPROC(chacha_permute)
+
+ENTRY(chacha_block_xor_neon)
+	// r0: Input state matrix, s
+	// r1: 1 data block output, o
+	// r2: 1 data block input, i
+	// r3: nrounds
+	push		{lr}
+
+	// x0..3 = s0..3
+	add		ip, r0, #0x20
+	vld1.32		{q0-q1}, [r0]
+	vld1.32		{q2-q3}, [ip]
+
+	vmov		q8, q0
+	vmov		q9, q1
+	vmov		q10, q2
+	vmov		q11, q3
+
+	bl		chacha_permute
+
+	add		ip, r2, #0x20
+	vld1.8		{q4-q5}, [r2]
+	vld1.8		{q6-q7}, [ip]
+
+	// o0 = i0 ^ (x0 + s0)
+	vadd.i32	q0, q0, q8
+	veor		q0, q0, q4
+
+	// o1 = i1 ^ (x1 + s1)
+	vadd.i32	q1, q1, q9
+	veor		q1, q1, q5
+
+	// o2 = i2 ^ (x2 + s2)
+	vadd.i32	q2, q2, q10
+	veor		q2, q2, q6
+
+	// o3 = i3 ^ (x3 + s3)
+	vadd.i32	q3, q3, q11
+	veor		q3, q3, q7
+
+	add		ip, r1, #0x20
+	vst1.8		{q0-q1}, [r1]
+	vst1.8		{q2-q3}, [ip]
+
+	pop		{pc}
+ENDPROC(chacha_block_xor_neon)
+
+ENTRY(hchacha_block_neon)
+	// r0: Input state matrix, s
+	// r1: output (8 32-bit words)
+	// r2: nrounds
+	push		{lr}
+
+	vld1.32		{q0-q1}, [r0]!
+	vld1.32		{q2-q3}, [r0]
+
+	mov		r3, r2
+	bl		chacha_permute
+
+	vst1.32		{q0}, [r1]!
+	vst1.32		{q3}, [r1]
+
+	pop		{pc}
+ENDPROC(hchacha_block_neon)
+
+	.align		4
+.Lctrinc:	.word	0, 1, 2, 3
+.Lrol8_table:	.byte	3, 0, 1, 2, 7, 4, 5, 6
+
+	.align		5
+ENTRY(chacha_4block_xor_neon)
+	push		{r4-r5}
+	mov		r4, sp			// preserve the stack pointer
+	sub		ip, sp, #0x20		// allocate a 32 byte buffer
+	bic		ip, ip, #0x1f		// aligned to 32 bytes
+	mov		sp, ip
+
+	// r0: Input state matrix, s
+	// r1: 4 data blocks output, o
+	// r2: 4 data blocks input, i
+	// r3: nrounds
+
+	//
+	// This function encrypts four consecutive ChaCha blocks by loading
+	// the state matrix in NEON registers four times. The algorithm performs
+	// each operation on the corresponding word of each state matrix, hence
+	// requires no word shuffling. The words are re-interleaved before the
+	// final addition of the original state and the XORing step.
+	//
+
+	// x0..15[0-3] = s0..15[0-3]
+	add		ip, r0, #0x20
+	vld1.32		{q0-q1}, [r0]
+	vld1.32		{q2-q3}, [ip]
+
+	adr		r5, .Lctrinc
+	vdup.32		q15, d7[1]
+	vdup.32		q14, d7[0]
+	vld1.32		{q4}, [r5, :128]
+	vdup.32		q13, d6[1]
+	vdup.32		q12, d6[0]
+	vdup.32		q11, d5[1]
+	vdup.32		q10, d5[0]
+	vadd.u32	q12, q12, q4		// x12 += counter values 0-3
+	vdup.32		q9, d4[1]
+	vdup.32		q8, d4[0]
+	vdup.32		q7, d3[1]
+	vdup.32		q6, d3[0]
+	vdup.32		q5, d2[1]
+	vdup.32		q4, d2[0]
+	vdup.32		q3, d1[1]
+	vdup.32		q2, d1[0]
+	vdup.32		q1, d0[1]
+	vdup.32		q0, d0[0]
+
+	adr		ip, .Lrol8_table
+	b		1f
+
+.Ldoubleround4:
+	vld1.32		{q8-q9}, [sp, :256]
+1:
+	// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
+	// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
+	// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
+	// x3 += x7, x15 = rotl32(x15 ^ x3, 16)
+	vadd.i32	q0, q0, q4
+	vadd.i32	q1, q1, q5
+	vadd.i32	q2, q2, q6
+	vadd.i32	q3, q3, q7
+
+	veor		q12, q12, q0
+	veor		q13, q13, q1
+	veor		q14, q14, q2
+	veor		q15, q15, q3
+
+	vrev32.16	q12, q12
+	vrev32.16	q13, q13
+	vrev32.16	q14, q14
+	vrev32.16	q15, q15
+
+	// x8 += x12, x4 = rotl32(x4 ^ x8, 12)
+	// x9 += x13, x5 = rotl32(x5 ^ x9, 12)
+	// x10 += x14, x6 = rotl32(x6 ^ x10, 12)
+	// x11 += x15, x7 = rotl32(x7 ^ x11, 12)
+	vadd.i32	q8, q8, q12
+	vadd.i32	q9, q9, q13
+	vadd.i32	q10, q10, q14
+	vadd.i32	q11, q11, q15
+
+	vst1.32		{q8-q9}, [sp, :256]
+
+	veor		q8, q4, q8
+	veor		q9, q5, q9
+	vshl.u32	q4, q8, #12
+	vshl.u32	q5, q9, #12
+	vsri.u32	q4, q8, #20
+	vsri.u32	q5, q9, #20
+
+	veor		q8, q6, q10
+	veor		q9, q7, q11
+	vshl.u32	q6, q8, #12
+	vshl.u32	q7, q9, #12
+	vsri.u32	q6, q8, #20
+	vsri.u32	q7, q9, #20
+
+	// x0 += x4, x12 = rotl32(x12 ^ x0, 8)
+	// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
+	// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
+	// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
+	vld1.8		{d16}, [ip, :64]
+	vadd.i32	q0, q0, q4
+	vadd.i32	q1, q1, q5
+	vadd.i32	q2, q2, q6
+	vadd.i32	q3, q3, q7
+
+	veor		q12, q12, q0
+	veor		q13, q13, q1
+	veor		q14, q14, q2
+	veor		q15, q15, q3
+
+	vtbl.8		d24, {d24}, d16
+	vtbl.8		d25, {d25}, d16
+	vtbl.8		d26, {d26}, d16
+	vtbl.8		d27, {d27}, d16
+	vtbl.8		d28, {d28}, d16
+	vtbl.8		d29, {d29}, d16
+	vtbl.8		d30, {d30}, d16
+	vtbl.8		d31, {d31}, d16
+
+	vld1.32		{q8-q9}, [sp, :256]
+
+	// x8 += x12, x4 = rotl32(x4 ^ x8, 7)
+	// x9 += x13, x5 = rotl32(x5 ^ x9, 7)
+	// x10 += x14, x6 = rotl32(x6 ^ x10, 7)
+	// x11 += x15, x7 = rotl32(x7 ^ x11, 7)
+	vadd.i32	q8, q8, q12
+	vadd.i32	q9, q9, q13
+	vadd.i32	q10, q10, q14
+	vadd.i32	q11, q11, q15
+
+	vst1.32		{q8-q9}, [sp, :256]
+
+	veor		q8, q4, q8
+	veor		q9, q5, q9
+	vshl.u32	q4, q8, #7
+	vshl.u32	q5, q9, #7
+	vsri.u32	q4, q8, #25
+	vsri.u32	q5, q9, #25
+
+	veor		q8, q6, q10
+	veor		q9, q7, q11
+	vshl.u32	q6, q8, #7
+	vshl.u32	q7, q9, #7
+	vsri.u32	q6, q8, #25
+	vsri.u32	q7, q9, #25
+
+	vld1.32		{q8-q9}, [sp, :256]
+
+	// x0 += x5, x15 = rotl32(x15 ^ x0, 16)
+	// x1 += x6, x12 = rotl32(x12 ^ x1, 16)
+	// x2 += x7, x13 = rotl32(x13 ^ x2, 16)
+	// x3 += x4, x14 = rotl32(x14 ^ x3, 16)
+	vadd.i32	q0, q0, q5
+	vadd.i32	q1, q1, q6
+	vadd.i32	q2, q2, q7
+	vadd.i32	q3, q3, q4
+
+	veor		q15, q15, q0
+	veor		q12, q12, q1
+	veor		q13, q13, q2
+	veor		q14, q14, q3
+
+	vrev32.16	q15, q15
+	vrev32.16	q12, q12
+	vrev32.16	q13, q13
+	vrev32.16	q14, q14
+
+	// x10 += x15, x5 = rotl32(x5 ^ x10, 12)
+	// x11 += x12, x6 = rotl32(x6 ^ x11, 12)
+	// x8 += x13, x7 = rotl32(x7 ^ x8, 12)
+	// x9 += x14, x4 = rotl32(x4 ^ x9, 12)
+	vadd.i32	q10, q10, q15
+	vadd.i32	q11, q11, q12
+	vadd.i32	q8, q8, q13
+	vadd.i32	q9, q9, q14
+
+	vst1.32		{q8-q9}, [sp, :256]
+
+	veor		q8, q7, q8
+	veor		q9, q4, q9
+	vshl.u32	q7, q8, #12
+	vshl.u32	q4, q9, #12
+	vsri.u32	q7, q8, #20
+	vsri.u32	q4, q9, #20
+
+	veor		q8, q5, q10
+	veor		q9, q6, q11
+	vshl.u32	q5, q8, #12
+	vshl.u32	q6, q9, #12
+	vsri.u32	q5, q8, #20
+	vsri.u32	q6, q9, #20
+
+	// x0 += x5, x15 = rotl32(x15 ^ x0, 8)
+	// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
+	// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
+	// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
+	vld1.8		{d16}, [ip, :64]
+	vadd.i32	q0, q0, q5
+	vadd.i32	q1, q1, q6
+	vadd.i32	q2, q2, q7
+	vadd.i32	q3, q3, q4
+
+	veor		q15, q15, q0
+	veor		q12, q12, q1
+	veor		q13, q13, q2
+	veor		q14, q14, q3
+
+	vtbl.8		d30, {d30}, d16
+	vtbl.8		d31, {d31}, d16
+	vtbl.8		d24, {d24}, d16
+	vtbl.8		d25, {d25}, d16
+	vtbl.8		d26, {d26}, d16
+	vtbl.8		d27, {d27}, d16
+	vtbl.8		d28, {d28}, d16
+	vtbl.8		d29, {d29}, d16
+
+	vld1.32		{q8-q9}, [sp, :256]
+
+	// x10 += x15, x5 = rotl32(x5 ^ x10, 7)
+	// x11 += x12, x6 = rotl32(x6 ^ x11, 7)
+	// x8 += x13, x7 = rotl32(x7 ^ x8, 7)
+	// x9 += x14, x4 = rotl32(x4 ^ x9, 7)
+	vadd.i32	q10, q10, q15
+	vadd.i32	q11, q11, q12
+	vadd.i32	q8, q8, q13
+	vadd.i32	q9, q9, q14
+
+	vst1.32		{q8-q9}, [sp, :256]
+
+	veor		q8, q7, q8
+	veor		q9, q4, q9
+	vshl.u32	q7, q8, #7
+	vshl.u32	q4, q9, #7
+	vsri.u32	q7, q8, #25
+	vsri.u32	q4, q9, #25
+
+	veor		q8, q5, q10
+	veor		q9, q6, q11
+	vshl.u32	q5, q8, #7
+	vshl.u32	q6, q9, #7
+	vsri.u32	q5, q8, #25
+	vsri.u32	q6, q9, #25
+
+	subs		r3, r3, #2
+	bne		.Ldoubleround4
+
+	// x0..7[0-3] are in q0-q7, x10..15[0-3] are in q10-q15.
+	// x8..9[0-3] are on the stack.
+
+	// Re-interleave the words in the first two rows of each block (x0..7).
+	// Also add the counter values 0-3 to x12[0-3].
+	  vld1.32	{q8}, [r5, :128]	// load counter values 0-3
+	vzip.32		q0, q1			// => (0 1 0 1) (0 1 0 1)
+	vzip.32		q2, q3			// => (2 3 2 3) (2 3 2 3)
+	vzip.32		q4, q5			// => (4 5 4 5) (4 5 4 5)
+	vzip.32		q6, q7			// => (6 7 6 7) (6 7 6 7)
+	  vadd.u32	q12, q8			// x12 += counter values 0-3
+	vswp		d1, d4
+	vswp		d3, d6
+	  vld1.32	{q8-q9}, [r0]!		// load s0..7
+	vswp		d9, d12
+	vswp		d11, d14
+
+	// Swap q1 and q4 so that we'll free up consecutive registers (q0-q1)
+	// after XORing the first 32 bytes.
+	vswp		q1, q4
+
+	// First two rows of each block are (q0 q1) (q2 q6) (q4 q5) (q3 q7)
+
+	// x0..3[0-3] += s0..3[0-3]	(add orig state to 1st row of each block)
+	vadd.u32	q0, q0, q8
+	vadd.u32	q2, q2, q8
+	vadd.u32	q4, q4, q8
+	vadd.u32	q3, q3, q8
+
+	// x4..7[0-3] += s4..7[0-3]	(add orig state to 2nd row of each block)
+	vadd.u32	q1, q1, q9
+	vadd.u32	q6, q6, q9
+	vadd.u32	q5, q5, q9
+	vadd.u32	q7, q7, q9
+
+	// XOR first 32 bytes using keystream from first two rows of first block
+	vld1.8		{q8-q9}, [r2]!
+	veor		q8, q8, q0
+	veor		q9, q9, q1
+	vst1.8		{q8-q9}, [r1]!
+
+	// Re-interleave the words in the last two rows of each block (x8..15).
+	vld1.32		{q8-q9}, [sp, :256]
+	vzip.32		q12, q13	// => (12 13 12 13) (12 13 12 13)
+	vzip.32		q14, q15	// => (14 15 14 15) (14 15 14 15)
+	vzip.32		q8, q9		// => (8 9 8 9) (8 9 8 9)
+	vzip.32		q10, q11	// => (10 11 10 11) (10 11 10 11)
+	  vld1.32	{q0-q1}, [r0]	// load s8..15
+	vswp		d25, d28
+	vswp		d27, d30
+	vswp		d17, d20
+	vswp		d19, d22
+
+	// Last two rows of each block are (q8 q12) (q10 q14) (q9 q13) (q11 q15)
+
+	// x8..11[0-3] += s8..11[0-3]	(add orig state to 3rd row of each block)
+	vadd.u32	q8,  q8,  q0
+	vadd.u32	q10, q10, q0
+	vadd.u32	q9,  q9,  q0
+	vadd.u32	q11, q11, q0
+
+	// x12..15[0-3] += s12..15[0-3] (add orig state to 4th row of each block)
+	vadd.u32	q12, q12, q1
+	vadd.u32	q14, q14, q1
+	vadd.u32	q13, q13, q1
+	vadd.u32	q15, q15, q1
+
+	// XOR the rest of the data with the keystream
+
+	vld1.8		{q0-q1}, [r2]!
+	veor		q0, q0, q8
+	veor		q1, q1, q12
+	vst1.8		{q0-q1}, [r1]!
+
+	vld1.8		{q0-q1}, [r2]!
+	veor		q0, q0, q2
+	veor		q1, q1, q6
+	vst1.8		{q0-q1}, [r1]!
+
+	vld1.8		{q0-q1}, [r2]!
+	veor		q0, q0, q10
+	veor		q1, q1, q14
+	vst1.8		{q0-q1}, [r1]!
+
+	vld1.8		{q0-q1}, [r2]!
+	veor		q0, q0, q4
+	veor		q1, q1, q5
+	vst1.8		{q0-q1}, [r1]!
+
+	vld1.8		{q0-q1}, [r2]!
+	veor		q0, q0, q9
+	veor		q1, q1, q13
+	vst1.8		{q0-q1}, [r1]!
+
+	vld1.8		{q0-q1}, [r2]!
+	veor		q0, q0, q3
+	veor		q1, q1, q7
+	vst1.8		{q0-q1}, [r1]!
+
+	vld1.8		{q0-q1}, [r2]
+	  mov		sp, r4		// restore original stack pointer
+	veor		q0, q0, q11
+	veor		q1, q1, q15
+	vst1.8		{q0-q1}, [r1]
+
+	pop		{r4-r5}
+	bx		lr
+ENDPROC(chacha_4block_xor_neon)

diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c
new file mode 100644
index 0000000..9d6fda8
--- /dev/null
+++ b/arch/arm/crypto/chacha-neon-glue.c

@@ -0,0 +1,201 @@
+/*
+ * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
+ *
+ * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on:
+ * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/chacha.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
+				      int nrounds);
+asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
+				       int nrounds);
+asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
+
+static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
+			  unsigned int bytes, int nrounds)
+{
+	u8 buf[CHACHA_BLOCK_SIZE];
+
+	while (bytes >= CHACHA_BLOCK_SIZE * 4) {
+		chacha_4block_xor_neon(state, dst, src, nrounds);
+		bytes -= CHACHA_BLOCK_SIZE * 4;
+		src += CHACHA_BLOCK_SIZE * 4;
+		dst += CHACHA_BLOCK_SIZE * 4;
+		state[12] += 4;
+	}
+	while (bytes >= CHACHA_BLOCK_SIZE) {
+		chacha_block_xor_neon(state, dst, src, nrounds);
+		bytes -= CHACHA_BLOCK_SIZE;
+		src += CHACHA_BLOCK_SIZE;
+		dst += CHACHA_BLOCK_SIZE;
+		state[12]++;
+	}
+	if (bytes) {
+		memcpy(buf, src, bytes);
+		chacha_block_xor_neon(state, buf, buf, nrounds);
+		memcpy(dst, buf, bytes);
+	}
+}
+
+static int chacha_neon_stream_xor(struct skcipher_request *req,
+				  struct chacha_ctx *ctx, u8 *iv)
+{
+	struct skcipher_walk walk;
+	u32 state[16];
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	crypto_chacha_init(state, ctx, iv);
+
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
+
+		if (nbytes < walk.total)
+			nbytes = round_down(nbytes, walk.stride);
+
+		kernel_neon_begin();
+		chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
+			      nbytes, ctx->nrounds);
+		kernel_neon_end();
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+	}
+
+	return err;
+}
+
+static int chacha_neon(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
+		return crypto_chacha_crypt(req);
+
+	return chacha_neon_stream_xor(req, ctx, req->iv);
+}
+
+static int xchacha_neon(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct chacha_ctx subctx;
+	u32 state[16];
+	u8 real_iv[16];
+
+	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
+		return crypto_xchacha_crypt(req);
+
+	crypto_chacha_init(state, ctx, req->iv);
+
+	kernel_neon_begin();
+	hchacha_block_neon(state, subctx.key, ctx->nrounds);
+	kernel_neon_end();
+	subctx.nrounds = ctx->nrounds;
+
+	memcpy(&real_iv[0], req->iv + 24, 8);
+	memcpy(&real_iv[8], req->iv + 16, 8);
+	return chacha_neon_stream_xor(req, &subctx, real_iv);
+}
+
+static struct skcipher_alg algs[] = {
+	{
+		.base.cra_name		= "chacha20",
+		.base.cra_driver_name	= "chacha20-neon",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= CHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.walksize		= 4 * CHACHA_BLOCK_SIZE,
+		.setkey			= crypto_chacha20_setkey,
+		.encrypt		= chacha_neon,
+		.decrypt		= chacha_neon,
+	}, {
+		.base.cra_name		= "xchacha20",
+		.base.cra_driver_name	= "xchacha20-neon",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= XCHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.walksize		= 4 * CHACHA_BLOCK_SIZE,
+		.setkey			= crypto_chacha20_setkey,
+		.encrypt		= xchacha_neon,
+		.decrypt		= xchacha_neon,
+	}, {
+		.base.cra_name		= "xchacha12",
+		.base.cra_driver_name	= "xchacha12-neon",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= XCHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.walksize		= 4 * CHACHA_BLOCK_SIZE,
+		.setkey			= crypto_chacha12_setkey,
+		.encrypt		= xchacha_neon,
+		.decrypt		= xchacha_neon,
+	}
+};
+
+static int __init chacha_simd_mod_init(void)
+{
+	if (!(elf_hwcap & HWCAP_NEON))
+		return -ENODEV;
+
+	return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit chacha_simd_mod_fini(void)
+{
+	crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+module_init(chacha_simd_mod_init);
+module_exit(chacha_simd_mod_fini);
+
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-neon");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-neon");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-neon");

diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha20-neon-core.S
deleted file mode 100644
index 451a849..0000000
--- a/arch/arm/crypto/chacha20-neon-core.S
+++ /dev/null

@@ -1,521 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions
- *
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on:
- * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSE3 functions
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/linkage.h>
-
-	.text
-	.fpu		neon
-	.align		5
-
-ENTRY(chacha20_block_xor_neon)
-	// r0: Input state matrix, s
-	// r1: 1 data block output, o
-	// r2: 1 data block input, i
-
-	//
-	// This function encrypts one ChaCha20 block by loading the state matrix
-	// in four NEON registers. It performs matrix operation on four words in
-	// parallel, but requireds shuffling to rearrange the words after each
-	// round.
-	//
-
-	// x0..3 = s0..3
-	add		ip, r0, #0x20
-	vld1.32		{q0-q1}, [r0]
-	vld1.32		{q2-q3}, [ip]
-
-	vmov		q8, q0
-	vmov		q9, q1
-	vmov		q10, q2
-	vmov		q11, q3
-
-	mov		r3, #10
-
-.Ldoubleround:
-	// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
-	vadd.i32	q0, q0, q1
-	veor		q3, q3, q0
-	vrev32.16	q3, q3
-
-	// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
-	vadd.i32	q2, q2, q3
-	veor		q4, q1, q2
-	vshl.u32	q1, q4, #12
-	vsri.u32	q1, q4, #20
-
-	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
-	vadd.i32	q0, q0, q1
-	veor		q4, q3, q0
-	vshl.u32	q3, q4, #8
-	vsri.u32	q3, q4, #24
-
-	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
-	vadd.i32	q2, q2, q3
-	veor		q4, q1, q2
-	vshl.u32	q1, q4, #7
-	vsri.u32	q1, q4, #25
-
-	// x1 = shuffle32(x1, MASK(0, 3, 2, 1))
-	vext.8		q1, q1, q1, #4
-	// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
-	vext.8		q2, q2, q2, #8
-	// x3 = shuffle32(x3, MASK(2, 1, 0, 3))
-	vext.8		q3, q3, q3, #12
-
-	// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
-	vadd.i32	q0, q0, q1
-	veor		q3, q3, q0
-	vrev32.16	q3, q3
-
-	// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
-	vadd.i32	q2, q2, q3
-	veor		q4, q1, q2
-	vshl.u32	q1, q4, #12
-	vsri.u32	q1, q4, #20
-
-	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
-	vadd.i32	q0, q0, q1
-	veor		q4, q3, q0
-	vshl.u32	q3, q4, #8
-	vsri.u32	q3, q4, #24
-
-	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
-	vadd.i32	q2, q2, q3
-	veor		q4, q1, q2
-	vshl.u32	q1, q4, #7
-	vsri.u32	q1, q4, #25
-
-	// x1 = shuffle32(x1, MASK(2, 1, 0, 3))
-	vext.8		q1, q1, q1, #12
-	// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
-	vext.8		q2, q2, q2, #8
-	// x3 = shuffle32(x3, MASK(0, 3, 2, 1))
-	vext.8		q3, q3, q3, #4
-
-	subs		r3, r3, #1
-	bne		.Ldoubleround
-
-	add		ip, r2, #0x20
-	vld1.8		{q4-q5}, [r2]
-	vld1.8		{q6-q7}, [ip]
-
-	// o0 = i0 ^ (x0 + s0)
-	vadd.i32	q0, q0, q8
-	veor		q0, q0, q4
-
-	// o1 = i1 ^ (x1 + s1)
-	vadd.i32	q1, q1, q9
-	veor		q1, q1, q5
-
-	// o2 = i2 ^ (x2 + s2)
-	vadd.i32	q2, q2, q10
-	veor		q2, q2, q6
-
-	// o3 = i3 ^ (x3 + s3)
-	vadd.i32	q3, q3, q11
-	veor		q3, q3, q7
-
-	add		ip, r1, #0x20
-	vst1.8		{q0-q1}, [r1]
-	vst1.8		{q2-q3}, [ip]
-
-	bx		lr
-ENDPROC(chacha20_block_xor_neon)
-
-	.align		5
-ENTRY(chacha20_4block_xor_neon)
-	push		{r4-r6, lr}
-	mov		ip, sp			// preserve the stack pointer
-	sub		r3, sp, #0x20		// allocate a 32 byte buffer
-	bic		r3, r3, #0x1f		// aligned to 32 bytes
-	mov		sp, r3
-
-	// r0: Input state matrix, s
-	// r1: 4 data blocks output, o
-	// r2: 4 data blocks input, i
-
-	//
-	// This function encrypts four consecutive ChaCha20 blocks by loading
-	// the state matrix in NEON registers four times. The algorithm performs
-	// each operation on the corresponding word of each state matrix, hence
-	// requires no word shuffling. For final XORing step we transpose the
-	// matrix by interleaving 32- and then 64-bit words, which allows us to
-	// do XOR in NEON registers.
-	//
-
-	// x0..15[0-3] = s0..3[0..3]
-	add		r3, r0, #0x20
-	vld1.32		{q0-q1}, [r0]
-	vld1.32		{q2-q3}, [r3]
-
-	adr		r3, CTRINC
-	vdup.32		q15, d7[1]
-	vdup.32		q14, d7[0]
-	vld1.32		{q11}, [r3, :128]
-	vdup.32		q13, d6[1]
-	vdup.32		q12, d6[0]
-	vadd.i32	q12, q12, q11		// x12 += counter values 0-3
-	vdup.32		q11, d5[1]
-	vdup.32		q10, d5[0]
-	vdup.32		q9, d4[1]
-	vdup.32		q8, d4[0]
-	vdup.32		q7, d3[1]
-	vdup.32		q6, d3[0]
-	vdup.32		q5, d2[1]
-	vdup.32		q4, d2[0]
-	vdup.32		q3, d1[1]
-	vdup.32		q2, d1[0]
-	vdup.32		q1, d0[1]
-	vdup.32		q0, d0[0]
-
-	mov		r3, #10
-
-.Ldoubleround4:
-	// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
-	// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
-	// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
-	// x3 += x7, x15 = rotl32(x15 ^ x3, 16)
-	vadd.i32	q0, q0, q4
-	vadd.i32	q1, q1, q5
-	vadd.i32	q2, q2, q6
-	vadd.i32	q3, q3, q7
-
-	veor		q12, q12, q0
-	veor		q13, q13, q1
-	veor		q14, q14, q2
-	veor		q15, q15, q3
-
-	vrev32.16	q12, q12
-	vrev32.16	q13, q13
-	vrev32.16	q14, q14
-	vrev32.16	q15, q15
-
-	// x8 += x12, x4 = rotl32(x4 ^ x8, 12)
-	// x9 += x13, x5 = rotl32(x5 ^ x9, 12)
-	// x10 += x14, x6 = rotl32(x6 ^ x10, 12)
-	// x11 += x15, x7 = rotl32(x7 ^ x11, 12)
-	vadd.i32	q8, q8, q12
-	vadd.i32	q9, q9, q13
-	vadd.i32	q10, q10, q14
-	vadd.i32	q11, q11, q15
-
-	vst1.32		{q8-q9}, [sp, :256]
-
-	veor		q8, q4, q8
-	veor		q9, q5, q9
-	vshl.u32	q4, q8, #12
-	vshl.u32	q5, q9, #12
-	vsri.u32	q4, q8, #20
-	vsri.u32	q5, q9, #20
-
-	veor		q8, q6, q10
-	veor		q9, q7, q11
-	vshl.u32	q6, q8, #12
-	vshl.u32	q7, q9, #12
-	vsri.u32	q6, q8, #20
-	vsri.u32	q7, q9, #20
-
-	// x0 += x4, x12 = rotl32(x12 ^ x0, 8)
-	// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
-	// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
-	// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
-	vadd.i32	q0, q0, q4
-	vadd.i32	q1, q1, q5
-	vadd.i32	q2, q2, q6
-	vadd.i32	q3, q3, q7
-
-	veor		q8, q12, q0
-	veor		q9, q13, q1
-	vshl.u32	q12, q8, #8
-	vshl.u32	q13, q9, #8
-	vsri.u32	q12, q8, #24
-	vsri.u32	q13, q9, #24
-
-	veor		q8, q14, q2
-	veor		q9, q15, q3
-	vshl.u32	q14, q8, #8
-	vshl.u32	q15, q9, #8
-	vsri.u32	q14, q8, #24
-	vsri.u32	q15, q9, #24
-
-	vld1.32		{q8-q9}, [sp, :256]
-
-	// x8 += x12, x4 = rotl32(x4 ^ x8, 7)
-	// x9 += x13, x5 = rotl32(x5 ^ x9, 7)
-	// x10 += x14, x6 = rotl32(x6 ^ x10, 7)
-	// x11 += x15, x7 = rotl32(x7 ^ x11, 7)
-	vadd.i32	q8, q8, q12
-	vadd.i32	q9, q9, q13
-	vadd.i32	q10, q10, q14
-	vadd.i32	q11, q11, q15
-
-	vst1.32		{q8-q9}, [sp, :256]
-
-	veor		q8, q4, q8
-	veor		q9, q5, q9
-	vshl.u32	q4, q8, #7
-	vshl.u32	q5, q9, #7
-	vsri.u32	q4, q8, #25
-	vsri.u32	q5, q9, #25
-
-	veor		q8, q6, q10
-	veor		q9, q7, q11
-	vshl.u32	q6, q8, #7
-	vshl.u32	q7, q9, #7
-	vsri.u32	q6, q8, #25
-	vsri.u32	q7, q9, #25
-
-	vld1.32		{q8-q9}, [sp, :256]
-
-	// x0 += x5, x15 = rotl32(x15 ^ x0, 16)
-	// x1 += x6, x12 = rotl32(x12 ^ x1, 16)
-	// x2 += x7, x13 = rotl32(x13 ^ x2, 16)
-	// x3 += x4, x14 = rotl32(x14 ^ x3, 16)
-	vadd.i32	q0, q0, q5
-	vadd.i32	q1, q1, q6
-	vadd.i32	q2, q2, q7
-	vadd.i32	q3, q3, q4
-
-	veor		q15, q15, q0
-	veor		q12, q12, q1
-	veor		q13, q13, q2
-	veor		q14, q14, q3
-
-	vrev32.16	q15, q15
-	vrev32.16	q12, q12
-	vrev32.16	q13, q13
-	vrev32.16	q14, q14
-
-	// x10 += x15, x5 = rotl32(x5 ^ x10, 12)
-	// x11 += x12, x6 = rotl32(x6 ^ x11, 12)
-	// x8 += x13, x7 = rotl32(x7 ^ x8, 12)
-	// x9 += x14, x4 = rotl32(x4 ^ x9, 12)
-	vadd.i32	q10, q10, q15
-	vadd.i32	q11, q11, q12
-	vadd.i32	q8, q8, q13
-	vadd.i32	q9, q9, q14
-
-	vst1.32		{q8-q9}, [sp, :256]
-
-	veor		q8, q7, q8
-	veor		q9, q4, q9
-	vshl.u32	q7, q8, #12
-	vshl.u32	q4, q9, #12
-	vsri.u32	q7, q8, #20
-	vsri.u32	q4, q9, #20
-
-	veor		q8, q5, q10
-	veor		q9, q6, q11
-	vshl.u32	q5, q8, #12
-	vshl.u32	q6, q9, #12
-	vsri.u32	q5, q8, #20
-	vsri.u32	q6, q9, #20
-
-	// x0 += x5, x15 = rotl32(x15 ^ x0, 8)
-	// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
-	// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
-	// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
-	vadd.i32	q0, q0, q5
-	vadd.i32	q1, q1, q6
-	vadd.i32	q2, q2, q7
-	vadd.i32	q3, q3, q4
-
-	veor		q8, q15, q0
-	veor		q9, q12, q1
-	vshl.u32	q15, q8, #8
-	vshl.u32	q12, q9, #8
-	vsri.u32	q15, q8, #24
-	vsri.u32	q12, q9, #24
-
-	veor		q8, q13, q2
-	veor		q9, q14, q3
-	vshl.u32	q13, q8, #8
-	vshl.u32	q14, q9, #8
-	vsri.u32	q13, q8, #24
-	vsri.u32	q14, q9, #24
-
-	vld1.32		{q8-q9}, [sp, :256]
-
-	// x10 += x15, x5 = rotl32(x5 ^ x10, 7)
-	// x11 += x12, x6 = rotl32(x6 ^ x11, 7)
-	// x8 += x13, x7 = rotl32(x7 ^ x8, 7)
-	// x9 += x14, x4 = rotl32(x4 ^ x9, 7)
-	vadd.i32	q10, q10, q15
-	vadd.i32	q11, q11, q12
-	vadd.i32	q8, q8, q13
-	vadd.i32	q9, q9, q14
-
-	vst1.32		{q8-q9}, [sp, :256]
-
-	veor		q8, q7, q8
-	veor		q9, q4, q9
-	vshl.u32	q7, q8, #7
-	vshl.u32	q4, q9, #7
-	vsri.u32	q7, q8, #25
-	vsri.u32	q4, q9, #25
-
-	veor		q8, q5, q10
-	veor		q9, q6, q11
-	vshl.u32	q5, q8, #7
-	vshl.u32	q6, q9, #7
-	vsri.u32	q5, q8, #25
-	vsri.u32	q6, q9, #25
-
-	subs		r3, r3, #1
-	beq		0f
-
-	vld1.32		{q8-q9}, [sp, :256]
-	b		.Ldoubleround4
-
-	// x0[0-3] += s0[0]
-	// x1[0-3] += s0[1]
-	// x2[0-3] += s0[2]
-	// x3[0-3] += s0[3]
-0:	ldmia		r0!, {r3-r6}
-	vdup.32		q8, r3
-	vdup.32		q9, r4
-	vadd.i32	q0, q0, q8
-	vadd.i32	q1, q1, q9
-	vdup.32		q8, r5
-	vdup.32		q9, r6
-	vadd.i32	q2, q2, q8
-	vadd.i32	q3, q3, q9
-
-	// x4[0-3] += s1[0]
-	// x5[0-3] += s1[1]
-	// x6[0-3] += s1[2]
-	// x7[0-3] += s1[3]
-	ldmia		r0!, {r3-r6}
-	vdup.32		q8, r3
-	vdup.32		q9, r4
-	vadd.i32	q4, q4, q8
-	vadd.i32	q5, q5, q9
-	vdup.32		q8, r5
-	vdup.32		q9, r6
-	vadd.i32	q6, q6, q8
-	vadd.i32	q7, q7, q9
-
-	// interleave 32-bit words in state n, n+1
-	vzip.32		q0, q1
-	vzip.32		q2, q3
-	vzip.32		q4, q5
-	vzip.32		q6, q7
-
-	// interleave 64-bit words in state n, n+2
-	vswp		d1, d4
-	vswp		d3, d6
-	vswp		d9, d12
-	vswp		d11, d14
-
-	// xor with corresponding input, write to output
-	vld1.8		{q8-q9}, [r2]!
-	veor		q8, q8, q0
-	veor		q9, q9, q4
-	vst1.8		{q8-q9}, [r1]!
-
-	vld1.32		{q8-q9}, [sp, :256]
-
-	// x8[0-3] += s2[0]
-	// x9[0-3] += s2[1]
-	// x10[0-3] += s2[2]
-	// x11[0-3] += s2[3]
-	ldmia		r0!, {r3-r6}
-	vdup.32		q0, r3
-	vdup.32		q4, r4
-	vadd.i32	q8, q8, q0
-	vadd.i32	q9, q9, q4
-	vdup.32		q0, r5
-	vdup.32		q4, r6
-	vadd.i32	q10, q10, q0
-	vadd.i32	q11, q11, q4
-
-	// x12[0-3] += s3[0]
-	// x13[0-3] += s3[1]
-	// x14[0-3] += s3[2]
-	// x15[0-3] += s3[3]
-	ldmia		r0!, {r3-r6}
-	vdup.32		q0, r3
-	vdup.32		q4, r4
-	adr		r3, CTRINC
-	vadd.i32	q12, q12, q0
-	vld1.32		{q0}, [r3, :128]
-	vadd.i32	q13, q13, q4
-	vadd.i32	q12, q12, q0		// x12 += counter values 0-3
-
-	vdup.32		q0, r5
-	vdup.32		q4, r6
-	vadd.i32	q14, q14, q0
-	vadd.i32	q15, q15, q4
-
-	// interleave 32-bit words in state n, n+1
-	vzip.32		q8, q9
-	vzip.32		q10, q11
-	vzip.32		q12, q13
-	vzip.32		q14, q15
-
-	// interleave 64-bit words in state n, n+2
-	vswp		d17, d20
-	vswp		d19, d22
-	vswp		d25, d28
-	vswp		d27, d30
-
-	vmov		q4, q1
-
-	vld1.8		{q0-q1}, [r2]!
-	veor		q0, q0, q8
-	veor		q1, q1, q12
-	vst1.8		{q0-q1}, [r1]!
-
-	vld1.8		{q0-q1}, [r2]!
-	veor		q0, q0, q2
-	veor		q1, q1, q6
-	vst1.8		{q0-q1}, [r1]!
-
-	vld1.8		{q0-q1}, [r2]!
-	veor		q0, q0, q10
-	veor		q1, q1, q14
-	vst1.8		{q0-q1}, [r1]!
-
-	vld1.8		{q0-q1}, [r2]!
-	veor		q0, q0, q4
-	veor		q1, q1, q5
-	vst1.8		{q0-q1}, [r1]!
-
-	vld1.8		{q0-q1}, [r2]!
-	veor		q0, q0, q9
-	veor		q1, q1, q13
-	vst1.8		{q0-q1}, [r1]!
-
-	vld1.8		{q0-q1}, [r2]!
-	veor		q0, q0, q3
-	veor		q1, q1, q7
-	vst1.8		{q0-q1}, [r1]!
-
-	vld1.8		{q0-q1}, [r2]
-	veor		q0, q0, q11
-	veor		q1, q1, q15
-	vst1.8		{q0-q1}, [r1]
-
-	mov		sp, ip
-	pop		{r4-r6, pc}
-ENDPROC(chacha20_4block_xor_neon)
-
-	.align		4
-CTRINC:	.word		0, 1, 2, 3

diff --git a/arch/arm/crypto/chacha20-neon-glue.c b/arch/arm/crypto/chacha20-neon-glue.c
deleted file mode 100644
index 59a7be0..0000000
--- a/arch/arm/crypto/chacha20-neon-glue.c
+++ /dev/null

@@ -1,127 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions
- *
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on:
- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/chacha20.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
-asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
-
-static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
-			    unsigned int bytes)
-{
-	u8 buf[CHACHA20_BLOCK_SIZE];
-
-	while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
-		chacha20_4block_xor_neon(state, dst, src);
-		bytes -= CHACHA20_BLOCK_SIZE * 4;
-		src += CHACHA20_BLOCK_SIZE * 4;
-		dst += CHACHA20_BLOCK_SIZE * 4;
-		state[12] += 4;
-	}
-	while (bytes >= CHACHA20_BLOCK_SIZE) {
-		chacha20_block_xor_neon(state, dst, src);
-		bytes -= CHACHA20_BLOCK_SIZE;
-		src += CHACHA20_BLOCK_SIZE;
-		dst += CHACHA20_BLOCK_SIZE;
-		state[12]++;
-	}
-	if (bytes) {
-		memcpy(buf, src, bytes);
-		chacha20_block_xor_neon(state, buf, buf);
-		memcpy(dst, buf, bytes);
-	}
-}
-
-static int chacha20_neon(struct skcipher_request *req)
-{
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct skcipher_walk walk;
-	u32 state[16];
-	int err;
-
-	if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd())
-		return crypto_chacha20_crypt(req);
-
-	err = skcipher_walk_virt(&walk, req, true);
-
-	crypto_chacha20_init(state, ctx, walk.iv);
-
-	kernel_neon_begin();
-	while (walk.nbytes > 0) {
-		unsigned int nbytes = walk.nbytes;
-
-		if (nbytes < walk.total)
-			nbytes = round_down(nbytes, walk.stride);
-
-		chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
-				nbytes);
-		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
-	}
-	kernel_neon_end();
-
-	return err;
-}
-
-static struct skcipher_alg alg = {
-	.base.cra_name		= "chacha20",
-	.base.cra_driver_name	= "chacha20-neon",
-	.base.cra_priority	= 300,
-	.base.cra_blocksize	= 1,
-	.base.cra_ctxsize	= sizeof(struct chacha20_ctx),
-	.base.cra_module	= THIS_MODULE,
-
-	.min_keysize		= CHACHA20_KEY_SIZE,
-	.max_keysize		= CHACHA20_KEY_SIZE,
-	.ivsize			= CHACHA20_IV_SIZE,
-	.chunksize		= CHACHA20_BLOCK_SIZE,
-	.walksize		= 4 * CHACHA20_BLOCK_SIZE,
-	.setkey			= crypto_chacha20_setkey,
-	.encrypt		= chacha20_neon,
-	.decrypt		= chacha20_neon,
-};
-
-static int __init chacha20_simd_mod_init(void)
-{
-	if (!(elf_hwcap & HWCAP_NEON))
-		return -ENODEV;
-
-	return crypto_register_skcipher(&alg);
-}
-
-static void __exit chacha20_simd_mod_fini(void)
-{
-	crypto_unregister_skcipher(&alg);
-}
-
-module_init(chacha20_simd_mod_init);
-module_exit(chacha20_simd_mod_fini);
-
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("chacha20");

diff --git a/arch/arm/crypto/nh-neon-core.S b/arch/arm/crypto/nh-neon-core.S
new file mode 100644
index 0000000..434d80a
--- /dev/null
+++ b/arch/arm/crypto/nh-neon-core.S

@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * NH - ε-almost-universal hash function, NEON accelerated version
+ *
+ * Copyright 2018 Google LLC
+ *
+ * Author: Eric Biggers <ebiggers@google.com>
+ */
+
+#include <linux/linkage.h>
+
+	.text
+	.fpu		neon
+
+	KEY		.req	r0
+	MESSAGE		.req	r1
+	MESSAGE_LEN	.req	r2
+	HASH		.req	r3
+
+	PASS0_SUMS	.req	q0
+	PASS0_SUM_A	.req	d0
+	PASS0_SUM_B	.req	d1
+	PASS1_SUMS	.req	q1
+	PASS1_SUM_A	.req	d2
+	PASS1_SUM_B	.req	d3
+	PASS2_SUMS	.req	q2
+	PASS2_SUM_A	.req	d4
+	PASS2_SUM_B	.req	d5
+	PASS3_SUMS	.req	q3
+	PASS3_SUM_A	.req	d6
+	PASS3_SUM_B	.req	d7
+	K0		.req	q4
+	K1		.req	q5
+	K2		.req	q6
+	K3		.req	q7
+	T0		.req	q8
+	T0_L		.req	d16
+	T0_H		.req	d17
+	T1		.req	q9
+	T1_L		.req	d18
+	T1_H		.req	d19
+	T2		.req	q10
+	T2_L		.req	d20
+	T2_H		.req	d21
+	T3		.req	q11
+	T3_L		.req	d22
+	T3_H		.req	d23
+
+.macro _nh_stride	k0, k1, k2, k3
+
+	// Load next message stride
+	vld1.8		{T3}, [MESSAGE]!
+
+	// Load next key stride
+	vld1.32		{\k3}, [KEY]!
+
+	// Add message words to key words
+	vadd.u32	T0, T3, \k0
+	vadd.u32	T1, T3, \k1
+	vadd.u32	T2, T3, \k2
+	vadd.u32	T3, T3, \k3
+
+	// Multiply 32x32 => 64 and accumulate
+	vmlal.u32	PASS0_SUMS, T0_L, T0_H
+	vmlal.u32	PASS1_SUMS, T1_L, T1_H
+	vmlal.u32	PASS2_SUMS, T2_L, T2_H
+	vmlal.u32	PASS3_SUMS, T3_L, T3_H
+.endm
+
+/*
+ * void nh_neon(const u32 *key, const u8 *message, size_t message_len,
+ *		u8 hash[NH_HASH_BYTES])
+ *
+ * It's guaranteed that message_len % 16 == 0.
+ */
+ENTRY(nh_neon)
+
+	vld1.32		{K0,K1}, [KEY]!
+	  vmov.u64	PASS0_SUMS, #0
+	  vmov.u64	PASS1_SUMS, #0
+	vld1.32		{K2}, [KEY]!
+	  vmov.u64	PASS2_SUMS, #0
+	  vmov.u64	PASS3_SUMS, #0
+
+	subs		MESSAGE_LEN, MESSAGE_LEN, #64
+	blt		.Lloop4_done
+.Lloop4:
+	_nh_stride	K0, K1, K2, K3
+	_nh_stride	K1, K2, K3, K0
+	_nh_stride	K2, K3, K0, K1
+	_nh_stride	K3, K0, K1, K2
+	subs		MESSAGE_LEN, MESSAGE_LEN, #64
+	bge		.Lloop4
+
+.Lloop4_done:
+	ands		MESSAGE_LEN, MESSAGE_LEN, #63
+	beq		.Ldone
+	_nh_stride	K0, K1, K2, K3
+
+	subs		MESSAGE_LEN, MESSAGE_LEN, #16
+	beq		.Ldone
+	_nh_stride	K1, K2, K3, K0
+
+	subs		MESSAGE_LEN, MESSAGE_LEN, #16
+	beq		.Ldone
+	_nh_stride	K2, K3, K0, K1
+
+.Ldone:
+	// Sum the accumulators for each pass, then store the sums to 'hash'
+	vadd.u64	T0_L, PASS0_SUM_A, PASS0_SUM_B
+	vadd.u64	T0_H, PASS1_SUM_A, PASS1_SUM_B
+	vadd.u64	T1_L, PASS2_SUM_A, PASS2_SUM_B
+	vadd.u64	T1_H, PASS3_SUM_A, PASS3_SUM_B
+	vst1.8		{T0-T1}, [HASH]
+	bx		lr
+ENDPROC(nh_neon)

diff --git a/arch/arm/crypto/nhpoly1305-neon-glue.c b/arch/arm/crypto/nhpoly1305-neon-glue.c
new file mode 100644
index 0000000..49aae87
--- /dev/null
+++ b/arch/arm/crypto/nhpoly1305-neon-glue.c

@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum
+ * (NEON accelerated version)
+ *
+ * Copyright 2018 Google LLC
+ */
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <crypto/internal/hash.h>
+#include <crypto/nhpoly1305.h>
+#include <linux/module.h>
+
+asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len,
+			u8 hash[NH_HASH_BYTES]);
+
+/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */
+static void _nh_neon(const u32 *key, const u8 *message, size_t message_len,
+		     __le64 hash[NH_NUM_PASSES])
+{
+	nh_neon(key, message, message_len, (u8 *)hash);
+}
+
+static int nhpoly1305_neon_update(struct shash_desc *desc,
+				  const u8 *src, unsigned int srclen)
+{
+	if (srclen < 64 || !may_use_simd())
+		return crypto_nhpoly1305_update(desc, src, srclen);
+
+	do {
+		unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE);
+
+		kernel_neon_begin();
+		crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon);
+		kernel_neon_end();
+		src += n;
+		srclen -= n;
+	} while (srclen);
+	return 0;
+}
+
+static struct shash_alg nhpoly1305_alg = {
+	.base.cra_name		= "nhpoly1305",
+	.base.cra_driver_name	= "nhpoly1305-neon",
+	.base.cra_priority	= 200,
+	.base.cra_ctxsize	= sizeof(struct nhpoly1305_key),
+	.base.cra_module	= THIS_MODULE,
+	.digestsize		= POLY1305_DIGEST_SIZE,
+	.init			= crypto_nhpoly1305_init,
+	.update			= nhpoly1305_neon_update,
+	.final			= crypto_nhpoly1305_final,
+	.setkey			= crypto_nhpoly1305_setkey,
+	.descsize		= sizeof(struct nhpoly1305_state),
+};
+
+static int __init nhpoly1305_mod_init(void)
+{
+	if (!(elf_hwcap & HWCAP_NEON))
+		return -ENODEV;
+
+	return crypto_register_shash(&nhpoly1305_alg);
+}
+
+static void __exit nhpoly1305_mod_exit(void)
+{
+	crypto_unregister_shash(&nhpoly1305_alg);
+}
+
+module_init(nhpoly1305_mod_init);
+module_exit(nhpoly1305_mod_exit);
+
+MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (NEON-accelerated)");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+MODULE_ALIAS_CRYPTO("nhpoly1305");
+MODULE_ALIAS_CRYPTO("nhpoly1305-neon");

diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index 5d88d2f..201dc20 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h

@@ -30,9 +30,15 @@
 /* Replace task scheduler's default frequency-invariant accounting */
 #define arch_scale_freq_capacity topology_get_freq_scale
 
+/* Replace task scheduler's default max-frequency-invariant accounting */
+#define arch_scale_max_freq_capacity topology_get_max_freq_scale
+
 /* Replace task scheduler's default cpu-invariant accounting */
 #define arch_scale_cpu_capacity topology_get_cpu_scale
 
+/* Enable topology flag updates */
+#define arch_update_cpu_topology topology_update_cpu_topology
+
 #else
 
 static inline void init_cpu_topology(void) { }

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e3ebece..07b50bd 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig

@@ -57,6 +57,7 @@
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS
 	select ARCH_SUPPORTS_MEMORY_FAILURE
+	select ARCH_SUPPORTS_LTO_CLANG
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000 || CC_IS_CLANG
 	select ARCH_SUPPORTS_NUMA_BALANCING
@@ -108,7 +109,7 @@
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_MMAP_RND_BITS
 	select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
-	select HAVE_ARCH_PREL32_RELOCATIONS
+	select HAVE_ARCH_PREL32_RELOCATIONS if !LTO_CLANG
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_STACKLEAK
 	select HAVE_ARCH_THREAD_STRUCT_WHITELIST
@@ -1258,6 +1259,23 @@
 	  entering them here. As a minimum, you should specify the the
 	  root device (e.g. root=/dev/nfs).
 
+choice
+	prompt "Kernel command line type" if CMDLINE != ""
+	default CMDLINE_FROM_BOOTLOADER
+
+config CMDLINE_FROM_BOOTLOADER
+	bool "Use bootloader kernel arguments if available"
+	help
+	  Uses the command-line options passed by the boot loader. If
+	  the boot loader doesn't provide any, the default kernel command
+	  string provided in CMDLINE will be used.
+
+config CMDLINE_EXTEND
+	bool "Extend bootloader kernel arguments"
+	help
+	  The command-line arguments provided by the boot loader will be
+	  appended to the default kernel command string.
+
 config CMDLINE_FORCE
 	bool "Always use the default kernel command string"
 	help
@@ -1265,6 +1283,7 @@
 	  loader passes other arguments to the kernel.
 	  This is useful if you cannot or don't want to change the
 	  command-line options your boot loader passes to the kernel.
+endchoice
 
 config EFI_STUB
 	bool

diff --git a/arch/arm64/boot/dts/arm/juno-r2.dts b/arch/arm64/boot/dts/arm/juno-r2.dts
index ab77adb..66f0ec7 100644
--- a/arch/arm64/boot/dts/arm/juno-r2.dts
+++ b/arch/arm64/boot/dts/arm/juno-r2.dts

@@ -99,6 +99,7 @@
 			clocks = <&scpi_dvfs 0>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 			capacity-dmips-mhz = <1024>;
+			dynamic-power-coefficient = <450>;
 		};
 
 		A72_1: cpu@1 {
@@ -116,6 +117,7 @@
 			clocks = <&scpi_dvfs 0>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 			capacity-dmips-mhz = <1024>;
+			dynamic-power-coefficient = <450>;
 		};
 
 		A53_0: cpu@100 {
@@ -133,6 +135,7 @@
 			clocks = <&scpi_dvfs 1>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 			capacity-dmips-mhz = <485>;
+			dynamic-power-coefficient = <140>;
 		};
 
 		A53_1: cpu@101 {
@@ -150,6 +153,7 @@
 			clocks = <&scpi_dvfs 1>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 			capacity-dmips-mhz = <485>;
+			dynamic-power-coefficient = <140>;
 		};
 
 		A53_2: cpu@102 {
@@ -167,6 +171,7 @@
 			clocks = <&scpi_dvfs 1>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 			capacity-dmips-mhz = <485>;
+			dynamic-power-coefficient = <140>;
 		};
 
 		A53_3: cpu@103 {
@@ -184,6 +189,7 @@
 			clocks = <&scpi_dvfs 1>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 			capacity-dmips-mhz = <485>;
+			dynamic-power-coefficient = <140>;
 		};
 
 		A72_L2: l2-cache0 {

diff --git a/arch/arm64/boot/dts/arm/juno.dts b/arch/arm64/boot/dts/arm/juno.dts
index 1fb5c5a..e3069e2 100644
--- a/arch/arm64/boot/dts/arm/juno.dts
+++ b/arch/arm64/boot/dts/arm/juno.dts

@@ -98,6 +98,7 @@
 			clocks = <&scpi_dvfs 0>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 			capacity-dmips-mhz = <1024>;
+			dynamic-power-coefficient = <530>;
 		};
 
 		A57_1: cpu@1 {
@@ -115,6 +116,7 @@
 			clocks = <&scpi_dvfs 0>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 			capacity-dmips-mhz = <1024>;
+			dynamic-power-coefficient = <530>;
 		};
 
 		A53_0: cpu@100 {
@@ -132,6 +134,7 @@
 			clocks = <&scpi_dvfs 1>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 			capacity-dmips-mhz = <578>;
+			dynamic-power-coefficient = <140>;
 		};
 
 		A53_1: cpu@101 {
@@ -149,6 +152,7 @@
 			clocks = <&scpi_dvfs 1>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 			capacity-dmips-mhz = <578>;
+			dynamic-power-coefficient = <140>;
 		};
 
 		A53_2: cpu@102 {
@@ -166,6 +170,7 @@
 			clocks = <&scpi_dvfs 1>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 			capacity-dmips-mhz = <578>;
+			dynamic-power-coefficient = <140>;
 		};
 
 		A53_3: cpu@103 {
@@ -183,6 +188,7 @@
 			clocks = <&scpi_dvfs 1>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 			capacity-dmips-mhz = <578>;
+			dynamic-power-coefficient = <140>;
 		};
 
 		A57_L2: l2-cache0 {

diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig
new file mode 100644
index 0000000..943f52c
--- /dev/null
+++ b/arch/arm64/configs/cuttlefish_defconfig

@@ -0,0 +1,454 @@
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_PSI=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_MEMCG=y
+CONFIG_MEMCG_SWAP=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CPUSETS=y
+# CONFIG_PROC_PID_CPUSET is not set
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_BPF=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_SCHED_TUNE=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+# CONFIG_RD_XZ is not set
+# CONFIG_RD_LZO is not set
+# CONFIG_RD_LZ4 is not set
+CONFIG_SGETMASK_SYSCALL=y
+# CONFIG_SYSFS_SYSCALL is not set
+# CONFIG_FHANDLE is not set
+CONFIG_KALLSYMS_ALL=y
+CONFIG_BPF_SYSCALL=y
+# CONFIG_RSEQ is not set
+CONFIG_EMBEDDED=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+# CONFIG_SLAB_MERGE_DEFAULT is not set
+CONFIG_PROFILING=y
+CONFIG_PCI=y
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_HZ_100=y
+CONFIG_SECCOMP=y
+CONFIG_PARAVIRT=y
+CONFIG_ARMV8_DEPRECATED=y
+CONFIG_SWP_EMULATION=y
+CONFIG_CP15_BARRIER_EMULATION=y
+CONFIG_SETEND_EMULATION=y
+CONFIG_ARM64_SW_TTBR0_PAN=y
+CONFIG_RANDOMIZE_BASE=y
+# CONFIG_EFI is not set
+CONFIG_COMPAT=y
+CONFIG_PM_WAKELOCKS=y
+CONFIG_PM_WAKELOCKS_LIMIT=0
+# CONFIG_PM_WAKELOCKS_GC is not set
+CONFIG_PM_DEBUG=y
+CONFIG_ENERGY_MODEL=y
+CONFIG_CPU_IDLE=y
+CONFIG_ARM_CPUIDLE=y
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_TIMES=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_CPUFREQ_DT=y
+CONFIG_ARM_BIG_LITTLE_CPUFREQ=y
+CONFIG_ARM_DT_BL_CPUFREQ=y
+CONFIG_ARM_SCPI_CPUFREQ=y
+CONFIG_ARM_SCMI_CPUFREQ=y
+CONFIG_ARM_SCMI_PROTOCOL=y
+# CONFIG_ARM_SCMI_POWER_DOMAIN is not set
+CONFIG_ARM_SCPI_PROTOCOL=y
+# CONFIG_ARM_SCPI_POWER_DOMAIN is not set
+CONFIG_KPROBES=y
+CONFIG_LTO_CLANG=y
+CONFIG_CFI_CLANG=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_SPARSEMEM_VMEMMAP is not set
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_ZSMALLOC=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_XFRM_INTERFACE=y
+CONFIG_XFRM_STATISTICS=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_NET_IPGRE_DEMUX=y
+CONFIG_NET_IPVTI=y
+CONFIG_INET_ESP=y
+# CONFIG_INET_XFRM_MODE_BEET is not set
+CONFIG_INET_UDP_DIAG=y
+CONFIG_INET_DIAG_DESTROY=y
+CONFIG_TCP_CONG_ADVANCED=y
+# CONFIG_TCP_CONG_BIC is not set
+# CONFIG_TCP_CONG_WESTWOOD is not set
+# CONFIG_TCP_CONG_HTCP is not set
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_ESP=y
+CONFIG_INET6_IPCOMP=y
+CONFIG_IPV6_MIP6=y
+CONFIG_IPV6_VTI=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_AMANDA=y
+CONFIG_NF_CONNTRACK_FTP=y
+CONFIG_NF_CONNTRACK_H323=y
+CONFIG_NF_CONNTRACK_IRC=y
+CONFIG_NF_CONNTRACK_NETBIOS_NS=y
+CONFIG_NF_CONNTRACK_PPTP=y
+CONFIG_NF_CONNTRACK_SANE=y
+CONFIG_NF_CONNTRACK_TFTP=y
+CONFIG_NF_CT_NETLINK=y
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=y
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
+CONFIG_NETFILTER_XT_TARGET_CT=y
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NETFILTER_XT_TARGET_NFLOG=y
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y
+CONFIG_NETFILTER_XT_TARGET_TPROXY=y
+CONFIG_NETFILTER_XT_TARGET_TRACE=y
+CONFIG_NETFILTER_XT_TARGET_SECMARK=y
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
+CONFIG_NETFILTER_XT_MATCH_COMMENT=y
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=y
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_HELPER=y
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=y
+CONFIG_NETFILTER_XT_MATCH_LENGTH=y
+CONFIG_NETFILTER_XT_MATCH_LIMIT=y
+CONFIG_NETFILTER_XT_MATCH_MAC=y
+CONFIG_NETFILTER_XT_MATCH_MARK=y
+CONFIG_NETFILTER_XT_MATCH_OWNER=y
+CONFIG_NETFILTER_XT_MATCH_POLICY=y
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA2=y
+CONFIG_NETFILTER_XT_MATCH_SOCKET=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
+CONFIG_NETFILTER_XT_MATCH_STRING=y
+CONFIG_NETFILTER_XT_MATCH_TIME=y
+CONFIG_NETFILTER_XT_MATCH_U32=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_MATCH_ECN=y
+CONFIG_IP_NF_MATCH_TTL=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+CONFIG_IP_NF_NAT=y
+CONFIG_IP_NF_TARGET_MASQUERADE=y
+CONFIG_IP_NF_TARGET_NETMAP=y
+CONFIG_IP_NF_TARGET_REDIRECT=y
+CONFIG_IP_NF_MANGLE=y
+CONFIG_IP_NF_RAW=y
+CONFIG_IP_NF_SECURITY=y
+CONFIG_IP_NF_ARPTABLES=y
+CONFIG_IP_NF_ARPFILTER=y
+CONFIG_IP_NF_ARP_MANGLE=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP6_NF_MATCH_RPFILTER=y
+CONFIG_IP6_NF_FILTER=y
+CONFIG_IP6_NF_TARGET_REJECT=y
+CONFIG_IP6_NF_MANGLE=y
+CONFIG_IP6_NF_RAW=y
+CONFIG_L2TP=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_HTB=y
+CONFIG_NET_SCH_NETEM=y
+CONFIG_NET_SCH_INGRESS=y
+CONFIG_NET_CLS_U32=y
+CONFIG_NET_CLS_BPF=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_U32=y
+CONFIG_NET_CLS_ACT=y
+CONFIG_VSOCKETS=y
+CONFIG_VIRTIO_VSOCKETS=y
+CONFIG_CFG80211=y
+# CONFIG_CFG80211_DEFAULT_PS is not set
+# CONFIG_CFG80211_CRDA_SUPPORT is not set
+CONFIG_MAC80211=y
+# CONFIG_MAC80211_RC_MINSTREL is not set
+CONFIG_RFKILL=y
+# CONFIG_UEVENT_HELPER is not set
+# CONFIG_ALLOW_DEV_COREDUMP is not set
+CONFIG_DEBUG_DEVRES=y
+CONFIG_OF_UNITTEST=y
+CONFIG_ZRAM=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_VIRTIO_BLK=y
+CONFIG_UID_SYS_STATS=y
+CONFIG_SCSI=y
+# CONFIG_SCSI_MQ_DEFAULT is not set
+# CONFIG_SCSI_PROC_FS is not set
+CONFIG_BLK_DEV_SD=y
+CONFIG_SCSI_VIRTIO=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=y
+CONFIG_DM_UEVENT=y
+CONFIG_DM_VERITY=y
+CONFIG_DM_VERITY_AVB=y
+CONFIG_DM_VERITY_FEC=y
+CONFIG_DM_BOW=y
+CONFIG_NETDEVICES=y
+CONFIG_NETCONSOLE=y
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_TUN=y
+CONFIG_VIRTIO_NET=y
+# CONFIG_ETHERNET is not set
+CONFIG_PHYLIB=y
+CONFIG_PPP=y
+CONFIG_PPP_BSDCOMP=y
+CONFIG_PPP_DEFLATE=y
+CONFIG_PPP_MPPE=y
+CONFIG_PPTP=y
+CONFIG_PPPOL2TP=y
+CONFIG_USB_RTL8152=y
+CONFIG_USB_USBNET=y
+# CONFIG_USB_NET_AX8817X is not set
+# CONFIG_USB_NET_AX88179_178A is not set
+# CONFIG_USB_NET_CDCETHER is not set
+# CONFIG_USB_NET_CDC_NCM is not set
+# CONFIG_USB_NET_NET1080 is not set
+# CONFIG_USB_NET_CDC_SUBSET is not set
+# CONFIG_USB_NET_ZAURUS is not set
+# CONFIG_WLAN_VENDOR_ADMTEK is not set
+# CONFIG_WLAN_VENDOR_ATH is not set
+# CONFIG_WLAN_VENDOR_ATMEL is not set
+# CONFIG_WLAN_VENDOR_BROADCOM is not set
+# CONFIG_WLAN_VENDOR_CISCO is not set
+# CONFIG_WLAN_VENDOR_INTEL is not set
+# CONFIG_WLAN_VENDOR_INTERSIL is not set
+# CONFIG_WLAN_VENDOR_MARVELL is not set
+# CONFIG_WLAN_VENDOR_MEDIATEK is not set
+# CONFIG_WLAN_VENDOR_RALINK is not set
+# CONFIG_WLAN_VENDOR_REALTEK is not set
+# CONFIG_WLAN_VENDOR_RSI is not set
+# CONFIG_WLAN_VENDOR_ST is not set
+# CONFIG_WLAN_VENDOR_TI is not set
+# CONFIG_WLAN_VENDOR_ZYDAS is not set
+# CONFIG_WLAN_VENDOR_QUANTENNA is not set
+CONFIG_VIRT_WIFI=y
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_JOYSTICK_XPAD=y
+CONFIG_JOYSTICK_XPAD_FF=y
+CONFIG_JOYSTICK_XPAD_LEDS=y
+CONFIG_INPUT_TABLET=y
+CONFIG_TABLET_USB_ACECAD=y
+CONFIG_TABLET_USB_AIPTEK=y
+CONFIG_TABLET_USB_GTCO=y
+CONFIG_TABLET_USB_HANWANG=y
+CONFIG_TABLET_USB_KBTAB=y
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_UINPUT=y
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVMEM is not set
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_EXAR is not set
+CONFIG_SERIAL_8250_NR_UARTS=48
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_HW_RANDOM=y
+CONFIG_HW_RANDOM_VIRTIO=y
+# CONFIG_HW_RANDOM_CAVIUM is not set
+# CONFIG_DEVPORT is not set
+# CONFIG_I2C_COMPAT is not set
+# CONFIG_I2C_HELPER_AUTO is not set
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+CONFIG_CPU_THERMAL=y
+CONFIG_MEDIA_SUPPORT=y
+# CONFIG_VGA_ARB is not set
+CONFIG_DRM=y
+# CONFIG_DRM_FBDEV_EMULATION is not set
+CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_SND_HRTIMER=y
+# CONFIG_SND_SUPPORT_OLD_API is not set
+# CONFIG_SND_VERBOSE_PROCFS is not set
+# CONFIG_SND_DRIVERS is not set
+CONFIG_SND_INTEL8X0=y
+# CONFIG_SND_USB is not set
+CONFIG_HIDRAW=y
+CONFIG_UHID=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_ACRUX=y
+CONFIG_HID_ACRUX_FF=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_PRODIKEYS=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_DRAGONRISE_FF=y
+CONFIG_HID_EMS_FF=y
+CONFIG_HID_ELECOM=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_HOLTEK=y
+CONFIG_HID_KEYTOUCH=y
+CONFIG_HID_KYE=y
+CONFIG_HID_UCLOGIC=y
+CONFIG_HID_WALTOP=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_LCPOWER=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_LOGITECH_DJ=y
+CONFIG_LOGITECH_FF=y
+CONFIG_LOGIRUMBLEPAD2_FF=y
+CONFIG_LOGIG940_FF=y
+CONFIG_HID_MAGICMOUSE=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_MULTITOUCH=y
+CONFIG_HID_NTRIG=y
+CONFIG_HID_ORTEK=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_PANTHERLORD_FF=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_PICOLCD=y
+CONFIG_HID_PRIMAX=y
+CONFIG_HID_ROCCAT=y
+CONFIG_HID_SAITEK=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SONY=y
+CONFIG_HID_SPEEDLINK=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_GREENASIA=y
+CONFIG_GREENASIA_FF=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_SMARTJOYPLUS_FF=y
+CONFIG_HID_TIVO=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_THRUSTMASTER=y
+CONFIG_HID_WACOM=y
+CONFIG_HID_WIIMOTE=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_HID_ZYDACRON=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_GADGET=y
+CONFIG_USB_CONFIGFS=y
+CONFIG_USB_CONFIGFS_UEVENT=y
+CONFIG_USB_CONFIGFS_F_FS=y
+CONFIG_USB_CONFIGFS_F_ACC=y
+CONFIG_USB_CONFIGFS_F_AUDIO_SRC=y
+CONFIG_USB_CONFIGFS_F_MIDI=y
+CONFIG_MMC=y
+# CONFIG_PWRSEQ_EMMC is not set
+# CONFIG_PWRSEQ_SIMPLE is not set
+# CONFIG_MMC_BLOCK is not set
+CONFIG_RTC_CLASS=y
+# CONFIG_RTC_SYSTOHC is not set
+CONFIG_RTC_DRV_PL030=y
+CONFIG_RTC_DRV_PL031=y
+CONFIG_VIRTIO_PCI=y
+# CONFIG_VIRTIO_PCI_LEGACY is not set
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_INPUT=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
+CONFIG_STAGING=y
+CONFIG_ASHMEM=y
+CONFIG_ANDROID_VSOC=y
+CONFIG_ION=y
+CONFIG_ION_SYSTEM_HEAP=y
+CONFIG_COMMON_CLK_SCPI=y
+# CONFIG_COMMON_CLK_XGENE is not set
+CONFIG_MAILBOX=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_ANDROID=y
+CONFIG_ANDROID_BINDER_IPC=y
+CONFIG_LEGACY_ENERGY_MODEL_DT=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_EXT4_ENCRYPTION=y
+CONFIG_F2FS_FS=y
+CONFIG_F2FS_FS_SECURITY=y
+CONFIG_F2FS_FS_ENCRYPTION=y
+# CONFIG_DNOTIFY is not set
+CONFIG_QUOTA=y
+CONFIG_QFMT_V2=y
+CONFIG_FUSE_FS=y
+CONFIG_OVERLAY_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_SDCARD_FS=y
+CONFIG_PSTORE=y
+CONFIG_PSTORE_CONSOLE=y
+CONFIG_PSTORE_RAM=y
+CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_LSM_MMAP_MIN_ADDR=65536
+CONFIG_HARDENED_USERCOPY=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_CRYPTO_ADIANTUM=y
+CONFIG_CRYPTO_SHA512=y
+CONFIG_CRYPTO_LZ4=y
+CONFIG_CRYPTO_ZSTD=y
+CONFIG_CRYPTO_ANSI_CPRNG=y
+CONFIG_CRYPTO_DEV_VIRTIO=y
+CONFIG_XZ_DEC=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+# CONFIG_DETECT_HUNG_TASK is not set
+CONFIG_PANIC_TIMEOUT=5
+CONFIG_SCHEDSTATS=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_ENABLE_DEFAULT_TRACERS=y
+# CONFIG_RUNTIME_TESTING_MENU is not set

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index db8d364..2fc45d5 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig

@@ -19,9 +19,13 @@
 CONFIG_CGROUP_PIDS=y
 CONFIG_CGROUP_HUGETLB=y
 CONFIG_CPUSETS=y
+CONFIG_CGROUPS=y
+CONFIG_FAIR_GROUP_SCHED=y
+CONFIG_CGROUP_SCHED=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CGROUP_CPUACCT=y
 CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_FREEZER=y
 CONFIG_USER_NS=y
 CONFIG_SCHED_AUTOGROUP=y
 CONFIG_BLK_DEV_INITRD=y
@@ -101,13 +105,16 @@
 CONFIG_COMPAT=y
 CONFIG_HIBERNATION=y
 CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y
+CONFIG_ENERGY_MODEL=y
+CONFIG_SCHED_TUNE=y
 CONFIG_ARM_CPUIDLE=y
 CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ_STAT=y
-CONFIG_CPU_FREQ_GOV_POWERSAVE=m
+CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
 CONFIG_CPU_FREQ_GOV_USERSPACE=y
 CONFIG_CPU_FREQ_GOV_ONDEMAND=y
-CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
 CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
 CONFIG_CPUFREQ_DT=y
 CONFIG_ACPI_CPPC_CPUFREQ=m

diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c
index 727579c..96e0cfb 100644
--- a/arch/arm64/crypto/chacha20-neon-glue.c
+++ b/arch/arm64/crypto/chacha20-neon-glue.c

@@ -19,7 +19,7 @@
  */
 
 #include <crypto/algapi.h>
-#include <crypto/chacha20.h>
+#include <crypto/chacha.h>
 #include <crypto/internal/skcipher.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -34,15 +34,15 @@
 static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
 			    unsigned int bytes)
 {
-	u8 buf[CHACHA20_BLOCK_SIZE];
+	u8 buf[CHACHA_BLOCK_SIZE];
 
-	while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
+	while (bytes >= CHACHA_BLOCK_SIZE * 4) {
 		kernel_neon_begin();
 		chacha20_4block_xor_neon(state, dst, src);
 		kernel_neon_end();
-		bytes -= CHACHA20_BLOCK_SIZE * 4;
-		src += CHACHA20_BLOCK_SIZE * 4;
-		dst += CHACHA20_BLOCK_SIZE * 4;
+		bytes -= CHACHA_BLOCK_SIZE * 4;
+		src += CHACHA_BLOCK_SIZE * 4;
+		dst += CHACHA_BLOCK_SIZE * 4;
 		state[12] += 4;
 	}
 
@@ -50,11 +50,11 @@
 		return;
 
 	kernel_neon_begin();
-	while (bytes >= CHACHA20_BLOCK_SIZE) {
+	while (bytes >= CHACHA_BLOCK_SIZE) {
 		chacha20_block_xor_neon(state, dst, src);
-		bytes -= CHACHA20_BLOCK_SIZE;
-		src += CHACHA20_BLOCK_SIZE;
-		dst += CHACHA20_BLOCK_SIZE;
+		bytes -= CHACHA_BLOCK_SIZE;
+		src += CHACHA_BLOCK_SIZE;
+		dst += CHACHA_BLOCK_SIZE;
 		state[12]++;
 	}
 	if (bytes) {
@@ -68,17 +68,17 @@
 static int chacha20_neon(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct skcipher_walk walk;
 	u32 state[16];
 	int err;
 
-	if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE)
-		return crypto_chacha20_crypt(req);
+	if (!may_use_simd() || req->cryptlen <= CHACHA_BLOCK_SIZE)
+		return crypto_chacha_crypt(req);
 
 	err = skcipher_walk_virt(&walk, req, false);
 
-	crypto_chacha20_init(state, ctx, walk.iv);
+	crypto_chacha_init(state, ctx, walk.iv);
 
 	while (walk.nbytes > 0) {
 		unsigned int nbytes = walk.nbytes;
@@ -99,14 +99,14 @@
 	.base.cra_driver_name	= "chacha20-neon",
 	.base.cra_priority	= 300,
 	.base.cra_blocksize	= 1,
-	.base.cra_ctxsize	= sizeof(struct chacha20_ctx),
+	.base.cra_ctxsize	= sizeof(struct chacha_ctx),
 	.base.cra_module	= THIS_MODULE,
 
-	.min_keysize		= CHACHA20_KEY_SIZE,
-	.max_keysize		= CHACHA20_KEY_SIZE,
-	.ivsize			= CHACHA20_IV_SIZE,
-	.chunksize		= CHACHA20_BLOCK_SIZE,
-	.walksize		= 4 * CHACHA20_BLOCK_SIZE,
+	.min_keysize		= CHACHA_KEY_SIZE,
+	.max_keysize		= CHACHA_KEY_SIZE,
+	.ivsize			= CHACHA_IV_SIZE,
+	.chunksize		= CHACHA_BLOCK_SIZE,
+	.walksize		= 4 * CHACHA_BLOCK_SIZE,
 	.setkey			= crypto_chacha20_setkey,
 	.encrypt		= chacha20_neon,
 	.decrypt		= chacha20_neon,

diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 1ed227b..bee3027 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c

@@ -60,6 +60,22 @@
 				      struct ghash_key const *k,
 				      const char *head);
 
+#ifdef CONFIG_CFI_CLANG
+static inline void __cfi_pmull_ghash_update_p64(int blocks, u64 dg[],
+                const char *src, struct ghash_key const *k, const char *head)
+{
+        return pmull_ghash_update_p64(blocks, dg, src, k, head);
+}
+#define pmull_ghash_update_p64 __cfi_pmull_ghash_update_p64
+
+static inline void __cfi_pmull_ghash_update_p8(int blocks, u64 dg[],
+                const char *src, struct ghash_key const *k, const char *head)
+{
+        return pmull_ghash_update_p8(blocks, dg, src, k, head);
+}
+#define pmull_ghash_update_p8 __cfi_pmull_ghash_update_p8
+#endif
+
 static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
 				  struct ghash_key const *k,
 				  const char *head);

diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index d8c521c..c320030 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c

@@ -29,6 +29,14 @@
 
 asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
 				  int blocks);
+#ifdef CONFIG_CFI_CLANG
+static inline void __cfi_sha1_ce_transform(struct sha1_state *sst,
+					   u8 const *src, int blocks)
+{
+	sha1_ce_transform((struct sha1_ce_state *)sst, src, blocks);
+}
+#define sha1_ce_transform __cfi_sha1_ce_transform
+#endif
 
 const u32 sha1_ce_offsetof_count = offsetof(struct sha1_ce_state, sst.count);
 const u32 sha1_ce_offsetof_finalize = offsetof(struct sha1_ce_state, finalize);

diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index c47d1a2..ddc56fa 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c

@@ -29,6 +29,14 @@
 
 asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
 				  int blocks);
+#ifdef CONFIG_CFI_CLANG
+static inline void __cfi_sha2_ce_transform(struct sha256_state *sst,
+					   u8 const *src, int blocks)
+{
+	sha2_ce_transform((struct sha256_ce_state *)sst, src, blocks);
+}
+#define sha2_ce_transform __cfi_sha2_ce_transform
+#endif
 
 const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state,
 					      sst.count);

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 384c343..f150c0f 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h

@@ -28,7 +28,7 @@
 	({								\
 		u64 reg;						\
 		asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##nvh),\
-					 "mrs_s %0, " __stringify(r##vh),\
+					 __mrs_s("%0", r##vh),		\
 					 ARM64_HAS_VIRT_HOST_EXTN)	\
 			     : "=r" (reg));				\
 		reg;							\
@@ -38,7 +38,7 @@
 	do {								\
 		u64 __val = (u64)(v);					\
 		asm volatile(ALTERNATIVE("msr " __stringify(r##nvh) ", %x0",\
-					 "msr_s " __stringify(r##vh) ", %x0",\
+					 __msr_s(r##vh, "%x0"),		\
 					 ARM64_HAS_VIRT_HOST_EXTN)	\
 					 : : "rZ" (__val));		\
 	} while (0)

diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
index 8262325..8be3b0f 100644
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h

@@ -20,7 +20,12 @@
 
 #else	/* __ASSEMBLER__ */
 
+#ifdef CONFIG_LTO_CLANG
+#define __LSE_PREAMBLE	".arch armv8-a+lse\n"
+#else
 __asm__(".arch_extension	lse");
+#define __LSE_PREAMBLE
+#endif
 
 /* Move the ll/sc atomics out-of-line */
 #define __LL_SC_INLINE		notrace
@@ -33,7 +38,7 @@
 
 /* In-line patching at runtime */
 #define ARM64_LSE_ATOMIC_INSN(llsc, lse)				\
-	ALTERNATIVE(llsc, lse, ARM64_HAS_LSE_ATOMICS)
+	ALTERNATIVE(llsc, __LSE_PREAMBLE lse, ARM64_HAS_LSE_ATOMICS)
 
 #endif	/* __ASSEMBLER__ */
 #else	/* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */

diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 39ec0b8..1b7aa97 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h

@@ -141,7 +141,7 @@
  * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
  * avoiding the possibility of conflicting TLB entries being allocated.
  */
-static inline void cpu_replace_ttbr1(pgd_t *pgdp)
+static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp)
 {
 	typedef void (ttbr_replace_func)(phys_addr_t);
 	extern ttbr_replace_func idmap_cpu_replace_ttbr1;

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index c147093..d1f69b4 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h

@@ -684,20 +684,39 @@
 #include <linux/build_bug.h>
 #include <linux/types.h>
 
-asm(
-"	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n"
-"	.equ	.L__reg_num_x\\num, \\num\n"
-"	.endr\n"
+#define __DEFINE_MRS_MSR_S_REGNUM				\
+"	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \
+"	.equ	.L__reg_num_x\\num, \\num\n"			\
+"	.endr\n"						\
 "	.equ	.L__reg_num_xzr, 31\n"
-"\n"
-"	.macro	mrs_s, rt, sreg\n"
-	__emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt))
+
+#define DEFINE_MRS_S						\
+	__DEFINE_MRS_MSR_S_REGNUM				\
+"	.macro	mrs_s, rt, sreg\n"				\
+	__emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt))	\
 "	.endm\n"
-"\n"
-"	.macro	msr_s, sreg, rt\n"
-	__emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt))
+
+#define DEFINE_MSR_S						\
+	__DEFINE_MRS_MSR_S_REGNUM				\
+"	.macro	msr_s, sreg, rt\n"				\
+	__emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt))	\
 "	.endm\n"
-);
+
+#define UNDEFINE_MRS_S						\
+"	.purgem	mrs_s\n"
+
+#define UNDEFINE_MSR_S						\
+"	.purgem	msr_s\n"
+
+#define __mrs_s(v, r)						\
+	DEFINE_MRS_S						\
+"	mrs_s " v ", " __stringify(r) "\n"			\
+	UNDEFINE_MRS_S
+
+#define __msr_s(r, v)						\
+	DEFINE_MSR_S						\
+"	msr_s " __stringify(r) ", " v "\n"			\
+	UNDEFINE_MSR_S
 
 /*
  * Unlike read_cpuid, calls to read_sysreg are never expected to be
@@ -725,13 +744,13 @@
  */
 #define read_sysreg_s(r) ({						\
 	u64 __val;							\
-	asm volatile("mrs_s %0, " __stringify(r) : "=r" (__val));	\
+	asm volatile(__mrs_s("%0", r) : "=r" (__val));			\
 	__val;								\
 })
 
 #define write_sysreg_s(v, r) do {					\
 	u64 __val = (u64)(v);						\
-	asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val));	\
+	asm volatile(__msr_s(r, "%x0") : : "rZ" (__val));		\
 } while (0)
 
 /*

diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index 49a0fee..8e0a96d 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h

@@ -42,9 +42,15 @@
 /* Replace task scheduler's default frequency-invariant accounting */
 #define arch_scale_freq_capacity topology_get_freq_scale
 
+/* Replace task scheduler's default max-frequency-invariant accounting */
+#define arch_scale_max_freq_capacity topology_get_max_freq_scale
+
 /* Replace task scheduler's default cpu-invariant accounting */
 #define arch_scale_cpu_capacity topology_get_cpu_scale
 
+/* Enable topology flag updates */
+#define arch_update_cpu_topology topology_update_cpu_topology
+
 #include <asm-generic/topology.h>
 
 #endif /* _ASM_ARM_TOPOLOGY_H */

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 94babc3..0fe730b 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c

@@ -928,7 +928,7 @@
 	return !has_cpuid_feature(entry, scope);
 }
 
-static void
+static void __nocfi
 kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
 {
 	typedef void (kpti_remap_fn)(int, int, phys_addr_t);

diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index b215c71..ef3f9d9 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile

@@ -15,6 +15,7 @@
 ccflags-y := -shared -fno-common -fno-builtin
 ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \
 		$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+ccflags-y += $(DISABLE_LTO)
 
 # Disable gcov profiling for VDSO code
 GCOV_PROFILE := n

diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index feef06f..22aeb8d 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile

@@ -4,7 +4,12 @@
 #
 
 ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \
-		$(DISABLE_STACKLEAK_PLUGIN)
+		$(DISABLE_STACKLEAK_PLUGIN) \
+		$(DISABLE_CFI)
+
+ifeq ($(cc-name),clang)
+ccflags-y += -fno-jump-tables
+endif
 
 KVM=../../../../virt/kvm
 

diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 5df2d61..33ce9a5 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile

@@ -11,7 +11,12 @@
 # patching of the bl instruction in the caller with an atomic instruction
 # when supported by the CPU. Result and argument registers are handled
 # correctly, based on the function prototype.
+ifeq ($(CONFIG_LD_IS_LLD), y)
+# https://bugs.llvm.org/show_bug.cgi?id=35841
+obj-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o
+else
 lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o
+endif
 CFLAGS_atomic_ll_sc.o	:= -ffixed-x1 -ffixed-x2        		\
 		   -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6		\
 		   -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9		\

diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
index 882944c..5d8e8b6 100644
--- a/arch/powerpc/platforms/cell/cpufreq_spudemand.c
+++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c

@@ -49,7 +49,7 @@
 	cpu = info->policy->cpu;
 	busy_spus = atomic_read(&cbe_spu_info[cpu_to_node(cpu)].busy_spus);
 
-	CALC_LOAD(info->busy_spus, EXP, busy_spus * FIXED_1);
+	info->busy_spus = calc_load(info->busy_spus, EXP, busy_spus * FIXED_1);
 	pr_debug("cpu %d: busy_spus=%d, info->busy_spus=%ld\n",
 			cpu, busy_spus, info->busy_spus);
 

diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index c9ef3c5..9fcccb44 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c

@@ -987,9 +987,9 @@
 	unsigned long active_tasks; /* fixed-point */
 
 	active_tasks = count_active_contexts() * FIXED_1;
-	CALC_LOAD(spu_avenrun[0], EXP_1, active_tasks);
-	CALC_LOAD(spu_avenrun[1], EXP_5, active_tasks);
-	CALC_LOAD(spu_avenrun[2], EXP_15, active_tasks);
+	spu_avenrun[0] = calc_load(spu_avenrun[0], EXP_1, active_tasks);
+	spu_avenrun[1] = calc_load(spu_avenrun[1], EXP_5, active_tasks);
+	spu_avenrun[2] = calc_load(spu_avenrun[2], EXP_15, active_tasks);
 }
 
 static void spusched_wake(struct timer_list *unused)
@@ -1071,9 +1071,6 @@
 	}
 }
 
-#define LOAD_INT(x) ((x) >> FSHIFT)
-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
-
 static int show_spu_loadavg(struct seq_file *s, void *private)
 {
 	int a, b, c;

diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index 433a994..54f3756 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c

@@ -25,10 +25,6 @@
 
 #include "appldata.h"
 
-
-#define LOAD_INT(x) ((x) >> FSHIFT)
-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
-
 /*
  * OS data
  *

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e76d16a..f85253f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig

@@ -78,6 +78,7 @@
 	select ARCH_SUPPORTS_ACPI
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_NUMA_BALANCING	if X86_64
+	select ARCH_SUPPORTS_LTO_CLANG		if X86_64
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS
@@ -124,7 +125,7 @@
 	select HAVE_ARCH_MMAP_RND_BITS		if MMU
 	select HAVE_ARCH_MMAP_RND_COMPAT_BITS	if MMU && COMPAT
 	select HAVE_ARCH_COMPAT_MMAP_BASES	if MMU && COMPAT
-	select HAVE_ARCH_PREL32_RELOCATIONS
+	select HAVE_ARCH_PREL32_RELOCATIONS	if !LTO_CLANG
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_THREAD_STRUCT_WHITELIST
 	select HAVE_ARCH_TRACEHOOK
@@ -185,7 +186,7 @@
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RELIABLE_STACKTRACE		if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
 	select HAVE_STACKPROTECTOR		if CC_HAS_SANE_STACKPROTECTOR
-	select HAVE_STACK_VALIDATION		if X86_64
+	select HAVE_STACK_VALIDATION		if X86_64 && !LTO_CLANG
 	select HAVE_RSEQ
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_UNSTABLE_SCHED_CLOCK

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index ce0d042..45bb65d0 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile

@@ -214,6 +214,11 @@
 KBUILD_LDFLAGS += $(call ld-option, -z max-page-size=0x200000)
 endif
 
+ifdef CONFIG_LTO_CLANG
+KBUILD_LDFLAGS	+= -plugin-opt=-code-model=kernel \
+		   -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8)
+endif
+
 # Speed up the build
 KBUILD_CFLAGS += -pipe
 # Workaround for a gcc prelease that unfortunately was shipped in a suse release
@@ -305,10 +310,6 @@
 
 archprepare: checkbin
 checkbin:
-ifndef CONFIG_CC_HAS_ASM_GOTO
-	@echo Compiler lacks asm-goto support.
-	@exit 1
-endif
 ifdef CONFIG_RETPOLINE
 ifeq ($(RETPOLINE_CFLAGS),)
 	@echo "You are building kernel with non-retpoline compiler." >&2

diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig
new file mode 100644
index 0000000..cc01102
--- /dev/null
+++ b/arch/x86/configs/x86_64_cuttlefish_defconfig

@@ -0,0 +1,483 @@
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_USELIB is not set
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_PSI=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUPS=y
+CONFIG_MEMCG=y
+CONFIG_MEMCG_SWAP=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CPUSETS=y
+# CONFIG_PROC_PID_CPUSET is not set
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NAMESPACES=y
+CONFIG_SCHED_TUNE=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_RD_LZ4 is not set
+# CONFIG_FHANDLE is not set
+# CONFIG_PCSPKR_PLATFORM is not set
+CONFIG_KALLSYMS_ALL=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_SMP=y
+CONFIG_HYPERVISOR_GUEST=y
+CONFIG_PARAVIRT=y
+CONFIG_PARAVIRT_SPINLOCKS=y
+CONFIG_MCORE2=y
+CONFIG_PROCESSOR_SELECT=y
+# CONFIG_CPU_SUP_CENTAUR is not set
+CONFIG_NR_CPUS=8
+# CONFIG_MICROCODE is not set
+CONFIG_X86_MSR=y
+CONFIG_X86_CPUID=y
+# CONFIG_MTRR is not set
+CONFIG_HZ_100=y
+CONFIG_KEXEC=y
+CONFIG_CRASH_DUMP=y
+CONFIG_PHYSICAL_START=0x200000
+CONFIG_PHYSICAL_ALIGN=0x1000000
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 reboot=p"
+CONFIG_PM_WAKELOCKS=y
+CONFIG_PM_WAKELOCKS_LIMIT=0
+# CONFIG_PM_WAKELOCKS_GC is not set
+CONFIG_PM_DEBUG=y
+CONFIG_ACPI_PROCFS_POWER=y
+# CONFIG_ACPI_FAN is not set
+# CONFIG_ACPI_THERMAL is not set
+# CONFIG_X86_PM_TIMER is not set
+CONFIG_CPU_FREQ_TIMES=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_X86_ACPI_CPUFREQ=y
+CONFIG_PCI_MSI=y
+CONFIG_IA32_EMULATION=y
+# CONFIG_FIRMWARE_MEMMAP is not set
+CONFIG_OPROFILE=y
+CONFIG_KPROBES=y
+CONFIG_LTO_CLANG=y
+CONFIG_CFI_CLANG=y
+CONFIG_REFCOUNT_FULL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=y
+CONFIG_KSM=y
+CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_ZSMALLOC=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_XFRM_INTERFACE=y
+CONFIG_XFRM_STATISTICS=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPGRE_DEMUX=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_NET_IPVTI=y
+CONFIG_INET_ESP=y
+# CONFIG_INET_XFRM_MODE_BEET is not set
+CONFIG_INET_UDP_DIAG=y
+CONFIG_INET_DIAG_DESTROY=y
+CONFIG_TCP_CONG_ADVANCED=y
+# CONFIG_TCP_CONG_BIC is not set
+# CONFIG_TCP_CONG_WESTWOOD is not set
+# CONFIG_TCP_CONG_HTCP is not set
+CONFIG_TCP_MD5SIG=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_AH=y
+CONFIG_INET6_ESP=y
+CONFIG_INET6_IPCOMP=y
+CONFIG_IPV6_MIP6=y
+CONFIG_IPV6_VTI=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NETLABEL=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_AMANDA=y
+CONFIG_NF_CONNTRACK_FTP=y
+CONFIG_NF_CONNTRACK_H323=y
+CONFIG_NF_CONNTRACK_IRC=y
+CONFIG_NF_CONNTRACK_NETBIOS_NS=y
+CONFIG_NF_CONNTRACK_PPTP=y
+CONFIG_NF_CONNTRACK_SANE=y
+CONFIG_NF_CONNTRACK_TFTP=y
+CONFIG_NF_CT_NETLINK=y
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=y
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
+CONFIG_NETFILTER_XT_TARGET_CT=y
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NETFILTER_XT_TARGET_NFLOG=y
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y
+CONFIG_NETFILTER_XT_TARGET_TPROXY=y
+CONFIG_NETFILTER_XT_TARGET_TRACE=y
+CONFIG_NETFILTER_XT_TARGET_SECMARK=y
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
+CONFIG_NETFILTER_XT_MATCH_COMMENT=y
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=y
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_HELPER=y
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=y
+# CONFIG_NETFILTER_XT_MATCH_L2TP is not set
+CONFIG_NETFILTER_XT_MATCH_LENGTH=y
+CONFIG_NETFILTER_XT_MATCH_LIMIT=y
+CONFIG_NETFILTER_XT_MATCH_MAC=y
+CONFIG_NETFILTER_XT_MATCH_MARK=y
+CONFIG_NETFILTER_XT_MATCH_OWNER=y
+CONFIG_NETFILTER_XT_MATCH_POLICY=y
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA2=y
+CONFIG_NETFILTER_XT_MATCH_SOCKET=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
+CONFIG_NETFILTER_XT_MATCH_STRING=y
+CONFIG_NETFILTER_XT_MATCH_TIME=y
+CONFIG_NETFILTER_XT_MATCH_U32=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_MATCH_AH=y
+CONFIG_IP_NF_MATCH_ECN=y
+CONFIG_IP_NF_MATCH_TTL=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+CONFIG_IP_NF_NAT=y
+CONFIG_IP_NF_TARGET_MASQUERADE=y
+CONFIG_IP_NF_TARGET_NETMAP=y
+CONFIG_IP_NF_TARGET_REDIRECT=y
+CONFIG_IP_NF_MANGLE=y
+CONFIG_IP_NF_RAW=y
+CONFIG_IP_NF_SECURITY=y
+CONFIG_IP_NF_ARPTABLES=y
+CONFIG_IP_NF_ARPFILTER=y
+CONFIG_IP_NF_ARP_MANGLE=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP6_NF_MATCH_IPV6HEADER=y
+CONFIG_IP6_NF_MATCH_RPFILTER=y
+CONFIG_IP6_NF_FILTER=y
+CONFIG_IP6_NF_TARGET_REJECT=y
+CONFIG_IP6_NF_MANGLE=y
+CONFIG_IP6_NF_RAW=y
+CONFIG_L2TP=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_HTB=y
+CONFIG_NET_SCH_NETEM=y
+CONFIG_NET_SCH_INGRESS=y
+CONFIG_NET_CLS_U32=y
+CONFIG_NET_CLS_BPF=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_U32=y
+CONFIG_NET_CLS_ACT=y
+CONFIG_VSOCKETS=y
+CONFIG_VIRTIO_VSOCKETS=y
+CONFIG_CFG80211=y
+CONFIG_MAC80211=y
+CONFIG_RFKILL=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEBUG_DEVRES=y
+CONFIG_OF=y
+CONFIG_OF_UNITTEST=y
+# CONFIG_PNP_DEBUG_MESSAGES is not set
+CONFIG_ZRAM=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_VIRTIO_BLK=y
+CONFIG_UID_SYS_STATS=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_SCSI_VIRTIO=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=y
+CONFIG_DM_MIRROR=y
+CONFIG_DM_ZERO=y
+CONFIG_DM_UEVENT=y
+CONFIG_DM_VERITY=y
+CONFIG_DM_VERITY_AVB=y
+CONFIG_DM_VERITY_FEC=y
+CONFIG_DM_BOW=y
+CONFIG_NETDEVICES=y
+CONFIG_NETCONSOLE=y
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_TUN=y
+CONFIG_VIRTIO_NET=y
+# CONFIG_ETHERNET is not set
+CONFIG_PPP=y
+CONFIG_PPP_BSDCOMP=y
+CONFIG_PPP_DEFLATE=y
+CONFIG_PPP_MPPE=y
+CONFIG_PPTP=y
+CONFIG_PPPOL2TP=y
+CONFIG_USB_RTL8152=y
+CONFIG_USB_USBNET=y
+# CONFIG_USB_NET_AX8817X is not set
+# CONFIG_USB_NET_AX88179_178A is not set
+# CONFIG_USB_NET_CDCETHER is not set
+# CONFIG_USB_NET_CDC_NCM is not set
+# CONFIG_USB_NET_NET1080 is not set
+# CONFIG_USB_NET_CDC_SUBSET is not set
+# CONFIG_USB_NET_ZAURUS is not set
+# CONFIG_WLAN_VENDOR_ADMTEK is not set
+# CONFIG_WLAN_VENDOR_ATH is not set
+# CONFIG_WLAN_VENDOR_ATMEL is not set
+# CONFIG_WLAN_VENDOR_BROADCOM is not set
+# CONFIG_WLAN_VENDOR_CISCO is not set
+# CONFIG_WLAN_VENDOR_INTEL is not set
+# CONFIG_WLAN_VENDOR_INTERSIL is not set
+# CONFIG_WLAN_VENDOR_MARVELL is not set
+# CONFIG_WLAN_VENDOR_MEDIATEK is not set
+# CONFIG_WLAN_VENDOR_RALINK is not set
+# CONFIG_WLAN_VENDOR_REALTEK is not set
+# CONFIG_WLAN_VENDOR_RSI is not set
+# CONFIG_WLAN_VENDOR_ST is not set
+# CONFIG_WLAN_VENDOR_TI is not set
+# CONFIG_WLAN_VENDOR_ZYDAS is not set
+# CONFIG_WLAN_VENDOR_QUANTENNA is not set
+CONFIG_MAC80211_HWSIM=y
+CONFIG_VIRT_WIFI=y
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_JOYSTICK_XPAD=y
+CONFIG_JOYSTICK_XPAD_FF=y
+CONFIG_JOYSTICK_XPAD_LEDS=y
+CONFIG_INPUT_TABLET=y
+CONFIG_TABLET_USB_ACECAD=y
+CONFIG_TABLET_USB_AIPTEK=y
+CONFIG_TABLET_USB_GTCO=y
+CONFIG_TABLET_USB_HANWANG=y
+CONFIG_TABLET_USB_KBTAB=y
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_UINPUT=y
+# CONFIG_SERIO_I8042 is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVMEM is not set
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_EXAR is not set
+CONFIG_SERIAL_8250_NR_UARTS=48
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_HW_RANDOM=y
+# CONFIG_HW_RANDOM_INTEL is not set
+# CONFIG_HW_RANDOM_AMD is not set
+# CONFIG_HW_RANDOM_VIA is not set
+CONFIG_HW_RANDOM_VIRTIO=y
+CONFIG_HPET=y
+# CONFIG_HPET_MMAP_DEFAULT is not set
+# CONFIG_DEVPORT is not set
+# CONFIG_ACPI_I2C_OPREGION is not set
+# CONFIG_I2C_COMPAT is not set
+# CONFIG_I2C_HELPER_AUTO is not set
+CONFIG_PTP_1588_CLOCK=y
+# CONFIG_HWMON is not set
+# CONFIG_X86_PKG_TEMP_THERMAL is not set
+CONFIG_WATCHDOG=y
+CONFIG_SOFT_WATCHDOG=y
+CONFIG_MEDIA_SUPPORT=y
+# CONFIG_VGA_ARB is not set
+CONFIG_DRM=y
+# CONFIG_DRM_FBDEV_EMULATION is not set
+CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_SND_HRTIMER=y
+# CONFIG_SND_SUPPORT_OLD_API is not set
+# CONFIG_SND_VERBOSE_PROCFS is not set
+# CONFIG_SND_DRIVERS is not set
+CONFIG_SND_INTEL8X0=y
+# CONFIG_SND_USB is not set
+CONFIG_HIDRAW=y
+CONFIG_UHID=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_ACRUX=y
+CONFIG_HID_ACRUX_FF=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_PRODIKEYS=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_DRAGONRISE_FF=y
+CONFIG_HID_EMS_FF=y
+CONFIG_HID_ELECOM=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_HOLTEK=y
+CONFIG_HID_KEYTOUCH=y
+CONFIG_HID_KYE=y
+CONFIG_HID_UCLOGIC=y
+CONFIG_HID_WALTOP=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_LCPOWER=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_LOGITECH_DJ=y
+CONFIG_LOGITECH_FF=y
+CONFIG_LOGIRUMBLEPAD2_FF=y
+CONFIG_LOGIG940_FF=y
+CONFIG_HID_MAGICMOUSE=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_MULTITOUCH=y
+CONFIG_HID_NTRIG=y
+CONFIG_HID_ORTEK=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_PANTHERLORD_FF=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_PICOLCD=y
+CONFIG_HID_PRIMAX=y
+CONFIG_HID_ROCCAT=y
+CONFIG_HID_SAITEK=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SONY=y
+CONFIG_HID_SPEEDLINK=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_GREENASIA=y
+CONFIG_GREENASIA_FF=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_SMARTJOYPLUS_FF=y
+CONFIG_HID_TIVO=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_THRUSTMASTER=y
+CONFIG_HID_WACOM=y
+CONFIG_HID_WIIMOTE=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_HID_ZYDACRON=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_GADGET=y
+CONFIG_USB_DUMMY_HCD=y
+CONFIG_USB_CONFIGFS=y
+CONFIG_USB_CONFIGFS_UEVENT=y
+CONFIG_USB_CONFIGFS_F_FS=y
+CONFIG_USB_CONFIGFS_F_ACC=y
+CONFIG_USB_CONFIGFS_F_AUDIO_SRC=y
+CONFIG_USB_CONFIGFS_F_MIDI=y
+CONFIG_MMC=y
+# CONFIG_PWRSEQ_EMMC is not set
+# CONFIG_PWRSEQ_SIMPLE is not set
+# CONFIG_MMC_BLOCK is not set
+CONFIG_RTC_CLASS=y
+CONFIG_SW_SYNC=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_INPUT=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
+CONFIG_STAGING=y
+CONFIG_ASHMEM=y
+CONFIG_ANDROID_VSOC=y
+CONFIG_ION=y
+CONFIG_ION_SYSTEM_HEAP=y
+# CONFIG_X86_PLATFORM_DEVICES is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_ANDROID=y
+CONFIG_ANDROID_BINDER_IPC=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_EXT4_ENCRYPTION=y
+CONFIG_F2FS_FS=y
+CONFIG_F2FS_FS_SECURITY=y
+CONFIG_F2FS_FS_ENCRYPTION=y
+CONFIG_QUOTA=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+# CONFIG_PRINT_QUOTA_WARNING is not set
+CONFIG_QFMT_V2=y
+CONFIG_AUTOFS4_FS=y
+CONFIG_FUSE_FS=y
+CONFIG_OVERLAY_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_SDCARD_FS=y
+CONFIG_PSTORE=y
+CONFIG_PSTORE_CONSOLE=y
+CONFIG_PSTORE_RAM=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_PATH=y
+CONFIG_HARDENED_USERCOPY=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
+# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_ADIANTUM=y
+CONFIG_CRYPTO_SHA512=y
+CONFIG_CRYPTO_LZ4=y
+CONFIG_CRYPTO_ZSTD=y
+CONFIG_CRYPTO_DEV_VIRTIO=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_PANIC_TIMEOUT=5
+CONFIG_SCHEDSTATS=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_ENABLE_DEFAULT_TRACERS=y
+CONFIG_IO_DELAY_NONE=y
+CONFIG_DEBUG_BOOT_PARAMS=y
+CONFIG_OPTIMIZE_INLINING=y
+CONFIG_UNWINDER_FRAME_POINTER=y

diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
index dce7c5d..bd249f0 100644
--- a/arch/x86/crypto/chacha20_glue.c
+++ b/arch/x86/crypto/chacha20_glue.c

@@ -10,7 +10,7 @@
  */
 
 #include <crypto/algapi.h>
-#include <crypto/chacha20.h>
+#include <crypto/chacha.h>
 #include <crypto/internal/skcipher.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -29,31 +29,31 @@
 static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
 			    unsigned int bytes)
 {
-	u8 buf[CHACHA20_BLOCK_SIZE];
+	u8 buf[CHACHA_BLOCK_SIZE];
 
 #ifdef CONFIG_AS_AVX2
 	if (chacha20_use_avx2) {
-		while (bytes >= CHACHA20_BLOCK_SIZE * 8) {
+		while (bytes >= CHACHA_BLOCK_SIZE * 8) {
 			chacha20_8block_xor_avx2(state, dst, src);
-			bytes -= CHACHA20_BLOCK_SIZE * 8;
-			src += CHACHA20_BLOCK_SIZE * 8;
-			dst += CHACHA20_BLOCK_SIZE * 8;
+			bytes -= CHACHA_BLOCK_SIZE * 8;
+			src += CHACHA_BLOCK_SIZE * 8;
+			dst += CHACHA_BLOCK_SIZE * 8;
 			state[12] += 8;
 		}
 	}
 #endif
-	while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
+	while (bytes >= CHACHA_BLOCK_SIZE * 4) {
 		chacha20_4block_xor_ssse3(state, dst, src);
-		bytes -= CHACHA20_BLOCK_SIZE * 4;
-		src += CHACHA20_BLOCK_SIZE * 4;
-		dst += CHACHA20_BLOCK_SIZE * 4;
+		bytes -= CHACHA_BLOCK_SIZE * 4;
+		src += CHACHA_BLOCK_SIZE * 4;
+		dst += CHACHA_BLOCK_SIZE * 4;
 		state[12] += 4;
 	}
-	while (bytes >= CHACHA20_BLOCK_SIZE) {
+	while (bytes >= CHACHA_BLOCK_SIZE) {
 		chacha20_block_xor_ssse3(state, dst, src);
-		bytes -= CHACHA20_BLOCK_SIZE;
-		src += CHACHA20_BLOCK_SIZE;
-		dst += CHACHA20_BLOCK_SIZE;
+		bytes -= CHACHA_BLOCK_SIZE;
+		src += CHACHA_BLOCK_SIZE;
+		dst += CHACHA_BLOCK_SIZE;
 		state[12]++;
 	}
 	if (bytes) {
@@ -66,7 +66,7 @@
 static int chacha20_simd(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
 	u32 *state, state_buf[16 + 2] __aligned(8);
 	struct skcipher_walk walk;
 	int err;
@@ -74,20 +74,20 @@
 	BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16);
 	state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN);
 
-	if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd())
-		return crypto_chacha20_crypt(req);
+	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
+		return crypto_chacha_crypt(req);
 
 	err = skcipher_walk_virt(&walk, req, true);
 
-	crypto_chacha20_init(state, ctx, walk.iv);
+	crypto_chacha_init(state, ctx, walk.iv);
 
 	kernel_fpu_begin();
 
-	while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
+	while (walk.nbytes >= CHACHA_BLOCK_SIZE) {
 		chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
-				rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
+				rounddown(walk.nbytes, CHACHA_BLOCK_SIZE));
 		err = skcipher_walk_done(&walk,
-					 walk.nbytes % CHACHA20_BLOCK_SIZE);
+					 walk.nbytes % CHACHA_BLOCK_SIZE);
 	}
 
 	if (walk.nbytes) {
@@ -106,13 +106,13 @@
 	.base.cra_driver_name	= "chacha20-simd",
 	.base.cra_priority	= 300,
 	.base.cra_blocksize	= 1,
-	.base.cra_ctxsize	= sizeof(struct chacha20_ctx),
+	.base.cra_ctxsize	= sizeof(struct chacha_ctx),
 	.base.cra_module	= THIS_MODULE,
 
-	.min_keysize		= CHACHA20_KEY_SIZE,
-	.max_keysize		= CHACHA20_KEY_SIZE,
-	.ivsize			= CHACHA20_IV_SIZE,
-	.chunksize		= CHACHA20_BLOCK_SIZE,
+	.min_keysize		= CHACHA_KEY_SIZE,
+	.max_keysize		= CHACHA_KEY_SIZE,
+	.ivsize			= CHACHA_IV_SIZE,
+	.chunksize		= CHACHA_BLOCK_SIZE,
 	.setkey			= crypto_chacha20_setkey,
 	.encrypt		= chacha20_simd,
 	.decrypt		= chacha20_simd,

diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index f012b7e..88cc015 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c

@@ -83,35 +83,37 @@
 	if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
 		if (unlikely(!sctx->wset)) {
 			if (!sctx->uset) {
-				memcpy(sctx->u, dctx->r, sizeof(sctx->u));
-				poly1305_simd_mult(sctx->u, dctx->r);
+				memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
+				poly1305_simd_mult(sctx->u, dctx->r.r);
 				sctx->uset = true;
 			}
 			memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
-			poly1305_simd_mult(sctx->u + 5, dctx->r);
+			poly1305_simd_mult(sctx->u + 5, dctx->r.r);
 			memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
-			poly1305_simd_mult(sctx->u + 10, dctx->r);
+			poly1305_simd_mult(sctx->u + 10, dctx->r.r);
 			sctx->wset = true;
 		}
 		blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
-		poly1305_4block_avx2(dctx->h, src, dctx->r, blocks, sctx->u);
+		poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks,
+				     sctx->u);
 		src += POLY1305_BLOCK_SIZE * 4 * blocks;
 		srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
 	}
 #endif
 	if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
 		if (unlikely(!sctx->uset)) {
-			memcpy(sctx->u, dctx->r, sizeof(sctx->u));
-			poly1305_simd_mult(sctx->u, dctx->r);
+			memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
+			poly1305_simd_mult(sctx->u, dctx->r.r);
 			sctx->uset = true;
 		}
 		blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
-		poly1305_2block_sse2(dctx->h, src, dctx->r, blocks, sctx->u);
+		poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks,
+				     sctx->u);
 		src += POLY1305_BLOCK_SIZE * 2 * blocks;
 		srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
 	}
 	if (srclen >= POLY1305_BLOCK_SIZE) {
-		poly1305_block_sse2(dctx->h, src, dctx->r, 1);
+		poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1);
 		srclen -= POLY1305_BLOCK_SIZE;
 	}
 	return srclen;

diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index aa3336a..7d17b3a 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c

@@ -10,13 +10,11 @@
 #ifdef CONFIG_IA32_EMULATION
 /* On X86_64, we use struct pt_regs * to pass parameters to syscalls */
 #define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
-
-/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */
-extern asmlinkage long sys_ni_syscall(const struct pt_regs *);
-
+#define __sys_ni_syscall __ia32_sys_ni_syscall
 #else /* CONFIG_IA32_EMULATION */
 #define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
+#define __sys_ni_syscall sys_ni_syscall
 #endif /* CONFIG_IA32_EMULATION */
 
 #include <asm/syscalls_32.h>
@@ -29,6 +27,6 @@
 	 * Smells like a compiler bug -- it doesn't work
 	 * when the & below is removed.
 	 */
-	[0 ... __NR_syscall_compat_max] = &sys_ni_syscall,
+	[0 ... __NR_syscall_compat_max] = &__sys_ni_syscall,
 #include <asm/syscalls_32.h>
 };

diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index d5252bc..eb82ad9 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c

@@ -4,11 +4,17 @@
 #include <linux/linkage.h>
 #include <linux/sys.h>
 #include <linux/cache.h>
+#include <linux/syscalls.h>
 #include <asm/asm-offsets.h>
 #include <asm/syscall.h>
 
-/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */
-extern asmlinkage long sys_ni_syscall(const struct pt_regs *);
+extern asmlinkage long sys_ni_syscall(void);
+
+SYSCALL_DEFINE0(ni_syscall)
+{
+	return sys_ni_syscall();
+}
+
 #define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
 #include <asm/syscalls_64.h>
 #undef __SYSCALL_64
@@ -20,6 +26,6 @@
 	 * Smells like a compiler bug -- it doesn't work
 	 * when the & below is removed.
 	 */
-	[0 ... __NR_syscall_max] = &sys_ni_syscall,
+	[0 ... __NR_syscall_max] = &__x64_sys_ni_syscall,
 #include <asm/syscalls_64.h>
 };

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 3cf7b533b..13c795b 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl

@@ -124,7 +124,7 @@
 110	i386	iopl			sys_iopl			__ia32_sys_iopl
 111	i386	vhangup			sys_vhangup			__ia32_sys_vhangup
 112	i386	idle
-113	i386	vm86old			sys_vm86old			sys_ni_syscall
+113	i386	vm86old			sys_vm86old			__ia32_sys_ni_syscall
 114	i386	wait4			sys_wait4			__ia32_compat_sys_wait4
 115	i386	swapoff			sys_swapoff			__ia32_sys_swapoff
 116	i386	sysinfo			sys_sysinfo			__ia32_compat_sys_sysinfo
@@ -177,7 +177,7 @@
 163	i386	mremap			sys_mremap			__ia32_sys_mremap
 164	i386	setresuid		sys_setresuid16			__ia32_sys_setresuid16
 165	i386	getresuid		sys_getresuid16			__ia32_sys_getresuid16
-166	i386	vm86			sys_vm86			sys_ni_syscall
+166	i386	vm86			sys_vm86			__ia32_sys_ni_syscall
 167	i386	query_module
 168	i386	poll			sys_poll			__ia32_sys_poll
 169	i386	nfsservctl

diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 5bfe224..c66b333 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile

@@ -3,7 +3,6 @@
 # Building vDSO images for x86.
 #
 
-KBUILD_CFLAGS += $(DISABLE_LTO)
 KASAN_SANITIZE			:= n
 UBSAN_SANITIZE			:= n
 OBJECT_FILES_NON_STANDARD	:= y
@@ -68,7 +67,7 @@
 CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
        $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
        -fno-omit-frame-pointer -foptimize-sibling-calls \
-       -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
+       -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO $(DISABLE_LTO)
 
 ifdef CONFIG_RETPOLINE
 ifneq ($(RETPOLINE_VDSO_CFLAGS),)
@@ -139,6 +138,8 @@
 KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out $(LTO_CFLAGS),$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out $(CFI_CFLAGS),$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic
 KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
 KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 513ba49..d8b7398 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c

@@ -118,7 +118,7 @@
 	return err;
 }
 
-asmlinkage long sys32_sigreturn(void)
+asmlinkage long sys32_sigreturn(const struct pt_regs *__unused)
 {
 	struct pt_regs *regs = current_pt_regs();
 	struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
@@ -144,7 +144,7 @@
 	return 0;
 }
 
-asmlinkage long sys32_rt_sigreturn(void)
+asmlinkage long sys32_rt_sigreturn(const struct pt_regs *__unused)
 {
 	struct pt_regs *regs = current_pt_regs();
 	struct rt_sigframe_ia32 __user *frame;

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 68889ac..5694825 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h

@@ -146,9 +146,6 @@
  * Workaround for the sake of BPF compilation which utilizes kernel
  * headers, but clang does not support ASM GOTO and fails the build.
  */
-#ifndef __BPF_TRACING__
-#warning "Compiler lacks ASM_GOTO support. Add -D __BPF_TRACING__ to your compiler arguments"
-#endif
 
 #define static_cpu_has(bit)            boot_cpu_has(bit)
 

diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 6390bd8..5e12b23 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h

@@ -159,7 +159,7 @@
 extern pte_t *pkmap_page_table;
 
 void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);
-void native_set_fixmap(enum fixed_addresses idx,
+void native_set_fixmap(unsigned /* enum fixed_addresses */ idx,
 		       phys_addr_t phys, pgprot_t flags);
 
 #ifndef CONFIG_PARAVIRT

diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index e046a40..5509c1a 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h

@@ -48,12 +48,13 @@
  * To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias
  * named __ia32_sys_*()
  */
-#define SYSCALL_DEFINE0(sname)					\
-	SYSCALL_METADATA(_##sname, 0);				\
-	asmlinkage long __x64_sys_##sname(void);		\
-	ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO);	\
-	SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname);	\
-	asmlinkage long __x64_sys_##sname(void)
+
+#define SYSCALL_DEFINE0(sname)						\
+	SYSCALL_METADATA(_##sname, 0);					\
+	asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\
+	ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO);		\
+	SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname);		\
+	asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused)
 
 #define COND_SYSCALL(name)						\
 	cond_syscall(__x64_sys_##name);					\
@@ -183,9 +184,9 @@
 #ifndef SYSCALL_DEFINE0
 #define SYSCALL_DEFINE0(sname)					\
 	SYSCALL_METADATA(_##sname, 0);				\
-	asmlinkage long __x64_sys_##sname(void);		\
+	asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\
 	ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO);	\
-	asmlinkage long __x64_sys_##sname(void)
+	asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused)
 #endif
 
 #ifndef COND_SYSCALL

diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index d805202..917840e 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c

@@ -45,7 +45,7 @@
 #define VMWARE_PORT_CMD_VCPU_RESERVED	31
 
 #define VMWARE_PORT(cmd, eax, ebx, ecx, edx)				\
-	__asm__("inl (%%dx)" :						\
+	__asm__("inl (%%dx), %%eax" :					\
 			"=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :	\
 			"0"(VMWARE_HYPERVISOR_MAGIC),			\
 			"1"(VMWARE_PORT_CMD_##cmd),			\

diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 59274e2..619101a 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c

@@ -660,7 +660,7 @@
 	fixmaps_set++;
 }
 
-void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys,
+void native_set_fixmap(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys,
 		       pgprot_t flags)
 {
 	/* Sanitize 'prot' against any unsupported bits: */

diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index f4f7c73..9b6a150 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c

@@ -141,7 +141,7 @@
 #define BLKIOLATENCY_MAX_WIN_SIZE NSEC_PER_SEC
 /*
  * These are the constants used to fake the fixed-point moving average
- * calculation just like load average.  The call to CALC_LOAD folds
+ * calculation just like load average.  The call to calc_load() folds
  * (FIXED_1 (2048) - exp_factor) * new_sample into lat_avg.  The sampling
  * window size is bucketed to try to approximately calculate average
  * latency such that 1/exp (decay rate) is [1 min, 2.5 min) when windows
@@ -505,7 +505,7 @@
 	lat_info = &parent->child_lat;
 
 	/*
-	 * CALC_LOAD takes in a number stored in fixed point representation.
+	 * calc_load() takes in a number stored in fixed point representation.
 	 * Because we are using this for IO time in ns, the values stored
 	 * are significantly larger than the FIXED_1 denominator (2048).
 	 * Therefore, rounding errors in the calculation are negligible and
@@ -514,7 +514,9 @@
 	exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1,
 			div64_u64(iolat->cur_win_nsec,
 				  BLKIOLATENCY_EXP_BUCKET_SIZE));
-	CALC_LOAD(iolat->lat_avg, iolatency_exp_factors[exp_idx], stat.mean);
+	iolat->lat_avg = calc_load(iolat->lat_avg,
+				   iolatency_exp_factors[exp_idx],
+				   stat.mean);
 
 	/* Everything is ok and we don't need to adjust the scale. */
 	if (stat.mean <= iolat->min_lat_nsec &&

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 2eb8744..9ad5211 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c

@@ -225,6 +225,7 @@
 
 	unsigned int weight;
 	unsigned int leaf_weight;
+	u64 group_idle;
 };
 
 /* This is per cgroup per device grouping structure */
@@ -310,6 +311,7 @@
 	struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
 	struct cfq_queue *async_idle_cfqq;
 
+	u64 group_idle;
 };
 
 struct cfq_io_cq {
@@ -805,6 +807,17 @@
 
 #endif	/* CONFIG_CFQ_GROUP_IOSCHED */
 
+static inline u64 get_group_idle(struct cfq_data *cfqd)
+{
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+	struct cfq_queue *cfqq = cfqd->active_queue;
+
+	if (cfqq && cfqq->cfqg)
+		return cfqq->cfqg->group_idle;
+#endif
+	return cfqd->cfq_group_idle;
+}
+
 #define cfq_log(cfqd, fmt, args...)	\
 	blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
 
@@ -825,7 +838,7 @@
 	if (!sample_valid(ttime->ttime_samples))
 		return false;
 	if (group_idle)
-		slice = cfqd->cfq_group_idle;
+		slice = get_group_idle(cfqd);
 	else
 		slice = cfqd->cfq_slice_idle;
 	return ttime->ttime_mean > slice;
@@ -1592,6 +1605,7 @@
 
 	cgd->weight = weight;
 	cgd->leaf_weight = weight;
+	cgd->group_idle = cfq_group_idle;
 }
 
 static void cfq_cpd_free(struct blkcg_policy_data *cpd)
@@ -1636,6 +1650,7 @@
 
 	cfqg->weight = cgd->weight;
 	cfqg->leaf_weight = cgd->leaf_weight;
+	cfqg->group_idle = cgd->group_idle;
 }
 
 static void cfq_pd_offline(struct blkg_policy_data *pd)
@@ -1757,6 +1772,19 @@
 	return 0;
 }
 
+static int cfq_print_group_idle(struct seq_file *sf, void *v)
+{
+	struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
+	struct cfq_group_data *cgd = blkcg_to_cfqgd(blkcg);
+	u64 val = 0;
+
+	if (cgd)
+		val = cgd->group_idle;
+
+	seq_printf(sf, "%llu\n", div_u64(val, NSEC_PER_USEC));
+	return 0;
+}
+
 static ssize_t __cfqg_set_weight_device(struct kernfs_open_file *of,
 					char *buf, size_t nbytes, loff_t off,
 					bool on_dfl, bool is_leaf_weight)
@@ -1878,6 +1906,37 @@
 	return __cfq_set_weight(css, val, false, false, true);
 }
 
+static int cfq_set_group_idle(struct cgroup_subsys_state *css,
+			       struct cftype *cft, u64 val)
+{
+	struct blkcg *blkcg = css_to_blkcg(css);
+	struct cfq_group_data *cfqgd;
+	struct blkcg_gq *blkg;
+	int ret = 0;
+
+	spin_lock_irq(&blkcg->lock);
+	cfqgd = blkcg_to_cfqgd(blkcg);
+	if (!cfqgd) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	cfqgd->group_idle = val * NSEC_PER_USEC;
+
+	hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
+		struct cfq_group *cfqg = blkg_to_cfqg(blkg);
+
+		if (!cfqg)
+			continue;
+
+		cfqg->group_idle = cfqgd->group_idle;
+	}
+
+out:
+	spin_unlock_irq(&blkcg->lock);
+	return ret;
+}
+
 static int cfqg_print_stat(struct seq_file *sf, void *v)
 {
 	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
@@ -2023,6 +2082,11 @@
 		.seq_show = cfq_print_leaf_weight,
 		.write_u64 = cfq_set_leaf_weight,
 	},
+	{
+		.name = "group_idle",
+		.seq_show = cfq_print_group_idle,
+		.write_u64 = cfq_set_group_idle,
+	},
 
 	/* statistics, covers only the tasks in the cfqg */
 	{
@@ -2917,7 +2981,7 @@
 	 * with sync vs async workloads.
 	 */
 	if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag &&
-		!cfqd->cfq_group_idle)
+		!get_group_idle(cfqd))
 		return;
 
 	WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list));
@@ -2928,9 +2992,8 @@
 	 */
 	if (!cfq_should_idle(cfqd, cfqq)) {
 		/* no queue idling. Check for group idling */
-		if (cfqd->cfq_group_idle)
-			group_idle = cfqd->cfq_group_idle;
-		else
+		group_idle = get_group_idle(cfqd);
+		if (!group_idle)
 			return;
 	}
 
@@ -2971,7 +3034,7 @@
 	cfq_mark_cfqq_wait_request(cfqq);
 
 	if (group_idle)
-		sl = cfqd->cfq_group_idle;
+		sl = group_idle;
 	else
 		sl = cfqd->cfq_slice_idle;
 
@@ -3320,7 +3383,7 @@
 	 * this group, wait for requests to complete.
 	 */
 check_group_idle:
-	if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 &&
+	if (get_group_idle(cfqd) && cfqq->cfqg->nr_cfqq == 1 &&
 	    cfqq->cfqg->dispatched &&
 	    !cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true)) {
 		cfqq = NULL;
@@ -3884,7 +3947,7 @@
 			cfqd->cfq_slice_idle);
 	}
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-	__cfq_update_io_thinktime(&cfqq->cfqg->ttime, cfqd->cfq_group_idle);
+	__cfq_update_io_thinktime(&cfqq->cfqg->ttime, get_group_idle(cfqd));
 #endif
 }
 
@@ -4273,7 +4336,7 @@
 		if (cfq_should_wait_busy(cfqd, cfqq)) {
 			u64 extend_sl = cfqd->cfq_slice_idle;
 			if (!cfqd->cfq_slice_idle)
-				extend_sl = cfqd->cfq_group_idle;
+				extend_sl = get_group_idle(cfqd);
 			cfqq->slice_end = now + extend_sl;
 			cfq_mark_cfqq_wait_busy(cfqq);
 			cfq_log_cfqq(cfqd, cfqq, "will busy wait");

diff --git a/build.config.cuttlefish.aarch64 b/build.config.cuttlefish.aarch64
new file mode 100644
index 0000000..da78020
--- /dev/null
+++ b/build.config.cuttlefish.aarch64

@@ -0,0 +1,18 @@
+ARCH=arm64
+BRANCH=android-4.19
+CC=clang
+CLANG_TRIPLE=aarch64-linux-gnu-
+CROSS_COMPILE=aarch64-linux-androidkernel-
+DEFCONFIG=cuttlefish_defconfig
+EXTRA_CMDS=''
+KERNEL_DIR=common
+POST_DEFCONFIG_CMDS="check_defconfig"
+CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r353983c/bin
+LD=ld.lld
+LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/bin
+FILES="
+arch/arm64/boot/Image.gz
+vmlinux
+System.map
+"
+STOP_SHIP_TRACEPRINTK=1

diff --git a/build.config.cuttlefish.x86_64 b/build.config.cuttlefish.x86_64
new file mode 100644
index 0000000..da47330
--- /dev/null
+++ b/build.config.cuttlefish.x86_64

@@ -0,0 +1,18 @@
+ARCH=x86_64
+BRANCH=android-4.19
+CC=clang
+CLANG_TRIPLE=x86_64-linux-gnu-
+CROSS_COMPILE=x86_64-linux-androidkernel-
+DEFCONFIG=x86_64_cuttlefish_defconfig
+EXTRA_CMDS=''
+KERNEL_DIR=common
+POST_DEFCONFIG_CMDS="check_defconfig"
+CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r353983c/bin
+LD=ld.lld
+LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin
+FILES="
+arch/x86/boot/bzImage
+vmlinux
+System.map
+"
+STOP_SHIP_TRACEPRINTK=1

diff --git a/crypto/Kconfig b/crypto/Kconfig
index 0fb9586..fdaa0dc 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig

@@ -495,6 +495,34 @@
 	  Support for key wrapping (NIST SP800-38F / RFC3394) without
 	  padding.
 
+config CRYPTO_NHPOLY1305
+	tristate
+	select CRYPTO_HASH
+	select CRYPTO_POLY1305
+
+config CRYPTO_ADIANTUM
+	tristate "Adiantum support"
+	select CRYPTO_CHACHA20
+	select CRYPTO_POLY1305
+	select CRYPTO_NHPOLY1305
+	help
+	  Adiantum is a tweakable, length-preserving encryption mode
+	  designed for fast and secure disk encryption, especially on
+	  CPUs without dedicated crypto instructions.  It encrypts
+	  each sector using the XChaCha12 stream cipher, two passes of
+	  an ε-almost-∆-universal hash function, and an invocation of
+	  the AES-256 block cipher on a single 16-byte block.  On CPUs
+	  without AES instructions, Adiantum is much faster than
+	  AES-XTS.
+
+	  Adiantum's security is provably reducible to that of its
+	  underlying stream and block ciphers, subject to a security
+	  bound.  Unlike XTS, Adiantum is a true wide-block encryption
+	  mode, so it actually provides an even stronger notion of
+	  security than XTS, subject to the security bound.
+
+	  If unsure, say N.
+
 comment "Hash modes"
 
 config CRYPTO_CMAC
@@ -1438,18 +1466,26 @@
 	  Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
 
 config CRYPTO_CHACHA20
-	tristate "ChaCha20 cipher algorithm"
+	tristate "ChaCha stream cipher algorithms"
 	select CRYPTO_BLKCIPHER
 	help
-	  ChaCha20 cipher algorithm, RFC7539.
+	  The ChaCha20, XChaCha20, and XChaCha12 stream cipher algorithms.
 
 	  ChaCha20 is a 256-bit high-speed stream cipher designed by Daniel J.
 	  Bernstein and further specified in RFC7539 for use in IETF protocols.
-	  This is the portable C implementation of ChaCha20.
-
-	  See also:
+	  This is the portable C implementation of ChaCha20.  See also:
 	  <http://cr.yp.to/chacha/chacha-20080128.pdf>
 
+	  XChaCha20 is the application of the XSalsa20 construction to ChaCha20
+	  rather than to Salsa20.  XChaCha20 extends ChaCha20's nonce length
+	  from 64 bits (or 96 bits using the RFC7539 convention) to 192 bits,
+	  while provably retaining ChaCha20's security.  See also:
+	  <https://cr.yp.to/snuffle/xsalsa-20081128.pdf>
+
+	  XChaCha12 is XChaCha20 reduced to 12 rounds, with correspondingly
+	  reduced security margin but increased performance.  It can be needed
+	  in some performance-sensitive scenarios.
+
 config CRYPTO_CHACHA20_X86_64
 	tristate "ChaCha20 cipher algorithm (x86_64/SSSE3/AVX2)"
 	depends on X86 && 64BIT

diff --git a/crypto/Makefile b/crypto/Makefile
index f6a234d..a56514c 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile

@@ -83,6 +83,8 @@
 obj-$(CONFIG_CRYPTO_XTS) += xts.o
 obj-$(CONFIG_CRYPTO_CTR) += ctr.o
 obj-$(CONFIG_CRYPTO_KEYWRAP) += keywrap.o
+obj-$(CONFIG_CRYPTO_ADIANTUM) += adiantum.o
+obj-$(CONFIG_CRYPTO_NHPOLY1305) += nhpoly1305.o
 obj-$(CONFIG_CRYPTO_GCM) += gcm.o
 obj-$(CONFIG_CRYPTO_CCM) += ccm.o
 obj-$(CONFIG_CRYPTO_CHACHA20POLY1305) += chacha20poly1305.o
@@ -116,7 +118,7 @@
 obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o
 obj-$(CONFIG_CRYPTO_SEED) += seed.o
 obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
-obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o
+obj-$(CONFIG_CRYPTO_CHACHA20) += chacha_generic.o
 obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o
 obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
 obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o

diff --git a/crypto/adiantum.c b/crypto/adiantum.c
new file mode 100644
index 0000000..5564e73
--- /dev/null
+++ b/crypto/adiantum.c

@@ -0,0 +1,668 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Adiantum length-preserving encryption mode
+ *
+ * Copyright 2018 Google LLC
+ */
+
+/*
+ * Adiantum is a tweakable, length-preserving encryption mode designed for fast
+ * and secure disk encryption, especially on CPUs without dedicated crypto
+ * instructions.  Adiantum encrypts each sector using the XChaCha12 stream
+ * cipher, two passes of an ε-almost-∆-universal (ε-∆U) hash function based on
+ * NH and Poly1305, and an invocation of the AES-256 block cipher on a single
+ * 16-byte block.  See the paper for details:
+ *
+ *	Adiantum: length-preserving encryption for entry-level processors
+ *      (https://eprint.iacr.org/2018/720.pdf)
+ *
+ * For flexibility, this implementation also allows other ciphers:
+ *
+ *	- Stream cipher: XChaCha12 or XChaCha20
+ *	- Block cipher: any with a 128-bit block size and 256-bit key
+ *
+ * This implementation doesn't currently allow other ε-∆U hash functions, i.e.
+ * HPolyC is not supported.  This is because Adiantum is ~20% faster than HPolyC
+ * but still provably as secure, and also the ε-∆U hash function of HBSH is
+ * formally defined to take two inputs (tweak, message) which makes it difficult
+ * to wrap with the crypto_shash API.  Rather, some details need to be handled
+ * here.  Nevertheless, if needed in the future, support for other ε-∆U hash
+ * functions could be added here.
+ */
+
+#include <crypto/b128ops.h>
+#include <crypto/chacha.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/nhpoly1305.h>
+#include <crypto/scatterwalk.h>
+#include <linux/module.h>
+
+#include "internal.h"
+
+/*
+ * Size of right-hand part of input data, in bytes; also the size of the block
+ * cipher's block size and the hash function's output.
+ */
+#define BLOCKCIPHER_BLOCK_SIZE		16
+
+/* Size of the block cipher key (K_E) in bytes */
+#define BLOCKCIPHER_KEY_SIZE		32
+
+/* Size of the hash key (K_H) in bytes */
+#define HASH_KEY_SIZE		(POLY1305_BLOCK_SIZE + NHPOLY1305_KEY_SIZE)
+
+/*
+ * The specification allows variable-length tweaks, but Linux's crypto API
+ * currently only allows algorithms to support a single length.  The "natural"
+ * tweak length for Adiantum is 16, since that fits into one Poly1305 block for
+ * the best performance.  But longer tweaks are useful for fscrypt, to avoid
+ * needing to derive per-file keys.  So instead we use two blocks, or 32 bytes.
+ */
+#define TWEAK_SIZE		32
+
+struct adiantum_instance_ctx {
+	struct crypto_skcipher_spawn streamcipher_spawn;
+	struct crypto_spawn blockcipher_spawn;
+	struct crypto_shash_spawn hash_spawn;
+};
+
+struct adiantum_tfm_ctx {
+	struct crypto_skcipher *streamcipher;
+	struct crypto_cipher *blockcipher;
+	struct crypto_shash *hash;
+	struct poly1305_key header_hash_key;
+};
+
+struct adiantum_request_ctx {
+
+	/*
+	 * Buffer for right-hand part of data, i.e.
+	 *
+	 *    P_L => P_M => C_M => C_R when encrypting, or
+	 *    C_R => C_M => P_M => P_L when decrypting.
+	 *
+	 * Also used to build the IV for the stream cipher.
+	 */
+	union {
+		u8 bytes[XCHACHA_IV_SIZE];
+		__le32 words[XCHACHA_IV_SIZE / sizeof(__le32)];
+		le128 bignum;	/* interpret as element of Z/(2^{128}Z) */
+	} rbuf;
+
+	bool enc; /* true if encrypting, false if decrypting */
+
+	/*
+	 * The result of the Poly1305 ε-∆U hash function applied to
+	 * (bulk length, tweak)
+	 */
+	le128 header_hash;
+
+	/* Sub-requests, must be last */
+	union {
+		struct shash_desc hash_desc;
+		struct skcipher_request streamcipher_req;
+	} u;
+};
+
+/*
+ * Given the XChaCha stream key K_S, derive the block cipher key K_E and the
+ * hash key K_H as follows:
+ *
+ *     K_E || K_H || ... = XChaCha(key=K_S, nonce=1||0^191)
+ *
+ * Note that this denotes using bits from the XChaCha keystream, which here we
+ * get indirectly by encrypting a buffer containing all 0's.
+ */
+static int adiantum_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			   unsigned int keylen)
+{
+	struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+	struct {
+		u8 iv[XCHACHA_IV_SIZE];
+		u8 derived_keys[BLOCKCIPHER_KEY_SIZE + HASH_KEY_SIZE];
+		struct scatterlist sg;
+		struct crypto_wait wait;
+		struct skcipher_request req; /* must be last */
+	} *data;
+	u8 *keyp;
+	int err;
+
+	/* Set the stream cipher key (K_S) */
+	crypto_skcipher_clear_flags(tctx->streamcipher, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(tctx->streamcipher,
+				  crypto_skcipher_get_flags(tfm) &
+				  CRYPTO_TFM_REQ_MASK);
+	err = crypto_skcipher_setkey(tctx->streamcipher, key, keylen);
+	crypto_skcipher_set_flags(tfm,
+				crypto_skcipher_get_flags(tctx->streamcipher) &
+				CRYPTO_TFM_RES_MASK);
+	if (err)
+		return err;
+
+	/* Derive the subkeys */
+	data = kzalloc(sizeof(*data) +
+		       crypto_skcipher_reqsize(tctx->streamcipher), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+	data->iv[0] = 1;
+	sg_init_one(&data->sg, data->derived_keys, sizeof(data->derived_keys));
+	crypto_init_wait(&data->wait);
+	skcipher_request_set_tfm(&data->req, tctx->streamcipher);
+	skcipher_request_set_callback(&data->req, CRYPTO_TFM_REQ_MAY_SLEEP |
+						  CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      crypto_req_done, &data->wait);
+	skcipher_request_set_crypt(&data->req, &data->sg, &data->sg,
+				   sizeof(data->derived_keys), data->iv);
+	err = crypto_wait_req(crypto_skcipher_encrypt(&data->req), &data->wait);
+	if (err)
+		goto out;
+	keyp = data->derived_keys;
+
+	/* Set the block cipher key (K_E) */
+	crypto_cipher_clear_flags(tctx->blockcipher, CRYPTO_TFM_REQ_MASK);
+	crypto_cipher_set_flags(tctx->blockcipher,
+				crypto_skcipher_get_flags(tfm) &
+				CRYPTO_TFM_REQ_MASK);
+	err = crypto_cipher_setkey(tctx->blockcipher, keyp,
+				   BLOCKCIPHER_KEY_SIZE);
+	crypto_skcipher_set_flags(tfm,
+				  crypto_cipher_get_flags(tctx->blockcipher) &
+				  CRYPTO_TFM_RES_MASK);
+	if (err)
+		goto out;
+	keyp += BLOCKCIPHER_KEY_SIZE;
+
+	/* Set the hash key (K_H) */
+	poly1305_core_setkey(&tctx->header_hash_key, keyp);
+	keyp += POLY1305_BLOCK_SIZE;
+
+	crypto_shash_clear_flags(tctx->hash, CRYPTO_TFM_REQ_MASK);
+	crypto_shash_set_flags(tctx->hash, crypto_skcipher_get_flags(tfm) &
+					   CRYPTO_TFM_REQ_MASK);
+	err = crypto_shash_setkey(tctx->hash, keyp, NHPOLY1305_KEY_SIZE);
+	crypto_skcipher_set_flags(tfm, crypto_shash_get_flags(tctx->hash) &
+				       CRYPTO_TFM_RES_MASK);
+	keyp += NHPOLY1305_KEY_SIZE;
+	WARN_ON(keyp != &data->derived_keys[ARRAY_SIZE(data->derived_keys)]);
+out:
+	kzfree(data);
+	return err;
+}
+
+/* Addition in Z/(2^{128}Z) */
+static inline void le128_add(le128 *r, const le128 *v1, const le128 *v2)
+{
+	u64 x = le64_to_cpu(v1->b);
+	u64 y = le64_to_cpu(v2->b);
+
+	r->b = cpu_to_le64(x + y);
+	r->a = cpu_to_le64(le64_to_cpu(v1->a) + le64_to_cpu(v2->a) +
+			   (x + y < x));
+}
+
+/* Subtraction in Z/(2^{128}Z) */
+static inline void le128_sub(le128 *r, const le128 *v1, const le128 *v2)
+{
+	u64 x = le64_to_cpu(v1->b);
+	u64 y = le64_to_cpu(v2->b);
+
+	r->b = cpu_to_le64(x - y);
+	r->a = cpu_to_le64(le64_to_cpu(v1->a) - le64_to_cpu(v2->a) -
+			   (x - y > x));
+}
+
+/*
+ * Apply the Poly1305 ε-∆U hash function to (bulk length, tweak) and save the
+ * result to rctx->header_hash.  This is the calculation
+ *
+ *	H_T ← Poly1305_{K_T}(bin_{128}(|L|) || T)
+ *
+ * from the procedure in section 6.4 of the Adiantum paper.  The resulting value
+ * is reused in both the first and second hash steps.  Specifically, it's added
+ * to the result of an independently keyed ε-∆U hash function (for equal length
+ * inputs only) taken over the left-hand part (the "bulk") of the message, to
+ * give the overall Adiantum hash of the (tweak, left-hand part) pair.
+ */
+static void adiantum_hash_header(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+	struct adiantum_request_ctx *rctx = skcipher_request_ctx(req);
+	const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE;
+	struct {
+		__le64 message_bits;
+		__le64 padding;
+	} header = {
+		.message_bits = cpu_to_le64((u64)bulk_len * 8)
+	};
+	struct poly1305_state state;
+
+	poly1305_core_init(&state);
+
+	BUILD_BUG_ON(sizeof(header) % POLY1305_BLOCK_SIZE != 0);
+	poly1305_core_blocks(&state, &tctx->header_hash_key,
+			     &header, sizeof(header) / POLY1305_BLOCK_SIZE);
+
+	BUILD_BUG_ON(TWEAK_SIZE % POLY1305_BLOCK_SIZE != 0);
+	poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv,
+			     TWEAK_SIZE / POLY1305_BLOCK_SIZE);
+
+	poly1305_core_emit(&state, &rctx->header_hash);
+}
+
+/* Hash the left-hand part (the "bulk") of the message using NHPoly1305 */
+static int adiantum_hash_message(struct skcipher_request *req,
+				 struct scatterlist *sgl, le128 *digest)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+	struct adiantum_request_ctx *rctx = skcipher_request_ctx(req);
+	const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE;
+	struct shash_desc *hash_desc = &rctx->u.hash_desc;
+	struct sg_mapping_iter miter;
+	unsigned int i, n;
+	int err;
+
+	hash_desc->tfm = tctx->hash;
+	hash_desc->flags = 0;
+
+	err = crypto_shash_init(hash_desc);
+	if (err)
+		return err;
+
+	sg_miter_start(&miter, sgl, sg_nents(sgl),
+		       SG_MITER_FROM_SG | SG_MITER_ATOMIC);
+	for (i = 0; i < bulk_len; i += n) {
+		sg_miter_next(&miter);
+		n = min_t(unsigned int, miter.length, bulk_len - i);
+		err = crypto_shash_update(hash_desc, miter.addr, n);
+		if (err)
+			break;
+	}
+	sg_miter_stop(&miter);
+	if (err)
+		return err;
+
+	return crypto_shash_final(hash_desc, (u8 *)digest);
+}
+
+/* Continue Adiantum encryption/decryption after the stream cipher step */
+static int adiantum_finish(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+	struct adiantum_request_ctx *rctx = skcipher_request_ctx(req);
+	const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE;
+	le128 digest;
+	int err;
+
+	/* If decrypting, decrypt C_M with the block cipher to get P_M */
+	if (!rctx->enc)
+		crypto_cipher_decrypt_one(tctx->blockcipher, rctx->rbuf.bytes,
+					  rctx->rbuf.bytes);
+
+	/*
+	 * Second hash step
+	 *	enc: C_R = C_M - H_{K_H}(T, C_L)
+	 *	dec: P_R = P_M - H_{K_H}(T, P_L)
+	 */
+	err = adiantum_hash_message(req, req->dst, &digest);
+	if (err)
+		return err;
+	le128_add(&digest, &digest, &rctx->header_hash);
+	le128_sub(&rctx->rbuf.bignum, &rctx->rbuf.bignum, &digest);
+	scatterwalk_map_and_copy(&rctx->rbuf.bignum, req->dst,
+				 bulk_len, BLOCKCIPHER_BLOCK_SIZE, 1);
+	return 0;
+}
+
+static void adiantum_streamcipher_done(struct crypto_async_request *areq,
+				       int err)
+{
+	struct skcipher_request *req = areq->data;
+
+	if (!err)
+		err = adiantum_finish(req);
+
+	skcipher_request_complete(req, err);
+}
+
+static int adiantum_crypt(struct skcipher_request *req, bool enc)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+	struct adiantum_request_ctx *rctx = skcipher_request_ctx(req);
+	const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE;
+	unsigned int stream_len;
+	le128 digest;
+	int err;
+
+	if (req->cryptlen < BLOCKCIPHER_BLOCK_SIZE)
+		return -EINVAL;
+
+	rctx->enc = enc;
+
+	/*
+	 * First hash step
+	 *	enc: P_M = P_R + H_{K_H}(T, P_L)
+	 *	dec: C_M = C_R + H_{K_H}(T, C_L)
+	 */
+	adiantum_hash_header(req);
+	err = adiantum_hash_message(req, req->src, &digest);
+	if (err)
+		return err;
+	le128_add(&digest, &digest, &rctx->header_hash);
+	scatterwalk_map_and_copy(&rctx->rbuf.bignum, req->src,
+				 bulk_len, BLOCKCIPHER_BLOCK_SIZE, 0);
+	le128_add(&rctx->rbuf.bignum, &rctx->rbuf.bignum, &digest);
+
+	/* If encrypting, encrypt P_M with the block cipher to get C_M */
+	if (enc)
+		crypto_cipher_encrypt_one(tctx->blockcipher, rctx->rbuf.bytes,
+					  rctx->rbuf.bytes);
+
+	/* Initialize the rest of the XChaCha IV (first part is C_M) */
+	BUILD_BUG_ON(BLOCKCIPHER_BLOCK_SIZE != 16);
+	BUILD_BUG_ON(XCHACHA_IV_SIZE != 32);	/* nonce || stream position */
+	rctx->rbuf.words[4] = cpu_to_le32(1);
+	rctx->rbuf.words[5] = 0;
+	rctx->rbuf.words[6] = 0;
+	rctx->rbuf.words[7] = 0;
+
+	/*
+	 * XChaCha needs to be done on all the data except the last 16 bytes;
+	 * for disk encryption that usually means 4080 or 496 bytes.  But ChaCha
+	 * implementations tend to be most efficient when passed a whole number
+	 * of 64-byte ChaCha blocks, or sometimes even a multiple of 256 bytes.
+	 * And here it doesn't matter whether the last 16 bytes are written to,
+	 * as the second hash step will overwrite them.  Thus, round the XChaCha
+	 * length up to the next 64-byte boundary if possible.
+	 */
+	stream_len = bulk_len;
+	if (round_up(stream_len, CHACHA_BLOCK_SIZE) <= req->cryptlen)
+		stream_len = round_up(stream_len, CHACHA_BLOCK_SIZE);
+
+	skcipher_request_set_tfm(&rctx->u.streamcipher_req, tctx->streamcipher);
+	skcipher_request_set_crypt(&rctx->u.streamcipher_req, req->src,
+				   req->dst, stream_len, &rctx->rbuf);
+	skcipher_request_set_callback(&rctx->u.streamcipher_req,
+				      req->base.flags,
+				      adiantum_streamcipher_done, req);
+	return crypto_skcipher_encrypt(&rctx->u.streamcipher_req) ?:
+		adiantum_finish(req);
+}
+
+static int adiantum_encrypt(struct skcipher_request *req)
+{
+	return adiantum_crypt(req, true);
+}
+
+static int adiantum_decrypt(struct skcipher_request *req)
+{
+	return adiantum_crypt(req, false);
+}
+
+static int adiantum_init_tfm(struct crypto_skcipher *tfm)
+{
+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+	struct adiantum_instance_ctx *ictx = skcipher_instance_ctx(inst);
+	struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+	struct crypto_skcipher *streamcipher;
+	struct crypto_cipher *blockcipher;
+	struct crypto_shash *hash;
+	unsigned int subreq_size;
+	int err;
+
+	streamcipher = crypto_spawn_skcipher(&ictx->streamcipher_spawn);
+	if (IS_ERR(streamcipher))
+		return PTR_ERR(streamcipher);
+
+	blockcipher = crypto_spawn_cipher(&ictx->blockcipher_spawn);
+	if (IS_ERR(blockcipher)) {
+		err = PTR_ERR(blockcipher);
+		goto err_free_streamcipher;
+	}
+
+	hash = crypto_spawn_shash(&ictx->hash_spawn);
+	if (IS_ERR(hash)) {
+		err = PTR_ERR(hash);
+		goto err_free_blockcipher;
+	}
+
+	tctx->streamcipher = streamcipher;
+	tctx->blockcipher = blockcipher;
+	tctx->hash = hash;
+
+	BUILD_BUG_ON(offsetofend(struct adiantum_request_ctx, u) !=
+		     sizeof(struct adiantum_request_ctx));
+	subreq_size = max(FIELD_SIZEOF(struct adiantum_request_ctx,
+				       u.hash_desc) +
+			  crypto_shash_descsize(hash),
+			  FIELD_SIZEOF(struct adiantum_request_ctx,
+				       u.streamcipher_req) +
+			  crypto_skcipher_reqsize(streamcipher));
+
+	crypto_skcipher_set_reqsize(tfm,
+				    offsetof(struct adiantum_request_ctx, u) +
+				    subreq_size);
+	return 0;
+
+err_free_blockcipher:
+	crypto_free_cipher(blockcipher);
+err_free_streamcipher:
+	crypto_free_skcipher(streamcipher);
+	return err;
+}
+
+static void adiantum_exit_tfm(struct crypto_skcipher *tfm)
+{
+	struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+
+	crypto_free_skcipher(tctx->streamcipher);
+	crypto_free_cipher(tctx->blockcipher);
+	crypto_free_shash(tctx->hash);
+}
+
+static void adiantum_free_instance(struct skcipher_instance *inst)
+{
+	struct adiantum_instance_ctx *ictx = skcipher_instance_ctx(inst);
+
+	crypto_drop_skcipher(&ictx->streamcipher_spawn);
+	crypto_drop_spawn(&ictx->blockcipher_spawn);
+	crypto_drop_shash(&ictx->hash_spawn);
+	kfree(inst);
+}
+
+/*
+ * Check for a supported set of inner algorithms.
+ * See the comment at the beginning of this file.
+ */
+static bool adiantum_supported_algorithms(struct skcipher_alg *streamcipher_alg,
+					  struct crypto_alg *blockcipher_alg,
+					  struct shash_alg *hash_alg)
+{
+	if (strcmp(streamcipher_alg->base.cra_name, "xchacha12") != 0 &&
+	    strcmp(streamcipher_alg->base.cra_name, "xchacha20") != 0)
+		return false;
+
+	if (blockcipher_alg->cra_cipher.cia_min_keysize > BLOCKCIPHER_KEY_SIZE ||
+	    blockcipher_alg->cra_cipher.cia_max_keysize < BLOCKCIPHER_KEY_SIZE)
+		return false;
+	if (blockcipher_alg->cra_blocksize != BLOCKCIPHER_BLOCK_SIZE)
+		return false;
+
+	if (strcmp(hash_alg->base.cra_name, "nhpoly1305") != 0)
+		return false;
+
+	return true;
+}
+
+static int adiantum_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+	struct crypto_attr_type *algt;
+	const char *streamcipher_name;
+	const char *blockcipher_name;
+	const char *nhpoly1305_name;
+	struct skcipher_instance *inst;
+	struct adiantum_instance_ctx *ictx;
+	struct skcipher_alg *streamcipher_alg;
+	struct crypto_alg *blockcipher_alg;
+	struct crypto_alg *_hash_alg;
+	struct shash_alg *hash_alg;
+	int err;
+
+	algt = crypto_get_attr_type(tb);
+	if (IS_ERR(algt))
+		return PTR_ERR(algt);
+
+	if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
+		return -EINVAL;
+
+	streamcipher_name = crypto_attr_alg_name(tb[1]);
+	if (IS_ERR(streamcipher_name))
+		return PTR_ERR(streamcipher_name);
+
+	blockcipher_name = crypto_attr_alg_name(tb[2]);
+	if (IS_ERR(blockcipher_name))
+		return PTR_ERR(blockcipher_name);
+
+	nhpoly1305_name = crypto_attr_alg_name(tb[3]);
+	if (nhpoly1305_name == ERR_PTR(-ENOENT))
+		nhpoly1305_name = "nhpoly1305";
+	if (IS_ERR(nhpoly1305_name))
+		return PTR_ERR(nhpoly1305_name);
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+	ictx = skcipher_instance_ctx(inst);
+
+	/* Stream cipher, e.g. "xchacha12" */
+	crypto_set_skcipher_spawn(&ictx->streamcipher_spawn,
+				  skcipher_crypto_instance(inst));
+	err = crypto_grab_skcipher(&ictx->streamcipher_spawn, streamcipher_name,
+				   0, crypto_requires_sync(algt->type,
+							   algt->mask));
+	if (err)
+		goto out_free_inst;
+	streamcipher_alg = crypto_spawn_skcipher_alg(&ictx->streamcipher_spawn);
+
+	/* Block cipher, e.g. "aes" */
+	crypto_set_spawn(&ictx->blockcipher_spawn,
+			 skcipher_crypto_instance(inst));
+	err = crypto_grab_spawn(&ictx->blockcipher_spawn, blockcipher_name,
+				CRYPTO_ALG_TYPE_CIPHER, CRYPTO_ALG_TYPE_MASK);
+	if (err)
+		goto out_drop_streamcipher;
+	blockcipher_alg = ictx->blockcipher_spawn.alg;
+
+	/* NHPoly1305 ε-∆U hash function */
+	_hash_alg = crypto_alg_mod_lookup(nhpoly1305_name,
+					  CRYPTO_ALG_TYPE_SHASH,
+					  CRYPTO_ALG_TYPE_MASK);
+	if (IS_ERR(_hash_alg)) {
+		err = PTR_ERR(_hash_alg);
+		goto out_drop_blockcipher;
+	}
+	hash_alg = __crypto_shash_alg(_hash_alg);
+	err = crypto_init_shash_spawn(&ictx->hash_spawn, hash_alg,
+				      skcipher_crypto_instance(inst));
+	if (err)
+		goto out_put_hash;
+
+	/* Check the set of algorithms */
+	if (!adiantum_supported_algorithms(streamcipher_alg, blockcipher_alg,
+					   hash_alg)) {
+		pr_warn("Unsupported Adiantum instantiation: (%s,%s,%s)\n",
+			streamcipher_alg->base.cra_name,
+			blockcipher_alg->cra_name, hash_alg->base.cra_name);
+		err = -EINVAL;
+		goto out_drop_hash;
+	}
+
+	/* Instance fields */
+
+	err = -ENAMETOOLONG;
+	if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
+		     "adiantum(%s,%s)", streamcipher_alg->base.cra_name,
+		     blockcipher_alg->cra_name) >= CRYPTO_MAX_ALG_NAME)
+		goto out_drop_hash;
+	if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+		     "adiantum(%s,%s,%s)",
+		     streamcipher_alg->base.cra_driver_name,
+		     blockcipher_alg->cra_driver_name,
+		     hash_alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+		goto out_drop_hash;
+
+	inst->alg.base.cra_flags = streamcipher_alg->base.cra_flags &
+				   CRYPTO_ALG_ASYNC;
+	inst->alg.base.cra_blocksize = BLOCKCIPHER_BLOCK_SIZE;
+	inst->alg.base.cra_ctxsize = sizeof(struct adiantum_tfm_ctx);
+	inst->alg.base.cra_alignmask = streamcipher_alg->base.cra_alignmask |
+				       hash_alg->base.cra_alignmask;
+	/*
+	 * The block cipher is only invoked once per message, so for long
+	 * messages (e.g. sectors for disk encryption) its performance doesn't
+	 * matter as much as that of the stream cipher and hash function.  Thus,
+	 * weigh the block cipher's ->cra_priority less.
+	 */
+	inst->alg.base.cra_priority = (4 * streamcipher_alg->base.cra_priority +
+				       2 * hash_alg->base.cra_priority +
+				       blockcipher_alg->cra_priority) / 7;
+
+	inst->alg.setkey = adiantum_setkey;
+	inst->alg.encrypt = adiantum_encrypt;
+	inst->alg.decrypt = adiantum_decrypt;
+	inst->alg.init = adiantum_init_tfm;
+	inst->alg.exit = adiantum_exit_tfm;
+	inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(streamcipher_alg);
+	inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(streamcipher_alg);
+	inst->alg.ivsize = TWEAK_SIZE;
+
+	inst->free = adiantum_free_instance;
+
+	err = skcipher_register_instance(tmpl, inst);
+	if (err)
+		goto out_drop_hash;
+
+	crypto_mod_put(_hash_alg);
+	return 0;
+
+out_drop_hash:
+	crypto_drop_shash(&ictx->hash_spawn);
+out_put_hash:
+	crypto_mod_put(_hash_alg);
+out_drop_blockcipher:
+	crypto_drop_spawn(&ictx->blockcipher_spawn);
+out_drop_streamcipher:
+	crypto_drop_skcipher(&ictx->streamcipher_spawn);
+out_free_inst:
+	kfree(inst);
+	return err;
+}
+
+/* adiantum(streamcipher_name, blockcipher_name [, nhpoly1305_name]) */
+static struct crypto_template adiantum_tmpl = {
+	.name = "adiantum",
+	.create = adiantum_create,
+	.module = THIS_MODULE,
+};
+
+static int __init adiantum_module_init(void)
+{
+	return crypto_register_template(&adiantum_tmpl);
+}
+
+static void __exit adiantum_module_exit(void)
+{
+	crypto_unregister_template(&adiantum_tmpl);
+}
+
+module_init(adiantum_module_init);
+module_exit(adiantum_module_exit);
+
+MODULE_DESCRIPTION("Adiantum length-preserving encryption mode");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+MODULE_ALIAS_CRYPTO("adiantum");

diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c
index ca554d5..13df33a 100644
--- a/crypto/aes_generic.c
+++ b/crypto/aes_generic.c

@@ -63,7 +63,8 @@
 
 static const u32 rco_tab[10] = { 1, 2, 4, 8, 16, 32, 64, 128, 27, 54 };
 
-__visible const u32 crypto_ft_tab[4][256] = {
+/* cacheline-aligned to facilitate prefetching into cache */
+__visible const u32 crypto_ft_tab[4][256] __cacheline_aligned = {
 	{
 		0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6,
 		0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591,
@@ -327,7 +328,7 @@
 	}
 };
 
-__visible const u32 crypto_fl_tab[4][256] = {
+__visible const u32 crypto_fl_tab[4][256] __cacheline_aligned = {
 	{
 		0x00000063, 0x0000007c, 0x00000077, 0x0000007b,
 		0x000000f2, 0x0000006b, 0x0000006f, 0x000000c5,
@@ -591,7 +592,7 @@
 	}
 };
 
-__visible const u32 crypto_it_tab[4][256] = {
+__visible const u32 crypto_it_tab[4][256] __cacheline_aligned = {
 	{
 		0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a,
 		0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b,
@@ -855,7 +856,7 @@
 	}
 };
 
-__visible const u32 crypto_il_tab[4][256] = {
+__visible const u32 crypto_il_tab[4][256] __cacheline_aligned = {
 	{
 		0x00000052, 0x00000009, 0x0000006a, 0x000000d5,
 		0x00000030, 0x00000036, 0x000000a5, 0x00000038,

diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c
deleted file mode 100644
index e451c3c..0000000
--- a/crypto/chacha20_generic.c
+++ /dev/null

@@ -1,136 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <asm/unaligned.h>
-#include <crypto/algapi.h>
-#include <crypto/chacha20.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/module.h>
-
-static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src,
-			     unsigned int bytes)
-{
-	u32 stream[CHACHA20_BLOCK_WORDS];
-
-	if (dst != src)
-		memcpy(dst, src, bytes);
-
-	while (bytes >= CHACHA20_BLOCK_SIZE) {
-		chacha20_block(state, stream);
-		crypto_xor(dst, (const u8 *)stream, CHACHA20_BLOCK_SIZE);
-		bytes -= CHACHA20_BLOCK_SIZE;
-		dst += CHACHA20_BLOCK_SIZE;
-	}
-	if (bytes) {
-		chacha20_block(state, stream);
-		crypto_xor(dst, (const u8 *)stream, bytes);
-	}
-}
-
-void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv)
-{
-	state[0]  = 0x61707865; /* "expa" */
-	state[1]  = 0x3320646e; /* "nd 3" */
-	state[2]  = 0x79622d32; /* "2-by" */
-	state[3]  = 0x6b206574; /* "te k" */
-	state[4]  = ctx->key[0];
-	state[5]  = ctx->key[1];
-	state[6]  = ctx->key[2];
-	state[7]  = ctx->key[3];
-	state[8]  = ctx->key[4];
-	state[9]  = ctx->key[5];
-	state[10] = ctx->key[6];
-	state[11] = ctx->key[7];
-	state[12] = get_unaligned_le32(iv +  0);
-	state[13] = get_unaligned_le32(iv +  4);
-	state[14] = get_unaligned_le32(iv +  8);
-	state[15] = get_unaligned_le32(iv + 12);
-}
-EXPORT_SYMBOL_GPL(crypto_chacha20_init);
-
-int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
-			   unsigned int keysize)
-{
-	struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int i;
-
-	if (keysize != CHACHA20_KEY_SIZE)
-		return -EINVAL;
-
-	for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
-		ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_chacha20_setkey);
-
-int crypto_chacha20_crypt(struct skcipher_request *req)
-{
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct skcipher_walk walk;
-	u32 state[16];
-	int err;
-
-	err = skcipher_walk_virt(&walk, req, true);
-
-	crypto_chacha20_init(state, ctx, walk.iv);
-
-	while (walk.nbytes > 0) {
-		unsigned int nbytes = walk.nbytes;
-
-		if (nbytes < walk.total)
-			nbytes = round_down(nbytes, walk.stride);
-
-		chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
-				 nbytes);
-		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
-	}
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(crypto_chacha20_crypt);
-
-static struct skcipher_alg alg = {
-	.base.cra_name		= "chacha20",
-	.base.cra_driver_name	= "chacha20-generic",
-	.base.cra_priority	= 100,
-	.base.cra_blocksize	= 1,
-	.base.cra_ctxsize	= sizeof(struct chacha20_ctx),
-	.base.cra_module	= THIS_MODULE,
-
-	.min_keysize		= CHACHA20_KEY_SIZE,
-	.max_keysize		= CHACHA20_KEY_SIZE,
-	.ivsize			= CHACHA20_IV_SIZE,
-	.chunksize		= CHACHA20_BLOCK_SIZE,
-	.setkey			= crypto_chacha20_setkey,
-	.encrypt		= crypto_chacha20_crypt,
-	.decrypt		= crypto_chacha20_crypt,
-};
-
-static int __init chacha20_generic_mod_init(void)
-{
-	return crypto_register_skcipher(&alg);
-}
-
-static void __exit chacha20_generic_mod_fini(void)
-{
-	crypto_unregister_skcipher(&alg);
-}
-
-module_init(chacha20_generic_mod_init);
-module_exit(chacha20_generic_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
-MODULE_DESCRIPTION("chacha20 cipher algorithm");
-MODULE_ALIAS_CRYPTO("chacha20");
-MODULE_ALIAS_CRYPTO("chacha20-generic");

diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c
index af8afe5..dce194c 100644
--- a/crypto/chacha20poly1305.c
+++ b/crypto/chacha20poly1305.c

@@ -13,7 +13,7 @@
 #include <crypto/internal/hash.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/scatterwalk.h>
-#include <crypto/chacha20.h>
+#include <crypto/chacha.h>
 #include <crypto/poly1305.h>
 #include <linux/err.h>
 #include <linux/init.h>
@@ -51,7 +51,7 @@
 };
 
 struct chacha_req {
-	u8 iv[CHACHA20_IV_SIZE];
+	u8 iv[CHACHA_IV_SIZE];
 	struct scatterlist src[1];
 	struct skcipher_request req; /* must be last member */
 };
@@ -97,7 +97,7 @@
 	memcpy(iv, &leicb, sizeof(leicb));
 	memcpy(iv + sizeof(leicb), ctx->salt, ctx->saltlen);
 	memcpy(iv + sizeof(leicb) + ctx->saltlen, req->iv,
-	       CHACHA20_IV_SIZE - sizeof(leicb) - ctx->saltlen);
+	       CHACHA_IV_SIZE - sizeof(leicb) - ctx->saltlen);
 }
 
 static int poly_verify_tag(struct aead_request *req)
@@ -502,7 +502,7 @@
 	struct chachapoly_ctx *ctx = crypto_aead_ctx(aead);
 	int err;
 
-	if (keylen != ctx->saltlen + CHACHA20_KEY_SIZE)
+	if (keylen != ctx->saltlen + CHACHA_KEY_SIZE)
 		return -EINVAL;
 
 	keylen -= ctx->saltlen;
@@ -647,7 +647,7 @@
 
 	err = -EINVAL;
 	/* Need 16-byte IV size, including Initial Block Counter value */
-	if (crypto_skcipher_alg_ivsize(chacha) != CHACHA20_IV_SIZE)
+	if (crypto_skcipher_alg_ivsize(chacha) != CHACHA_IV_SIZE)
 		goto out_drop_chacha;
 	/* Not a stream cipher? */
 	if (chacha->base.cra_blocksize != 1)

diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c
new file mode 100644
index 0000000..35b5831
--- /dev/null
+++ b/crypto/chacha_generic.c

@@ -0,0 +1,217 @@
+/*
+ * ChaCha and XChaCha stream ciphers, including ChaCha20 (RFC7539)
+ *
+ * Copyright (C) 2015 Martin Willi
+ * Copyright (C) 2018 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/chacha.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/module.h>
+
+static void chacha_docrypt(u32 *state, u8 *dst, const u8 *src,
+			   unsigned int bytes, int nrounds)
+{
+	/* aligned to potentially speed up crypto_xor() */
+	u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long));
+
+	if (dst != src)
+		memcpy(dst, src, bytes);
+
+	while (bytes >= CHACHA_BLOCK_SIZE) {
+		chacha_block(state, stream, nrounds);
+		crypto_xor(dst, stream, CHACHA_BLOCK_SIZE);
+		bytes -= CHACHA_BLOCK_SIZE;
+		dst += CHACHA_BLOCK_SIZE;
+	}
+	if (bytes) {
+		chacha_block(state, stream, nrounds);
+		crypto_xor(dst, stream, bytes);
+	}
+}
+
+static int chacha_stream_xor(struct skcipher_request *req,
+			     struct chacha_ctx *ctx, u8 *iv)
+{
+	struct skcipher_walk walk;
+	u32 state[16];
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	crypto_chacha_init(state, ctx, iv);
+
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
+
+		if (nbytes < walk.total)
+			nbytes = round_down(nbytes, walk.stride);
+
+		chacha_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
+			       nbytes, ctx->nrounds);
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+	}
+
+	return err;
+}
+
+void crypto_chacha_init(u32 *state, struct chacha_ctx *ctx, u8 *iv)
+{
+	state[0]  = 0x61707865; /* "expa" */
+	state[1]  = 0x3320646e; /* "nd 3" */
+	state[2]  = 0x79622d32; /* "2-by" */
+	state[3]  = 0x6b206574; /* "te k" */
+	state[4]  = ctx->key[0];
+	state[5]  = ctx->key[1];
+	state[6]  = ctx->key[2];
+	state[7]  = ctx->key[3];
+	state[8]  = ctx->key[4];
+	state[9]  = ctx->key[5];
+	state[10] = ctx->key[6];
+	state[11] = ctx->key[7];
+	state[12] = get_unaligned_le32(iv +  0);
+	state[13] = get_unaligned_le32(iv +  4);
+	state[14] = get_unaligned_le32(iv +  8);
+	state[15] = get_unaligned_le32(iv + 12);
+}
+EXPORT_SYMBOL_GPL(crypto_chacha_init);
+
+static int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			 unsigned int keysize, int nrounds)
+{
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int i;
+
+	if (keysize != CHACHA_KEY_SIZE)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
+		ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
+
+	ctx->nrounds = nrounds;
+	return 0;
+}
+
+int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			   unsigned int keysize)
+{
+	return chacha_setkey(tfm, key, keysize, 20);
+}
+EXPORT_SYMBOL_GPL(crypto_chacha20_setkey);
+
+int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			   unsigned int keysize)
+{
+	return chacha_setkey(tfm, key, keysize, 12);
+}
+EXPORT_SYMBOL_GPL(crypto_chacha12_setkey);
+
+int crypto_chacha_crypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	return chacha_stream_xor(req, ctx, req->iv);
+}
+EXPORT_SYMBOL_GPL(crypto_chacha_crypt);
+
+int crypto_xchacha_crypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct chacha_ctx subctx;
+	u32 state[16];
+	u8 real_iv[16];
+
+	/* Compute the subkey given the original key and first 128 nonce bits */
+	crypto_chacha_init(state, ctx, req->iv);
+	hchacha_block(state, subctx.key, ctx->nrounds);
+	subctx.nrounds = ctx->nrounds;
+
+	/* Build the real IV */
+	memcpy(&real_iv[0], req->iv + 24, 8); /* stream position */
+	memcpy(&real_iv[8], req->iv + 16, 8); /* remaining 64 nonce bits */
+
+	/* Generate the stream and XOR it with the data */
+	return chacha_stream_xor(req, &subctx, real_iv);
+}
+EXPORT_SYMBOL_GPL(crypto_xchacha_crypt);
+
+static struct skcipher_alg algs[] = {
+	{
+		.base.cra_name		= "chacha20",
+		.base.cra_driver_name	= "chacha20-generic",
+		.base.cra_priority	= 100,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= CHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= crypto_chacha20_setkey,
+		.encrypt		= crypto_chacha_crypt,
+		.decrypt		= crypto_chacha_crypt,
+	}, {
+		.base.cra_name		= "xchacha20",
+		.base.cra_driver_name	= "xchacha20-generic",
+		.base.cra_priority	= 100,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= XCHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= crypto_chacha20_setkey,
+		.encrypt		= crypto_xchacha_crypt,
+		.decrypt		= crypto_xchacha_crypt,
+	}, {
+		.base.cra_name		= "xchacha12",
+		.base.cra_driver_name	= "xchacha12-generic",
+		.base.cra_priority	= 100,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= XCHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= crypto_chacha12_setkey,
+		.encrypt		= crypto_xchacha_crypt,
+		.decrypt		= crypto_xchacha_crypt,
+	}
+};
+
+static int __init chacha_generic_mod_init(void)
+{
+	return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit chacha_generic_mod_fini(void)
+{
+	crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+module_init(chacha_generic_mod_init);
+module_exit(chacha_generic_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (generic)");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-generic");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-generic");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-generic");

diff --git a/crypto/nhpoly1305.c b/crypto/nhpoly1305.c
new file mode 100644
index 0000000..ec831a5
--- /dev/null
+++ b/crypto/nhpoly1305.c

@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum
+ *
+ * Copyright 2018 Google LLC
+ */
+
+/*
+ * "NHPoly1305" is the main component of Adiantum hashing.
+ * Specifically, it is the calculation
+ *
+ *	H_L ← Poly1305_{K_L}(NH_{K_N}(pad_{128}(L)))
+ *
+ * from the procedure in section 6.4 of the Adiantum paper [1].  It is an
+ * ε-almost-∆-universal (ε-∆U) hash function for equal-length inputs over
+ * Z/(2^{128}Z), where the "∆" operation is addition.  It hashes 1024-byte
+ * chunks of the input with the NH hash function [2], reducing the input length
+ * by 32x.  The resulting NH digests are evaluated as a polynomial in
+ * GF(2^{130}-5), like in the Poly1305 MAC [3].  Note that the polynomial
+ * evaluation by itself would suffice to achieve the ε-∆U property; NH is used
+ * for performance since it's over twice as fast as Poly1305.
+ *
+ * This is *not* a cryptographic hash function; do not use it as such!
+ *
+ * [1] Adiantum: length-preserving encryption for entry-level processors
+ *     (https://eprint.iacr.org/2018/720.pdf)
+ * [2] UMAC: Fast and Secure Message Authentication
+ *     (https://fastcrypto.org/umac/umac_proc.pdf)
+ * [3] The Poly1305-AES message-authentication code
+ *     (https://cr.yp.to/mac/poly1305-20050329.pdf)
+ */
+
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/nhpoly1305.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+static void nh_generic(const u32 *key, const u8 *message, size_t message_len,
+		       __le64 hash[NH_NUM_PASSES])
+{
+	u64 sums[4] = { 0, 0, 0, 0 };
+
+	BUILD_BUG_ON(NH_PAIR_STRIDE != 2);
+	BUILD_BUG_ON(NH_NUM_PASSES != 4);
+
+	while (message_len) {
+		u32 m0 = get_unaligned_le32(message + 0);
+		u32 m1 = get_unaligned_le32(message + 4);
+		u32 m2 = get_unaligned_le32(message + 8);
+		u32 m3 = get_unaligned_le32(message + 12);
+
+		sums[0] += (u64)(u32)(m0 + key[ 0]) * (u32)(m2 + key[ 2]);
+		sums[1] += (u64)(u32)(m0 + key[ 4]) * (u32)(m2 + key[ 6]);
+		sums[2] += (u64)(u32)(m0 + key[ 8]) * (u32)(m2 + key[10]);
+		sums[3] += (u64)(u32)(m0 + key[12]) * (u32)(m2 + key[14]);
+		sums[0] += (u64)(u32)(m1 + key[ 1]) * (u32)(m3 + key[ 3]);
+		sums[1] += (u64)(u32)(m1 + key[ 5]) * (u32)(m3 + key[ 7]);
+		sums[2] += (u64)(u32)(m1 + key[ 9]) * (u32)(m3 + key[11]);
+		sums[3] += (u64)(u32)(m1 + key[13]) * (u32)(m3 + key[15]);
+		key += NH_MESSAGE_UNIT / sizeof(key[0]);
+		message += NH_MESSAGE_UNIT;
+		message_len -= NH_MESSAGE_UNIT;
+	}
+
+	hash[0] = cpu_to_le64(sums[0]);
+	hash[1] = cpu_to_le64(sums[1]);
+	hash[2] = cpu_to_le64(sums[2]);
+	hash[3] = cpu_to_le64(sums[3]);
+}
+
+/* Pass the next NH hash value through Poly1305 */
+static void process_nh_hash_value(struct nhpoly1305_state *state,
+				  const struct nhpoly1305_key *key)
+{
+	BUILD_BUG_ON(NH_HASH_BYTES % POLY1305_BLOCK_SIZE != 0);
+
+	poly1305_core_blocks(&state->poly_state, &key->poly_key, state->nh_hash,
+			     NH_HASH_BYTES / POLY1305_BLOCK_SIZE);
+}
+
+/*
+ * Feed the next portion of the source data, as a whole number of 16-byte
+ * "NH message units", through NH and Poly1305.  Each NH hash is taken over
+ * 1024 bytes, except possibly the final one which is taken over a multiple of
+ * 16 bytes up to 1024.  Also, in the case where data is passed in misaligned
+ * chunks, we combine partial hashes; the end result is the same either way.
+ */
+static void nhpoly1305_units(struct nhpoly1305_state *state,
+			     const struct nhpoly1305_key *key,
+			     const u8 *src, unsigned int srclen, nh_t nh_fn)
+{
+	do {
+		unsigned int bytes;
+
+		if (state->nh_remaining == 0) {
+			/* Starting a new NH message */
+			bytes = min_t(unsigned int, srclen, NH_MESSAGE_BYTES);
+			nh_fn(key->nh_key, src, bytes, state->nh_hash);
+			state->nh_remaining = NH_MESSAGE_BYTES - bytes;
+		} else {
+			/* Continuing a previous NH message */
+			__le64 tmp_hash[NH_NUM_PASSES];
+			unsigned int pos;
+			int i;
+
+			pos = NH_MESSAGE_BYTES - state->nh_remaining;
+			bytes = min(srclen, state->nh_remaining);
+			nh_fn(&key->nh_key[pos / 4], src, bytes, tmp_hash);
+			for (i = 0; i < NH_NUM_PASSES; i++)
+				le64_add_cpu(&state->nh_hash[i],
+					     le64_to_cpu(tmp_hash[i]));
+			state->nh_remaining -= bytes;
+		}
+		if (state->nh_remaining == 0)
+			process_nh_hash_value(state, key);
+		src += bytes;
+		srclen -= bytes;
+	} while (srclen);
+}
+
+int crypto_nhpoly1305_setkey(struct crypto_shash *tfm,
+			     const u8 *key, unsigned int keylen)
+{
+	struct nhpoly1305_key *ctx = crypto_shash_ctx(tfm);
+	int i;
+
+	if (keylen != NHPOLY1305_KEY_SIZE)
+		return -EINVAL;
+
+	poly1305_core_setkey(&ctx->poly_key, key);
+	key += POLY1305_BLOCK_SIZE;
+
+	for (i = 0; i < NH_KEY_WORDS; i++)
+		ctx->nh_key[i] = get_unaligned_le32(key + i * sizeof(u32));
+
+	return 0;
+}
+EXPORT_SYMBOL(crypto_nhpoly1305_setkey);
+
+int crypto_nhpoly1305_init(struct shash_desc *desc)
+{
+	struct nhpoly1305_state *state = shash_desc_ctx(desc);
+
+	poly1305_core_init(&state->poly_state);
+	state->buflen = 0;
+	state->nh_remaining = 0;
+	return 0;
+}
+EXPORT_SYMBOL(crypto_nhpoly1305_init);
+
+int crypto_nhpoly1305_update_helper(struct shash_desc *desc,
+				    const u8 *src, unsigned int srclen,
+				    nh_t nh_fn)
+{
+	struct nhpoly1305_state *state = shash_desc_ctx(desc);
+	const struct nhpoly1305_key *key = crypto_shash_ctx(desc->tfm);
+	unsigned int bytes;
+
+	if (state->buflen) {
+		bytes = min(srclen, (int)NH_MESSAGE_UNIT - state->buflen);
+		memcpy(&state->buffer[state->buflen], src, bytes);
+		state->buflen += bytes;
+		if (state->buflen < NH_MESSAGE_UNIT)
+			return 0;
+		nhpoly1305_units(state, key, state->buffer, NH_MESSAGE_UNIT,
+				 nh_fn);
+		state->buflen = 0;
+		src += bytes;
+		srclen -= bytes;
+	}
+
+	if (srclen >= NH_MESSAGE_UNIT) {
+		bytes = round_down(srclen, NH_MESSAGE_UNIT);
+		nhpoly1305_units(state, key, src, bytes, nh_fn);
+		src += bytes;
+		srclen -= bytes;
+	}
+
+	if (srclen) {
+		memcpy(state->buffer, src, srclen);
+		state->buflen = srclen;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(crypto_nhpoly1305_update_helper);
+
+int crypto_nhpoly1305_update(struct shash_desc *desc,
+			     const u8 *src, unsigned int srclen)
+{
+	return crypto_nhpoly1305_update_helper(desc, src, srclen, nh_generic);
+}
+EXPORT_SYMBOL(crypto_nhpoly1305_update);
+
+int crypto_nhpoly1305_final_helper(struct shash_desc *desc, u8 *dst, nh_t nh_fn)
+{
+	struct nhpoly1305_state *state = shash_desc_ctx(desc);
+	const struct nhpoly1305_key *key = crypto_shash_ctx(desc->tfm);
+
+	if (state->buflen) {
+		memset(&state->buffer[state->buflen], 0,
+		       NH_MESSAGE_UNIT - state->buflen);
+		nhpoly1305_units(state, key, state->buffer, NH_MESSAGE_UNIT,
+				 nh_fn);
+	}
+
+	if (state->nh_remaining)
+		process_nh_hash_value(state, key);
+
+	poly1305_core_emit(&state->poly_state, dst);
+	return 0;
+}
+EXPORT_SYMBOL(crypto_nhpoly1305_final_helper);
+
+int crypto_nhpoly1305_final(struct shash_desc *desc, u8 *dst)
+{
+	return crypto_nhpoly1305_final_helper(desc, dst, nh_generic);
+}
+EXPORT_SYMBOL(crypto_nhpoly1305_final);
+
+static struct shash_alg nhpoly1305_alg = {
+	.base.cra_name		= "nhpoly1305",
+	.base.cra_driver_name	= "nhpoly1305-generic",
+	.base.cra_priority	= 100,
+	.base.cra_ctxsize	= sizeof(struct nhpoly1305_key),
+	.base.cra_module	= THIS_MODULE,
+	.digestsize		= POLY1305_DIGEST_SIZE,
+	.init			= crypto_nhpoly1305_init,
+	.update			= crypto_nhpoly1305_update,
+	.final			= crypto_nhpoly1305_final,
+	.setkey			= crypto_nhpoly1305_setkey,
+	.descsize		= sizeof(struct nhpoly1305_state),
+};
+
+static int __init nhpoly1305_mod_init(void)
+{
+	return crypto_register_shash(&nhpoly1305_alg);
+}
+
+static void __exit nhpoly1305_mod_exit(void)
+{
+	crypto_unregister_shash(&nhpoly1305_alg);
+}
+
+module_init(nhpoly1305_mod_init);
+module_exit(nhpoly1305_mod_exit);
+
+MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+MODULE_ALIAS_CRYPTO("nhpoly1305");
+MODULE_ALIAS_CRYPTO("nhpoly1305-generic");

diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
index 47d3a6b8..2a06874 100644
--- a/crypto/poly1305_generic.c
+++ b/crypto/poly1305_generic.c

@@ -38,7 +38,7 @@
 {
 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
 
-	memset(dctx->h, 0, sizeof(dctx->h));
+	poly1305_core_init(&dctx->h);
 	dctx->buflen = 0;
 	dctx->rset = false;
 	dctx->sset = false;
@@ -47,23 +47,16 @@
 }
 EXPORT_SYMBOL_GPL(crypto_poly1305_init);
 
-static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key)
+void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key)
 {
 	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
-	dctx->r[0] = (get_unaligned_le32(key +  0) >> 0) & 0x3ffffff;
-	dctx->r[1] = (get_unaligned_le32(key +  3) >> 2) & 0x3ffff03;
-	dctx->r[2] = (get_unaligned_le32(key +  6) >> 4) & 0x3ffc0ff;
-	dctx->r[3] = (get_unaligned_le32(key +  9) >> 6) & 0x3f03fff;
-	dctx->r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff;
+	key->r[0] = (get_unaligned_le32(raw_key +  0) >> 0) & 0x3ffffff;
+	key->r[1] = (get_unaligned_le32(raw_key +  3) >> 2) & 0x3ffff03;
+	key->r[2] = (get_unaligned_le32(raw_key +  6) >> 4) & 0x3ffc0ff;
+	key->r[3] = (get_unaligned_le32(raw_key +  9) >> 6) & 0x3f03fff;
+	key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
 }
-
-static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key)
-{
-	dctx->s[0] = get_unaligned_le32(key +  0);
-	dctx->s[1] = get_unaligned_le32(key +  4);
-	dctx->s[2] = get_unaligned_le32(key +  8);
-	dctx->s[3] = get_unaligned_le32(key + 12);
-}
+EXPORT_SYMBOL_GPL(poly1305_core_setkey);
 
 /*
  * Poly1305 requires a unique key for each tag, which implies that we can't set
@@ -75,13 +68,16 @@
 {
 	if (!dctx->sset) {
 		if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
-			poly1305_setrkey(dctx, src);
+			poly1305_core_setkey(&dctx->r, src);
 			src += POLY1305_BLOCK_SIZE;
 			srclen -= POLY1305_BLOCK_SIZE;
 			dctx->rset = true;
 		}
 		if (srclen >= POLY1305_BLOCK_SIZE) {
-			poly1305_setskey(dctx, src);
+			dctx->s[0] = get_unaligned_le32(src +  0);
+			dctx->s[1] = get_unaligned_le32(src +  4);
+			dctx->s[2] = get_unaligned_le32(src +  8);
+			dctx->s[3] = get_unaligned_le32(src + 12);
 			src += POLY1305_BLOCK_SIZE;
 			srclen -= POLY1305_BLOCK_SIZE;
 			dctx->sset = true;
@@ -91,41 +87,37 @@
 }
 EXPORT_SYMBOL_GPL(crypto_poly1305_setdesckey);
 
-static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
-				    const u8 *src, unsigned int srclen,
-				    u32 hibit)
+static void poly1305_blocks_internal(struct poly1305_state *state,
+				     const struct poly1305_key *key,
+				     const void *src, unsigned int nblocks,
+				     u32 hibit)
 {
 	u32 r0, r1, r2, r3, r4;
 	u32 s1, s2, s3, s4;
 	u32 h0, h1, h2, h3, h4;
 	u64 d0, d1, d2, d3, d4;
-	unsigned int datalen;
 
-	if (unlikely(!dctx->sset)) {
-		datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
-		src += srclen - datalen;
-		srclen = datalen;
-	}
+	if (!nblocks)
+		return;
 
-	r0 = dctx->r[0];
-	r1 = dctx->r[1];
-	r2 = dctx->r[2];
-	r3 = dctx->r[3];
-	r4 = dctx->r[4];
+	r0 = key->r[0];
+	r1 = key->r[1];
+	r2 = key->r[2];
+	r3 = key->r[3];
+	r4 = key->r[4];
 
 	s1 = r1 * 5;
 	s2 = r2 * 5;
 	s3 = r3 * 5;
 	s4 = r4 * 5;
 
-	h0 = dctx->h[0];
-	h1 = dctx->h[1];
-	h2 = dctx->h[2];
-	h3 = dctx->h[3];
-	h4 = dctx->h[4];
+	h0 = state->h[0];
+	h1 = state->h[1];
+	h2 = state->h[2];
+	h3 = state->h[3];
+	h4 = state->h[4];
 
-	while (likely(srclen >= POLY1305_BLOCK_SIZE)) {
-
+	do {
 		/* h += m[i] */
 		h0 += (get_unaligned_le32(src +  0) >> 0) & 0x3ffffff;
 		h1 += (get_unaligned_le32(src +  3) >> 2) & 0x3ffffff;
@@ -154,16 +146,36 @@
 		h1 += h0 >> 26;       h0 = h0 & 0x3ffffff;
 
 		src += POLY1305_BLOCK_SIZE;
-		srclen -= POLY1305_BLOCK_SIZE;
+	} while (--nblocks);
+
+	state->h[0] = h0;
+	state->h[1] = h1;
+	state->h[2] = h2;
+	state->h[3] = h3;
+	state->h[4] = h4;
+}
+
+void poly1305_core_blocks(struct poly1305_state *state,
+			  const struct poly1305_key *key,
+			  const void *src, unsigned int nblocks)
+{
+	poly1305_blocks_internal(state, key, src, nblocks, 1 << 24);
+}
+EXPORT_SYMBOL_GPL(poly1305_core_blocks);
+
+static void poly1305_blocks(struct poly1305_desc_ctx *dctx,
+			    const u8 *src, unsigned int srclen, u32 hibit)
+{
+	unsigned int datalen;
+
+	if (unlikely(!dctx->sset)) {
+		datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
+		src += srclen - datalen;
+		srclen = datalen;
 	}
 
-	dctx->h[0] = h0;
-	dctx->h[1] = h1;
-	dctx->h[2] = h2;
-	dctx->h[3] = h3;
-	dctx->h[4] = h4;
-
-	return srclen;
+	poly1305_blocks_internal(&dctx->h, &dctx->r,
+				 src, srclen / POLY1305_BLOCK_SIZE, hibit);
 }
 
 int crypto_poly1305_update(struct shash_desc *desc,
@@ -187,9 +199,9 @@
 	}
 
 	if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
-		bytes = poly1305_blocks(dctx, src, srclen, 1 << 24);
-		src += srclen - bytes;
-		srclen = bytes;
+		poly1305_blocks(dctx, src, srclen, 1 << 24);
+		src += srclen - (srclen % POLY1305_BLOCK_SIZE);
+		srclen %= POLY1305_BLOCK_SIZE;
 	}
 
 	if (unlikely(srclen)) {
@@ -201,30 +213,18 @@
 }
 EXPORT_SYMBOL_GPL(crypto_poly1305_update);
 
-int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
+void poly1305_core_emit(const struct poly1305_state *state, void *dst)
 {
-	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
 	u32 h0, h1, h2, h3, h4;
 	u32 g0, g1, g2, g3, g4;
 	u32 mask;
-	u64 f = 0;
-
-	if (unlikely(!dctx->sset))
-		return -ENOKEY;
-
-	if (unlikely(dctx->buflen)) {
-		dctx->buf[dctx->buflen++] = 1;
-		memset(dctx->buf + dctx->buflen, 0,
-		       POLY1305_BLOCK_SIZE - dctx->buflen);
-		poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0);
-	}
 
 	/* fully carry h */
-	h0 = dctx->h[0];
-	h1 = dctx->h[1];
-	h2 = dctx->h[2];
-	h3 = dctx->h[3];
-	h4 = dctx->h[4];
+	h0 = state->h[0];
+	h1 = state->h[1];
+	h2 = state->h[2];
+	h3 = state->h[3];
+	h4 = state->h[4];
 
 	h2 += (h1 >> 26);     h1 = h1 & 0x3ffffff;
 	h3 += (h2 >> 26);     h2 = h2 & 0x3ffffff;
@@ -254,16 +254,40 @@
 	h4 = (h4 & mask) | g4;
 
 	/* h = h % (2^128) */
-	h0 = (h0 >>  0) | (h1 << 26);
-	h1 = (h1 >>  6) | (h2 << 20);
-	h2 = (h2 >> 12) | (h3 << 14);
-	h3 = (h3 >> 18) | (h4 <<  8);
+	put_unaligned_le32((h0 >>  0) | (h1 << 26), dst +  0);
+	put_unaligned_le32((h1 >>  6) | (h2 << 20), dst +  4);
+	put_unaligned_le32((h2 >> 12) | (h3 << 14), dst +  8);
+	put_unaligned_le32((h3 >> 18) | (h4 <<  8), dst + 12);
+}
+EXPORT_SYMBOL_GPL(poly1305_core_emit);
+
+int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+	__le32 digest[4];
+	u64 f = 0;
+
+	if (unlikely(!dctx->sset))
+		return -ENOKEY;
+
+	if (unlikely(dctx->buflen)) {
+		dctx->buf[dctx->buflen++] = 1;
+		memset(dctx->buf + dctx->buflen, 0,
+		       POLY1305_BLOCK_SIZE - dctx->buflen);
+		poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+	}
+
+	poly1305_core_emit(&dctx->h, digest);
 
 	/* mac = (h + s) % (2^128) */
-	f = (f >> 32) + h0 + dctx->s[0]; put_unaligned_le32(f, dst +  0);
-	f = (f >> 32) + h1 + dctx->s[1]; put_unaligned_le32(f, dst +  4);
-	f = (f >> 32) + h2 + dctx->s[2]; put_unaligned_le32(f, dst +  8);
-	f = (f >> 32) + h3 + dctx->s[3]; put_unaligned_le32(f, dst + 12);
+	f = (f >> 32) + le32_to_cpu(digest[0]) + dctx->s[0];
+	put_unaligned_le32(f, dst + 0);
+	f = (f >> 32) + le32_to_cpu(digest[1]) + dctx->s[1];
+	put_unaligned_le32(f, dst + 4);
+	f = (f >> 32) + le32_to_cpu(digest[2]) + dctx->s[2];
+	put_unaligned_le32(f, dst + 8);
+	f = (f >> 32) + le32_to_cpu(digest[3]) + dctx->s[3];
+	put_unaligned_le32(f, dst + 12);
 
 	return 0;
 }

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index d332988..740ef57 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c

@@ -2290,6 +2290,17 @@
 				   num_mb);
 		break;
 
+	case 219:
+		test_cipher_speed("adiantum(xchacha12,aes)", ENCRYPT, sec, NULL,
+				  0, speed_template_32);
+		test_cipher_speed("adiantum(xchacha12,aes)", DECRYPT, sec, NULL,
+				  0, speed_template_32);
+		test_cipher_speed("adiantum(xchacha20,aes)", ENCRYPT, sec, NULL,
+				  0, speed_template_32);
+		test_cipher_speed("adiantum(xchacha20,aes)", DECRYPT, sec, NULL,
+				  0, speed_template_32);
+		break;
+
 	case 300:
 		if (alg) {
 			test_hash_speed(alg, sec, generic_hash_speed_template);

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 3664c26..828e3b0 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c

@@ -2410,6 +2410,18 @@
 /* Please keep this list sorted by algorithm name. */
 static const struct alg_test_desc alg_test_descs[] = {
 	{
+		.alg = "adiantum(xchacha12,aes)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = __VECS(adiantum_xchacha12_aes_tv_template)
+		},
+	}, {
+		.alg = "adiantum(xchacha20,aes)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = __VECS(adiantum_xchacha20_aes_tv_template)
+		},
+	}, {
 		.alg = "aegis128",
 		.test = alg_test_aead,
 		.suite = {
@@ -3293,6 +3305,12 @@
 			}
 		}
 	}, {
+		.alg = "nhpoly1305",
+		.test = alg_test_hash,
+		.suite = {
+			.hash = __VECS(nhpoly1305_tv_template)
+		}
+	}, {
 		.alg = "ofb(aes)",
 		.test = alg_test_skcipher,
 		.fips_allowed = 1,
@@ -3546,6 +3564,18 @@
 			.hash = __VECS(aes_xcbc128_tv_template)
 		}
 	}, {
+		.alg = "xchacha12",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = __VECS(xchacha12_tv_template)
+		},
+	}, {
+		.alg = "xchacha20",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = __VECS(xchacha20_tv_template)
+		},
+	}, {
 		.alg = "xts(aes)",
 		.test = alg_test_skcipher,
 		.fips_allowed = 1,

diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 74e1454..39652cd 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h

@@ -29,7 +29,7 @@
 #define MAX_DIGEST_SIZE		64
 #define MAX_TAP			8
 
-#define MAX_KEYLEN		160
+#define MAX_KEYLEN		1088
 #define MAX_IVLEN		32
 
 struct hash_testvec {
@@ -37,10 +37,10 @@
 	const char *key;
 	const char *plaintext;
 	const char *digest;
-	unsigned char tap[MAX_TAP];
+	unsigned short tap[MAX_TAP];
+	unsigned short np;
 	unsigned short psize;
-	unsigned char np;
-	unsigned char ksize;
+	unsigned short ksize;
 };
 
 /*
@@ -5637,6 +5637,1238 @@
 	}
 };
 
+/* NHPoly1305 test vectors from https://github.com/google/adiantum */
+static const struct hash_testvec nhpoly1305_tv_template[] = {
+	{
+		.key	= "\xd2\x5d\x4c\xdd\x8d\x2b\x7f\x7a"
+			  "\xd9\xbe\x71\xec\xd1\x83\x52\xe3"
+			  "\xe1\xad\xd7\x5c\x0a\x75\x9d\xec"
+			  "\x1d\x13\x7e\x5d\x71\x07\xc9\xe4"
+			  "\x57\x2d\x44\x68\xcf\xd8\xd6\xc5"
+			  "\x39\x69\x7d\x32\x75\x51\x4f\x7e"
+			  "\xb2\x4c\xc6\x90\x51\x6e\xd9\xd6"
+			  "\xa5\x8b\x2d\xf1\x94\xf9\xf7\x5e"
+			  "\x2c\x84\x7b\x41\x0f\x88\x50\x89"
+			  "\x30\xd9\xa1\x38\x46\x6c\xc0\x4f"
+			  "\xe8\xdf\xdc\x66\xab\x24\x43\x41"
+			  "\x91\x55\x29\x65\x86\x28\x5e\x45"
+			  "\xd5\x2d\xb7\x80\x08\x9a\xc3\xd4"
+			  "\x9a\x77\x0a\xd4\xef\x3e\xe6\x3f"
+			  "\x6f\x2f\x9b\x3a\x7d\x12\x1e\x80"
+			  "\x6c\x44\xa2\x25\xe1\xf6\x60\xe9"
+			  "\x0d\xaf\xc5\x3c\xa5\x79\xae\x64"
+			  "\xbc\xa0\x39\xa3\x4d\x10\xe5\x4d"
+			  "\xd5\xe7\x89\x7a\x13\xee\x06\x78"
+			  "\xdc\xa4\xdc\x14\x27\xe6\x49\x38"
+			  "\xd0\xe0\x45\x25\x36\xc5\xf4\x79"
+			  "\x2e\x9a\x98\x04\xe4\x2b\x46\x52"
+			  "\x7c\x33\xca\xe2\x56\x51\x50\xe2"
+			  "\xa5\x9a\xae\x18\x6a\x13\xf8\xd2"
+			  "\x21\x31\x66\x02\xe2\xda\x8d\x7e"
+			  "\x41\x19\xb2\x61\xee\x48\x8f\xf1"
+			  "\x65\x24\x2e\x1e\x68\xce\x05\xd9"
+			  "\x2a\xcf\xa5\x3a\x57\xdd\x35\x91"
+			  "\x93\x01\xca\x95\xfc\x2b\x36\x04"
+			  "\xe6\x96\x97\x28\xf6\x31\xfe\xa3"
+			  "\x9d\xf6\x6a\x1e\x80\x8d\xdc\xec"
+			  "\xaf\x66\x11\x13\x02\x88\xd5\x27"
+			  "\x33\xb4\x1a\xcd\xa3\xf6\xde\x31"
+			  "\x8e\xc0\x0e\x6c\xd8\x5a\x97\x5e"
+			  "\xdd\xfd\x60\x69\x38\x46\x3f\x90"
+			  "\x5e\x97\xd3\x32\x76\xc7\x82\x49"
+			  "\xfe\xba\x06\x5f\x2f\xa2\xfd\xff"
+			  "\x80\x05\x40\xe4\x33\x03\xfb\x10"
+			  "\xc0\xde\x65\x8c\xc9\x8d\x3a\x9d"
+			  "\xb5\x7b\x36\x4b\xb5\x0c\xcf\x00"
+			  "\x9c\x87\xe4\x49\xad\x90\xda\x4a"
+			  "\xdd\xbd\xff\xe2\x32\x57\xd6\x78"
+			  "\x36\x39\x6c\xd3\x5b\x9b\x88\x59"
+			  "\x2d\xf0\x46\xe4\x13\x0e\x2b\x35"
+			  "\x0d\x0f\x73\x8a\x4f\x26\x84\x75"
+			  "\x88\x3c\xc5\x58\x66\x18\x1a\xb4"
+			  "\x64\x51\x34\x27\x1b\xa4\x11\xc9"
+			  "\x6d\x91\x8a\xfa\x32\x60\x9d\xd7"
+			  "\x87\xe5\xaa\x43\x72\xf8\xda\xd1"
+			  "\x48\x44\x13\x61\xdc\x8c\x76\x17"
+			  "\x0c\x85\x4e\xf3\xdd\xa2\x42\xd2"
+			  "\x74\xc1\x30\x1b\xeb\x35\x31\x29"
+			  "\x5b\xd7\x4c\x94\x46\x35\xa1\x23"
+			  "\x50\xf2\xa2\x8e\x7e\x4f\x23\x4f"
+			  "\x51\xff\xe2\xc9\xa3\x7d\x56\x8b"
+			  "\x41\xf2\xd0\xc5\x57\x7e\x59\xac"
+			  "\xbb\x65\xf3\xfe\xf7\x17\xef\x63"
+			  "\x7c\x6f\x23\xdd\x22\x8e\xed\x84"
+			  "\x0e\x3b\x09\xb3\xf3\xf4\x8f\xcd"
+			  "\x37\xa8\xe1\xa7\x30\xdb\xb1\xa2"
+			  "\x9c\xa2\xdf\x34\x17\x3e\x68\x44"
+			  "\xd0\xde\x03\x50\xd1\x48\x6b\x20"
+			  "\xe2\x63\x45\xa5\xea\x87\xc2\x42"
+			  "\x95\x03\x49\x05\xed\xe0\x90\x29"
+			  "\x1a\xb8\xcf\x9b\x43\xcf\x29\x7a"
+			  "\x63\x17\x41\x9f\xe0\xc9\x10\xfd"
+			  "\x2c\x56\x8c\x08\x55\xb4\xa9\x27"
+			  "\x0f\x23\xb1\x05\x6a\x12\x46\xc7"
+			  "\xe1\xfe\x28\x93\x93\xd7\x2f\xdc"
+			  "\x98\x30\xdb\x75\x8a\xbe\x97\x7a"
+			  "\x02\xfb\x8c\xba\xbe\x25\x09\xbe"
+			  "\xce\xcb\xa2\xef\x79\x4d\x0e\x9d"
+			  "\x1b\x9d\xb6\x39\x34\x38\xfa\x07"
+			  "\xec\xe8\xfc\x32\x85\x1d\xf7\x85"
+			  "\x63\xc3\x3c\xc0\x02\x75\xd7\x3f"
+			  "\xb2\x68\x60\x66\x65\x81\xc6\xb1"
+			  "\x42\x65\x4b\x4b\x28\xd7\xc7\xaa"
+			  "\x9b\xd2\xdc\x1b\x01\xe0\x26\x39"
+			  "\x01\xc1\x52\x14\xd1\x3f\xb7\xe6"
+			  "\x61\x41\xc7\x93\xd2\xa2\x67\xc6"
+			  "\xf7\x11\xb5\xf5\xea\xdd\x19\xfb"
+			  "\x4d\x21\x12\xd6\x7d\xf1\x10\xb0"
+			  "\x89\x07\xc7\x5a\x52\x73\x70\x2f"
+			  "\x32\xef\x65\x2b\x12\xb2\xf0\xf5"
+			  "\x20\xe0\x90\x59\x7e\x64\xf1\x4c"
+			  "\x41\xb3\xa5\x91\x08\xe6\x5e\x5f"
+			  "\x05\x56\x76\xb4\xb0\xcd\x70\x53"
+			  "\x10\x48\x9c\xff\xc2\x69\x55\x24"
+			  "\x87\xef\x84\xea\xfb\xa7\xbf\xa0"
+			  "\x91\x04\xad\x4f\x8b\x57\x54\x4b"
+			  "\xb6\xe9\xd1\xac\x37\x2f\x1d\x2e"
+			  "\xab\xa5\xa4\xe8\xff\xfb\xd9\x39"
+			  "\x2f\xb7\xac\xd1\xfe\x0b\x9a\x80"
+			  "\x0f\xb6\xf4\x36\x39\x90\x51\xe3"
+			  "\x0a\x2f\xb6\x45\x76\x89\xcd\x61"
+			  "\xfe\x48\x5f\x75\x1d\x13\x00\x62"
+			  "\x80\x24\x47\xe7\xbc\x37\xd7\xe3"
+			  "\x15\xe8\x68\x22\xaf\x80\x6f\x4b"
+			  "\xa8\x9f\x01\x10\x48\x14\xc3\x02"
+			  "\x52\xd2\xc7\x75\x9b\x52\x6d\x30"
+			  "\xac\x13\x85\xc8\xf7\xa3\x58\x4b"
+			  "\x49\xf7\x1c\x45\x55\x8c\x39\x9a"
+			  "\x99\x6d\x97\x27\x27\xe6\xab\xdd"
+			  "\x2c\x42\x1b\x35\xdd\x9d\x73\xbb"
+			  "\x6c\xf3\x64\xf1\xfb\xb9\xf7\xe6"
+			  "\x4a\x3c\xc0\x92\xc0\x2e\xb7\x1a"
+			  "\xbe\xab\xb3\x5a\xe5\xea\xb1\x48"
+			  "\x58\x13\x53\x90\xfd\xc3\x8e\x54"
+			  "\xf9\x18\x16\x73\xe8\xcb\x6d\x39"
+			  "\x0e\xd7\xe0\xfe\xb6\x9f\x43\x97"
+			  "\xe8\xd0\x85\x56\x83\x3e\x98\x68"
+			  "\x7f\xbd\x95\xa8\x9a\x61\x21\x8f"
+			  "\x06\x98\x34\xa6\xc8\xd6\x1d\xf3"
+			  "\x3d\x43\xa4\x9a\x8c\xe5\xd3\x5a"
+			  "\x32\xa2\x04\x22\xa4\x19\x1a\x46"
+			  "\x42\x7e\x4d\xe5\xe0\xe6\x0e\xca"
+			  "\xd5\x58\x9d\x2c\xaf\xda\x33\x5c"
+			  "\xb0\x79\x9e\xc9\xfc\xca\xf0\x2f"
+			  "\xa8\xb2\x77\xeb\x7a\xa2\xdd\x37"
+			  "\x35\x83\x07\xd6\x02\x1a\xb6\x6c"
+			  "\x24\xe2\x59\x08\x0e\xfd\x3e\x46"
+			  "\xec\x40\x93\xf4\x00\x26\x4f\x2a"
+			  "\xff\x47\x2f\xeb\x02\x92\x26\x5b"
+			  "\x53\x17\xc2\x8d\x2a\xc7\xa3\x1b"
+			  "\xcd\xbc\xa7\xe8\xd1\x76\xe3\x80"
+			  "\x21\xca\x5d\x3b\xe4\x9c\x8f\xa9"
+			  "\x5b\x7f\x29\x7f\x7c\xd8\xed\x6d"
+			  "\x8c\xb2\x86\x85\xe7\x77\xf2\x85"
+			  "\xab\x38\xa9\x9d\xc1\x4e\xc5\x64"
+			  "\x33\x73\x8b\x59\x03\xad\x05\xdf"
+			  "\x25\x98\x31\xde\xef\x13\xf1\x9b"
+			  "\x3c\x91\x9d\x7b\xb1\xfa\xe6\xbf"
+			  "\x5b\xed\xa5\x55\xe6\xea\x6c\x74"
+			  "\xf4\xb9\xe4\x45\x64\x72\x81\xc2"
+			  "\x4c\x28\xd4\xcd\xac\xe2\xde\xf9"
+			  "\xeb\x5c\xeb\x61\x60\x5a\xe5\x28",
+		.ksize	= 1088,
+		.plaintext	= "",
+		.psize	= 0,
+		.digest	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+	}, {
+		.key	= "\x29\x21\x43\xcb\xcb\x13\x07\xde"
+			  "\xbf\x48\xdf\x8a\x7f\xa2\x84\xde"
+			  "\x72\x23\x9d\xf5\xf0\x07\xf2\x4c"
+			  "\x20\x3a\x93\xb9\xcd\x5d\xfe\xcb"
+			  "\x99\x2c\x2b\x58\xc6\x50\x5f\x94"
+			  "\x56\xc3\x7c\x0d\x02\x3f\xb8\x5e"
+			  "\x7b\xc0\x6c\x51\x34\x76\xc0\x0e"
+			  "\xc6\x22\xc8\x9e\x92\xa0\x21\xc9"
+			  "\x85\x5c\x7c\xf8\xe2\x64\x47\xc9"
+			  "\xe4\xa2\x57\x93\xf8\xa2\x69\xcd"
+			  "\x62\x98\x99\xf4\xd7\x7b\x14\xb1"
+			  "\xd8\x05\xff\x04\x15\xc9\xe1\x6e"
+			  "\x9b\xe6\x50\x6b\x0b\x3f\x22\x1f"
+			  "\x08\xde\x0c\x5b\x08\x7e\xc6\x2f"
+			  "\x6c\xed\xd6\xb2\x15\xa4\xb3\xf9"
+			  "\xa7\x46\x38\x2a\xea\x69\xa5\xde"
+			  "\x02\xc3\x96\x89\x4d\x55\x3b\xed"
+			  "\x3d\x3a\x85\x77\xbf\x97\x45\x5c"
+			  "\x9e\x02\x69\xe2\x1b\x68\xbe\x96"
+			  "\xfb\x64\x6f\x0f\xf6\x06\x40\x67"
+			  "\xfa\x04\xe3\x55\xfa\xbe\xa4\x60"
+			  "\xef\x21\x66\x97\xe6\x9d\x5c\x1f"
+			  "\x62\x37\xaa\x31\xde\xe4\x9c\x28"
+			  "\x95\xe0\x22\x86\xf4\x4d\xf3\x07"
+			  "\xfd\x5f\x3a\x54\x2c\x51\x80\x71"
+			  "\xba\x78\x69\x5b\x65\xab\x1f\x81"
+			  "\xed\x3b\xff\x34\xa3\xfb\xbc\x73"
+			  "\x66\x7d\x13\x7f\xdf\x6e\xe2\xe2"
+			  "\xeb\x4f\x6c\xda\x7d\x33\x57\xd0"
+			  "\xd3\x7c\x95\x4f\x33\x58\x21\xc7"
+			  "\xc0\xe5\x6f\x42\x26\xc6\x1f\x5e"
+			  "\x85\x1b\x98\x9a\xa2\x1e\x55\x77"
+			  "\x23\xdf\x81\x5e\x79\x55\x05\xfc"
+			  "\xfb\xda\xee\xba\x5a\xba\xf7\x77"
+			  "\x7f\x0e\xd3\xe1\x37\xfe\x8d\x2b"
+			  "\xd5\x3f\xfb\xd0\xc0\x3c\x0b\x3f"
+			  "\xcf\x3c\x14\xcf\xfb\x46\x72\x4c"
+			  "\x1f\x39\xe2\xda\x03\x71\x6d\x23"
+			  "\xef\x93\xcd\x39\xd9\x37\x80\x4d"
+			  "\x65\x61\xd1\x2c\x03\xa9\x47\x72"
+			  "\x4d\x1e\x0e\x16\x33\x0f\x21\x17"
+			  "\xec\x92\xea\x6f\x37\x22\xa4\xd8"
+			  "\x03\x33\x9e\xd8\x03\x69\x9a\xe8"
+			  "\xb2\x57\xaf\x78\x99\x05\x12\xab"
+			  "\x48\x90\x80\xf0\x12\x9b\x20\x64"
+			  "\x7a\x1d\x47\x5f\xba\x3c\xf9\xc3"
+			  "\x0a\x0d\x8d\xa1\xf9\x1b\x82\x13"
+			  "\x3e\x0d\xec\x0a\x83\xc0\x65\xe1"
+			  "\xe9\x95\xff\x97\xd6\xf2\xe4\xd5"
+			  "\x86\xc0\x1f\x29\x27\x63\xd7\xde"
+			  "\xb7\x0a\x07\x99\x04\x2d\xa3\x89"
+			  "\xa2\x43\xcf\xf3\xe1\x43\xac\x4a"
+			  "\x06\x97\xd0\x05\x4f\x87\xfa\xf9"
+			  "\x9b\xbf\x52\x70\xbd\xbc\x6c\xf3"
+			  "\x03\x13\x60\x41\x28\x09\xec\xcc"
+			  "\xb1\x1a\xec\xd6\xfb\x6f\x2a\x89"
+			  "\x5d\x0b\x53\x9c\x59\xc1\x84\x21"
+			  "\x33\x51\x47\x19\x31\x9c\xd4\x0a"
+			  "\x4d\x04\xec\x50\x90\x61\xbd\xbc"
+			  "\x7e\xc8\xd9\x6c\x98\x1d\x45\x41"
+			  "\x17\x5e\x97\x1c\xc5\xa8\xe8\xea"
+			  "\x46\x58\x53\xf7\x17\xd5\xad\x11"
+			  "\xc8\x54\xf5\x7a\x33\x90\xf5\x19"
+			  "\xba\x36\xb4\xfc\x52\xa5\x72\x3d"
+			  "\x14\xbb\x55\xa7\xe9\xe3\x12\xf7"
+			  "\x1c\x30\xa2\x82\x03\xbf\x53\x91"
+			  "\x2e\x60\x41\x9f\x5b\x69\x39\xf6"
+			  "\x4d\xc8\xf8\x46\x7a\x7f\xa4\x98"
+			  "\x36\xff\x06\xcb\xca\xe7\x33\xf2"
+			  "\xc0\x4a\xf4\x3c\x14\x44\x5f\x6b"
+			  "\x75\xef\x02\x36\x75\x08\x14\xfd"
+			  "\x10\x8e\xa5\x58\xd0\x30\x46\x49"
+			  "\xaf\x3a\xf8\x40\x3d\x35\xdb\x84"
+			  "\x11\x2e\x97\x6a\xb7\x87\x7f\xad"
+			  "\xf1\xfa\xa5\x63\x60\xd8\x5e\xbf"
+			  "\x41\x78\x49\xcf\x77\xbb\x56\xbb"
+			  "\x7d\x01\x67\x05\x22\xc8\x8f\x41"
+			  "\xba\x81\xd2\xca\x2c\x38\xac\x76"
+			  "\x06\xc1\x1a\xc2\xce\xac\x90\x67"
+			  "\x57\x3e\x20\x12\x5b\xd9\x97\x58"
+			  "\x65\x05\xb7\x04\x61\x7e\xd8\x3a"
+			  "\xbf\x55\x3b\x13\xe9\x34\x5a\x37"
+			  "\x36\xcb\x94\x45\xc5\x32\xb3\xa0"
+			  "\x0c\x3e\x49\xc5\xd3\xed\xa7\xf0"
+			  "\x1c\x69\xcc\xea\xcc\x83\xc9\x16"
+			  "\x95\x72\x4b\xf4\x89\xd5\xb9\x10"
+			  "\xf6\x2d\x60\x15\xea\x3c\x06\x66"
+			  "\x9f\x82\xad\x17\xce\xd2\xa4\x48"
+			  "\x7c\x65\xd9\xf8\x02\x4d\x9b\x4c"
+			  "\x89\x06\x3a\x34\x85\x48\x89\x86"
+			  "\xf9\x24\xa9\x54\x72\xdb\x44\x95"
+			  "\xc7\x44\x1c\x19\x11\x4c\x04\xdc"
+			  "\x13\xb9\x67\xc8\xc3\x3a\x6a\x50"
+			  "\xfa\xd1\xfb\xe1\x88\xb6\xf1\xa3"
+			  "\xc5\x3b\xdc\x38\x45\x16\x26\x02"
+			  "\x3b\xb8\x8f\x8b\x58\x7d\x23\x04"
+			  "\x50\x6b\x81\x9f\xae\x66\xac\x6f"
+			  "\xcf\x2a\x9d\xf1\xfd\x1d\x57\x07"
+			  "\xbe\x58\xeb\x77\x0c\xe3\xc2\x19"
+			  "\x14\x74\x1b\x51\x1c\x4f\x41\xf3"
+			  "\x32\x89\xb3\xe7\xde\x62\xf6\x5f"
+			  "\xc7\x6a\x4a\x2a\x5b\x0f\x5f\x87"
+			  "\x9c\x08\xb9\x02\x88\xc8\x29\xb7"
+			  "\x94\x52\xfa\x52\xfe\xaa\x50\x10"
+			  "\xba\x48\x75\x5e\x11\x1b\xe6\x39"
+			  "\xd7\x82\x2c\x87\xf1\x1e\xa4\x38"
+			  "\x72\x3e\x51\xe7\xd8\x3e\x5b\x7b"
+			  "\x31\x16\x89\xba\xd6\xad\x18\x5e"
+			  "\xba\xf8\x12\xb3\xf4\x6c\x47\x30"
+			  "\xc0\x38\x58\xb3\x10\x8d\x58\x5d"
+			  "\xb4\xfb\x19\x7e\x41\xc3\x66\xb8"
+			  "\xd6\x72\x84\xe1\x1a\xc2\x71\x4c"
+			  "\x0d\x4a\x21\x7a\xab\xa2\xc0\x36"
+			  "\x15\xc5\xe9\x46\xd7\x29\x17\x76"
+			  "\x5e\x47\x36\x7f\x72\x05\xa7\xcc"
+			  "\x36\x63\xf9\x47\x7d\xe6\x07\x3c"
+			  "\x8b\x79\x1d\x96\x61\x8d\x90\x65"
+			  "\x7c\xf5\xeb\x4e\x6e\x09\x59\x6d"
+			  "\x62\x50\x1b\x0f\xe0\xdc\x78\xf2"
+			  "\x5b\x83\x1a\xa1\x11\x75\xfd\x18"
+			  "\xd7\xe2\x8d\x65\x14\x21\xce\xbe"
+			  "\xb5\x87\xe3\x0a\xda\x24\x0a\x64"
+			  "\xa9\x9f\x03\x8d\x46\x5d\x24\x1a"
+			  "\x8a\x0c\x42\x01\xca\xb1\x5f\x7c"
+			  "\xa5\xac\x32\x4a\xb8\x07\x91\x18"
+			  "\x6f\xb0\x71\x3c\xc9\xb1\xa8\xf8"
+			  "\x5f\x69\xa5\xa1\xca\x9e\x7a\xaa"
+			  "\xac\xe9\xc7\x47\x41\x75\x25\xc3"
+			  "\x73\xe2\x0b\xdd\x6d\x52\x71\xbe"
+			  "\xc5\xdc\xb4\xe7\x01\x26\x53\x77"
+			  "\x86\x90\x85\x68\x6b\x7b\x03\x53"
+			  "\xda\x52\x52\x51\x68\xc8\xf3\xec"
+			  "\x6c\xd5\x03\x7a\xa3\x0e\xb4\x02"
+			  "\x5f\x1a\xab\xee\xca\x67\x29\x7b"
+			  "\xbd\x96\x59\xb3\x8b\x32\x7a\x92"
+			  "\x9f\xd8\x25\x2b\xdf\xc0\x4c\xda",
+		.ksize	= 1088,
+		.plaintext	= "\xbc\xda\x81\xa8\x78\x79\x1c\xbf"
+			  "\x77\x53\xba\x4c\x30\x5b\xb8\x33",
+		.psize	= 16,
+		.digest	= "\x04\xbf\x7f\x6a\xce\x72\xea\x6a"
+			  "\x79\xdb\xb0\xc9\x60\xf6\x12\xcc",
+		.np	= 6,
+		.tap	= { 4, 4, 1, 1, 1, 5 },
+	}, {
+		.key	= "\x65\x4d\xe3\xf8\xd2\x4c\xac\x28"
+			  "\x68\xf5\xb3\x81\x71\x4b\xa1\xfa"
+			  "\x04\x0e\xd3\x81\x36\xbe\x0c\x81"
+			  "\x5e\xaf\xbc\x3a\xa4\xc0\x8e\x8b"
+			  "\x55\x63\xd3\x52\x97\x88\xd6\x19"
+			  "\xbc\x96\xdf\x49\xff\x04\x63\xf5"
+			  "\x0c\x11\x13\xaa\x9e\x1f\x5a\xf7"
+			  "\xdd\xbd\x37\x80\xc3\xd0\xbe\xa7"
+			  "\x05\xc8\x3c\x98\x1e\x05\x3c\x84"
+			  "\x39\x61\xc4\xed\xed\x71\x1b\xc4"
+			  "\x74\x45\x2c\xa1\x56\x70\x97\xfd"
+			  "\x44\x18\x07\x7d\xca\x60\x1f\x73"
+			  "\x3b\x6d\x21\xcb\x61\x87\x70\x25"
+			  "\x46\x21\xf1\x1f\x21\x91\x31\x2d"
+			  "\x5d\xcc\xb7\xd1\x84\x3e\x3d\xdb"
+			  "\x03\x53\x2a\x82\xa6\x9a\x95\xbc"
+			  "\x1a\x1e\x0a\x5e\x07\x43\xab\x43"
+			  "\xaf\x92\x82\x06\x91\x04\x09\xf4"
+			  "\x17\x0a\x9a\x2c\x54\xdb\xb8\xf4"
+			  "\xd0\xf0\x10\x66\x24\x8d\xcd\xda"
+			  "\xfe\x0e\x45\x9d\x6f\xc4\x4e\xf4"
+			  "\x96\xaf\x13\xdc\xa9\xd4\x8c\xc4"
+			  "\xc8\x57\x39\x3c\xc2\xd3\x0a\x76"
+			  "\x4a\x1f\x75\x83\x44\xc7\xd1\x39"
+			  "\xd8\xb5\x41\xba\x73\x87\xfa\x96"
+			  "\xc7\x18\x53\xfb\x9b\xda\xa0\x97"
+			  "\x1d\xee\x60\x85\x9e\x14\xc3\xce"
+			  "\xc4\x05\x29\x3b\x95\x30\xa3\xd1"
+			  "\x9f\x82\x6a\x04\xf5\xa7\x75\x57"
+			  "\x82\x04\xfe\x71\x51\x71\xb1\x49"
+			  "\x50\xf8\xe0\x96\xf1\xfa\xa8\x88"
+			  "\x3f\xa0\x86\x20\xd4\x60\x79\x59"
+			  "\x17\x2d\xd1\x09\xf4\xec\x05\x57"
+			  "\xcf\x62\x7e\x0e\x7e\x60\x78\xe6"
+			  "\x08\x60\x29\xd8\xd5\x08\x1a\x24"
+			  "\xc4\x6c\x24\xe7\x92\x08\x3d\x8a"
+			  "\x98\x7a\xcf\x99\x0a\x65\x0e\xdc"
+			  "\x8c\x8a\xbe\x92\x82\x91\xcc\x62"
+			  "\x30\xb6\xf4\x3f\xc6\x8a\x7f\x12"
+			  "\x4a\x8a\x49\xfa\x3f\x5c\xd4\x5a"
+			  "\xa6\x82\xa3\xe6\xaa\x34\x76\xb2"
+			  "\xab\x0a\x30\xef\x6c\x77\x58\x3f"
+			  "\x05\x6b\xcc\x5c\xae\xdc\xd7\xb9"
+			  "\x51\x7e\x8d\x32\x5b\x24\x25\xbe"
+			  "\x2b\x24\x01\xcf\x80\xda\x16\xd8"
+			  "\x90\x72\x2c\xad\x34\x8d\x0c\x74"
+			  "\x02\xcb\xfd\xcf\x6e\xef\x97\xb5"
+			  "\x4c\xf2\x68\xca\xde\x43\x9e\x8a"
+			  "\xc5\x5f\x31\x7f\x14\x71\x38\xec"
+			  "\xbd\x98\xe5\x71\xc4\xb5\xdb\xef"
+			  "\x59\xd2\xca\xc0\xc1\x86\x75\x01"
+			  "\xd4\x15\x0d\x6f\xa4\xf7\x7b\x37"
+			  "\x47\xda\x18\x93\x63\xda\xbe\x9e"
+			  "\x07\xfb\xb2\x83\xd5\xc4\x34\x55"
+			  "\xee\x73\xa1\x42\x96\xf9\x66\x41"
+			  "\xa4\xcc\xd2\x93\x6e\xe1\x0a\xbb"
+			  "\xd2\xdd\x18\x23\xe6\x6b\x98\x0b"
+			  "\x8a\x83\x59\x2c\xc3\xa6\x59\x5b"
+			  "\x01\x22\x59\xf7\xdc\xb0\x87\x7e"
+			  "\xdb\x7d\xf4\x71\x41\xab\xbd\xee"
+			  "\x79\xbe\x3c\x01\x76\x0b\x2d\x0a"
+			  "\x42\xc9\x77\x8c\xbb\x54\x95\x60"
+			  "\x43\x2e\xe0\x17\x52\xbd\x90\xc9"
+			  "\xc2\x2c\xdd\x90\x24\x22\x76\x40"
+			  "\x5c\xb9\x41\xc9\xa1\xd5\xbd\xe3"
+			  "\x44\xe0\xa4\xab\xcc\xb8\xe2\x32"
+			  "\x02\x15\x04\x1f\x8c\xec\x5d\x14"
+			  "\xac\x18\xaa\xef\x6e\x33\x19\x6e"
+			  "\xde\xfe\x19\xdb\xeb\x61\xca\x18"
+			  "\xad\xd8\x3d\xbf\x09\x11\xc7\xa5"
+			  "\x86\x0b\x0f\xe5\x3e\xde\xe8\xd9"
+			  "\x0a\x69\x9e\x4c\x20\xff\xf9\xc5"
+			  "\xfa\xf8\xf3\x7f\xa5\x01\x4b\x5e"
+			  "\x0f\xf0\x3b\x68\xf0\x46\x8c\x2a"
+			  "\x7a\xc1\x8f\xa0\xfe\x6a\x5b\x44"
+			  "\x70\x5c\xcc\x92\x2c\x6f\x0f\xbd"
+			  "\x25\x3e\xb7\x8e\x73\x58\xda\xc9"
+			  "\xa5\xaa\x9e\xf3\x9b\xfd\x37\x3e"
+			  "\xe2\x88\xa4\x7b\xc8\x5c\xa8\x93"
+			  "\x0e\xe7\x9a\x9c\x2e\x95\x18\x9f"
+			  "\xc8\x45\x0c\x88\x9e\x53\x4f\x3a"
+			  "\x76\xc1\x35\xfa\x17\xd8\xac\xa0"
+			  "\x0c\x2d\x47\x2e\x4f\x69\x9b\xf7"
+			  "\xd0\xb6\x96\x0c\x19\xb3\x08\x01"
+			  "\x65\x7a\x1f\xc7\x31\x86\xdb\xc8"
+			  "\xc1\x99\x8f\xf8\x08\x4a\x9d\x23"
+			  "\x22\xa8\xcf\x27\x01\x01\x88\x93"
+			  "\x9c\x86\x45\xbd\xe0\x51\xca\x52"
+			  "\x84\xba\xfe\x03\xf7\xda\xc5\xce"
+			  "\x3e\x77\x75\x86\xaf\x84\xc8\x05"
+			  "\x44\x01\x0f\x02\xf3\x58\xb0\x06"
+			  "\x5a\xd7\x12\x30\x8d\xdf\x1f\x1f"
+			  "\x0a\xe6\xd2\xea\xf6\x3a\x7a\x99"
+			  "\x63\xe8\xd2\xc1\x4a\x45\x8b\x40"
+			  "\x4d\x0a\xa9\x76\x92\xb3\xda\x87"
+			  "\x36\x33\xf0\x78\xc3\x2f\x5f\x02"
+			  "\x1a\x6a\x2c\x32\xcd\x76\xbf\xbd"
+			  "\x5a\x26\x20\x28\x8c\x8c\xbc\x52"
+			  "\x3d\x0a\xc9\xcb\xab\xa4\x21\xb0"
+			  "\x54\x40\x81\x44\xc7\xd6\x1c\x11"
+			  "\x44\xc6\x02\x92\x14\x5a\xbf\x1a"
+			  "\x09\x8a\x18\xad\xcd\x64\x3d\x53"
+			  "\x4a\xb6\xa5\x1b\x57\x0e\xef\xe0"
+			  "\x8c\x44\x5f\x7d\xbd\x6c\xfd\x60"
+			  "\xae\x02\x24\xb6\x99\xdd\x8c\xaf"
+			  "\x59\x39\x75\x3c\xd1\x54\x7b\x86"
+			  "\xcc\x99\xd9\x28\x0c\xb0\x94\x62"
+			  "\xf9\x51\xd1\x19\x96\x2d\x66\xf5"
+			  "\x55\xcf\x9e\x59\xe2\x6b\x2c\x08"
+			  "\xc0\x54\x48\x24\x45\xc3\x8c\x73"
+			  "\xea\x27\x6e\x66\x7d\x1d\x0e\x6e"
+			  "\x13\xe8\x56\x65\x3a\xb0\x81\x5c"
+			  "\xf0\xe8\xd8\x00\x6b\xcd\x8f\xad"
+			  "\xdd\x53\xf3\xa4\x6c\x43\xd6\x31"
+			  "\xaf\xd2\x76\x1e\x91\x12\xdb\x3c"
+			  "\x8c\xc2\x81\xf0\x49\xdb\xe2\x6b"
+			  "\x76\x62\x0a\x04\xe4\xaa\x8a\x7c"
+			  "\x08\x0b\x5d\xd0\xee\x1d\xfb\xc4"
+			  "\x02\x75\x42\xd6\xba\xa7\x22\xa8"
+			  "\x47\x29\xb7\x85\x6d\x93\x3a\xdb"
+			  "\x00\x53\x0b\xa2\xeb\xf8\xfe\x01"
+			  "\x6f\x8a\x31\xd6\x17\x05\x6f\x67"
+			  "\x88\x95\x32\xfe\x4f\xa6\x4b\xf8"
+			  "\x03\xe4\xcd\x9a\x18\xe8\x4e\x2d"
+			  "\xf7\x97\x9a\x0c\x7d\x9f\x7e\x44"
+			  "\x69\x51\xe0\x32\x6b\x62\x86\x8f"
+			  "\xa6\x8e\x0b\x21\x96\xe5\xaf\x77"
+			  "\xc0\x83\xdf\xa5\x0e\xd0\xa1\x04"
+			  "\xaf\xc1\x10\xcb\x5a\x40\xe4\xe3"
+			  "\x38\x7e\x07\xe8\x4d\xfa\xed\xc5"
+			  "\xf0\x37\xdf\xbb\x8a\xcf\x3d\xdc"
+			  "\x61\xd2\xc6\x2b\xff\x07\xc9\x2f"
+			  "\x0c\x2d\x5c\x07\xa8\x35\x6a\xfc"
+			  "\xae\x09\x03\x45\x74\x51\x4d\xc4"
+			  "\xb8\x23\x87\x4a\x99\x27\x20\x87"
+			  "\x62\x44\x0a\x4a\xce\x78\x47\x22",
+		.ksize	= 1088,
+		.plaintext	= "\x8e\xb0\x4c\xde\x9c\x4a\x04\x5a"
+			  "\xf6\xa9\x7f\x45\x25\xa5\x7b\x3a"
+			  "\xbc\x4d\x73\x39\x81\xb5\xbd\x3d"
+			  "\x21\x6f\xd7\x37\x50\x3c\x7b\x28"
+			  "\xd1\x03\x3a\x17\xed\x7b\x7c\x2a"
+			  "\x16\xbc\xdf\x19\x89\x52\x71\x31"
+			  "\xb6\xc0\xfd\xb5\xd3\xba\x96\x99"
+			  "\xb6\x34\x0b\xd0\x99\x93\xfc\x1a"
+			  "\x01\x3c\x85\xc6\x9b\x78\x5c\x8b"
+			  "\xfe\xae\xd2\xbf\xb2\x6f\xf9\xed"
+			  "\xc8\x25\x17\xfe\x10\x3b\x7d\xda"
+			  "\xf4\x8d\x35\x4b\x7c\x7b\x82\xe7"
+			  "\xc2\xb3\xee\x60\x4a\x03\x86\xc9"
+			  "\x4e\xb5\xc4\xbe\xd2\xbd\x66\xf1"
+			  "\x13\xf1\x09\xab\x5d\xca\x63\x1f"
+			  "\xfc\xfb\x57\x2a\xfc\xca\x66\xd8"
+			  "\x77\x84\x38\x23\x1d\xac\xd3\xb3"
+			  "\x7a\xad\x4c\x70\xfa\x9c\xc9\x61"
+			  "\xa6\x1b\xba\x33\x4b\x4e\x33\xec"
+			  "\xa0\xa1\x64\x39\x40\x05\x1c\xc2"
+			  "\x3f\x49\x9d\xae\xf2\xc5\xf2\xc5"
+			  "\xfe\xe8\xf4\xc2\xf9\x96\x2d\x28"
+			  "\x92\x30\x44\xbc\xd2\x7f\xe1\x6e"
+			  "\x62\x02\x8f\x3d\x1c\x80\xda\x0e"
+			  "\x6a\x90\x7e\x75\xff\xec\x3e\xc4"
+			  "\xcd\x16\x34\x3b\x05\x6d\x4d\x20"
+			  "\x1c\x7b\xf5\x57\x4f\xfa\x3d\xac"
+			  "\xd0\x13\x55\xe8\xb3\xe1\x1b\x78"
+			  "\x30\xe6\x9f\x84\xd4\x69\xd1\x08"
+			  "\x12\x77\xa7\x4a\xbd\xc0\xf2\xd2"
+			  "\x78\xdd\xa3\x81\x12\xcb\x6c\x14"
+			  "\x90\x61\xe2\x84\xc6\x2b\x16\xcc"
+			  "\x40\x99\x50\x88\x01\x09\x64\x4f"
+			  "\x0a\x80\xbe\x61\xae\x46\xc9\x0a"
+			  "\x5d\xe0\xfb\x72\x7a\x1a\xdd\x61"
+			  "\x63\x20\x05\xa0\x4a\xf0\x60\x69"
+			  "\x7f\x92\xbc\xbf\x4e\x39\x4d\xdd"
+			  "\x74\xd1\xb7\xc0\x5a\x34\xb7\xae"
+			  "\x76\x65\x2e\xbc\x36\xb9\x04\x95"
+			  "\x42\xe9\x6f\xca\x78\xb3\x72\x07"
+			  "\xa3\xba\x02\x94\x67\x4c\xb1\xd7"
+			  "\xe9\x30\x0d\xf0\x3b\xb8\x10\x6d"
+			  "\xea\x2b\x21\xbf\x74\x59\x82\x97"
+			  "\x85\xaa\xf1\xd7\x54\x39\xeb\x05"
+			  "\xbd\xf3\x40\xa0\x97\xe6\x74\xfe"
+			  "\xb4\x82\x5b\xb1\x36\xcb\xe8\x0d"
+			  "\xce\x14\xd9\xdf\xf1\x94\x22\xcd"
+			  "\xd6\x00\xba\x04\x4c\x05\x0c\xc0"
+			  "\xd1\x5a\xeb\x52\xd5\xa8\x8e\xc8"
+			  "\x97\xa1\xaa\xc1\xea\xc1\xbe\x7c"
+			  "\x36\xb3\x36\xa0\xc6\x76\x66\xc5"
+			  "\xe2\xaf\xd6\x5c\xe2\xdb\x2c\xb3"
+			  "\x6c\xb9\x99\x7f\xff\x9f\x03\x24"
+			  "\xe1\x51\x44\x66\xd8\x0c\x5d\x7f"
+			  "\x5c\x85\x22\x2a\xcf\x6d\x79\x28"
+			  "\xab\x98\x01\x72\xfe\x80\x87\x5f"
+			  "\x46\xba\xef\x81\x24\xee\xbf\xb0"
+			  "\x24\x74\xa3\x65\x97\x12\xc4\xaf"
+			  "\x8b\xa0\x39\xda\x8a\x7e\x74\x6e"
+			  "\x1b\x42\xb4\x44\x37\xfc\x59\xfd"
+			  "\x86\xed\xfb\x8c\x66\x33\xda\x63"
+			  "\x75\xeb\xe1\xa4\x85\x4f\x50\x8f"
+			  "\x83\x66\x0d\xd3\x37\xfa\xe6\x9c"
+			  "\x4f\x30\x87\x35\x18\xe3\x0b\xb7"
+			  "\x6e\x64\x54\xcd\x70\xb3\xde\x54"
+			  "\xb7\x1d\xe6\x4c\x4d\x55\x12\x12"
+			  "\xaf\x5f\x7f\x5e\xee\x9d\xe8\x8e"
+			  "\x32\x9d\x4e\x75\xeb\xc6\xdd\xaa"
+			  "\x48\x82\xa4\x3f\x3c\xd7\xd3\xa8"
+			  "\x63\x9e\x64\xfe\xe3\x97\x00\x62"
+			  "\xe5\x40\x5d\xc3\xad\x72\xe1\x28"
+			  "\x18\x50\xb7\x75\xef\xcd\x23\xbf"
+			  "\x3f\xc0\x51\x36\xf8\x41\xc3\x08"
+			  "\xcb\xf1\x8d\x38\x34\xbd\x48\x45"
+			  "\x75\xed\xbc\x65\x7b\xb5\x0c\x9b"
+			  "\xd7\x67\x7d\x27\xb4\xc4\x80\xd7"
+			  "\xa9\xb9\xc7\x4a\x97\xaa\xda\xc8"
+			  "\x3c\x74\xcf\x36\x8f\xe4\x41\xe3"
+			  "\xd4\xd3\x26\xa7\xf3\x23\x9d\x8f"
+			  "\x6c\x20\x05\x32\x3e\xe0\xc3\xc8"
+			  "\x56\x3f\xa7\x09\xb7\xfb\xc7\xf7"
+			  "\xbe\x2a\xdd\x0f\x06\x7b\x0d\xdd"
+			  "\xb0\xb4\x86\x17\xfd\xb9\x04\xe5"
+			  "\xc0\x64\x5d\xad\x2a\x36\x38\xdb"
+			  "\x24\xaf\x5b\xff\xca\xf9\x41\xe8"
+			  "\xf9\x2f\x1e\x5e\xf9\xf5\xd5\xf2"
+			  "\xb2\x88\xca\xc9\xa1\x31\xe2\xe8"
+			  "\x10\x95\x65\xbf\xf1\x11\x61\x7a"
+			  "\x30\x1a\x54\x90\xea\xd2\x30\xf6"
+			  "\xa5\xad\x60\xf9\x4d\x84\x21\x1b"
+			  "\xe4\x42\x22\xc8\x12\x4b\xb0\x58"
+			  "\x3e\x9c\x2d\x32\x95\x0a\x8e\xb0"
+			  "\x0a\x7e\x77\x2f\xe8\x97\x31\x6a"
+			  "\xf5\x59\xb4\x26\xe6\x37\x12\xc9"
+			  "\xcb\xa0\x58\x33\x6f\xd5\x55\x55"
+			  "\x3c\xa1\x33\xb1\x0b\x7e\x2e\xb4"
+			  "\x43\x2a\x84\x39\xf0\x9c\xf4\x69"
+			  "\x4f\x1e\x79\xa6\x15\x1b\x87\xbb"
+			  "\xdb\x9b\xe0\xf1\x0b\xba\xe3\x6e"
+			  "\xcc\x2f\x49\x19\x22\x29\xfc\x71"
+			  "\xbb\x77\x38\x18\x61\xaf\x85\x76"
+			  "\xeb\xd1\x09\xcc\x86\x04\x20\x9a"
+			  "\x66\x53\x2f\x44\x8b\xc6\xa3\xd2"
+			  "\x5f\xc7\x79\x82\x66\xa8\x6e\x75"
+			  "\x7d\x94\xd1\x86\x75\x0f\xa5\x4f"
+			  "\x3c\x7a\x33\xce\xd1\x6e\x9d\x7b"
+			  "\x1f\x91\x37\xb8\x37\x80\xfb\xe0"
+			  "\x52\x26\xd0\x9a\xd4\x48\x02\x41"
+			  "\x05\xe3\x5a\x94\xf1\x65\x61\x19"
+			  "\xb8\x88\x4e\x2b\xea\xba\x8b\x58"
+			  "\x8b\x42\x01\x00\xa8\xfe\x00\x5c"
+			  "\xfe\x1c\xee\x31\x15\x69\xfa\xb3"
+			  "\x9b\x5f\x22\x8e\x0d\x2c\xe3\xa5"
+			  "\x21\xb9\x99\x8a\x8e\x94\x5a\xef"
+			  "\x13\x3e\x99\x96\x79\x6e\xd5\x42"
+			  "\x36\x03\xa9\xe2\xca\x65\x4e\x8a"
+			  "\x8a\x30\xd2\x7d\x74\xe7\xf0\xaa"
+			  "\x23\x26\xdd\xcb\x82\x39\xfc\x9d"
+			  "\x51\x76\x21\x80\xa2\xbe\x93\x03"
+			  "\x47\xb0\xc1\xb6\xdc\x63\xfd\x9f"
+			  "\xca\x9d\xa5\xca\x27\x85\xe2\xd8"
+			  "\x15\x5b\x7e\x14\x7a\xc4\x89\xcc"
+			  "\x74\x14\x4b\x46\xd2\xce\xac\x39"
+			  "\x6b\x6a\x5a\xa4\x0e\xe3\x7b\x15"
+			  "\x94\x4b\x0f\x74\xcb\x0c\x7f\xa9"
+			  "\xbe\x09\x39\xa3\xdd\x56\x5c\xc7"
+			  "\x99\x56\x65\x39\xf4\x0b\x7d\x87"
+			  "\xec\xaa\xe3\x4d\x22\x65\x39\x4e",
+		.psize	= 1024,
+		.digest	= "\x64\x3a\xbc\xc3\x3f\x74\x40\x51"
+			  "\x6e\x56\x01\x1a\x51\xec\x36\xde",
+		.np	= 8,
+		.tap	= { 64, 203, 267, 28, 263, 62, 54, 83 },
+	}, {
+		.key	= "\x1b\x82\x2e\x1b\x17\x23\xb9\x6d"
+			  "\xdc\x9c\xda\x99\x07\xe3\x5f\xd8"
+			  "\xd2\xf8\x43\x80\x8d\x86\x7d\x80"
+			  "\x1a\xd0\xcc\x13\xb9\x11\x05\x3f"
+			  "\x7e\xcf\x7e\x80\x0e\xd8\x25\x48"
+			  "\x8b\xaa\x63\x83\x92\xd0\x72\xf5"
+			  "\x4f\x67\x7e\x50\x18\x25\xa4\xd1"
+			  "\xe0\x7e\x1e\xba\xd8\xa7\x6e\xdb"
+			  "\x1a\xcc\x0d\xfe\x9f\x6d\x22\x35"
+			  "\xe1\xe6\xe0\xa8\x7b\x9c\xb1\x66"
+			  "\xa3\xf8\xff\x4d\x90\x84\x28\xbc"
+			  "\xdc\x19\xc7\x91\x49\xfc\xf6\x33"
+			  "\xc9\x6e\x65\x7f\x28\x6f\x68\x2e"
+			  "\xdf\x1a\x75\xe9\xc2\x0c\x96\xb9"
+			  "\x31\x22\xc4\x07\xc6\x0a\x2f\xfd"
+			  "\x36\x06\x5f\x5c\xc5\xb1\x3a\xf4"
+			  "\x5e\x48\xa4\x45\x2b\x88\xa7\xee"
+			  "\xa9\x8b\x52\xcc\x99\xd9\x2f\xb8"
+			  "\xa4\x58\x0a\x13\xeb\x71\x5a\xfa"
+			  "\xe5\x5e\xbe\xf2\x64\xad\x75\xbc"
+			  "\x0b\x5b\x34\x13\x3b\x23\x13\x9a"
+			  "\x69\x30\x1e\x9a\xb8\x03\xb8\x8b"
+			  "\x3e\x46\x18\x6d\x38\xd9\xb3\xd8"
+			  "\xbf\xf1\xd0\x28\xe6\x51\x57\x80"
+			  "\x5e\x99\xfb\xd0\xce\x1e\x83\xf7"
+			  "\xe9\x07\x5a\x63\xa9\xef\xce\xa5"
+			  "\xfb\x3f\x37\x17\xfc\x0b\x37\x0e"
+			  "\xbb\x4b\x21\x62\xb7\x83\x0e\xa9"
+			  "\x9e\xb0\xc4\xad\x47\xbe\x35\xe7"
+			  "\x51\xb2\xf2\xac\x2b\x65\x7b\x48"
+			  "\xe3\x3f\x5f\xb6\x09\x04\x0c\x58"
+			  "\xce\x99\xa9\x15\x2f\x4e\xc1\xf2"
+			  "\x24\x48\xc0\xd8\x6c\xd3\x76\x17"
+			  "\x83\x5d\xe6\xe3\xfd\x01\x8e\xf7"
+			  "\x42\xa5\x04\x29\x30\xdf\xf9\x00"
+			  "\x4a\xdc\x71\x22\x1a\x33\x15\xb6"
+			  "\xd7\x72\xfb\x9a\xb8\xeb\x2b\x38"
+			  "\xea\xa8\x61\xa8\x90\x11\x9d\x73"
+			  "\x2e\x6c\xce\x81\x54\x5a\x9f\xcd"
+			  "\xcf\xd5\xbd\x26\x5d\x66\xdb\xfb"
+			  "\xdc\x1e\x7c\x10\xfe\x58\x82\x10"
+			  "\x16\x24\x01\xce\x67\x55\x51\xd1"
+			  "\xdd\x6b\x44\xa3\x20\x8e\xa9\xa6"
+			  "\x06\xa8\x29\x77\x6e\x00\x38\x5b"
+			  "\xde\x4d\x58\xd8\x1f\x34\xdf\xf9"
+			  "\x2c\xac\x3e\xad\xfb\x92\x0d\x72"
+			  "\x39\xa4\xac\x44\x10\xc0\x43\xc4"
+			  "\xa4\x77\x3b\xfc\xc4\x0d\x37\xd3"
+			  "\x05\x84\xda\x53\x71\xf8\x80\xd3"
+			  "\x34\x44\xdb\x09\xb4\x2b\x8e\xe3"
+			  "\x00\x75\x50\x9e\x43\x22\x00\x0b"
+			  "\x7c\x70\xab\xd4\x41\xf1\x93\xcd"
+			  "\x25\x2d\x84\x74\xb5\xf2\x92\xcd"
+			  "\x0a\x28\xea\x9a\x49\x02\x96\xcb"
+			  "\x85\x9e\x2f\x33\x03\x86\x1d\xdc"
+			  "\x1d\x31\xd5\xfc\x9d\xaa\xc5\xe9"
+			  "\x9a\xc4\x57\xf5\x35\xed\xf4\x4b"
+			  "\x3d\x34\xc2\x29\x13\x86\x36\x42"
+			  "\x5d\xbf\x90\x86\x13\x77\xe5\xc3"
+			  "\x62\xb4\xfe\x0b\x70\x39\x35\x65"
+			  "\x02\xea\xf6\xce\x57\x0c\xbb\x74"
+			  "\x29\xe3\xfd\x60\x90\xfd\x10\x38"
+			  "\xd5\x4e\x86\xbd\x37\x70\xf0\x97"
+			  "\xa6\xab\x3b\x83\x64\x52\xca\x66"
+			  "\x2f\xf9\xa4\xca\x3a\x55\x6b\xb0"
+			  "\xe8\x3a\x34\xdb\x9e\x48\x50\x2f"
+			  "\x3b\xef\xfd\x08\x2d\x5f\xc1\x37"
+			  "\x5d\xbe\x73\xe4\xd8\xe9\xac\xca"
+			  "\x8a\xaa\x48\x7c\x5c\xf4\xa6\x96"
+			  "\x5f\xfa\x70\xa6\xb7\x8b\x50\xcb"
+			  "\xa6\xf5\xa9\xbd\x7b\x75\x4c\x22"
+			  "\x0b\x19\x40\x2e\xc9\x39\x39\x32"
+			  "\x83\x03\xa8\xa4\x98\xe6\x8e\x16"
+			  "\xb9\xde\x08\xc5\xfc\xbf\xad\x39"
+			  "\xa8\xc7\x93\x6c\x6f\x23\xaf\xc1"
+			  "\xab\xe1\xdf\xbb\x39\xae\x93\x29"
+			  "\x0e\x7d\x80\x8d\x3e\x65\xf3\xfd"
+			  "\x96\x06\x65\x90\xa1\x28\x64\x4b"
+			  "\x69\xf9\xa8\x84\x27\x50\xfc\x87"
+			  "\xf7\xbf\x55\x8e\x56\x13\x58\x7b"
+			  "\x85\xb4\x6a\x72\x0f\x40\xf1\x4f"
+			  "\x83\x81\x1f\x76\xde\x15\x64\x7a"
+			  "\x7a\x80\xe4\xc7\x5e\x63\x01\x91"
+			  "\xd7\x6b\xea\x0b\x9b\xa2\x99\x3b"
+			  "\x6c\x88\xd8\xfd\x59\x3c\x8d\x22"
+			  "\x86\x56\xbe\xab\xa1\x37\x08\x01"
+			  "\x50\x85\x69\x29\xee\x9f\xdf\x21"
+			  "\x3e\x20\x20\xf5\xb0\xbb\x6b\xd0"
+			  "\x9c\x41\x38\xec\x54\x6f\x2d\xbd"
+			  "\x0f\xe1\xbd\xf1\x2b\x6e\x60\x56"
+			  "\x29\xe5\x7a\x70\x1c\xe2\xfc\x97"
+			  "\x82\x68\x67\xd9\x3d\x1f\xfb\xd8"
+			  "\x07\x9f\xbf\x96\x74\xba\x6a\x0e"
+			  "\x10\x48\x20\xd8\x13\x1e\xb5\x44"
+			  "\xf2\xcc\xb1\x8b\xfb\xbb\xec\xd7"
+			  "\x37\x70\x1f\x7c\x55\xd2\x4b\xb9"
+			  "\xfd\x70\x5e\xa3\x91\x73\x63\x52"
+			  "\x13\x47\x5a\x06\xfb\x01\x67\xa5"
+			  "\xc0\xd0\x49\x19\x56\x66\x9a\x77"
+			  "\x64\xaf\x8c\x25\x91\x52\x87\x0e"
+			  "\x18\xf3\x5f\x97\xfd\x71\x13\xf8"
+			  "\x05\xa5\x39\xcc\x65\xd3\xcc\x63"
+			  "\x5b\xdb\x5f\x7e\x5f\x6e\xad\xc4"
+			  "\xf4\xa0\xc5\xc2\x2b\x4d\x97\x38"
+			  "\x4f\xbc\xfa\x33\x17\xb4\x47\xb9"
+			  "\x43\x24\x15\x8d\xd2\xed\x80\x68"
+			  "\x84\xdb\x04\x80\xca\x5e\x6a\x35"
+			  "\x2c\x2c\xe7\xc5\x03\x5f\x54\xb0"
+			  "\x5e\x4f\x1d\x40\x54\x3d\x78\x9a"
+			  "\xac\xda\x80\x27\x4d\x15\x4c\x1a"
+			  "\x6e\x80\xc9\xc4\x3b\x84\x0e\xd9"
+			  "\x2e\x93\x01\x8c\xc3\xc8\x91\x4b"
+			  "\xb3\xaa\x07\x04\x68\x5b\x93\xa5"
+			  "\xe7\xc4\x9d\xe7\x07\xee\xf5\x3b"
+			  "\x40\x89\xcc\x60\x34\x9d\xb4\x06"
+			  "\x1b\xef\x92\xe6\xc1\x2a\x7d\x0f"
+			  "\x81\xaa\x56\xe3\xd7\xed\xa7\xd4"
+			  "\xa7\x3a\x49\xc4\xad\x81\x5c\x83"
+			  "\x55\x8e\x91\x54\xb7\x7d\x65\xa5"
+			  "\x06\x16\xd5\x9a\x16\xc1\xb0\xa2"
+			  "\x06\xd8\x98\x47\x73\x7e\x73\xa0"
+			  "\xb8\x23\xb1\x52\xbf\x68\x74\x5d"
+			  "\x0b\xcb\xfa\x8c\x46\xe3\x24\xe6"
+			  "\xab\xd4\x69\x8d\x8c\xf2\x8a\x59"
+			  "\xbe\x48\x46\x50\x8c\x9a\xe8\xe3"
+			  "\x31\x55\x0a\x06\xed\x4f\xf8\xb7"
+			  "\x4f\xe3\x85\x17\x30\xbd\xd5\x20"
+			  "\xe7\x5b\xb2\x32\xcf\x6b\x16\x44"
+			  "\xd2\xf5\x7e\xd7\xd1\x2f\xee\x64"
+			  "\x3e\x9d\x10\xef\x27\x35\x43\x64"
+			  "\x67\xfb\x7a\x7b\xe0\x62\x31\x9a"
+			  "\x4d\xdf\xa5\xab\xc0\x20\xbb\x01"
+			  "\xe9\x7b\x54\xf1\xde\xb2\x79\x50"
+			  "\x6c\x4b\x91\xdb\x7f\xbb\x50\xc1"
+			  "\x55\x44\x38\x9a\xe0\x9f\xe8\x29"
+			  "\x6f\x15\xf8\x4e\xa6\xec\xa0\x60",
+		.ksize	= 1088,
+		.plaintext	= "\x15\x68\x9e\x2f\xad\x15\x52\xdf"
+			  "\xf0\x42\x62\x24\x2a\x2d\xea\xbf"
+			  "\xc7\xf3\xb4\x1a\xf5\xed\xb2\x08"
+			  "\x15\x60\x1c\x00\x77\xbf\x0b\x0e"
+			  "\xb7\x2c\xcf\x32\x3a\xc7\x01\x77"
+			  "\xef\xa6\x75\xd0\x29\xc7\x68\x20"
+			  "\xb2\x92\x25\xbf\x12\x34\xe9\xa4"
+			  "\xfd\x32\x7b\x3f\x7c\xbd\xa5\x02"
+			  "\x38\x41\xde\xc9\xc1\x09\xd9\xfc"
+			  "\x6e\x78\x22\x83\x18\xf7\x50\x8d"
+			  "\x8f\x9c\x2d\x02\xa5\x30\xac\xff"
+			  "\xea\x63\x2e\x80\x37\x83\xb0\x58"
+			  "\xda\x2f\xef\x21\x55\xba\x7b\xb1"
+			  "\xb6\xed\xf5\xd2\x4d\xaa\x8c\xa9"
+			  "\xdd\xdb\x0f\xb4\xce\xc1\x9a\xb1"
+			  "\xc1\xdc\xbd\xab\x86\xc2\xdf\x0b"
+			  "\xe1\x2c\xf9\xbe\xf6\xd8\xda\x62"
+			  "\x72\xdd\x98\x09\x52\xc0\xc4\xb6"
+			  "\x7b\x17\x5c\xf5\xd8\x4b\x88\xd6"
+			  "\x6b\xbf\x84\x4a\x3f\xf5\x4d\xd2"
+			  "\x94\xe2\x9c\xff\xc7\x3c\xd9\xc8"
+			  "\x37\x38\xbc\x8c\xf3\xe7\xb7\xd0"
+			  "\x1d\x78\xc4\x39\x07\xc8\x5e\x79"
+			  "\xb6\x5a\x90\x5b\x6e\x97\xc9\xd4"
+			  "\x82\x9c\xf3\x83\x7a\xe7\x97\xfc"
+			  "\x1d\xbb\xef\xdb\xce\xe0\x82\xad"
+			  "\xca\x07\x6c\x54\x62\x6f\x81\xe6"
+			  "\x7a\x5a\x96\x6e\x80\x3a\xa2\x37"
+			  "\x6f\xc6\xa4\x29\xc3\x9e\x19\x94"
+			  "\x9f\xb0\x3e\x38\xfb\x3c\x2b\x7d"
+			  "\xaa\xb8\x74\xda\x54\x23\x51\x12"
+			  "\x4b\x96\x36\x8f\x91\x4f\x19\x37"
+			  "\x83\xc9\xdd\xc7\x1a\x32\x2d\xab"
+			  "\xc7\x89\xe2\x07\x47\x6c\xe8\xa6"
+			  "\x70\x6b\x8e\x0c\xda\x5c\x6a\x59"
+			  "\x27\x33\x0e\xe1\xe1\x20\xe8\xc8"
+			  "\xae\xdc\xd0\xe3\x6d\xa8\xa6\x06"
+			  "\x41\xb4\xd4\xd4\xcf\x91\x3e\x06"
+			  "\xb0\x9a\xf7\xf1\xaa\xa6\x23\x92"
+			  "\x10\x86\xf0\x94\xd1\x7c\x2e\x07"
+			  "\x30\xfb\xc5\xd8\xf3\x12\xa9\xe8"
+			  "\x22\x1c\x97\x1a\xad\x96\xb0\xa1"
+			  "\x72\x6a\x6b\xb4\xfd\xf7\xe8\xfa"
+			  "\xe2\x74\xd8\x65\x8d\x35\x17\x4b"
+			  "\x00\x23\x5c\x8c\x70\xad\x71\xa2"
+			  "\xca\xc5\x6c\x59\xbf\xb4\xc0\x6d"
+			  "\x86\x98\x3e\x19\x5a\x90\x92\xb1"
+			  "\x66\x57\x6a\x91\x68\x7c\xbc\xf3"
+			  "\xf1\xdb\x94\xf8\x48\xf1\x36\xd8"
+			  "\x78\xac\x1c\xa9\xcc\xd6\x27\xba"
+			  "\x91\x54\x22\xf5\xe6\x05\x3f\xcc"
+			  "\xc2\x8f\x2c\x3b\x2b\xc3\x2b\x2b"
+			  "\x3b\xb8\xb6\x29\xb7\x2f\x94\xb6"
+			  "\x7b\xfc\x94\x3e\xd0\x7a\x41\x59"
+			  "\x7b\x1f\x9a\x09\xa6\xed\x4a\x82"
+			  "\x9d\x34\x1c\xbd\x4e\x1c\x3a\x66"
+			  "\x80\x74\x0e\x9a\x4f\x55\x54\x47"
+			  "\x16\xba\x2a\x0a\x03\x35\x99\xa3"
+			  "\x5c\x63\x8d\xa2\x72\x8b\x17\x15"
+			  "\x68\x39\x73\xeb\xec\xf2\xe8\xf5"
+			  "\x95\x32\x27\xd6\xc4\xfe\xb0\x51"
+			  "\xd5\x0c\x50\xc5\xcd\x6d\x16\xb3"
+			  "\xa3\x1e\x95\x69\xad\x78\x95\x06"
+			  "\xb9\x46\xf2\x6d\x24\x5a\x99\x76"
+			  "\x73\x6a\x91\xa6\xac\x12\xe1\x28"
+			  "\x79\xbc\x08\x4e\x97\x00\x98\x63"
+			  "\x07\x1c\x4e\xd1\x68\xf3\xb3\x81"
+			  "\xa8\xa6\x5f\xf1\x01\xc9\xc1\xaf"
+			  "\x3a\x96\xf9\x9d\xb5\x5a\x5f\x8f"
+			  "\x7e\xc1\x7e\x77\x0a\x40\xc8\x8e"
+			  "\xfc\x0e\xed\xe1\x0d\xb0\xe5\x5e"
+			  "\x5e\x6f\xf5\x7f\xab\x33\x7d\xcd"
+			  "\xf0\x09\x4b\xb2\x11\x37\xdc\x65"
+			  "\x97\x32\x62\x71\x3a\x29\x54\xb9"
+			  "\xc7\xa4\xbf\x75\x0f\xf9\x40\xa9"
+			  "\x8d\xd7\x8b\xa7\xe0\x9a\xbe\x15"
+			  "\xc6\xda\xd8\x00\x14\x69\x1a\xaf"
+			  "\x5f\x79\xc3\xf5\xbb\x6c\x2a\x9d"
+			  "\xdd\x3c\x5f\x97\x21\xe1\x3a\x03"
+			  "\x84\x6a\xe9\x76\x11\x1f\xd3\xd5"
+			  "\xf0\x54\x20\x4d\xc2\x91\xc3\xa4"
+			  "\x36\x25\xbe\x1b\x2a\x06\xb7\xf3"
+			  "\xd1\xd0\x55\x29\x81\x4c\x83\xa3"
+			  "\xa6\x84\x1e\x5c\xd1\xd0\x6c\x90"
+			  "\xa4\x11\xf0\xd7\x63\x6a\x48\x05"
+			  "\xbc\x48\x18\x53\xcd\xb0\x8d\xdb"
+			  "\xdc\xfe\x55\x11\x5c\x51\xb3\xab"
+			  "\xab\x63\x3e\x31\x5a\x8b\x93\x63"
+			  "\x34\xa9\xba\x2b\x69\x1a\xc0\xe3"
+			  "\xcb\x41\xbc\xd7\xf5\x7f\x82\x3e"
+			  "\x01\xa3\x3c\x72\xf4\xfe\xdf\xbe"
+			  "\xb1\x67\x17\x2b\x37\x60\x0d\xca"
+			  "\x6f\xc3\x94\x2c\xd2\x92\x6d\x9d"
+			  "\x75\x18\x77\xaa\x29\x38\x96\xed"
+			  "\x0e\x20\x70\x92\xd5\xd0\xb4\x00"
+			  "\xc0\x31\xf2\xc9\x43\x0e\x75\x1d"
+			  "\x4b\x64\xf2\x1f\xf2\x29\x6c\x7b"
+			  "\x7f\xec\x59\x7d\x8c\x0d\xd4\xd3"
+			  "\xac\x53\x4c\xa3\xde\x42\x92\x95"
+			  "\x6d\xa3\x4f\xd0\xe6\x3d\xe7\xec"
+			  "\x7a\x4d\x68\xf1\xfe\x67\x66\x09"
+			  "\x83\x22\xb1\x98\x43\x8c\xab\xb8"
+			  "\x45\xe6\x6d\xdf\x5e\x50\x71\xce"
+			  "\xf5\x4e\x40\x93\x2b\xfa\x86\x0e"
+			  "\xe8\x30\xbd\x82\xcc\x1c\x9c\x5f"
+			  "\xad\xfd\x08\x31\xbe\x52\xe7\xe6"
+			  "\xf2\x06\x01\x62\x25\x15\x99\x74"
+			  "\x33\x51\x52\x57\x3f\x57\x87\x61"
+			  "\xb9\x7f\x29\x3d\xcd\x92\x5e\xa6"
+			  "\x5c\x3b\xf1\xed\x5f\xeb\x82\xed"
+			  "\x56\x7b\x61\xe7\xfd\x02\x47\x0e"
+			  "\x2a\x15\xa4\xce\x43\x86\x9b\xe1"
+			  "\x2b\x4c\x2a\xd9\x42\x97\xf7\x9a"
+			  "\xe5\x47\x46\x48\xd3\x55\x6f\x4d"
+			  "\xd9\xeb\x4b\xdd\x7b\x21\x2f\xb3"
+			  "\xa8\x36\x28\xdf\xca\xf1\xf6\xd9"
+			  "\x10\xf6\x1c\xfd\x2e\x0c\x27\xe0"
+			  "\x01\xb3\xff\x6d\x47\x08\x4d\xd4"
+			  "\x00\x25\xee\x55\x4a\xe9\xe8\x5b"
+			  "\xd8\xf7\x56\x12\xd4\x50\xb2\xe5"
+			  "\x51\x6f\x34\x63\x69\xd2\x4e\x96"
+			  "\x4e\xbc\x79\xbf\x18\xae\xc6\x13"
+			  "\x80\x92\x77\xb0\xb4\x0f\x29\x94"
+			  "\x6f\x4c\xbb\x53\x11\x36\xc3\x9f"
+			  "\x42\x8e\x96\x8a\x91\xc8\xe9\xfc"
+			  "\xfe\xbf\x7c\x2d\x6f\xf9\xb8\x44"
+			  "\x89\x1b\x09\x53\x0a\x2a\x92\xc3"
+			  "\x54\x7a\x3a\xf9\xe2\xe4\x75\x87"
+			  "\xa0\x5e\x4b\x03\x7a\x0d\x8a\xf4"
+			  "\x55\x59\x94\x2b\x63\x96\x0e\xf5",
+		.psize	= 1040,
+		.digest	= "\xb5\xb9\x08\xb3\x24\x3e\x03\xf0"
+			  "\xd6\x0b\x57\xbc\x0a\x6d\x89\x59",
+	}, {
+		.key	= "\xf6\x34\x42\x71\x35\x52\x8b\x58"
+			  "\x02\x3a\x8e\x4a\x8d\x41\x13\xe9"
+			  "\x7f\xba\xb9\x55\x9d\x73\x4d\xf8"
+			  "\x3f\x5d\x73\x15\xff\xd3\x9e\x7f"
+			  "\x20\x2a\x6a\xa8\xd1\xf0\x8f\x12"
+			  "\x6b\x02\xd8\x6c\xde\xba\x80\x22"
+			  "\x19\x37\xc8\xd0\x4e\x89\x17\x7c"
+			  "\x7c\xdd\x88\xfd\x41\xc0\x04\xb7"
+			  "\x1d\xac\x19\xe3\x20\xc7\x16\xcf"
+			  "\x58\xee\x1d\x7a\x61\x69\xa9\x12"
+			  "\x4b\xef\x4f\xb6\x38\xdd\x78\xf8"
+			  "\x28\xee\x70\x08\xc7\x7c\xcc\xc8"
+			  "\x1e\x41\xf5\x80\x86\x70\xd0\xf0"
+			  "\xa3\x87\x6b\x0a\x00\xd2\x41\x28"
+			  "\x74\x26\xf1\x24\xf3\xd0\x28\x77"
+			  "\xd7\xcd\xf6\x2d\x61\xf4\xa2\x13"
+			  "\x77\xb4\x6f\xa0\xf4\xfb\xd6\xb5"
+			  "\x38\x9d\x5a\x0c\x51\xaf\xad\x63"
+			  "\x27\x67\x8c\x01\xea\x42\x1a\x66"
+			  "\xda\x16\x7c\x3c\x30\x0c\x66\x53"
+			  "\x1c\x88\xa4\x5c\xb2\xe3\x78\x0a"
+			  "\x13\x05\x6d\xe2\xaf\xb3\xe4\x75"
+			  "\x00\x99\x58\xee\x76\x09\x64\xaa"
+			  "\xbb\x2e\xb1\x81\xec\xd8\x0e\xd3"
+			  "\x0c\x33\x5d\xb7\x98\xef\x36\xb6"
+			  "\xd2\x65\x69\x41\x70\x12\xdc\x25"
+			  "\x41\x03\x99\x81\x41\x19\x62\x13"
+			  "\xd1\x0a\x29\xc5\x8c\xe0\x4c\xf3"
+			  "\xd6\xef\x4c\xf4\x1d\x83\x2e\x6d"
+			  "\x8e\x14\x87\xed\x80\xe0\xaa\xd3"
+			  "\x08\x04\x73\x1a\x84\x40\xf5\x64"
+			  "\xbd\x61\x32\x65\x40\x42\xfb\xb0"
+			  "\x40\xf6\x40\x8d\xc7\x7f\x14\xd0"
+			  "\x83\x99\xaa\x36\x7e\x60\xc6\xbf"
+			  "\x13\x8a\xf9\x21\xe4\x7e\x68\x87"
+			  "\xf3\x33\x86\xb4\xe0\x23\x7e\x0a"
+			  "\x21\xb1\xf5\xad\x67\x3c\x9c\x9d"
+			  "\x09\xab\xaf\x5f\xba\xe0\xd0\x82"
+			  "\x48\x22\x70\xb5\x6d\x53\xd6\x0e"
+			  "\xde\x64\x92\x41\xb0\xd3\xfb\xda"
+			  "\x21\xfe\xab\xea\x20\xc4\x03\x58"
+			  "\x18\x2e\x7d\x2f\x03\xa9\x47\x66"
+			  "\xdf\x7b\xa4\x6b\x34\x6b\x55\x9c"
+			  "\x4f\xd7\x9c\x47\xfb\xa9\x42\xec"
+			  "\x5a\x12\xfd\xfe\x76\xa0\x92\x9d"
+			  "\xfe\x1e\x16\xdd\x24\x2a\xe4\x27"
+			  "\xd5\xa9\xf2\x05\x4f\x83\xa2\xaf"
+			  "\xfe\xee\x83\x7a\xad\xde\xdf\x9a"
+			  "\x80\xd5\x81\x14\x93\x16\x7e\x46"
+			  "\x47\xc2\x14\xef\x49\x6e\xb9\xdb"
+			  "\x40\xe8\x06\x6f\x9c\x2a\xfd\x62"
+			  "\x06\x46\xfd\x15\x1d\x36\x61\x6f"
+			  "\x77\x77\x5e\x64\xce\x78\x1b\x85"
+			  "\xbf\x50\x9a\xfd\x67\xa6\x1a\x65"
+			  "\xad\x5b\x33\x30\xf1\x71\xaa\xd9"
+			  "\x23\x0d\x92\x24\x5f\xae\x57\xb0"
+			  "\x24\x37\x0a\x94\x12\xfb\xb5\xb1"
+			  "\xd3\xb8\x1d\x12\x29\xb0\x80\x24"
+			  "\x2d\x47\x9f\x96\x1f\x95\xf1\xb1"
+			  "\xda\x35\xf6\x29\xe0\xe1\x23\x96"
+			  "\xc7\xe8\x22\x9b\x7c\xac\xf9\x41"
+			  "\x39\x01\xe5\x73\x15\x5e\x99\xec"
+			  "\xb4\xc1\xf4\xe7\xa7\x97\x6a\xd5"
+			  "\x90\x9a\xa0\x1d\xf3\x5a\x8b\x5f"
+			  "\xdf\x01\x52\xa4\x93\x31\x97\xb0"
+			  "\x93\x24\xb5\xbc\xb2\x14\x24\x98"
+			  "\x4a\x8f\x19\x85\xc3\x2d\x0f\x74"
+			  "\x9d\x16\x13\x80\x5e\x59\x62\x62"
+			  "\x25\xe0\xd1\x2f\x64\xef\xba\xac"
+			  "\xcd\x09\x07\x15\x8a\xcf\x73\xb5"
+			  "\x8b\xc9\xd8\x24\xb0\x53\xd5\x6f"
+			  "\xe1\x2b\x77\xb1\xc5\xe4\xa7\x0e"
+			  "\x18\x45\xab\x36\x03\x59\xa8\xbd"
+			  "\x43\xf0\xd8\x2c\x1a\x69\x96\xbb"
+			  "\x13\xdf\x6c\x33\x77\xdf\x25\x34"
+			  "\x5b\xa5\x5b\x8c\xf9\x51\x05\xd4"
+			  "\x8b\x8b\x44\x87\x49\xfc\xa0\x8f"
+			  "\x45\x15\x5b\x40\x42\xc4\x09\x92"
+			  "\x98\x0c\x4d\xf4\x26\x37\x1b\x13"
+			  "\x76\x01\x93\x8d\x4f\xe6\xed\x18"
+			  "\xd0\x79\x7b\x3f\x44\x50\xcb\xee"
+			  "\xf7\x4a\xc9\x9e\xe0\x96\x74\xa7"
+			  "\xe6\x93\xb2\x53\xca\x55\xa8\xdc"
+			  "\x1e\x68\x07\x87\xb7\x2e\xc1\x08"
+			  "\xb2\xa4\x5b\xaf\xc6\xdb\x5c\x66"
+			  "\x41\x1c\x51\xd9\xb0\x07\x00\x0d"
+			  "\xf0\x4c\xdc\x93\xde\xa9\x1e\x8e"
+			  "\xd3\x22\x62\xd8\x8b\x88\x2c\xea"
+			  "\x5e\xf1\x6e\x14\x40\xc7\xbe\xaa"
+			  "\x42\x28\xd0\x26\x30\x78\x01\x9b"
+			  "\x83\x07\xbc\x94\xc7\x57\xa2\x9f"
+			  "\x03\x07\xff\x16\xff\x3c\x6e\x48"
+			  "\x0a\xd0\xdd\x4c\xf6\x64\x9a\xf1"
+			  "\xcd\x30\x12\x82\x2c\x38\xd3\x26"
+			  "\x83\xdb\xab\x3e\xc6\xf8\xe6\xfa"
+			  "\x77\x0a\x78\x82\x75\xf8\x63\x51"
+			  "\x59\xd0\x8d\x24\x9f\x25\xe6\xa3"
+			  "\x4c\xbc\x34\xfc\xe3\x10\xc7\x62"
+			  "\xd4\x23\xc8\x3d\xa7\xc6\xa6\x0a"
+			  "\x4f\x7e\x29\x9d\x6d\xbe\xb5\xf1"
+			  "\xdf\xa4\x53\xfa\xc0\x23\x0f\x37"
+			  "\x84\x68\xd0\xb5\xc8\xc6\xae\xf8"
+			  "\xb7\x8d\xb3\x16\xfe\x8f\x87\xad"
+			  "\xd0\xc1\x08\xee\x12\x1c\x9b\x1d"
+			  "\x90\xf8\xd1\x63\xa4\x92\x3c\xf0"
+			  "\xc7\x34\xd8\xf1\x14\xed\xa3\xbc"
+			  "\x17\x7e\xd4\x62\x42\x54\x57\x2c"
+			  "\x3e\x7a\x35\x35\x17\x0f\x0b\x7f"
+			  "\x81\xa1\x3f\xd0\xcd\xc8\x3b\x96"
+			  "\xe9\xe0\x4a\x04\xe1\xb6\x3c\xa1"
+			  "\xd6\xca\xc4\xbd\xb6\xb5\x95\x34"
+			  "\x12\x9d\xc5\x96\xf2\xdf\xba\x54"
+			  "\x76\xd1\xb2\x6b\x3b\x39\xe0\xb9"
+			  "\x18\x62\xfb\xf7\xfc\x12\xf1\x5f"
+			  "\x7e\xc7\xe3\x59\x4c\xa6\xc2\x3d"
+			  "\x40\x15\xf9\xa3\x95\x64\x4c\x74"
+			  "\x8b\x73\x77\x33\x07\xa7\x04\x1d"
+			  "\x33\x5a\x7e\x8f\xbd\x86\x01\x4f"
+			  "\x3e\xb9\x27\x6f\xe2\x41\xf7\x09"
+			  "\x67\xfd\x29\x28\xc5\xe4\xf6\x18"
+			  "\x4c\x1b\x49\xb2\x9c\x5b\xf6\x81"
+			  "\x4f\xbb\x5c\xcc\x0b\xdf\x84\x23"
+			  "\x58\xd6\x28\x34\x93\x3a\x25\x97"
+			  "\xdf\xb2\xc3\x9e\x97\x38\x0b\x7d"
+			  "\x10\xb3\x54\x35\x23\x8c\x64\xee"
+			  "\xf0\xd8\x66\xff\x8b\x22\xd2\x5b"
+			  "\x05\x16\x3c\x89\xf7\xb1\x75\xaf"
+			  "\xc0\xae\x6a\x4f\x3f\xaf\x9a\xf4"
+			  "\xf4\x9a\x24\xd9\x80\x82\xc0\x12"
+			  "\xde\x96\xd1\xbe\x15\x0b\x8d\x6a"
+			  "\xd7\x12\xe4\x85\x9f\x83\xc9\xc3"
+			  "\xff\x0b\xb5\xaf\x3b\xd8\x6d\x67"
+			  "\x81\x45\xe6\xac\xec\xc1\x7b\x16"
+			  "\x18\x0a\xce\x4b\xc0\x2e\x76\xbc"
+			  "\x1b\xfa\xb4\x34\xb8\xfc\x3e\xc8"
+			  "\x5d\x90\x71\x6d\x7a\x79\xef\x06",
+		.ksize	= 1088,
+		.plaintext	= "\xaa\x5d\x54\xcb\xea\x1e\x46\x0f"
+			  "\x45\x87\x70\x51\x8a\x66\x7a\x33"
+			  "\xb4\x18\xff\xa9\x82\xf9\x45\x4b"
+			  "\x93\xae\x2e\x7f\xab\x98\xfe\xbf"
+			  "\x01\xee\xe5\xa0\x37\x8f\x57\xa6"
+			  "\xb0\x76\x0d\xa4\xd6\x28\x2b\x5d"
+			  "\xe1\x03\xd6\x1c\x6f\x34\x0d\xe7"
+			  "\x61\x2d\x2e\xe5\xae\x5d\x47\xc7"
+			  "\x80\x4b\x18\x8f\xa8\x99\xbc\x28"
+			  "\xed\x1d\x9d\x86\x7d\xd7\x41\xd1"
+			  "\xe0\x2b\xe1\x8c\x93\x2a\xa7\x80"
+			  "\xe1\x07\xa0\xa9\x9f\x8c\x8d\x1a"
+			  "\x55\xfc\x6b\x24\x7a\xbd\x3e\x51"
+			  "\x68\x4b\x26\x59\xc8\xa7\x16\xd9"
+			  "\xb9\x61\x13\xde\x8b\x63\x1c\xf6"
+			  "\x60\x01\xfb\x08\xb3\x5b\x0a\xbf"
+			  "\x34\x73\xda\x87\x87\x3d\x6f\x97"
+			  "\x4a\x0c\xa3\x58\x20\xa2\xc0\x81"
+			  "\x5b\x8c\xef\xa9\xc2\x01\x1e\x64"
+			  "\x83\x8c\xbc\x03\xb6\xd0\x29\x9f"
+			  "\x54\xe2\xce\x8b\xc2\x07\x85\x78"
+			  "\x25\x38\x96\x4c\xb4\xbe\x17\x4a"
+			  "\x65\xa6\xfa\x52\x9d\x66\x9d\x65"
+			  "\x4a\xd1\x01\x01\xf0\xcb\x13\xcc"
+			  "\xa5\x82\xf3\xf2\x66\xcd\x3f\x9d"
+			  "\xd1\xaa\xe4\x67\xea\xf2\xad\x88"
+			  "\x56\x76\xa7\x9b\x59\x3c\xb1\x5d"
+			  "\x78\xfd\x69\x79\x74\x78\x43\x26"
+			  "\x7b\xde\x3f\xf1\xf5\x4e\x14\xd9"
+			  "\x15\xf5\x75\xb5\x2e\x19\xf3\x0c"
+			  "\x48\x72\xd6\x71\x6d\x03\x6e\xaa"
+			  "\xa7\x08\xf9\xaa\x70\xa3\x0f\x4d"
+			  "\x12\x8a\xdd\xe3\x39\x73\x7e\xa7"
+			  "\xea\x1f\x6d\x06\x26\x2a\xf2\xc5"
+			  "\x52\xb4\xbf\xfd\x52\x0c\x06\x60"
+			  "\x90\xd1\xb2\x7b\x56\xae\xac\x58"
+			  "\x5a\x6b\x50\x2a\xf5\xe0\x30\x3c"
+			  "\x2a\x98\x0f\x1b\x5b\x0a\x84\x6c"
+			  "\x31\xae\x92\xe2\xd4\xbb\x7f\x59"
+			  "\x26\x10\xb9\x89\x37\x68\x26\xbf"
+			  "\x41\xc8\x49\xc4\x70\x35\x7d\xff"
+			  "\x2d\x7f\xf6\x8a\x93\x68\x8c\x78"
+			  "\x0d\x53\xce\x7d\xff\x7d\xfb\xae"
+			  "\x13\x1b\x75\xc4\x78\xd7\x71\xd8"
+			  "\xea\xd3\xf4\x9d\x95\x64\x8e\xb4"
+			  "\xde\xb8\xe4\xa6\x68\xc8\xae\x73"
+			  "\x58\xaf\xa8\xb0\x5a\x20\xde\x87"
+			  "\x43\xb9\x0f\xe3\xad\x41\x4b\xd5"
+			  "\xb7\xad\x16\x00\xa6\xff\xf6\x74"
+			  "\xbf\x8c\x9f\xb3\x58\x1b\xb6\x55"
+			  "\xa9\x90\x56\x28\xf0\xb5\x13\x4e"
+			  "\x9e\xf7\x25\x86\xe0\x07\x7b\x98"
+			  "\xd8\x60\x5d\x38\x95\x3c\xe4\x22"
+			  "\x16\x2f\xb2\xa2\xaf\xe8\x90\x17"
+			  "\xec\x11\x83\x1a\xf4\xa9\x26\xda"
+			  "\x39\x72\xf5\x94\x61\x05\x51\xec"
+			  "\xa8\x30\x8b\x2c\x13\xd0\x72\xac"
+			  "\xb9\xd2\xa0\x4c\x4b\x78\xe8\x6e"
+			  "\x04\x85\xe9\x04\x49\x82\x91\xff"
+			  "\x89\xe5\xab\x4c\xaa\x37\x03\x12"
+			  "\xca\x8b\x74\x10\xfd\x9e\xd9\x7b"
+			  "\xcb\xdb\x82\x6e\xce\x2e\x33\x39"
+			  "\xce\xd2\x84\x6e\x34\x71\x51\x6e"
+			  "\x0d\xd6\x01\x87\xc7\xfa\x0a\xd3"
+			  "\xad\x36\xf3\x4c\x9f\x96\x5e\x62"
+			  "\x62\x54\xc3\x03\x78\xd6\xab\xdd"
+			  "\x89\x73\x55\x25\x30\xf8\xa7\xe6"
+			  "\x4f\x11\x0c\x7c\x0a\xa1\x2b\x7b"
+			  "\x3d\x0d\xde\x81\xd4\x9d\x0b\xae"
+			  "\xdf\x00\xf9\x4c\xb6\x90\x8e\x16"
+			  "\xcb\x11\xc8\xd1\x2e\x73\x13\x75"
+			  "\x75\x3e\xaa\xf5\xee\x02\xb3\x18"
+			  "\xa6\x2d\xf5\x3b\x51\xd1\x1f\x47"
+			  "\x6b\x2c\xdb\xc4\x10\xe0\xc8\xba"
+			  "\x9d\xac\xb1\x9d\x75\xd5\x41\x0e"
+			  "\x7e\xbe\x18\x5b\xa4\x1f\xf8\x22"
+			  "\x4c\xc1\x68\xda\x6d\x51\x34\x6c"
+			  "\x19\x59\xec\xb5\xb1\xec\xa7\x03"
+			  "\xca\x54\x99\x63\x05\x6c\xb1\xac"
+			  "\x9c\x31\xd6\xdb\xba\x7b\x14\x12"
+			  "\x7a\xc3\x2f\xbf\x8d\xdc\x37\x46"
+			  "\xdb\xd2\xbc\xd4\x2f\xab\x30\xd5"
+			  "\xed\x34\x99\x8e\x83\x3e\xbe\x4c"
+			  "\x86\x79\x58\xe0\x33\x8d\x9a\xb8"
+			  "\xa9\xa6\x90\x46\xa2\x02\xb8\xdd"
+			  "\xf5\xf9\x1a\x5c\x8c\x01\xaa\x6e"
+			  "\xb4\x22\x12\xf5\x0c\x1b\x9b\x7a"
+			  "\xc3\x80\xf3\x06\x00\x5f\x30\xd5"
+			  "\x06\xdb\x7d\x82\xc2\xd4\x0b\x4c"
+			  "\x5f\xe9\xc5\xf5\xdf\x97\x12\xbf"
+			  "\x56\xaf\x9b\x69\xcd\xee\x30\xb4"
+			  "\xa8\x71\xff\x3e\x7d\x73\x7a\xb4"
+			  "\x0d\xa5\x46\x7a\xf3\xf4\x15\x87"
+			  "\x5d\x93\x2b\x8c\x37\x64\xb5\xdd"
+			  "\x48\xd1\xe5\x8c\xae\xd4\xf1\x76"
+			  "\xda\xf4\xba\x9e\x25\x0e\xad\xa3"
+			  "\x0d\x08\x7c\xa8\x82\x16\x8d\x90"
+			  "\x56\x40\x16\x84\xe7\x22\x53\x3a"
+			  "\x58\xbc\xb9\x8f\x33\xc8\xc2\x84"
+			  "\x22\xe6\x0d\xe7\xb3\xdc\x5d\xdf"
+			  "\xd7\x2a\x36\xe4\x16\x06\x07\xd2"
+			  "\x97\x60\xb2\xf5\x5e\x14\xc9\xfd"
+			  "\x8b\x05\xd1\xce\xee\x9a\x65\x99"
+			  "\xb7\xae\x19\xb7\xc8\xbc\xd5\xa2"
+			  "\x7b\x95\xe1\xcc\xba\x0d\xdc\x8a"
+			  "\x1d\x59\x52\x50\xaa\x16\x02\x82"
+			  "\xdf\x61\x33\x2e\x44\xce\x49\xc7"
+			  "\xe5\xc6\x2e\x76\xcf\x80\x52\xf0"
+			  "\x3d\x17\x34\x47\x3f\xd3\x80\x48"
+			  "\xa2\xba\xd5\xc7\x7b\x02\x28\xdb"
+			  "\xac\x44\xc7\x6e\x05\x5c\xc2\x79"
+			  "\xb3\x7d\x6a\x47\x77\x66\xf1\x38"
+			  "\xf0\xf5\x4f\x27\x1a\x31\xca\x6c"
+			  "\x72\x95\x92\x8e\x3f\xb0\xec\x1d"
+			  "\xc7\x2a\xff\x73\xee\xdf\x55\x80"
+			  "\x93\xd2\xbd\x34\xd3\x9f\x00\x51"
+			  "\xfb\x2e\x41\xba\x6c\x5a\x7c\x17"
+			  "\x7f\xe6\x70\xac\x8d\x39\x3f\x77"
+			  "\xe2\x23\xac\x8f\x72\x4e\xe4\x53"
+			  "\xcc\xf1\x1b\xf1\x35\xfe\x52\xa4"
+			  "\xd6\xb8\x40\x6b\xc1\xfd\xa0\xa1"
+			  "\xf5\x46\x65\xc2\x50\xbb\x43\xe2"
+			  "\xd1\x43\x28\x34\x74\xf5\x87\xa0"
+			  "\xf2\x5e\x27\x3b\x59\x2b\x3e\x49"
+			  "\xdf\x46\xee\xaf\x71\xd7\x32\x36"
+			  "\xc7\x14\x0b\x58\x6e\x3e\x2d\x41"
+			  "\xfa\x75\x66\x3a\x54\xe0\xb2\xb9"
+			  "\xaf\xdd\x04\x80\x15\x19\x3f\x6f"
+			  "\xce\x12\xb4\xd8\xe8\x89\x3c\x05"
+			  "\x30\xeb\xf3\x3d\xcd\x27\xec\xdc"
+			  "\x56\x70\x12\xcf\x78\x2b\x77\xbf"
+			  "\x22\xf0\x1b\x17\x9c\xcc\xd6\x1b"
+			  "\x2d\x3d\xa0\x3b\xd8\xc9\x70\xa4"
+			  "\x7a\x3e\x07\xb9\x06\xc3\xfa\xb0"
+			  "\x33\xee\xc1\xd8\xf6\xe0\xf0\xb2"
+			  "\x61\x12\x69\xb0\x5f\x28\x99\xda"
+			  "\xc3\x61\x48\xfa\x07\x16\x03\xc4"
+			  "\xa8\xe1\x3c\xe8\x0e\x64\x15\x30"
+			  "\xc1\x9d\x84\x2f\x73\x98\x0e\x3a"
+			  "\xf2\x86\x21\xa4\x9e\x1d\xb5\x86"
+			  "\x16\xdb\x2b\x9a\x06\x64\x8e\x79"
+			  "\x8d\x76\x3e\xc3\xc2\x64\x44\xe3"
+			  "\xda\xbc\x1a\x52\xd7\x61\x03\x65"
+			  "\x54\x32\x77\x01\xed\x9d\x8a\x43"
+			  "\x25\x24\xe3\xc1\xbe\xb8\x2f\xcb"
+			  "\x89\x14\x64\xab\xf6\xa0\x6e\x02"
+			  "\x57\xe4\x7d\xa9\x4e\x9a\x03\x36"
+			  "\xad\xf1\xb1\xfc\x0b\xe6\x79\x51"
+			  "\x9f\x81\x77\xc4\x14\x78\x9d\xbf"
+			  "\xb6\xd6\xa3\x8c\xba\x0b\x26\xe7"
+			  "\xc8\xb9\x5c\xcc\xe1\x5f\xd5\xc6"
+			  "\xc4\xca\xc2\xa3\x45\xba\x94\x13"
+			  "\xb2\x8f\xc3\x54\x01\x09\xe7\x8b"
+			  "\xda\x2a\x0a\x11\x02\x43\xcb\x57"
+			  "\xc9\xcc\xb5\x5c\xab\xc4\xec\x54"
+			  "\x00\x06\x34\xe1\x6e\x03\x89\x7c"
+			  "\xc6\xfb\x6a\xc7\x60\x43\xd6\xc5"
+			  "\xb5\x68\x72\x89\x8f\x42\xc3\x74"
+			  "\xbd\x25\xaa\x9f\x67\xb5\xdf\x26"
+			  "\x20\xe8\xb7\x01\x3c\xe4\x77\xce"
+			  "\xc4\x65\xa7\x23\x79\xea\x33\xc7"
+			  "\x82\x14\x5c\x82\xf2\x4e\x3d\xf6"
+			  "\xc6\x4a\x0e\x29\xbb\xec\x44\xcd"
+			  "\x2f\xd1\x4f\x21\x71\xa9\xce\x0f"
+			  "\x5c\xf2\x72\x5c\x08\x2e\x21\xd2"
+			  "\xc3\x29\x13\xd8\xac\xc3\xda\x13"
+			  "\x1a\x9d\xa7\x71\x1d\x27\x1d\x27"
+			  "\x1d\xea\xab\x44\x79\xad\xe5\xeb"
+			  "\xef\x1f\x22\x0a\x44\x4f\xcb\x87"
+			  "\xa7\x58\x71\x0e\x66\xf8\x60\xbf"
+			  "\x60\x74\x4a\xb4\xec\x2e\xfe\xd3"
+			  "\xf5\xb8\xfe\x46\x08\x50\x99\x6c"
+			  "\x66\xa5\xa8\x34\x44\xb5\xe5\xf0"
+			  "\xdd\x2c\x67\x4e\x35\x96\x8e\x67"
+			  "\x48\x3f\x5f\x37\x44\x60\x51\x2e"
+			  "\x14\x91\x5e\x57\xc3\x0e\x79\x77"
+			  "\x2f\x03\xf4\xe2\x1c\x72\xbf\x85"
+			  "\x5d\xd3\x17\xdf\x6c\xc5\x70\x24"
+			  "\x42\xdf\x51\x4e\x2a\xb2\xd2\x5b"
+			  "\x9e\x69\x83\x41\x11\xfe\x73\x22"
+			  "\xde\x8a\x9e\xd8\x8a\xfb\x20\x38"
+			  "\xd8\x47\x6f\xd5\xed\x8f\x41\xfd"
+			  "\x13\x7a\x18\x03\x7d\x0f\xcd\x7d"
+			  "\xa6\x7d\x31\x9e\xf1\x8f\x30\xa3"
+			  "\x8b\x4c\x24\xb7\xf5\x48\xd7\xd9"
+			  "\x12\xe7\x84\x97\x5c\x31\x6d\xfb"
+			  "\xdf\xf3\xd3\xd1\xd5\x0c\x30\x06"
+			  "\x01\x6a\xbc\x6c\x78\x7b\xa6\x50"
+			  "\xfa\x0f\x3c\x42\x2d\xa5\xa3\x3b"
+			  "\xcf\x62\x50\xff\x71\x6d\xe7\xda"
+			  "\x27\xab\xc6\x67\x16\x65\x68\x64"
+			  "\xc7\xd5\x5f\x81\xa9\xf6\x65\xb3"
+			  "\x5e\x43\x91\x16\xcd\x3d\x55\x37"
+			  "\x55\xb3\xf0\x28\xc5\x54\x19\xc0"
+			  "\xe0\xd6\x2a\x61\xd4\xc8\x72\x51"
+			  "\xe9\xa1\x7b\x48\x21\xad\x44\x09"
+			  "\xe4\x01\x61\x3c\x8a\x5b\xf9\xa1"
+			  "\x6e\x1b\xdf\xc0\x04\xa8\x8b\xf2"
+			  "\x21\xbe\x34\x7b\xfc\xa1\xcd\xc9"
+			  "\xa9\x96\xf4\xa4\x4c\xf7\x4e\x8f"
+			  "\x84\xcc\xd3\xa8\x92\x77\x8f\x36"
+			  "\xe2\x2e\x8c\x33\xe8\x84\xa6\x0c"
+			  "\x6c\x8a\xda\x14\x32\xc2\x96\xff"
+			  "\xc6\x4a\xc2\x9b\x30\x7f\xd1\x29"
+			  "\xc0\xd5\x78\x41\x00\x80\x80\x03"
+			  "\x2a\xb1\xde\x26\x03\x48\x49\xee"
+			  "\x57\x14\x76\x51\x3c\x36\x5d\x0a"
+			  "\x5c\x9f\xe8\xd8\x53\xdb\x4f\xd4"
+			  "\x38\xbf\x66\xc9\x75\x12\x18\x75"
+			  "\x34\x2d\x93\x22\x96\x51\x24\x6e"
+			  "\x4e\xd9\x30\xea\x67\xff\x92\x1c"
+			  "\x16\x26\xe9\xb5\x33\xab\x8c\x22"
+			  "\x47\xdb\xa0\x2c\x08\xf0\x12\x69"
+			  "\x7e\x93\x52\xda\xa5\xe5\xca\xc1"
+			  "\x0f\x55\x2a\xbd\x09\x30\x88\x1b"
+			  "\x9c\xc6\x9f\xe6\xdb\xa6\x92\xeb"
+			  "\xf4\xbd\x5c\xc4\xdb\xc6\x71\x09"
+			  "\xab\x5e\x48\x0c\xed\x6f\xda\x8e"
+			  "\x8d\x0c\x98\x71\x7d\x10\xd0\x9c"
+			  "\x20\x9b\x79\x53\x26\x5d\xb9\x85"
+			  "\x8a\x31\xb8\xc5\x1c\x97\xde\x88"
+			  "\x61\x55\x7f\x7c\x21\x06\xea\xc4"
+			  "\x5f\xaf\xf2\xf0\xd5\x5e\x7d\xb4"
+			  "\x6e\xcf\xe9\xae\x1b\x0e\x11\x80"
+			  "\xc1\x9a\x74\x7e\x52\x6f\xa0\xb7"
+			  "\x24\xcd\x8d\x0a\x11\x40\x63\x72"
+			  "\xfa\xe2\xc5\xb3\x94\xef\x29\xa2"
+			  "\x1a\x23\x43\x04\x37\x55\x0d\xe9"
+			  "\x83\xb2\x29\x51\x49\x64\xa0\xbd"
+			  "\xde\x73\xfd\xa5\x7c\x95\x70\x62"
+			  "\x58\xdc\xe2\xd0\xbf\x98\xf5\x8a"
+			  "\x6a\xfd\xce\xa8\x0e\x42\x2a\xeb"
+			  "\xd2\xff\x83\x27\x53\x5c\xa0\x6e"
+			  "\x93\xef\xe2\xb9\x5d\x35\xd6\x98"
+			  "\xf6\x71\x19\x7a\x54\xa1\xa7\xe8"
+			  "\x09\xfe\xf6\x9e\xc7\xbd\x3e\x29"
+			  "\xbd\x6b\x17\xf4\xe7\x3e\x10\x5c"
+			  "\xc1\xd2\x59\x4f\x4b\x12\x1a\x5b"
+			  "\x50\x80\x59\xb9\xec\x13\x66\xa8"
+			  "\xd2\x31\x7b\x6a\x61\x22\xdd\x7d"
+			  "\x61\xee\x87\x16\x46\x9f\xf9\xc7"
+			  "\x41\xee\x74\xf8\xd0\x96\x2c\x76"
+			  "\x2a\xac\x7d\x6e\x9f\x0e\x7f\x95"
+			  "\xfe\x50\x16\xb2\x23\xca\x62\xd5"
+			  "\x68\xcf\x07\x3f\x3f\x97\x85\x2a"
+			  "\x0c\x25\x45\xba\xdb\x32\xcb\x83"
+			  "\x8c\x4f\xe0\x6d\x9a\x99\xf9\xc9"
+			  "\xda\xd4\x19\x31\xc1\x7c\x6d\xd9"
+			  "\x9c\x56\xd3\xec\xc1\x81\x4c\xed"
+			  "\x28\x9d\x87\xeb\x19\xd7\x1a\x4f"
+			  "\x04\x6a\xcb\x1f\xcf\x1f\xa2\x16"
+			  "\xfc\x2a\x0d\xa1\x14\x2d\xfa\xc5"
+			  "\x5a\xd2\xc5\xf9\x19\x7c\x20\x1f"
+			  "\x2d\x10\xc0\x66\x7c\xd9\x2d\xe5"
+			  "\x88\x70\x59\xa7\x85\xd5\x2e\x7c"
+			  "\x5c\xe3\xb7\x12\xd6\x97\x3f\x29",
+		.psize	= 2048,
+		.digest	= "\x37\x90\x92\xc2\xeb\x01\x87\xd9"
+			  "\x95\xc7\x91\xc3\x17\x8b\x38\x52",
+	}
+};
+
+
 /*
  * DES test vectors.
  */
@@ -30818,6 +32050,1622 @@
 	},
 };
 
+static const struct cipher_testvec xchacha20_tv_template[] = {
+	{ /* from libsodium test/default/xchacha20.c */
+		.key	= "\x79\xc9\x97\x98\xac\x67\x30\x0b"
+			  "\xbb\x27\x04\xc9\x5c\x34\x1e\x32"
+			  "\x45\xf3\xdc\xb2\x17\x61\xb9\x8e"
+			  "\x52\xff\x45\xb2\x4f\x30\x4f\xc4",
+		.klen	= 32,
+		.iv	= "\xb3\x3f\xfd\x30\x96\x47\x9b\xcf"
+			  "\xbc\x9a\xee\x49\x41\x76\x88\xa0"
+			  "\xa2\x55\x4f\x8d\x95\x38\x94\x19"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00",
+		.ctext	= "\xc6\xe9\x75\x81\x60\x08\x3a\xc6"
+			  "\x04\xef\x90\xe7\x12\xce\x6e\x75"
+			  "\xd7\x79\x75\x90\x74\x4e\x0c\xf0"
+			  "\x60\xf0\x13\x73\x9c",
+		.len	= 29,
+	}, { /* from libsodium test/default/xchacha20.c */
+		.key	= "\x9d\x23\xbd\x41\x49\xcb\x97\x9c"
+			  "\xcf\x3c\x5c\x94\xdd\x21\x7e\x98"
+			  "\x08\xcb\x0e\x50\xcd\x0f\x67\x81"
+			  "\x22\x35\xea\xaf\x60\x1d\x62\x32",
+		.klen	= 32,
+		.iv	= "\xc0\x47\x54\x82\x66\xb7\xc3\x70"
+			  "\xd3\x35\x66\xa2\x42\x5c\xbf\x30"
+			  "\xd8\x2d\x1e\xaf\x52\x94\x10\x9e"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00",
+		.ctext	= "\xa2\x12\x09\x09\x65\x94\xde\x8c"
+			  "\x56\x67\xb1\xd1\x3a\xd9\x3f\x74"
+			  "\x41\x06\xd0\x54\xdf\x21\x0e\x47"
+			  "\x82\xcd\x39\x6f\xec\x69\x2d\x35"
+			  "\x15\xa2\x0b\xf3\x51\xee\xc0\x11"
+			  "\xa9\x2c\x36\x78\x88\xbc\x46\x4c"
+			  "\x32\xf0\x80\x7a\xcd\x6c\x20\x3a"
+			  "\x24\x7e\x0d\xb8\x54\x14\x84\x68"
+			  "\xe9\xf9\x6b\xee\x4c\xf7\x18\xd6"
+			  "\x8d\x5f\x63\x7c\xbd\x5a\x37\x64"
+			  "\x57\x78\x8e\x6f\xae\x90\xfc\x31"
+			  "\x09\x7c\xfc",
+		.len	= 91,
+	}, { /* Taken from the ChaCha20 test vectors, appended 16 random bytes
+		to nonce, and recomputed the ciphertext with libsodium */
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x67\xc6\x69\x73"
+			  "\x51\xff\x4a\xec\x29\xcd\xba\xab"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ctext	= "\x9c\x49\x2a\xe7\x8a\x2f\x93\xc7"
+			  "\xb3\x33\x6f\x82\x17\xd8\xc4\x1e"
+			  "\xad\x80\x11\x11\x1d\x4c\x16\x18"
+			  "\x07\x73\x9b\x4f\xdb\x7c\xcb\x47"
+			  "\xfd\xef\x59\x74\xfa\x3f\xe5\x4c"
+			  "\x9b\xd0\xea\xbc\xba\x56\xad\x32"
+			  "\x03\xdc\xf8\x2b\xc1\xe1\x75\x67"
+			  "\x23\x7b\xe6\xfc\xd4\x03\x86\x54",
+		.len	= 64,
+	}, { /* Taken from the ChaCha20 test vectors, appended 16 random bytes
+		to nonce, and recomputed the ciphertext with libsodium */
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x02\xf2\xfb\xe3\x46"
+			  "\x7c\xc2\x54\xf8\x1b\xe8\xe7\x8d"
+			  "\x01\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "\x41\x6e\x79\x20\x73\x75\x62\x6d"
+			  "\x69\x73\x73\x69\x6f\x6e\x20\x74"
+			  "\x6f\x20\x74\x68\x65\x20\x49\x45"
+			  "\x54\x46\x20\x69\x6e\x74\x65\x6e"
+			  "\x64\x65\x64\x20\x62\x79\x20\x74"
+			  "\x68\x65\x20\x43\x6f\x6e\x74\x72"
+			  "\x69\x62\x75\x74\x6f\x72\x20\x66"
+			  "\x6f\x72\x20\x70\x75\x62\x6c\x69"
+			  "\x63\x61\x74\x69\x6f\x6e\x20\x61"
+			  "\x73\x20\x61\x6c\x6c\x20\x6f\x72"
+			  "\x20\x70\x61\x72\x74\x20\x6f\x66"
+			  "\x20\x61\x6e\x20\x49\x45\x54\x46"
+			  "\x20\x49\x6e\x74\x65\x72\x6e\x65"
+			  "\x74\x2d\x44\x72\x61\x66\x74\x20"
+			  "\x6f\x72\x20\x52\x46\x43\x20\x61"
+			  "\x6e\x64\x20\x61\x6e\x79\x20\x73"
+			  "\x74\x61\x74\x65\x6d\x65\x6e\x74"
+			  "\x20\x6d\x61\x64\x65\x20\x77\x69"
+			  "\x74\x68\x69\x6e\x20\x74\x68\x65"
+			  "\x20\x63\x6f\x6e\x74\x65\x78\x74"
+			  "\x20\x6f\x66\x20\x61\x6e\x20\x49"
+			  "\x45\x54\x46\x20\x61\x63\x74\x69"
+			  "\x76\x69\x74\x79\x20\x69\x73\x20"
+			  "\x63\x6f\x6e\x73\x69\x64\x65\x72"
+			  "\x65\x64\x20\x61\x6e\x20\x22\x49"
+			  "\x45\x54\x46\x20\x43\x6f\x6e\x74"
+			  "\x72\x69\x62\x75\x74\x69\x6f\x6e"
+			  "\x22\x2e\x20\x53\x75\x63\x68\x20"
+			  "\x73\x74\x61\x74\x65\x6d\x65\x6e"
+			  "\x74\x73\x20\x69\x6e\x63\x6c\x75"
+			  "\x64\x65\x20\x6f\x72\x61\x6c\x20"
+			  "\x73\x74\x61\x74\x65\x6d\x65\x6e"
+			  "\x74\x73\x20\x69\x6e\x20\x49\x45"
+			  "\x54\x46\x20\x73\x65\x73\x73\x69"
+			  "\x6f\x6e\x73\x2c\x20\x61\x73\x20"
+			  "\x77\x65\x6c\x6c\x20\x61\x73\x20"
+			  "\x77\x72\x69\x74\x74\x65\x6e\x20"
+			  "\x61\x6e\x64\x20\x65\x6c\x65\x63"
+			  "\x74\x72\x6f\x6e\x69\x63\x20\x63"
+			  "\x6f\x6d\x6d\x75\x6e\x69\x63\x61"
+			  "\x74\x69\x6f\x6e\x73\x20\x6d\x61"
+			  "\x64\x65\x20\x61\x74\x20\x61\x6e"
+			  "\x79\x20\x74\x69\x6d\x65\x20\x6f"
+			  "\x72\x20\x70\x6c\x61\x63\x65\x2c"
+			  "\x20\x77\x68\x69\x63\x68\x20\x61"
+			  "\x72\x65\x20\x61\x64\x64\x72\x65"
+			  "\x73\x73\x65\x64\x20\x74\x6f",
+		.ctext	= "\xf9\xab\x7a\x4a\x60\xb8\x5f\xa0"
+			  "\x50\xbb\x57\xce\xef\x8c\xc1\xd9"
+			  "\x24\x15\xb3\x67\x5e\x7f\x01\xf6"
+			  "\x1c\x22\xf6\xe5\x71\xb1\x43\x64"
+			  "\x63\x05\xd5\xfc\x5c\x3d\xc0\x0e"
+			  "\x23\xef\xd3\x3b\xd9\xdc\x7f\xa8"
+			  "\x58\x26\xb3\xd0\xc2\xd5\x04\x3f"
+			  "\x0a\x0e\x8f\x17\xe4\xcd\xf7\x2a"
+			  "\xb4\x2c\x09\xe4\x47\xec\x8b\xfb"
+			  "\x59\x37\x7a\xa1\xd0\x04\x7e\xaa"
+			  "\xf1\x98\x5f\x24\x3d\x72\x9a\x43"
+			  "\xa4\x36\x51\x92\x22\x87\xff\x26"
+			  "\xce\x9d\xeb\x59\x78\x84\x5e\x74"
+			  "\x97\x2e\x63\xc0\xef\x29\xf7\x8a"
+			  "\xb9\xee\x35\x08\x77\x6a\x35\x9a"
+			  "\x3e\xe6\x4f\x06\x03\x74\x1b\xc1"
+			  "\x5b\xb3\x0b\x89\x11\x07\xd3\xb7"
+			  "\x53\xd6\x25\x04\xd9\x35\xb4\x5d"
+			  "\x4c\x33\x5a\xc2\x42\x4c\xe6\xa4"
+			  "\x97\x6e\x0e\xd2\xb2\x8b\x2f\x7f"
+			  "\x28\xe5\x9f\xac\x4b\x2e\x02\xab"
+			  "\x85\xfa\xa9\x0d\x7c\x2d\x10\xe6"
+			  "\x91\xab\x55\x63\xf0\xde\x3a\x94"
+			  "\x25\x08\x10\x03\xc2\x68\xd1\xf4"
+			  "\xaf\x7d\x9c\x99\xf7\x86\x96\x30"
+			  "\x60\xfc\x0b\xe6\xa8\x80\x15\xb0"
+			  "\x81\xb1\x0c\xbe\xb9\x12\x18\x25"
+			  "\xe9\x0e\xb1\xe7\x23\xb2\xef\x4a"
+			  "\x22\x8f\xc5\x61\x89\xd4\xe7\x0c"
+			  "\x64\x36\x35\x61\xb6\x34\x60\xf7"
+			  "\x7b\x61\x37\x37\x12\x10\xa2\xf6"
+			  "\x7e\xdb\x7f\x39\x3f\xb6\x8e\x89"
+			  "\x9e\xf3\xfe\x13\x98\xbb\x66\x5a"
+			  "\xec\xea\xab\x3f\x9c\x87\xc4\x8c"
+			  "\x8a\x04\x18\x49\xfc\x77\x11\x50"
+			  "\x16\xe6\x71\x2b\xee\xc0\x9c\xb6"
+			  "\x87\xfd\x80\xff\x0b\x1d\x73\x38"
+			  "\xa4\x1d\x6f\xae\xe4\x12\xd7\x93"
+			  "\x9d\xcd\x38\x26\x09\x40\x52\xcd"
+			  "\x67\x01\x67\x26\xe0\x3e\x98\xa8"
+			  "\xe8\x1a\x13\x41\xbb\x90\x4d\x87"
+			  "\xbb\x42\x82\x39\xce\x3a\xd0\x18"
+			  "\x6d\x7b\x71\x8f\xbb\x2c\x6a\xd1"
+			  "\xbd\xf5\xc7\x8a\x7e\xe1\x1e\x0f"
+			  "\x0d\x0d\x13\x7c\xd9\xd8\x3c\x91"
+			  "\xab\xff\x1f\x12\xc3\xee\xe5\x65"
+			  "\x12\x8d\x7b\x61\xe5\x1f\x98",
+		.len	= 375,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 375 - 20, 4, 16 },
+
+	}, { /* Taken from the ChaCha20 test vectors, appended 16 random bytes
+		to nonce, and recomputed the ciphertext with libsodium */
+		.key	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a"
+			  "\xf3\x33\x88\x86\x04\xf6\xb5\xf0"
+			  "\x47\x39\x17\xc1\x40\x2b\x80\x09"
+			  "\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x02\x76\x5a\x2e\x63"
+			  "\x33\x9f\xc9\x9a\x66\x32\x0d\xb7"
+			  "\x2a\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "\x27\x54\x77\x61\x73\x20\x62\x72"
+			  "\x69\x6c\x6c\x69\x67\x2c\x20\x61"
+			  "\x6e\x64\x20\x74\x68\x65\x20\x73"
+			  "\x6c\x69\x74\x68\x79\x20\x74\x6f"
+			  "\x76\x65\x73\x0a\x44\x69\x64\x20"
+			  "\x67\x79\x72\x65\x20\x61\x6e\x64"
+			  "\x20\x67\x69\x6d\x62\x6c\x65\x20"
+			  "\x69\x6e\x20\x74\x68\x65\x20\x77"
+			  "\x61\x62\x65\x3a\x0a\x41\x6c\x6c"
+			  "\x20\x6d\x69\x6d\x73\x79\x20\x77"
+			  "\x65\x72\x65\x20\x74\x68\x65\x20"
+			  "\x62\x6f\x72\x6f\x67\x6f\x76\x65"
+			  "\x73\x2c\x0a\x41\x6e\x64\x20\x74"
+			  "\x68\x65\x20\x6d\x6f\x6d\x65\x20"
+			  "\x72\x61\x74\x68\x73\x20\x6f\x75"
+			  "\x74\x67\x72\x61\x62\x65\x2e",
+		.ctext	= "\x95\xb9\x51\xe7\x8f\xb4\xa4\x03"
+			  "\xca\x37\xcc\xde\x60\x1d\x8c\xe2"
+			  "\xf1\xbb\x8a\x13\x7f\x61\x85\xcc"
+			  "\xad\xf4\xf0\xdc\x86\xa6\x1e\x10"
+			  "\xbc\x8e\xcb\x38\x2b\xa5\xc8\x8f"
+			  "\xaa\x03\x3d\x53\x4a\x42\xb1\x33"
+			  "\xfc\xd3\xef\xf0\x8e\x7e\x10\x9c"
+			  "\x6f\x12\x5e\xd4\x96\xfe\x5b\x08"
+			  "\xb6\x48\xf0\x14\x74\x51\x18\x7c"
+			  "\x07\x92\xfc\xac\x9d\xf1\x94\xc0"
+			  "\xc1\x9d\xc5\x19\x43\x1f\x1d\xbb"
+			  "\x07\xf0\x1b\x14\x25\x45\xbb\xcb"
+			  "\x5c\xe2\x8b\x28\xf3\xcf\x47\x29"
+			  "\x27\x79\x67\x24\xa6\x87\xc2\x11"
+			  "\x65\x03\xfa\x45\xf7\x9e\x53\x7a"
+			  "\x99\xf1\x82\x25\x4f\x8d\x07",
+		.len	= 127,
+	}, { /* Taken from the ChaCha20 test vectors, appended 16 random bytes
+		to nonce, and recomputed the ciphertext with libsodium */
+		.key	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a"
+			  "\xf3\x33\x88\x86\x04\xf6\xb5\xf0"
+			  "\x47\x39\x17\xc1\x40\x2b\x80\x09"
+			  "\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x01\x31\x58\xa3\x5a"
+			  "\x25\x5d\x05\x17\x58\xe9\x5e\xd4"
+			  "\x1c\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "\x49\xee\xe0\xdc\x24\x90\x40\xcd"
+			  "\xc5\x40\x8f\x47\x05\xbc\xdd\x81"
+			  "\x47\xc6\x8d\xe6\xb1\x8f\xd7\xcb"
+			  "\x09\x0e\x6e\x22\x48\x1f\xbf\xb8"
+			  "\x5c\xf7\x1e\x8a\xc1\x23\xf2\xd4"
+			  "\x19\x4b\x01\x0f\x4e\xa4\x43\xce"
+			  "\x01\xc6\x67\xda\x03\x91\x18\x90"
+			  "\xa5\xa4\x8e\x45\x03\xb3\x2d\xac"
+			  "\x74\x92\xd3\x53\x47\xc8\xdd\x25"
+			  "\x53\x6c\x02\x03\x87\x0d\x11\x0c"
+			  "\x58\xe3\x12\x18\xfd\x2a\x5b\x40"
+			  "\x0c\x30\xf0\xb8\x3f\x43\xce\xae"
+			  "\x65\x3a\x7d\x7c\xf4\x54\xaa\xcc"
+			  "\x33\x97\xc3\x77\xba\xc5\x70\xde"
+			  "\xd7\xd5\x13\xa5\x65\xc4\x5f\x0f"
+			  "\x46\x1a\x0d\x97\xb5\xf3\xbb\x3c"
+			  "\x84\x0f\x2b\xc5\xaa\xea\xf2\x6c"
+			  "\xc9\xb5\x0c\xee\x15\xf3\x7d\xbe"
+			  "\x9f\x7b\x5a\xa6\xae\x4f\x83\xb6"
+			  "\x79\x49\x41\xf4\x58\x18\xcb\x86"
+			  "\x7f\x30\x0e\xf8\x7d\x44\x36\xea"
+			  "\x75\xeb\x88\x84\x40\x3c\xad\x4f"
+			  "\x6f\x31\x6b\xaa\x5d\xe5\xa5\xc5"
+			  "\x21\x66\xe9\xa7\xe3\xb2\x15\x88"
+			  "\x78\xf6\x79\xa1\x59\x47\x12\x4e"
+			  "\x9f\x9f\x64\x1a\xa0\x22\x5b\x08"
+			  "\xbe\x7c\x36\xc2\x2b\x66\x33\x1b"
+			  "\xdd\x60\x71\xf7\x47\x8c\x61\xc3"
+			  "\xda\x8a\x78\x1e\x16\xfa\x1e\x86"
+			  "\x81\xa6\x17\x2a\xa7\xb5\xc2\xe7"
+			  "\xa4\xc7\x42\xf1\xcf\x6a\xca\xb4"
+			  "\x45\xcf\xf3\x93\xf0\xe7\xea\xf6"
+			  "\xf4\xe6\x33\x43\x84\x93\xa5\x67"
+			  "\x9b\x16\x58\x58\x80\x0f\x2b\x5c"
+			  "\x24\x74\x75\x7f\x95\x81\xb7\x30"
+			  "\x7a\x33\xa7\xf7\x94\x87\x32\x27"
+			  "\x10\x5d\x14\x4c\x43\x29\xdd\x26"
+			  "\xbd\x3e\x3c\x0e\xfe\x0e\xa5\x10"
+			  "\xea\x6b\x64\xfd\x73\xc6\xed\xec"
+			  "\xa8\xc9\xbf\xb3\xba\x0b\x4d\x07"
+			  "\x70\xfc\x16\xfd\x79\x1e\xd7\xc5"
+			  "\x49\x4e\x1c\x8b\x8d\x79\x1b\xb1"
+			  "\xec\xca\x60\x09\x4c\x6a\xd5\x09"
+			  "\x49\x46\x00\x88\x22\x8d\xce\xea"
+			  "\xb1\x17\x11\xde\x42\xd2\x23\xc1"
+			  "\x72\x11\xf5\x50\x73\x04\x40\x47"
+			  "\xf9\x5d\xe7\xa7\x26\xb1\x7e\xb0"
+			  "\x3f\x58\xc1\x52\xab\x12\x67\x9d"
+			  "\x3f\x43\x4b\x68\xd4\x9c\x68\x38"
+			  "\x07\x8a\x2d\x3e\xf3\xaf\x6a\x4b"
+			  "\xf9\xe5\x31\x69\x22\xf9\xa6\x69"
+			  "\xc6\x9c\x96\x9a\x12\x35\x95\x1d"
+			  "\x95\xd5\xdd\xbe\xbf\x93\x53\x24"
+			  "\xfd\xeb\xc2\x0a\x64\xb0\x77\x00"
+			  "\x6f\x88\xc4\x37\x18\x69\x7c\xd7"
+			  "\x41\x92\x55\x4c\x03\xa1\x9a\x4b"
+			  "\x15\xe5\xdf\x7f\x37\x33\x72\xc1"
+			  "\x8b\x10\x67\xa3\x01\x57\x94\x25"
+			  "\x7b\x38\x71\x7e\xdd\x1e\xcc\x73"
+			  "\x55\xd2\x8e\xeb\x07\xdd\xf1\xda"
+			  "\x58\xb1\x47\x90\xfe\x42\x21\x72"
+			  "\xa3\x54\x7a\xa0\x40\xec\x9f\xdd"
+			  "\xc6\x84\x6e\xca\xae\xe3\x68\xb4"
+			  "\x9d\xe4\x78\xff\x57\xf2\xf8\x1b"
+			  "\x03\xa1\x31\xd9\xde\x8d\xf5\x22"
+			  "\x9c\xdd\x20\xa4\x1e\x27\xb1\x76"
+			  "\x4f\x44\x55\xe2\x9b\xa1\x9c\xfe"
+			  "\x54\xf7\x27\x1b\xf4\xde\x02\xf5"
+			  "\x1b\x55\x48\x5c\xdc\x21\x4b\x9e"
+			  "\x4b\x6e\xed\x46\x23\xdc\x65\xb2"
+			  "\xcf\x79\x5f\x28\xe0\x9e\x8b\xe7"
+			  "\x4c\x9d\x8a\xff\xc1\xa6\x28\xb8"
+			  "\x65\x69\x8a\x45\x29\xef\x74\x85"
+			  "\xde\x79\xc7\x08\xae\x30\xb0\xf4"
+			  "\xa3\x1d\x51\x41\xab\xce\xcb\xf6"
+			  "\xb5\xd8\x6d\xe0\x85\xe1\x98\xb3"
+			  "\x43\xbb\x86\x83\x0a\xa0\xf5\xb7"
+			  "\x04\x0b\xfa\x71\x1f\xb0\xf6\xd9"
+			  "\x13\x00\x15\xf0\xc7\xeb\x0d\x5a"
+			  "\x9f\xd7\xb9\x6c\x65\x14\x22\x45"
+			  "\x6e\x45\x32\x3e\x7e\x60\x1a\x12"
+			  "\x97\x82\x14\xfb\xaa\x04\x22\xfa"
+			  "\xa0\xe5\x7e\x8c\x78\x02\x48\x5d"
+			  "\x78\x33\x5a\x7c\xad\xdb\x29\xce"
+			  "\xbb\x8b\x61\xa4\xb7\x42\xe2\xac"
+			  "\x8b\x1a\xd9\x2f\x0b\x8b\x62\x21"
+			  "\x83\x35\x7e\xad\x73\xc2\xb5\x6c"
+			  "\x10\x26\x38\x07\xe5\xc7\x36\x80"
+			  "\xe2\x23\x12\x61\xf5\x48\x4b\x2b"
+			  "\xc5\xdf\x15\xd9\x87\x01\xaa\xac"
+			  "\x1e\x7c\xad\x73\x78\x18\x63\xe0"
+			  "\x8b\x9f\x81\xd8\x12\x6a\x28\x10"
+			  "\xbe\x04\x68\x8a\x09\x7c\x1b\x1c"
+			  "\x83\x66\x80\x47\x80\xe8\xfd\x35"
+			  "\x1c\x97\x6f\xae\x49\x10\x66\xcc"
+			  "\xc6\xd8\xcc\x3a\x84\x91\x20\x77"
+			  "\x72\xe4\x24\xd2\x37\x9f\xc5\xc9"
+			  "\x25\x94\x10\x5f\x40\x00\x64\x99"
+			  "\xdc\xae\xd7\x21\x09\x78\x50\x15"
+			  "\xac\x5f\xc6\x2c\xa2\x0b\xa9\x39"
+			  "\x87\x6e\x6d\xab\xde\x08\x51\x16"
+			  "\xc7\x13\xe9\xea\xed\x06\x8e\x2c"
+			  "\xf8\x37\x8c\xf0\xa6\x96\x8d\x43"
+			  "\xb6\x98\x37\xb2\x43\xed\xde\xdf"
+			  "\x89\x1a\xe7\xeb\x9d\xa1\x7b\x0b"
+			  "\x77\xb0\xe2\x75\xc0\xf1\x98\xd9"
+			  "\x80\x55\xc9\x34\x91\xd1\x59\xe8"
+			  "\x4b\x0f\xc1\xa9\x4b\x7a\x84\x06"
+			  "\x20\xa8\x5d\xfa\xd1\xde\x70\x56"
+			  "\x2f\x9e\x91\x9c\x20\xb3\x24\xd8"
+			  "\x84\x3d\xe1\x8c\x7e\x62\x52\xe5"
+			  "\x44\x4b\x9f\xc2\x93\x03\xea\x2b"
+			  "\x59\xc5\xfa\x3f\x91\x2b\xbb\x23"
+			  "\xf5\xb2\x7b\xf5\x38\xaf\xb3\xee"
+			  "\x63\xdc\x7b\xd1\xff\xaa\x8b\xab"
+			  "\x82\x6b\x37\x04\xeb\x74\xbe\x79"
+			  "\xb9\x83\x90\xef\x20\x59\x46\xff"
+			  "\xe9\x97\x3e\x2f\xee\xb6\x64\x18"
+			  "\x38\x4c\x7a\x4a\xf9\x61\xe8\x9a"
+			  "\xa1\xb5\x01\xa6\x47\xd3\x11\xd4"
+			  "\xce\xd3\x91\x49\x88\xc7\xb8\x4d"
+			  "\xb1\xb9\x07\x6d\x16\x72\xae\x46"
+			  "\x5e\x03\xa1\x4b\xb6\x02\x30\xa8"
+			  "\x3d\xa9\x07\x2a\x7c\x19\xe7\x62"
+			  "\x87\xe3\x82\x2f\x6f\xe1\x09\xd9"
+			  "\x94\x97\xea\xdd\x58\x9e\xae\x76"
+			  "\x7e\x35\xe5\xb4\xda\x7e\xf4\xde"
+			  "\xf7\x32\x87\xcd\x93\xbf\x11\x56"
+			  "\x11\xbe\x08\x74\xe1\x69\xad\xe2"
+			  "\xd7\xf8\x86\x75\x8a\x3c\xa4\xbe"
+			  "\x70\xa7\x1b\xfc\x0b\x44\x2a\x76"
+			  "\x35\xea\x5d\x85\x81\xaf\x85\xeb"
+			  "\xa0\x1c\x61\xc2\xf7\x4f\xa5\xdc"
+			  "\x02\x7f\xf6\x95\x40\x6e\x8a\x9a"
+			  "\xf3\x5d\x25\x6e\x14\x3a\x22\xc9"
+			  "\x37\x1c\xeb\x46\x54\x3f\xa5\x91"
+			  "\xc2\xb5\x8c\xfe\x53\x08\x97\x32"
+			  "\x1b\xb2\x30\x27\xfe\x25\x5d\xdc"
+			  "\x08\x87\xd0\xe5\x94\x1a\xd4\xf1"
+			  "\xfe\xd6\xb4\xa3\xe6\x74\x81\x3c"
+			  "\x1b\xb7\x31\xa7\x22\xfd\xd4\xdd"
+			  "\x20\x4e\x7c\x51\xb0\x60\x73\xb8"
+			  "\x9c\xac\x91\x90\x7e\x01\xb0\xe1"
+			  "\x8a\x2f\x75\x1c\x53\x2a\x98\x2a"
+			  "\x06\x52\x95\x52\xb2\xe9\x25\x2e"
+			  "\x4c\xe2\x5a\x00\xb2\x13\x81\x03"
+			  "\x77\x66\x0d\xa5\x99\xda\x4e\x8c"
+			  "\xac\xf3\x13\x53\x27\x45\xaf\x64"
+			  "\x46\xdc\xea\x23\xda\x97\xd1\xab"
+			  "\x7d\x6c\x30\x96\x1f\xbc\x06\x34"
+			  "\x18\x0b\x5e\x21\x35\x11\x8d\x4c"
+			  "\xe0\x2d\xe9\x50\x16\x74\x81\xa8"
+			  "\xb4\x34\xb9\x72\x42\xa6\xcc\xbc"
+			  "\xca\x34\x83\x27\x10\x5b\x68\x45"
+			  "\x8f\x52\x22\x0c\x55\x3d\x29\x7c"
+			  "\xe3\xc0\x66\x05\x42\x91\x5f\x58"
+			  "\xfe\x4a\x62\xd9\x8c\xa9\x04\x19"
+			  "\x04\xa9\x08\x4b\x57\xfc\x67\x53"
+			  "\x08\x7c\xbc\x66\x8a\xb0\xb6\x9f"
+			  "\x92\xd6\x41\x7c\x5b\x2a\x00\x79"
+			  "\x72",
+		.ctext	= "\x3a\x92\xee\x53\x31\xaf\x2b\x60"
+			  "\x5f\x55\x8d\x00\x5d\xfc\x74\x97"
+			  "\x28\x54\xf4\xa5\x75\xf1\x9b\x25"
+			  "\x62\x1c\xc0\xe0\x13\xc8\x87\x53"
+			  "\xd0\xf3\xa7\x97\x1f\x3b\x1e\xea"
+			  "\xe0\xe5\x2a\xd1\xdd\xa4\x3b\x50"
+			  "\x45\xa3\x0d\x7e\x1b\xc9\xa0\xad"
+			  "\xb9\x2c\x54\xa6\xc7\x55\x16\xd0"
+			  "\xc5\x2e\x02\x44\x35\xd0\x7e\x67"
+			  "\xf2\xc4\x9b\xcd\x95\x10\xcc\x29"
+			  "\x4b\xfa\x86\x87\xbe\x40\x36\xbe"
+			  "\xe1\xa3\x52\x89\x55\x20\x9b\xc2"
+			  "\xab\xf2\x31\x34\x16\xad\xc8\x17"
+			  "\x65\x24\xc0\xff\x12\x37\xfe\x5a"
+			  "\x62\x3b\x59\x47\x6c\x5f\x3a\x8e"
+			  "\x3b\xd9\x30\xc8\x7f\x2f\x88\xda"
+			  "\x80\xfd\x02\xda\x7f\x9a\x7a\x73"
+			  "\x59\xc5\x34\x09\x9a\x11\xcb\xa7"
+			  "\xfc\xf6\xa1\xa0\x60\xfb\x43\xbb"
+			  "\xf1\xe9\xd7\xc6\x79\x27\x4e\xff"
+			  "\x22\xb4\x24\xbf\x76\xee\x47\xb9"
+			  "\x6d\x3f\x8b\xb0\x9c\x3c\x43\xdd"
+			  "\xff\x25\x2e\x6d\xa4\x2b\xfb\x5d"
+			  "\x1b\x97\x6c\x55\x0a\x82\x7a\x7b"
+			  "\x94\x34\xc2\xdb\x2f\x1f\xc1\xea"
+			  "\xd4\x4d\x17\x46\x3b\x51\x69\x09"
+			  "\xe4\x99\x32\x25\xfd\x94\xaf\xfb"
+			  "\x10\xf7\x4f\xdd\x0b\x3c\x8b\x41"
+			  "\xb3\x6a\xb7\xd1\x33\xa8\x0c\x2f"
+			  "\x62\x4c\x72\x11\xd7\x74\xe1\x3b"
+			  "\x38\x43\x66\x7b\x6c\x36\x48\xe7"
+			  "\xe3\xe7\x9d\xb9\x42\x73\x7a\x2a"
+			  "\x89\x20\x1a\x41\x80\x03\xf7\x8f"
+			  "\x61\x78\x13\xbf\xfe\x50\xf5\x04"
+			  "\x52\xf9\xac\x47\xf8\x62\x4b\xb2"
+			  "\x24\xa9\xbf\x64\xb0\x18\x69\xd2"
+			  "\xf5\xe4\xce\xc8\xb1\x87\x75\xd6"
+			  "\x2c\x24\x79\x00\x7d\x26\xfb\x44"
+			  "\xe7\x45\x7a\xee\x58\xa5\x83\xc1"
+			  "\xb4\x24\xab\x23\x2f\x4d\xd7\x4f"
+			  "\x1c\xc7\xaa\xa9\x50\xf4\xa3\x07"
+			  "\x12\x13\x89\x74\xdc\x31\x6a\xb2"
+			  "\xf5\x0f\x13\x8b\xb9\xdb\x85\x1f"
+			  "\xf5\xbc\x88\xd9\x95\xea\x31\x6c"
+			  "\x36\x60\xb6\x49\xdc\xc4\xf7\x55"
+			  "\x3f\x21\xc1\xb5\x92\x18\x5e\xbc"
+			  "\x9f\x87\x7f\xe7\x79\x25\x40\x33"
+			  "\xd6\xb9\x33\xd5\x50\xb3\xc7\x89"
+			  "\x1b\x12\xa0\x46\xdd\xa7\xd8\x3e"
+			  "\x71\xeb\x6f\x66\xa1\x26\x0c\x67"
+			  "\xab\xb2\x38\x58\x17\xd8\x44\x3b"
+			  "\x16\xf0\x8e\x62\x8d\x16\x10\x00"
+			  "\x32\x8b\xef\xb9\x28\xd3\xc5\xad"
+			  "\x0a\x19\xa2\xe4\x03\x27\x7d\x94"
+			  "\x06\x18\xcd\xd6\x27\x00\xf9\x1f"
+			  "\xb6\xb3\xfe\x96\x35\x5f\xc4\x1c"
+			  "\x07\x62\x10\x79\x68\x50\xf1\x7e"
+			  "\x29\xe7\xc4\xc4\xe7\xee\x54\xd6"
+			  "\x58\x76\x84\x6d\x8d\xe4\x59\x31"
+			  "\xe9\xf4\xdc\xa1\x1f\xe5\x1a\xd6"
+			  "\xe6\x64\x46\xf5\x77\x9c\x60\x7a"
+			  "\x5e\x62\xe3\x0a\xd4\x9f\x7a\x2d"
+			  "\x7a\xa5\x0a\x7b\x29\x86\x7a\x74"
+			  "\x74\x71\x6b\xca\x7d\x1d\xaa\xba"
+			  "\x39\x84\x43\x76\x35\xfe\x4f\x9b"
+			  "\xbb\xbb\xb5\x6a\x32\xb5\x5d\x41"
+			  "\x51\xf0\x5b\x68\x03\x47\x4b\x8a"
+			  "\xca\x88\xf6\x37\xbd\x73\x51\x70"
+			  "\x66\xfe\x9e\x5f\x21\x9c\xf3\xdd"
+			  "\xc3\xea\x27\xf9\x64\x94\xe1\x19"
+			  "\xa0\xa9\xab\x60\xe0\x0e\xf7\x78"
+			  "\x70\x86\xeb\xe0\xd1\x5c\x05\xd3"
+			  "\xd7\xca\xe0\xc0\x47\x47\x34\xee"
+			  "\x11\xa3\xa3\x54\x98\xb7\x49\x8e"
+			  "\x84\x28\x70\x2c\x9e\xfb\x55\x54"
+			  "\x4d\xf8\x86\xf7\x85\x7c\xbd\xf3"
+			  "\x17\xd8\x47\xcb\xac\xf4\x20\x85"
+			  "\x34\x66\xad\x37\x2d\x5e\x52\xda"
+			  "\x8a\xfe\x98\x55\x30\xe7\x2d\x2b"
+			  "\x19\x10\x8e\x7b\x66\x5e\xdc\xe0"
+			  "\x45\x1f\x7b\xb4\x08\xfb\x8f\xf6"
+			  "\x8c\x89\x21\x34\x55\x27\xb2\x76"
+			  "\xb2\x07\xd9\xd6\x68\x9b\xea\x6b"
+			  "\x2d\xb4\xc4\x35\xdd\xd2\x79\xae"
+			  "\xc7\xd6\x26\x7f\x12\x01\x8c\xa7"
+			  "\xe3\xdb\xa8\xf4\xf7\x2b\xec\x99"
+			  "\x11\x00\xf1\x35\x8c\xcf\xd5\xc9"
+			  "\xbd\x91\x36\x39\x70\xcf\x7d\x70"
+			  "\x47\x1a\xfc\x6b\x56\xe0\x3f\x9c"
+			  "\x60\x49\x01\x72\xa9\xaf\x2c\x9c"
+			  "\xe8\xab\xda\x8c\x14\x19\xf3\x75"
+			  "\x07\x17\x9d\x44\x67\x7a\x2e\xef"
+			  "\xb7\x83\x35\x4a\xd1\x3d\x1c\x84"
+			  "\x32\xdd\xaa\xea\xca\x1d\xdc\x72"
+			  "\x2c\xcc\x43\xcd\x5d\xe3\x21\xa4"
+			  "\xd0\x8a\x4b\x20\x12\xa3\xd5\x86"
+			  "\x76\x96\xff\x5f\x04\x57\x0f\xe6"
+			  "\xba\xe8\x76\x50\x0c\x64\x1d\x83"
+			  "\x9c\x9b\x9a\x9a\x58\x97\x9c\x5c"
+			  "\xb4\xa4\xa6\x3e\x19\xeb\x8f\x5a"
+			  "\x61\xb2\x03\x7b\x35\x19\xbe\xa7"
+			  "\x63\x0c\xfd\xdd\xf9\x90\x6c\x08"
+			  "\x19\x11\xd3\x65\x4a\xf5\x96\x92"
+			  "\x59\xaa\x9c\x61\x0c\x29\xa7\xf8"
+			  "\x14\x39\x37\xbf\x3c\xf2\x16\x72"
+			  "\x02\xfa\xa2\xf3\x18\x67\x5d\xcb"
+			  "\xdc\x4d\xbb\x96\xff\x70\x08\x2d"
+			  "\xc2\xa8\x52\xe1\x34\x5f\x72\xfe"
+			  "\x64\xbf\xca\xa7\x74\x38\xfb\x74"
+			  "\x55\x9c\xfa\x8a\xed\xfb\x98\xeb"
+			  "\x58\x2e\x6c\xe1\x52\x76\x86\xd7"
+			  "\xcf\xa1\xa4\xfc\xb2\x47\x41\x28"
+			  "\xa3\xc1\xe5\xfd\x53\x19\x28\x2b"
+			  "\x37\x04\x65\x96\x99\x7a\x28\x0f"
+			  "\x07\x68\x4b\xc7\x52\x0a\x55\x35"
+			  "\x40\x19\x95\x61\xe8\x59\x40\x1f"
+			  "\x9d\xbf\x78\x7d\x8f\x84\xff\x6f"
+			  "\xd0\xd5\x63\xd2\x22\xbd\xc8\x4e"
+			  "\xfb\xe7\x9f\x06\xe6\xe7\x39\x6d"
+			  "\x6a\x96\x9f\xf0\x74\x7e\xc9\x35"
+			  "\xb7\x26\xb8\x1c\x0a\xa6\x27\x2c"
+			  "\xa2\x2b\xfe\xbe\x0f\x07\x73\xae"
+			  "\x7f\x7f\x54\xf5\x7c\x6a\x0a\x56"
+			  "\x49\xd4\x81\xe5\x85\x53\x99\x1f"
+			  "\x95\x05\x13\x58\x8d\x0e\x1b\x90"
+			  "\xc3\x75\x48\x64\x58\x98\x67\x84"
+			  "\xae\xe2\x21\xa2\x8a\x04\x0a\x0b"
+			  "\x61\xaa\xb0\xd4\x28\x60\x7a\xf8"
+			  "\xbc\x52\xfb\x24\x7f\xed\x0d\x2a"
+			  "\x0a\xb2\xf9\xc6\x95\xb5\x11\xc9"
+			  "\xf4\x0f\x26\x11\xcf\x2a\x57\x87"
+			  "\x7a\xf3\xe7\x94\x65\xc2\xb5\xb3"
+			  "\xab\x98\xe3\xc1\x2b\x59\x19\x7c"
+			  "\xd6\xf3\xf9\xbf\xff\x6d\xc6\x82"
+			  "\x13\x2f\x4a\x2e\xcd\x26\xfe\x2d"
+			  "\x01\x70\xf4\xc2\x7f\x1f\x4c\xcb"
+			  "\x47\x77\x0c\xa0\xa3\x03\xec\xda"
+			  "\xa9\xbf\x0d\x2d\xae\xe4\xb8\x7b"
+			  "\xa9\xbc\x08\xb4\x68\x2e\xc5\x60"
+			  "\x8d\x87\x41\x2b\x0f\x69\xf0\xaf"
+			  "\x5f\xba\x72\x20\x0f\x33\xcd\x6d"
+			  "\x36\x7d\x7b\xd5\x05\xf1\x4b\x05"
+			  "\xc4\xfc\x7f\x80\xb9\x4d\xbd\xf7"
+			  "\x7c\x84\x07\x01\xc2\x40\x66\x5b"
+			  "\x98\xc7\x2c\xe3\x97\xfa\xdf\x87"
+			  "\xa0\x1f\xe9\x21\x42\x0f\x3b\xeb"
+			  "\x89\x1c\x3b\xca\x83\x61\x77\x68"
+			  "\x84\xbb\x60\x87\x38\x2e\x25\xd5"
+			  "\x9e\x04\x41\x70\xac\xda\xc0\x9c"
+			  "\x9c\x69\xea\x8d\x4e\x55\x2a\x29"
+			  "\xed\x05\x4b\x7b\x73\x71\x90\x59"
+			  "\x4d\xc8\xd8\x44\xf0\x4c\xe1\x5e"
+			  "\x84\x47\x55\xcc\x32\x3f\xe7\x97"
+			  "\x42\xc6\x32\xac\x40\xe5\xa5\xc7"
+			  "\x8b\xed\xdb\xf7\x83\xd6\xb1\xc2"
+			  "\x52\x5e\x34\xb7\xeb\x6e\xd9\xfc"
+			  "\xe5\x93\x9a\x97\x3e\xb0\xdc\xd9"
+			  "\xd7\x06\x10\xb6\x1d\x80\x59\xdd"
+			  "\x0d\xfe\x64\x35\xcd\x5d\xec\xf0"
+			  "\xba\xd0\x34\xc9\x2d\x91\xc5\x17"
+			  "\x11",
+		.len	= 1281,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 1200, 1, 80 },
+	},
+};
+
+/*
+ * Same as XChaCha20 test vectors above, but recomputed the ciphertext with
+ * XChaCha12, using a modified libsodium.
+ */
+static const struct cipher_testvec xchacha12_tv_template[] = {
+	{
+		.key	= "\x79\xc9\x97\x98\xac\x67\x30\x0b"
+			  "\xbb\x27\x04\xc9\x5c\x34\x1e\x32"
+			  "\x45\xf3\xdc\xb2\x17\x61\xb9\x8e"
+			  "\x52\xff\x45\xb2\x4f\x30\x4f\xc4",
+		.klen	= 32,
+		.iv	= "\xb3\x3f\xfd\x30\x96\x47\x9b\xcf"
+			  "\xbc\x9a\xee\x49\x41\x76\x88\xa0"
+			  "\xa2\x55\x4f\x8d\x95\x38\x94\x19"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00",
+		.ctext	= "\x1b\x78\x7f\xd7\xa1\x41\x68\xab"
+			  "\x3d\x3f\xd1\x7b\x69\x56\xb2\xd5"
+			  "\x43\xce\xeb\xaf\x36\xf0\x29\x9d"
+			  "\x3a\xfb\x18\xae\x1b",
+		.len	= 29,
+	}, {
+		.key	= "\x9d\x23\xbd\x41\x49\xcb\x97\x9c"
+			  "\xcf\x3c\x5c\x94\xdd\x21\x7e\x98"
+			  "\x08\xcb\x0e\x50\xcd\x0f\x67\x81"
+			  "\x22\x35\xea\xaf\x60\x1d\x62\x32",
+		.klen	= 32,
+		.iv	= "\xc0\x47\x54\x82\x66\xb7\xc3\x70"
+			  "\xd3\x35\x66\xa2\x42\x5c\xbf\x30"
+			  "\xd8\x2d\x1e\xaf\x52\x94\x10\x9e"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00",
+		.ctext	= "\xfb\x32\x09\x1d\x83\x05\xae\x4c"
+			  "\x13\x1f\x12\x71\xf2\xca\xb2\xeb"
+			  "\x5b\x83\x14\x7d\x83\xf6\x57\x77"
+			  "\x2e\x40\x1f\x92\x2c\xf9\xec\x35"
+			  "\x34\x1f\x93\xdf\xfb\x30\xd7\x35"
+			  "\x03\x05\x78\xc1\x20\x3b\x7a\xe3"
+			  "\x62\xa3\x89\xdc\x11\x11\x45\xa8"
+			  "\x82\x89\xa0\xf1\x4e\xc7\x0f\x11"
+			  "\x69\xdd\x0c\x84\x2b\x89\x5c\xdc"
+			  "\xf0\xde\x01\xef\xc5\x65\x79\x23"
+			  "\x87\x67\xd6\x50\xd9\x8d\xd9\x92"
+			  "\x54\x5b\x0e",
+		.len	= 91,
+	}, {
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x67\xc6\x69\x73"
+			  "\x51\xff\x4a\xec\x29\xcd\xba\xab"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ctext	= "\xdf\x2d\xc6\x21\x2a\x9d\xa1\xbb"
+			  "\xc2\x77\x66\x0c\x5c\x46\xef\xa7"
+			  "\x79\x1b\xb9\xdf\x55\xe2\xf9\x61"
+			  "\x4c\x7b\xa4\x52\x24\xaf\xa2\xda"
+			  "\xd1\x8f\x8f\xa2\x9e\x53\x4d\xc4"
+			  "\xb8\x55\x98\x08\x7c\x08\xd4\x18"
+			  "\x67\x8f\xef\x50\xb1\x5f\xa5\x77"
+			  "\x4c\x25\xe7\x86\x26\x42\xca\x44",
+		.len	= 64,
+	}, {
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x02\xf2\xfb\xe3\x46"
+			  "\x7c\xc2\x54\xf8\x1b\xe8\xe7\x8d"
+			  "\x01\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "\x41\x6e\x79\x20\x73\x75\x62\x6d"
+			  "\x69\x73\x73\x69\x6f\x6e\x20\x74"
+			  "\x6f\x20\x74\x68\x65\x20\x49\x45"
+			  "\x54\x46\x20\x69\x6e\x74\x65\x6e"
+			  "\x64\x65\x64\x20\x62\x79\x20\x74"
+			  "\x68\x65\x20\x43\x6f\x6e\x74\x72"
+			  "\x69\x62\x75\x74\x6f\x72\x20\x66"
+			  "\x6f\x72\x20\x70\x75\x62\x6c\x69"
+			  "\x63\x61\x74\x69\x6f\x6e\x20\x61"
+			  "\x73\x20\x61\x6c\x6c\x20\x6f\x72"
+			  "\x20\x70\x61\x72\x74\x20\x6f\x66"
+			  "\x20\x61\x6e\x20\x49\x45\x54\x46"
+			  "\x20\x49\x6e\x74\x65\x72\x6e\x65"
+			  "\x74\x2d\x44\x72\x61\x66\x74\x20"
+			  "\x6f\x72\x20\x52\x46\x43\x20\x61"
+			  "\x6e\x64\x20\x61\x6e\x79\x20\x73"
+			  "\x74\x61\x74\x65\x6d\x65\x6e\x74"
+			  "\x20\x6d\x61\x64\x65\x20\x77\x69"
+			  "\x74\x68\x69\x6e\x20\x74\x68\x65"
+			  "\x20\x63\x6f\x6e\x74\x65\x78\x74"
+			  "\x20\x6f\x66\x20\x61\x6e\x20\x49"
+			  "\x45\x54\x46\x20\x61\x63\x74\x69"
+			  "\x76\x69\x74\x79\x20\x69\x73\x20"
+			  "\x63\x6f\x6e\x73\x69\x64\x65\x72"
+			  "\x65\x64\x20\x61\x6e\x20\x22\x49"
+			  "\x45\x54\x46\x20\x43\x6f\x6e\x74"
+			  "\x72\x69\x62\x75\x74\x69\x6f\x6e"
+			  "\x22\x2e\x20\x53\x75\x63\x68\x20"
+			  "\x73\x74\x61\x74\x65\x6d\x65\x6e"
+			  "\x74\x73\x20\x69\x6e\x63\x6c\x75"
+			  "\x64\x65\x20\x6f\x72\x61\x6c\x20"
+			  "\x73\x74\x61\x74\x65\x6d\x65\x6e"
+			  "\x74\x73\x20\x69\x6e\x20\x49\x45"
+			  "\x54\x46\x20\x73\x65\x73\x73\x69"
+			  "\x6f\x6e\x73\x2c\x20\x61\x73\x20"
+			  "\x77\x65\x6c\x6c\x20\x61\x73\x20"
+			  "\x77\x72\x69\x74\x74\x65\x6e\x20"
+			  "\x61\x6e\x64\x20\x65\x6c\x65\x63"
+			  "\x74\x72\x6f\x6e\x69\x63\x20\x63"
+			  "\x6f\x6d\x6d\x75\x6e\x69\x63\x61"
+			  "\x74\x69\x6f\x6e\x73\x20\x6d\x61"
+			  "\x64\x65\x20\x61\x74\x20\x61\x6e"
+			  "\x79\x20\x74\x69\x6d\x65\x20\x6f"
+			  "\x72\x20\x70\x6c\x61\x63\x65\x2c"
+			  "\x20\x77\x68\x69\x63\x68\x20\x61"
+			  "\x72\x65\x20\x61\x64\x64\x72\x65"
+			  "\x73\x73\x65\x64\x20\x74\x6f",
+		.ctext	= "\xe4\xa6\xc8\x30\xc4\x23\x13\xd6"
+			  "\x08\x4d\xc9\xb7\xa5\x64\x7c\xb9"
+			  "\x71\xe2\xab\x3e\xa8\x30\x8a\x1c"
+			  "\x4a\x94\x6d\x9b\xe0\xb3\x6f\xf1"
+			  "\xdc\xe3\x1b\xb3\xa9\x6d\x0d\xd6"
+			  "\xd0\xca\x12\xef\xe7\x5f\xd8\x61"
+			  "\x3c\x82\xd3\x99\x86\x3c\x6f\x66"
+			  "\x02\x06\xdc\x55\xf9\xed\xdf\x38"
+			  "\xb4\xa6\x17\x00\x7f\xef\xbf\x4f"
+			  "\xf8\x36\xf1\x60\x7e\x47\xaf\xdb"
+			  "\x55\x9b\x12\xcb\x56\x44\xa7\x1f"
+			  "\xd3\x1a\x07\x3b\x00\xec\xe6\x4c"
+			  "\xa2\x43\x27\xdf\x86\x19\x4f\x16"
+			  "\xed\xf9\x4a\xf3\x63\x6f\xfa\x7f"
+			  "\x78\x11\xf6\x7d\x97\x6f\xec\x6f"
+			  "\x85\x0f\x5c\x36\x13\x8d\x87\xe0"
+			  "\x80\xb1\x69\x0b\x98\x89\x9c\x4e"
+			  "\xf8\xdd\xee\x5c\x0a\x85\xce\xd4"
+			  "\xea\x1b\x48\xbe\x08\xf8\xe2\xa8"
+			  "\xa5\xb0\x3c\x79\xb1\x15\xb4\xb9"
+			  "\x75\x10\x95\x35\x81\x7e\x26\xe6"
+			  "\x78\xa4\x88\xcf\xdb\x91\x34\x18"
+			  "\xad\xd7\x8e\x07\x7d\xab\x39\xf9"
+			  "\xa3\x9e\xa5\x1d\xbb\xed\x61\xfd"
+			  "\xdc\xb7\x5a\x27\xfc\xb5\xc9\x10"
+			  "\xa8\xcc\x52\x7f\x14\x76\x90\xe7"
+			  "\x1b\x29\x60\x74\xc0\x98\x77\xbb"
+			  "\xe0\x54\xbb\x27\x49\x59\x1e\x62"
+			  "\x3d\xaf\x74\x06\xa4\x42\x6f\xc6"
+			  "\x52\x97\xc4\x1d\xc4\x9f\xe2\xe5"
+			  "\x38\x57\x91\xd1\xa2\x28\xcc\x40"
+			  "\xcc\x70\x59\x37\xfc\x9f\x4b\xda"
+			  "\xa0\xeb\x97\x9a\x7d\xed\x14\x5c"
+			  "\x9c\xb7\x93\x26\x41\xa8\x66\xdd"
+			  "\x87\x6a\xc0\xd3\xc2\xa9\x3e\xae"
+			  "\xe9\x72\xfe\xd1\xb3\xac\x38\xea"
+			  "\x4d\x15\xa9\xd5\x36\x61\xe9\x96"
+			  "\x6c\x23\xf8\x43\xe4\x92\x29\xd9"
+			  "\x8b\x78\xf7\x0a\x52\xe0\x19\x5b"
+			  "\x59\x69\x5b\x5d\xa1\x53\xc4\x68"
+			  "\xe1\xbb\xac\x89\x14\xe2\xe2\x85"
+			  "\x41\x18\xf5\xb3\xd1\xfa\x68\x19"
+			  "\x44\x78\xdc\xcf\xe7\x88\x2d\x52"
+			  "\x5f\x40\xb5\x7e\xf8\x88\xa2\xae"
+			  "\x4a\xb2\x07\x35\x9d\x9b\x07\x88"
+			  "\xb7\x00\xd0\x0c\xb6\xa0\x47\x59"
+			  "\xda\x4e\xc9\xab\x9b\x8a\x7b",
+
+		.len	= 375,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 375 - 20, 4, 16 },
+
+	}, {
+		.key	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a"
+			  "\xf3\x33\x88\x86\x04\xf6\xb5\xf0"
+			  "\x47\x39\x17\xc1\x40\x2b\x80\x09"
+			  "\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x02\x76\x5a\x2e\x63"
+			  "\x33\x9f\xc9\x9a\x66\x32\x0d\xb7"
+			  "\x2a\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "\x27\x54\x77\x61\x73\x20\x62\x72"
+			  "\x69\x6c\x6c\x69\x67\x2c\x20\x61"
+			  "\x6e\x64\x20\x74\x68\x65\x20\x73"
+			  "\x6c\x69\x74\x68\x79\x20\x74\x6f"
+			  "\x76\x65\x73\x0a\x44\x69\x64\x20"
+			  "\x67\x79\x72\x65\x20\x61\x6e\x64"
+			  "\x20\x67\x69\x6d\x62\x6c\x65\x20"
+			  "\x69\x6e\x20\x74\x68\x65\x20\x77"
+			  "\x61\x62\x65\x3a\x0a\x41\x6c\x6c"
+			  "\x20\x6d\x69\x6d\x73\x79\x20\x77"
+			  "\x65\x72\x65\x20\x74\x68\x65\x20"
+			  "\x62\x6f\x72\x6f\x67\x6f\x76\x65"
+			  "\x73\x2c\x0a\x41\x6e\x64\x20\x74"
+			  "\x68\x65\x20\x6d\x6f\x6d\x65\x20"
+			  "\x72\x61\x74\x68\x73\x20\x6f\x75"
+			  "\x74\x67\x72\x61\x62\x65\x2e",
+		.ctext	= "\xb9\x68\xbc\x6a\x24\xbc\xcc\xd8"
+			  "\x9b\x2a\x8d\x5b\x96\xaf\x56\xe3"
+			  "\x11\x61\xe7\xa7\x9b\xce\x4e\x7d"
+			  "\x60\x02\x48\xac\xeb\xd5\x3a\x26"
+			  "\x9d\x77\x3b\xb5\x32\x13\x86\x8e"
+			  "\x20\x82\x26\x72\xae\x64\x1b\x7e"
+			  "\x2e\x01\x68\xb4\x87\x45\xa1\x24"
+			  "\xe4\x48\x40\xf0\xaa\xac\xee\xa9"
+			  "\xfc\x31\xad\x9d\x89\xa3\xbb\xd2"
+			  "\xe4\x25\x13\xad\x0f\x5e\xdf\x3c"
+			  "\x27\xab\xb8\x62\x46\x22\x30\x48"
+			  "\x55\x2c\x4e\x84\x78\x1d\x0d\x34"
+			  "\x8d\x3c\x91\x0a\x7f\x5b\x19\x9f"
+			  "\x97\x05\x4c\xa7\x62\x47\x8b\xc5"
+			  "\x44\x2e\x20\x33\xdd\xa0\x82\xa9"
+			  "\x25\x76\x37\xe6\x3c\x67\x5b",
+		.len	= 127,
+	}, {
+		.key	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a"
+			  "\xf3\x33\x88\x86\x04\xf6\xb5\xf0"
+			  "\x47\x39\x17\xc1\x40\x2b\x80\x09"
+			  "\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x01\x31\x58\xa3\x5a"
+			  "\x25\x5d\x05\x17\x58\xe9\x5e\xd4"
+			  "\x1c\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "\x49\xee\xe0\xdc\x24\x90\x40\xcd"
+			  "\xc5\x40\x8f\x47\x05\xbc\xdd\x81"
+			  "\x47\xc6\x8d\xe6\xb1\x8f\xd7\xcb"
+			  "\x09\x0e\x6e\x22\x48\x1f\xbf\xb8"
+			  "\x5c\xf7\x1e\x8a\xc1\x23\xf2\xd4"
+			  "\x19\x4b\x01\x0f\x4e\xa4\x43\xce"
+			  "\x01\xc6\x67\xda\x03\x91\x18\x90"
+			  "\xa5\xa4\x8e\x45\x03\xb3\x2d\xac"
+			  "\x74\x92\xd3\x53\x47\xc8\xdd\x25"
+			  "\x53\x6c\x02\x03\x87\x0d\x11\x0c"
+			  "\x58\xe3\x12\x18\xfd\x2a\x5b\x40"
+			  "\x0c\x30\xf0\xb8\x3f\x43\xce\xae"
+			  "\x65\x3a\x7d\x7c\xf4\x54\xaa\xcc"
+			  "\x33\x97\xc3\x77\xba\xc5\x70\xde"
+			  "\xd7\xd5\x13\xa5\x65\xc4\x5f\x0f"
+			  "\x46\x1a\x0d\x97\xb5\xf3\xbb\x3c"
+			  "\x84\x0f\x2b\xc5\xaa\xea\xf2\x6c"
+			  "\xc9\xb5\x0c\xee\x15\xf3\x7d\xbe"
+			  "\x9f\x7b\x5a\xa6\xae\x4f\x83\xb6"
+			  "\x79\x49\x41\xf4\x58\x18\xcb\x86"
+			  "\x7f\x30\x0e\xf8\x7d\x44\x36\xea"
+			  "\x75\xeb\x88\x84\x40\x3c\xad\x4f"
+			  "\x6f\x31\x6b\xaa\x5d\xe5\xa5\xc5"
+			  "\x21\x66\xe9\xa7\xe3\xb2\x15\x88"
+			  "\x78\xf6\x79\xa1\x59\x47\x12\x4e"
+			  "\x9f\x9f\x64\x1a\xa0\x22\x5b\x08"
+			  "\xbe\x7c\x36\xc2\x2b\x66\x33\x1b"
+			  "\xdd\x60\x71\xf7\x47\x8c\x61\xc3"
+			  "\xda\x8a\x78\x1e\x16\xfa\x1e\x86"
+			  "\x81\xa6\x17\x2a\xa7\xb5\xc2\xe7"
+			  "\xa4\xc7\x42\xf1\xcf\x6a\xca\xb4"
+			  "\x45\xcf\xf3\x93\xf0\xe7\xea\xf6"
+			  "\xf4\xe6\x33\x43\x84\x93\xa5\x67"
+			  "\x9b\x16\x58\x58\x80\x0f\x2b\x5c"
+			  "\x24\x74\x75\x7f\x95\x81\xb7\x30"
+			  "\x7a\x33\xa7\xf7\x94\x87\x32\x27"
+			  "\x10\x5d\x14\x4c\x43\x29\xdd\x26"
+			  "\xbd\x3e\x3c\x0e\xfe\x0e\xa5\x10"
+			  "\xea\x6b\x64\xfd\x73\xc6\xed\xec"
+			  "\xa8\xc9\xbf\xb3\xba\x0b\x4d\x07"
+			  "\x70\xfc\x16\xfd\x79\x1e\xd7\xc5"
+			  "\x49\x4e\x1c\x8b\x8d\x79\x1b\xb1"
+			  "\xec\xca\x60\x09\x4c\x6a\xd5\x09"
+			  "\x49\x46\x00\x88\x22\x8d\xce\xea"
+			  "\xb1\x17\x11\xde\x42\xd2\x23\xc1"
+			  "\x72\x11\xf5\x50\x73\x04\x40\x47"
+			  "\xf9\x5d\xe7\xa7\x26\xb1\x7e\xb0"
+			  "\x3f\x58\xc1\x52\xab\x12\x67\x9d"
+			  "\x3f\x43\x4b\x68\xd4\x9c\x68\x38"
+			  "\x07\x8a\x2d\x3e\xf3\xaf\x6a\x4b"
+			  "\xf9\xe5\x31\x69\x22\xf9\xa6\x69"
+			  "\xc6\x9c\x96\x9a\x12\x35\x95\x1d"
+			  "\x95\xd5\xdd\xbe\xbf\x93\x53\x24"
+			  "\xfd\xeb\xc2\x0a\x64\xb0\x77\x00"
+			  "\x6f\x88\xc4\x37\x18\x69\x7c\xd7"
+			  "\x41\x92\x55\x4c\x03\xa1\x9a\x4b"
+			  "\x15\xe5\xdf\x7f\x37\x33\x72\xc1"
+			  "\x8b\x10\x67\xa3\x01\x57\x94\x25"
+			  "\x7b\x38\x71\x7e\xdd\x1e\xcc\x73"
+			  "\x55\xd2\x8e\xeb\x07\xdd\xf1\xda"
+			  "\x58\xb1\x47\x90\xfe\x42\x21\x72"
+			  "\xa3\x54\x7a\xa0\x40\xec\x9f\xdd"
+			  "\xc6\x84\x6e\xca\xae\xe3\x68\xb4"
+			  "\x9d\xe4\x78\xff\x57\xf2\xf8\x1b"
+			  "\x03\xa1\x31\xd9\xde\x8d\xf5\x22"
+			  "\x9c\xdd\x20\xa4\x1e\x27\xb1\x76"
+			  "\x4f\x44\x55\xe2\x9b\xa1\x9c\xfe"
+			  "\x54\xf7\x27\x1b\xf4\xde\x02\xf5"
+			  "\x1b\x55\x48\x5c\xdc\x21\x4b\x9e"
+			  "\x4b\x6e\xed\x46\x23\xdc\x65\xb2"
+			  "\xcf\x79\x5f\x28\xe0\x9e\x8b\xe7"
+			  "\x4c\x9d\x8a\xff\xc1\xa6\x28\xb8"
+			  "\x65\x69\x8a\x45\x29\xef\x74\x85"
+			  "\xde\x79\xc7\x08\xae\x30\xb0\xf4"
+			  "\xa3\x1d\x51\x41\xab\xce\xcb\xf6"
+			  "\xb5\xd8\x6d\xe0\x85\xe1\x98\xb3"
+			  "\x43\xbb\x86\x83\x0a\xa0\xf5\xb7"
+			  "\x04\x0b\xfa\x71\x1f\xb0\xf6\xd9"
+			  "\x13\x00\x15\xf0\xc7\xeb\x0d\x5a"
+			  "\x9f\xd7\xb9\x6c\x65\x14\x22\x45"
+			  "\x6e\x45\x32\x3e\x7e\x60\x1a\x12"
+			  "\x97\x82\x14\xfb\xaa\x04\x22\xfa"
+			  "\xa0\xe5\x7e\x8c\x78\x02\x48\x5d"
+			  "\x78\x33\x5a\x7c\xad\xdb\x29\xce"
+			  "\xbb\x8b\x61\xa4\xb7\x42\xe2\xac"
+			  "\x8b\x1a\xd9\x2f\x0b\x8b\x62\x21"
+			  "\x83\x35\x7e\xad\x73\xc2\xb5\x6c"
+			  "\x10\x26\x38\x07\xe5\xc7\x36\x80"
+			  "\xe2\x23\x12\x61\xf5\x48\x4b\x2b"
+			  "\xc5\xdf\x15\xd9\x87\x01\xaa\xac"
+			  "\x1e\x7c\xad\x73\x78\x18\x63\xe0"
+			  "\x8b\x9f\x81\xd8\x12\x6a\x28\x10"
+			  "\xbe\x04\x68\x8a\x09\x7c\x1b\x1c"
+			  "\x83\x66\x80\x47\x80\xe8\xfd\x35"
+			  "\x1c\x97\x6f\xae\x49\x10\x66\xcc"
+			  "\xc6\xd8\xcc\x3a\x84\x91\x20\x77"
+			  "\x72\xe4\x24\xd2\x37\x9f\xc5\xc9"
+			  "\x25\x94\x10\x5f\x40\x00\x64\x99"
+			  "\xdc\xae\xd7\x21\x09\x78\x50\x15"
+			  "\xac\x5f\xc6\x2c\xa2\x0b\xa9\x39"
+			  "\x87\x6e\x6d\xab\xde\x08\x51\x16"
+			  "\xc7\x13\xe9\xea\xed\x06\x8e\x2c"
+			  "\xf8\x37\x8c\xf0\xa6\x96\x8d\x43"
+			  "\xb6\x98\x37\xb2\x43\xed\xde\xdf"
+			  "\x89\x1a\xe7\xeb\x9d\xa1\x7b\x0b"
+			  "\x77\xb0\xe2\x75\xc0\xf1\x98\xd9"
+			  "\x80\x55\xc9\x34\x91\xd1\x59\xe8"
+			  "\x4b\x0f\xc1\xa9\x4b\x7a\x84\x06"
+			  "\x20\xa8\x5d\xfa\xd1\xde\x70\x56"
+			  "\x2f\x9e\x91\x9c\x20\xb3\x24\xd8"
+			  "\x84\x3d\xe1\x8c\x7e\x62\x52\xe5"
+			  "\x44\x4b\x9f\xc2\x93\x03\xea\x2b"
+			  "\x59\xc5\xfa\x3f\x91\x2b\xbb\x23"
+			  "\xf5\xb2\x7b\xf5\x38\xaf\xb3\xee"
+			  "\x63\xdc\x7b\xd1\xff\xaa\x8b\xab"
+			  "\x82\x6b\x37\x04\xeb\x74\xbe\x79"
+			  "\xb9\x83\x90\xef\x20\x59\x46\xff"
+			  "\xe9\x97\x3e\x2f\xee\xb6\x64\x18"
+			  "\x38\x4c\x7a\x4a\xf9\x61\xe8\x9a"
+			  "\xa1\xb5\x01\xa6\x47\xd3\x11\xd4"
+			  "\xce\xd3\x91\x49\x88\xc7\xb8\x4d"
+			  "\xb1\xb9\x07\x6d\x16\x72\xae\x46"
+			  "\x5e\x03\xa1\x4b\xb6\x02\x30\xa8"
+			  "\x3d\xa9\x07\x2a\x7c\x19\xe7\x62"
+			  "\x87\xe3\x82\x2f\x6f\xe1\x09\xd9"
+			  "\x94\x97\xea\xdd\x58\x9e\xae\x76"
+			  "\x7e\x35\xe5\xb4\xda\x7e\xf4\xde"
+			  "\xf7\x32\x87\xcd\x93\xbf\x11\x56"
+			  "\x11\xbe\x08\x74\xe1\x69\xad\xe2"
+			  "\xd7\xf8\x86\x75\x8a\x3c\xa4\xbe"
+			  "\x70\xa7\x1b\xfc\x0b\x44\x2a\x76"
+			  "\x35\xea\x5d\x85\x81\xaf\x85\xeb"
+			  "\xa0\x1c\x61\xc2\xf7\x4f\xa5\xdc"
+			  "\x02\x7f\xf6\x95\x40\x6e\x8a\x9a"
+			  "\xf3\x5d\x25\x6e\x14\x3a\x22\xc9"
+			  "\x37\x1c\xeb\x46\x54\x3f\xa5\x91"
+			  "\xc2\xb5\x8c\xfe\x53\x08\x97\x32"
+			  "\x1b\xb2\x30\x27\xfe\x25\x5d\xdc"
+			  "\x08\x87\xd0\xe5\x94\x1a\xd4\xf1"
+			  "\xfe\xd6\xb4\xa3\xe6\x74\x81\x3c"
+			  "\x1b\xb7\x31\xa7\x22\xfd\xd4\xdd"
+			  "\x20\x4e\x7c\x51\xb0\x60\x73\xb8"
+			  "\x9c\xac\x91\x90\x7e\x01\xb0\xe1"
+			  "\x8a\x2f\x75\x1c\x53\x2a\x98\x2a"
+			  "\x06\x52\x95\x52\xb2\xe9\x25\x2e"
+			  "\x4c\xe2\x5a\x00\xb2\x13\x81\x03"
+			  "\x77\x66\x0d\xa5\x99\xda\x4e\x8c"
+			  "\xac\xf3\x13\x53\x27\x45\xaf\x64"
+			  "\x46\xdc\xea\x23\xda\x97\xd1\xab"
+			  "\x7d\x6c\x30\x96\x1f\xbc\x06\x34"
+			  "\x18\x0b\x5e\x21\x35\x11\x8d\x4c"
+			  "\xe0\x2d\xe9\x50\x16\x74\x81\xa8"
+			  "\xb4\x34\xb9\x72\x42\xa6\xcc\xbc"
+			  "\xca\x34\x83\x27\x10\x5b\x68\x45"
+			  "\x8f\x52\x22\x0c\x55\x3d\x29\x7c"
+			  "\xe3\xc0\x66\x05\x42\x91\x5f\x58"
+			  "\xfe\x4a\x62\xd9\x8c\xa9\x04\x19"
+			  "\x04\xa9\x08\x4b\x57\xfc\x67\x53"
+			  "\x08\x7c\xbc\x66\x8a\xb0\xb6\x9f"
+			  "\x92\xd6\x41\x7c\x5b\x2a\x00\x79"
+			  "\x72",
+		.ctext	= "\xe1\xb6\x8b\x5c\x80\xb8\xcc\x08"
+			  "\x1b\x84\xb2\xd1\xad\xa4\x70\xac"
+			  "\x67\xa9\x39\x27\xac\xb4\x5b\xb7"
+			  "\x4c\x26\x77\x23\x1d\xce\x0a\xbe"
+			  "\x18\x9e\x42\x8b\xbd\x7f\xd6\xf1"
+			  "\xf1\x6b\xe2\x6d\x7f\x92\x0e\xcb"
+			  "\xb8\x79\xba\xb4\xac\x7e\x2d\xc0"
+			  "\x9e\x83\x81\x91\xd5\xea\xc3\x12"
+			  "\x8d\xa4\x26\x70\xa4\xf9\x71\x0b"
+			  "\xbd\x2e\xe1\xb3\x80\x42\x25\xb3"
+			  "\x0b\x31\x99\xe1\x0d\xde\xa6\x90"
+			  "\xf2\xa3\x10\xf7\xe5\xf3\x83\x1e"
+			  "\x2c\xfb\x4d\xf0\x45\x3d\x28\x3c"
+			  "\xb8\xf1\xcb\xbf\x67\xd8\x43\x5a"
+			  "\x9d\x7b\x73\x29\x88\x0f\x13\x06"
+			  "\x37\x50\x0d\x7c\xe6\x9b\x07\xdd"
+			  "\x7e\x01\x1f\x81\x90\x10\x69\xdb"
+			  "\xa4\xad\x8a\x5e\xac\x30\x72\xf2"
+			  "\x36\xcd\xe3\x23\x49\x02\x93\xfa"
+			  "\x3d\xbb\xe2\x98\x83\xeb\xe9\x8d"
+			  "\xb3\x8f\x11\xaa\x53\xdb\xaf\x2e"
+			  "\x95\x13\x99\x3d\x71\xbd\x32\x92"
+			  "\xdd\xfc\x9d\x5e\x6f\x63\x2c\xee"
+			  "\x91\x1f\x4c\x64\x3d\x87\x55\x0f"
+			  "\xcc\x3d\x89\x61\x53\x02\x57\x8f"
+			  "\xe4\x77\x29\x32\xaf\xa6\x2f\x0a"
+			  "\xae\x3c\x3f\x3f\xf4\xfb\x65\x52"
+			  "\xc5\xc1\x78\x78\x53\x28\xad\xed"
+			  "\xd1\x67\x37\xc7\x59\x70\xcd\x0a"
+			  "\xb8\x0f\x80\x51\x9f\xc0\x12\x5e"
+			  "\x06\x0a\x7e\xec\x24\x5f\x73\x00"
+			  "\xb1\x0b\x31\x47\x4f\x73\x8d\xb4"
+			  "\xce\xf3\x55\x45\x6c\x84\x27\xba"
+			  "\xb9\x6f\x03\x4a\xeb\x98\x88\x6e"
+			  "\x53\xed\x25\x19\x0d\x8f\xfe\xca"
+			  "\x60\xe5\x00\x93\x6e\x3c\xff\x19"
+			  "\xae\x08\x3b\x8a\xa6\x84\x05\xfe"
+			  "\x9b\x59\xa0\x8c\xc8\x05\x45\xf5"
+			  "\x05\x37\xdc\x45\x6f\x8b\x95\x8c"
+			  "\x4e\x11\x45\x7a\xce\x21\xa5\xf7"
+			  "\x71\x67\xb9\xce\xd7\xf9\xe9\x5e"
+			  "\x60\xf5\x53\x7a\xa8\x85\x14\x03"
+			  "\xa0\x92\xec\xf3\x51\x80\x84\xc4"
+			  "\xdc\x11\x9e\x57\xce\x4b\x45\xcf"
+			  "\x90\x95\x85\x0b\x96\xe9\xee\x35"
+			  "\x10\xb8\x9b\xf2\x59\x4a\xc6\x7e"
+			  "\x85\xe5\x6f\x38\x51\x93\x40\x0c"
+			  "\x99\xd7\x7f\x32\xa8\x06\x27\xd1"
+			  "\x2b\xd5\xb5\x3a\x1a\xe1\x5e\xda"
+			  "\xcd\x5a\x50\x30\x3c\xc7\xe7\x65"
+			  "\xa6\x07\x0b\x98\x91\xc6\x20\x27"
+			  "\x2a\x03\x63\x1b\x1e\x3d\xaf\xc8"
+			  "\x71\x48\x46\x6a\x64\x28\xf9\x3d"
+			  "\xd1\x1d\xab\xc8\x40\x76\xc2\x39"
+			  "\x4e\x00\x75\xd2\x0e\x82\x58\x8c"
+			  "\xd3\x73\x5a\xea\x46\x89\xbe\xfd"
+			  "\x4e\x2c\x0d\x94\xaa\x9b\x68\xac"
+			  "\x86\x87\x30\x7e\xa9\x16\xcd\x59"
+			  "\xd2\xa6\xbe\x0a\xd8\xf5\xfd\x2d"
+			  "\x49\x69\xd2\x1a\x90\xd2\x1b\xed"
+			  "\xff\x71\x04\x87\x87\x21\xc4\xb8"
+			  "\x1f\x5b\x51\x33\xd0\xd6\x59\x9a"
+			  "\x03\x0e\xd3\x8b\xfb\x57\x73\xfd"
+			  "\x5a\x52\x63\x82\xc8\x85\x2f\xcb"
+			  "\x74\x6d\x4e\xd9\x68\x37\x85\x6a"
+			  "\xd4\xfb\x94\xed\x8d\xd1\x1a\xaf"
+			  "\x76\xa7\xb7\x88\xd0\x2b\x4e\xda"
+			  "\xec\x99\x94\x27\x6f\x87\x8c\xdf"
+			  "\x4b\x5e\xa6\x66\xdd\xcb\x33\x7b"
+			  "\x64\x94\x31\xa8\x37\xa6\x1d\xdb"
+			  "\x0d\x5c\x93\xa4\x40\xf9\x30\x53"
+			  "\x4b\x74\x8d\xdd\xf6\xde\x3c\xac"
+			  "\x5c\x80\x01\x3a\xef\xb1\x9a\x02"
+			  "\x0c\x22\x8e\xe7\x44\x09\x74\x4c"
+			  "\xf2\x9a\x27\x69\x7f\x12\x32\x36"
+			  "\xde\x92\xdf\xde\x8f\x5b\x31\xab"
+			  "\x4a\x01\x26\xe0\xb1\xda\xe8\x37"
+			  "\x21\x64\xe8\xff\x69\xfc\x9e\x41"
+			  "\xd2\x96\x2d\x18\x64\x98\x33\x78"
+			  "\x24\x61\x73\x9b\x47\x29\xf1\xa7"
+			  "\xcb\x27\x0f\xf0\x85\x6d\x8c\x9d"
+			  "\x2c\x95\x9e\xe5\xb2\x8e\x30\x29"
+			  "\x78\x8a\x9d\x65\xb4\x8e\xde\x7b"
+			  "\xd9\x00\x50\xf5\x7f\x81\xc3\x1b"
+			  "\x25\x85\xeb\xc2\x8c\x33\x22\x1e"
+			  "\x68\x38\x22\x30\xd8\x2e\x00\x98"
+			  "\x85\x16\x06\x56\xb4\x81\x74\x20"
+			  "\x95\xdb\x1c\x05\x19\xe8\x23\x4d"
+			  "\x65\x5d\xcc\xd8\x7f\xc4\x2d\x0f"
+			  "\x57\x26\x71\x07\xad\xaa\x71\x9f"
+			  "\x19\x76\x2f\x25\x51\x88\xe4\xc0"
+			  "\x82\x6e\x08\x05\x37\x04\xee\x25"
+			  "\x23\x90\xe9\x4e\xce\x9b\x16\xc1"
+			  "\x31\xe7\x6e\x2c\x1b\xe1\x85\x9a"
+			  "\x0c\x8c\xbb\x12\x1e\x68\x7b\x93"
+			  "\xa9\x3c\x39\x56\x23\x3e\x6e\xc7"
+			  "\x77\x84\xd3\xe0\x86\x59\xaa\xb9"
+			  "\xd5\x53\x58\xc9\x0a\x83\x5f\x85"
+			  "\xd8\x47\x14\x67\x8a\x3c\x17\xe0"
+			  "\xab\x02\x51\xea\xf1\xf0\x4f\x30"
+			  "\x7d\xe0\x92\xc2\x5f\xfb\x19\x5a"
+			  "\x3f\xbd\xf4\x39\xa4\x31\x0c\x39"
+			  "\xd1\xae\x4e\xf7\x65\x7f\x1f\xce"
+			  "\xc2\x39\xd1\x84\xd4\xe5\x02\xe0"
+			  "\x58\xaa\xf1\x5e\x81\xaf\x7f\x72"
+			  "\x0f\x08\x99\x43\xb9\xd8\xac\x41"
+			  "\x35\x55\xf2\xb2\xd4\x98\xb8\x3b"
+			  "\x2b\x3c\x3e\x16\x06\x31\xfc\x79"
+			  "\x47\x38\x63\x51\xc5\xd0\x26\xd7"
+			  "\x43\xb4\x2b\xd9\xc5\x05\xf2\x9d"
+			  "\x18\xc9\x26\x82\x56\xd2\x11\x05"
+			  "\xb6\x89\xb4\x43\x9c\xb5\x9d\x11"
+			  "\x6c\x83\x37\x71\x27\x1c\xae\xbf"
+			  "\xcd\x57\xd2\xee\x0d\x5a\x15\x26"
+			  "\x67\x88\x80\x80\x1b\xdc\xc1\x62"
+			  "\xdd\x4c\xff\x92\x5c\x6c\xe1\xa0"
+			  "\xe3\x79\xa9\x65\x8c\x8c\x14\x42"
+			  "\xe5\x11\xd2\x1a\xad\xa9\x56\x6f"
+			  "\x98\xfc\x8a\x7b\x56\x1f\xc6\xc1"
+			  "\x52\x12\x92\x9b\x41\x0f\x4b\xae"
+			  "\x1b\x4a\xbc\xfe\x23\xb6\x94\x70"
+			  "\x04\x30\x9e\x69\x47\xbe\xb8\x8f"
+			  "\xca\x45\xd7\x8a\xf4\x78\x3e\xaa"
+			  "\x71\x17\xd8\x1e\xb8\x11\x8f\xbc"
+			  "\xc8\x1a\x65\x7b\x41\x89\x72\xc7"
+			  "\x5f\xbe\xc5\x2a\xdb\x5c\x54\xf9"
+			  "\x25\xa3\x7a\x80\x56\x9c\x8c\xab"
+			  "\x26\x19\x10\x36\xa6\xf3\x14\x79"
+			  "\x40\x98\x70\x68\xb7\x35\xd9\xb9"
+			  "\x27\xd4\xe7\x74\x5b\x3d\x97\xb4"
+			  "\xd9\xaa\xd9\xf2\xb5\x14\x84\x1f"
+			  "\xa9\xde\x12\x44\x5b\x00\xc0\xbc"
+			  "\xc8\x11\x25\x1b\x67\x7a\x15\x72"
+			  "\xa6\x31\x6f\xf4\x68\x7a\x86\x9d"
+			  "\x43\x1c\x5f\x16\xd3\xad\x2e\x52"
+			  "\xf3\xb4\xc3\xfa\x27\x2e\x68\x6c"
+			  "\x06\xe7\x4c\x4f\xa2\xe0\xe4\x21"
+			  "\x5d\x9e\x33\x58\x8d\xbf\xd5\x70"
+			  "\xf8\x80\xa5\xdd\xe7\x18\x79\xfa"
+			  "\x7b\xfd\x09\x69\x2c\x37\x32\xa8"
+			  "\x65\xfa\x8d\x8b\x5c\xcc\xe8\xf3"
+			  "\x37\xf6\xa6\xc6\x5c\xa2\x66\x79"
+			  "\xfa\x8a\xa7\xd1\x0b\x2e\x1b\x5e"
+			  "\x95\x35\x00\x76\xae\x42\xf7\x50"
+			  "\x51\x78\xfb\xb4\x28\x24\xde\x1a"
+			  "\x70\x8b\xed\xca\x3c\x5e\xe4\xbd"
+			  "\x28\xb5\xf3\x76\x4f\x67\x5d\x81"
+			  "\xb2\x60\x87\xd9\x7b\x19\x1a\xa7"
+			  "\x79\xa2\xfa\x3f\x9e\xa9\xd7\x25"
+			  "\x61\xe1\x74\x31\xa2\x77\xa0\x1b"
+			  "\xf6\xf7\xcb\xc5\xaa\x9e\xce\xf9"
+			  "\x9b\x96\xef\x51\xc3\x1a\x44\x96"
+			  "\xae\x17\x50\xab\x29\x08\xda\xcc"
+			  "\x1a\xb3\x12\xd0\x24\xe4\xe2\xe0"
+			  "\xc6\xe3\xcc\x82\xd0\xba\x47\x4c"
+			  "\x3f\x49\xd7\xe8\xb6\x61\xaa\x65"
+			  "\x25\x18\x40\x2d\x62\x25\x02\x71"
+			  "\x61\xa2\xc1\xb2\x13\xd2\x71\x3f"
+			  "\x43\x1a\xc9\x09\x92\xff\xd5\x57"
+			  "\xf0\xfc\x5e\x1c\xf1\xf5\xf9\xf3"
+			  "\x5b",
+		.len	= 1281,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 1200, 1, 80 },
+	},
+};
+
+/* Adiantum test vectors from https://github.com/google/adiantum */
+static const struct cipher_testvec adiantum_xchacha12_aes_tv_template[] = {
+	{
+		.key	= "\x9e\xeb\xb2\x49\x3c\x1c\xf5\xf4"
+			  "\x6a\x99\xc2\xc4\xdf\xb1\xf4\xdd"
+			  "\x75\x20\x57\xea\x2c\x4f\xcd\xb2"
+			  "\xa5\x3d\x7b\x49\x1e\xab\xfd\x0f",
+		.klen	= 32,
+		.iv	= "\xdf\x63\xd4\xab\xd2\x49\xf3\xd8"
+			  "\x33\x81\x37\x60\x7d\xfa\x73\x08"
+			  "\xd8\x49\x6d\x80\xe8\x2f\x62\x54"
+			  "\xeb\x0e\xa9\x39\x5b\x45\x7f\x8a",
+		.ptext	= "\x67\xc9\xf2\x30\x84\x41\x8e\x43"
+			  "\xfb\xf3\xb3\x3e\x79\x36\x7f\xe8",
+		.ctext	= "\x6d\x32\x86\x18\x67\x86\x0f\x3f"
+			  "\x96\x7c\x9d\x28\x0d\x53\xec\x9f",
+		.len	= 16,
+		.also_non_np = 1,
+		.np	= 2,
+		.tap	= { 14, 2 },
+	}, {
+		.key	= "\x36\x2b\x57\x97\xf8\x5d\xcd\x99"
+			  "\x5f\x1a\x5a\x44\x1d\x92\x0f\x27"
+			  "\xcc\x16\xd7\x2b\x85\x63\x99\xd3"
+			  "\xba\x96\xa1\xdb\xd2\x60\x68\xda",
+		.klen	= 32,
+		.iv	= "\xef\x58\x69\xb1\x2c\x5e\x9a\x47"
+			  "\x24\xc1\xb1\x69\xe1\x12\x93\x8f"
+			  "\x43\x3d\x6d\x00\xdb\x5e\xd8\xd9"
+			  "\x12\x9a\xfe\xd9\xff\x2d\xaa\xc4",
+		.ptext	= "\x5e\xa8\x68\x19\x85\x98\x12\x23"
+			  "\x26\x0a\xcc\xdb\x0a\x04\xb9\xdf"
+			  "\x4d\xb3\x48\x7b\xb0\xe3\xc8\x19"
+			  "\x43\x5a\x46\x06\x94\x2d\xf2",
+		.ctext	= "\xc7\xc6\xf1\x73\x8f\xc4\xff\x4a"
+			  "\x39\xbe\x78\xbe\x8d\x28\xc8\x89"
+			  "\x46\x63\xe7\x0c\x7d\x87\xe8\x4e"
+			  "\xc9\x18\x7b\xbe\x18\x60\x50",
+		.len	= 31,
+	}, {
+		.key	= "\xa5\x28\x24\x34\x1a\x3c\xd8\xf7"
+			  "\x05\x91\x8f\xee\x85\x1f\x35\x7f"
+			  "\x80\x3d\xfc\x9b\x94\xf6\xfc\x9e"
+			  "\x19\x09\x00\xa9\x04\x31\x4f\x11",
+		.klen	= 32,
+		.iv	= "\xa1\xba\x49\x95\xff\x34\x6d\xb8"
+			  "\xcd\x87\x5d\x5e\xfd\xea\x85\xdb"
+			  "\x8a\x7b\x5e\xb2\x5d\x57\xdd\x62"
+			  "\xac\xa9\x8c\x41\x42\x94\x75\xb7",
+		.ptext	= "\x69\xb4\xe8\x8c\x37\xe8\x67\x82"
+			  "\xf1\xec\x5d\x04\xe5\x14\x91\x13"
+			  "\xdf\xf2\x87\x1b\x69\x81\x1d\x71"
+			  "\x70\x9e\x9c\x3b\xde\x49\x70\x11"
+			  "\xa0\xa3\xdb\x0d\x54\x4f\x66\x69"
+			  "\xd7\xdb\x80\xa7\x70\x92\x68\xce"
+			  "\x81\x04\x2c\xc6\xab\xae\xe5\x60"
+			  "\x15\xe9\x6f\xef\xaa\x8f\xa7\xa7"
+			  "\x63\x8f\xf2\xf0\x77\xf1\xa8\xea"
+			  "\xe1\xb7\x1f\x9e\xab\x9e\x4b\x3f"
+			  "\x07\x87\x5b\x6f\xcd\xa8\xaf\xb9"
+			  "\xfa\x70\x0b\x52\xb8\xa8\xa7\x9e"
+			  "\x07\x5f\xa6\x0e\xb3\x9b\x79\x13"
+			  "\x79\xc3\x3e\x8d\x1c\x2c\x68\xc8"
+			  "\x51\x1d\x3c\x7b\x7d\x79\x77\x2a"
+			  "\x56\x65\xc5\x54\x23\x28\xb0\x03",
+		.ctext	= "\x9e\x16\xab\xed\x4b\xa7\x42\x5a"
+			  "\xc6\xfb\x4e\x76\xff\xbe\x03\xa0"
+			  "\x0f\xe3\xad\xba\xe4\x98\x2b\x0e"
+			  "\x21\x48\xa0\xb8\x65\x48\x27\x48"
+			  "\x84\x54\x54\xb2\x9a\x94\x7b\xe6"
+			  "\x4b\x29\xe9\xcf\x05\x91\x80\x1a"
+			  "\x3a\xf3\x41\x96\x85\x1d\x9f\x74"
+			  "\x51\x56\x63\xfa\x7c\x28\x85\x49"
+			  "\xf7\x2f\xf9\xf2\x18\x46\xf5\x33"
+			  "\x80\xa3\x3c\xce\xb2\x57\x93\xf5"
+			  "\xae\xbd\xa9\xf5\x7b\x30\xc4\x93"
+			  "\x66\xe0\x30\x77\x16\xe4\xa0\x31"
+			  "\xba\x70\xbc\x68\x13\xf5\xb0\x9a"
+			  "\xc1\xfc\x7e\xfe\x55\x80\x5c\x48"
+			  "\x74\xa6\xaa\xa3\xac\xdc\xc2\xf5"
+			  "\x8d\xde\x34\x86\x78\x60\x75\x8d",
+		.len	= 128,
+		.also_non_np = 1,
+		.np	= 4,
+		.tap	= { 104, 16, 4, 4 },
+	}, {
+		.key	= "\xd3\x81\x72\x18\x23\xff\x6f\x4a"
+			  "\x25\x74\x29\x0d\x51\x8a\x0e\x13"
+			  "\xc1\x53\x5d\x30\x8d\xee\x75\x0d"
+			  "\x14\xd6\x69\xc9\x15\xa9\x0c\x60",
+		.klen	= 32,
+		.iv	= "\x65\x9b\xd4\xa8\x7d\x29\x1d\xf4"
+			  "\xc4\xd6\x9b\x6a\x28\xab\x64\xe2"
+			  "\x62\x81\x97\xc5\x81\xaa\xf9\x44"
+			  "\xc1\x72\x59\x82\xaf\x16\xc8\x2c",
+		.ptext	= "\xc7\x6b\x52\x6a\x10\xf0\xcc\x09"
+			  "\xc1\x12\x1d\x6d\x21\xa6\x78\xf5"
+			  "\x05\xa3\x69\x60\x91\x36\x98\x57"
+			  "\xba\x0c\x14\xcc\xf3\x2d\x73\x03"
+			  "\xc6\xb2\x5f\xc8\x16\x27\x37\x5d"
+			  "\xd0\x0b\x87\xb2\x50\x94\x7b\x58"
+			  "\x04\xf4\xe0\x7f\x6e\x57\x8e\xc9"
+			  "\x41\x84\xc1\xb1\x7e\x4b\x91\x12"
+			  "\x3a\x8b\x5d\x50\x82\x7b\xcb\xd9"
+			  "\x9a\xd9\x4e\x18\x06\x23\x9e\xd4"
+			  "\xa5\x20\x98\xef\xb5\xda\xe5\xc0"
+			  "\x8a\x6a\x83\x77\x15\x84\x1e\xae"
+			  "\x78\x94\x9d\xdf\xb7\xd1\xea\x67"
+			  "\xaa\xb0\x14\x15\xfa\x67\x21\x84"
+			  "\xd3\x41\x2a\xce\xba\x4b\x4a\xe8"
+			  "\x95\x62\xa9\x55\xf0\x80\xad\xbd"
+			  "\xab\xaf\xdd\x4f\xa5\x7c\x13\x36"
+			  "\xed\x5e\x4f\x72\xad\x4b\xf1\xd0"
+			  "\x88\x4e\xec\x2c\x88\x10\x5e\xea"
+			  "\x12\xc0\x16\x01\x29\xa3\xa0\x55"
+			  "\xaa\x68\xf3\xe9\x9d\x3b\x0d\x3b"
+			  "\x6d\xec\xf8\xa0\x2d\xf0\x90\x8d"
+			  "\x1c\xe2\x88\xd4\x24\x71\xf9\xb3"
+			  "\xc1\x9f\xc5\xd6\x76\x70\xc5\x2e"
+			  "\x9c\xac\xdb\x90\xbd\x83\x72\xba"
+			  "\x6e\xb5\xa5\x53\x83\xa9\xa5\xbf"
+			  "\x7d\x06\x0e\x3c\x2a\xd2\x04\xb5"
+			  "\x1e\x19\x38\x09\x16\xd2\x82\x1f"
+			  "\x75\x18\x56\xb8\x96\x0b\xa6\xf9"
+			  "\xcf\x62\xd9\x32\x5d\xa9\xd7\x1d"
+			  "\xec\xe4\xdf\x1b\xbe\xf1\x36\xee"
+			  "\xe3\x7b\xb5\x2f\xee\xf8\x53\x3d"
+			  "\x6a\xb7\x70\xa9\xfc\x9c\x57\x25"
+			  "\xf2\x89\x10\xd3\xb8\xa8\x8c\x30"
+			  "\xae\x23\x4f\x0e\x13\x66\x4f\xe1"
+			  "\xb6\xc0\xe4\xf8\xef\x93\xbd\x6e"
+			  "\x15\x85\x6b\xe3\x60\x81\x1d\x68"
+			  "\xd7\x31\x87\x89\x09\xab\xd5\x96"
+			  "\x1d\xf3\x6d\x67\x80\xca\x07\x31"
+			  "\x5d\xa7\xe4\xfb\x3e\xf2\x9b\x33"
+			  "\x52\x18\xc8\x30\xfe\x2d\xca\x1e"
+			  "\x79\x92\x7a\x60\x5c\xb6\x58\x87"
+			  "\xa4\x36\xa2\x67\x92\x8b\xa4\xb7"
+			  "\xf1\x86\xdf\xdc\xc0\x7e\x8f\x63"
+			  "\xd2\xa2\xdc\x78\xeb\x4f\xd8\x96"
+			  "\x47\xca\xb8\x91\xf9\xf7\x94\x21"
+			  "\x5f\x9a\x9f\x5b\xb8\x40\x41\x4b"
+			  "\x66\x69\x6a\x72\xd0\xcb\x70\xb7"
+			  "\x93\xb5\x37\x96\x05\x37\x4f\xe5"
+			  "\x8c\xa7\x5a\x4e\x8b\xb7\x84\xea"
+			  "\xc7\xfc\x19\x6e\x1f\x5a\xa1\xac"
+			  "\x18\x7d\x52\x3b\xb3\x34\x62\x99"
+			  "\xe4\x9e\x31\x04\x3f\xc0\x8d\x84"
+			  "\x17\x7c\x25\x48\x52\x67\x11\x27"
+			  "\x67\xbb\x5a\x85\xca\x56\xb2\x5c"
+			  "\xe6\xec\xd5\x96\x3d\x15\xfc\xfb"
+			  "\x22\x25\xf4\x13\xe5\x93\x4b\x9a"
+			  "\x77\xf1\x52\x18\xfa\x16\x5e\x49"
+			  "\x03\x45\xa8\x08\xfa\xb3\x41\x92"
+			  "\x79\x50\x33\xca\xd0\xd7\x42\x55"
+			  "\xc3\x9a\x0c\x4e\xd9\xa4\x3c\x86"
+			  "\x80\x9f\x53\xd1\xa4\x2e\xd1\xbc"
+			  "\xf1\x54\x6e\x93\xa4\x65\x99\x8e"
+			  "\xdf\x29\xc0\x64\x63\x07\xbb\xea",
+		.ctext	= "\x15\x97\xd0\x86\x18\x03\x9c\x51"
+			  "\xc5\x11\x36\x62\x13\x92\xe6\x73"
+			  "\x29\x79\xde\xa1\x00\x3e\x08\x64"
+			  "\x17\x1a\xbc\xd5\xfe\x33\x0e\x0c"
+			  "\x7c\x94\xa7\xc6\x3c\xbe\xac\xa2"
+			  "\x89\xe6\xbc\xdf\x0c\x33\x27\x42"
+			  "\x46\x73\x2f\xba\x4e\xa6\x46\x8f"
+			  "\xe4\xee\x39\x63\x42\x65\xa3\x88"
+			  "\x7a\xad\x33\x23\xa9\xa7\x20\x7f"
+			  "\x0b\xe6\x6a\xc3\x60\xda\x9e\xb4"
+			  "\xd6\x07\x8a\x77\x26\xd1\xab\x44"
+			  "\x99\x55\x03\x5e\xed\x8d\x7b\xbd"
+			  "\xc8\x21\xb7\x21\x30\x3f\xc0\xb5"
+			  "\xc8\xec\x6c\x23\xa6\xa3\x6d\xf1"
+			  "\x30\x0a\xd0\xa6\xa9\x28\x69\xae"
+			  "\x2a\xe6\x54\xac\x82\x9d\x6a\x95"
+			  "\x6f\x06\x44\xc5\x5a\x77\x6e\xec"
+			  "\xf8\xf8\x63\xb2\xe6\xaa\xbd\x8e"
+			  "\x0e\x8a\x62\x00\x03\xc8\x84\xdd"
+			  "\x47\x4a\xc3\x55\xba\xb7\xe7\xdf"
+			  "\x08\xbf\x62\xf5\xe8\xbc\xb6\x11"
+			  "\xe4\xcb\xd0\x66\x74\x32\xcf\xd4"
+			  "\xf8\x51\x80\x39\x14\x05\x12\xdb"
+			  "\x87\x93\xe2\x26\x30\x9c\x3a\x21"
+			  "\xe5\xd0\x38\x57\x80\x15\xe4\x08"
+			  "\x58\x05\x49\x7d\xe6\x92\x77\x70"
+			  "\xfb\x1e\x2d\x6a\x84\x00\xc8\x68"
+			  "\xf7\x1a\xdd\xf0\x7b\x38\x1e\xd8"
+			  "\x2c\x78\x78\x61\xcf\xe3\xde\x69"
+			  "\x1f\xd5\x03\xd5\x1a\xb4\xcf\x03"
+			  "\xc8\x7a\x70\x68\x35\xb4\xf6\xbe"
+			  "\x90\x62\xb2\x28\x99\x86\xf5\x44"
+			  "\x99\xeb\x31\xcf\xca\xdf\xd0\x21"
+			  "\xd6\x60\xf7\x0f\x40\xb4\x80\xb7"
+			  "\xab\xe1\x9b\x45\xba\x66\xda\xee"
+			  "\xdd\x04\x12\x40\x98\xe1\x69\xe5"
+			  "\x2b\x9c\x59\x80\xe7\x7b\xcc\x63"
+			  "\xa6\xc0\x3a\xa9\xfe\x8a\xf9\x62"
+			  "\x11\x34\x61\x94\x35\xfe\xf2\x99"
+			  "\xfd\xee\x19\xea\x95\xb6\x12\xbf"
+			  "\x1b\xdf\x02\x1a\xcc\x3e\x7e\x65"
+			  "\x78\x74\x10\x50\x29\x63\x28\xea"
+			  "\x6b\xab\xd4\x06\x4d\x15\x24\x31"
+			  "\xc7\x0a\xc9\x16\xb6\x48\xf0\xbf"
+			  "\x49\xdb\x68\x71\x31\x8f\x87\xe2"
+			  "\x13\x05\x64\xd6\x22\x0c\xf8\x36"
+			  "\x84\x24\x3e\x69\x5e\xb8\x9e\x16"
+			  "\x73\x6c\x83\x1e\xe0\x9f\x9e\xba"
+			  "\xe5\x59\x21\x33\x1b\xa9\x26\xc2"
+			  "\xc7\xd9\x30\x73\xb6\xa6\x73\x82"
+			  "\x19\xfa\x44\x4d\x40\x8b\x69\x04"
+			  "\x94\x74\xea\x6e\xb3\x09\x47\x01"
+			  "\x2a\xb9\x78\x34\x43\x11\xed\xd6"
+			  "\x8c\x95\x65\x1b\x85\x67\xa5\x40"
+			  "\xac\x9c\x05\x4b\x57\x4a\xa9\x96"
+			  "\x0f\xdd\x4f\xa1\xe0\xcf\x6e\xc7"
+			  "\x1b\xed\xa2\xb4\x56\x8c\x09\x6e"
+			  "\xa6\x65\xd7\x55\x81\xb7\xed\x11"
+			  "\x9b\x40\x75\xa8\x6b\x56\xaf\x16"
+			  "\x8b\x3d\xf4\xcb\xfe\xd5\x1d\x3d"
+			  "\x85\xc2\xc0\xde\x43\x39\x4a\x96"
+			  "\xba\x88\x97\xc0\xd6\x00\x0e\x27"
+			  "\x21\xb0\x21\x52\xba\xa7\x37\xaa"
+			  "\xcc\xbf\x95\xa8\xf4\xd0\x91\xf6",
+		.len	= 512,
+		.also_non_np = 1,
+		.np	= 2,
+		.tap	= { 144, 368 },
+	}
+};
+
+/* Adiantum with XChaCha20 instead of XChaCha12 */
+/* Test vectors from https://github.com/google/adiantum */
+static const struct cipher_testvec adiantum_xchacha20_aes_tv_template[] = {
+	{
+		.key	= "\x9e\xeb\xb2\x49\x3c\x1c\xf5\xf4"
+			  "\x6a\x99\xc2\xc4\xdf\xb1\xf4\xdd"
+			  "\x75\x20\x57\xea\x2c\x4f\xcd\xb2"
+			  "\xa5\x3d\x7b\x49\x1e\xab\xfd\x0f",
+		.klen	= 32,
+		.iv	= "\xdf\x63\xd4\xab\xd2\x49\xf3\xd8"
+			  "\x33\x81\x37\x60\x7d\xfa\x73\x08"
+			  "\xd8\x49\x6d\x80\xe8\x2f\x62\x54"
+			  "\xeb\x0e\xa9\x39\x5b\x45\x7f\x8a",
+		.ptext	= "\x67\xc9\xf2\x30\x84\x41\x8e\x43"
+			  "\xfb\xf3\xb3\x3e\x79\x36\x7f\xe8",
+		.ctext	= "\xf6\x78\x97\xd6\xaa\x94\x01\x27"
+			  "\x2e\x4d\x83\xe0\x6e\x64\x9a\xdf",
+		.len	= 16,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 5, 2, 9 },
+	}, {
+		.key	= "\x36\x2b\x57\x97\xf8\x5d\xcd\x99"
+			  "\x5f\x1a\x5a\x44\x1d\x92\x0f\x27"
+			  "\xcc\x16\xd7\x2b\x85\x63\x99\xd3"
+			  "\xba\x96\xa1\xdb\xd2\x60\x68\xda",
+		.klen	= 32,
+		.iv	= "\xef\x58\x69\xb1\x2c\x5e\x9a\x47"
+			  "\x24\xc1\xb1\x69\xe1\x12\x93\x8f"
+			  "\x43\x3d\x6d\x00\xdb\x5e\xd8\xd9"
+			  "\x12\x9a\xfe\xd9\xff\x2d\xaa\xc4",
+		.ptext	= "\x5e\xa8\x68\x19\x85\x98\x12\x23"
+			  "\x26\x0a\xcc\xdb\x0a\x04\xb9\xdf"
+			  "\x4d\xb3\x48\x7b\xb0\xe3\xc8\x19"
+			  "\x43\x5a\x46\x06\x94\x2d\xf2",
+		.ctext	= "\x4b\xb8\x90\x10\xdf\x7f\x64\x08"
+			  "\x0e\x14\x42\x5f\x00\x74\x09\x36"
+			  "\x57\x72\xb5\xfd\xb5\x5d\xb8\x28"
+			  "\x0c\x04\x91\x14\x91\xe9\x37",
+		.len	= 31,
+		.also_non_np = 1,
+		.np	= 2,
+		.tap	= { 16, 15 },
+	}, {
+		.key	= "\xa5\x28\x24\x34\x1a\x3c\xd8\xf7"
+			  "\x05\x91\x8f\xee\x85\x1f\x35\x7f"
+			  "\x80\x3d\xfc\x9b\x94\xf6\xfc\x9e"
+			  "\x19\x09\x00\xa9\x04\x31\x4f\x11",
+		.klen	= 32,
+		.iv	= "\xa1\xba\x49\x95\xff\x34\x6d\xb8"
+			  "\xcd\x87\x5d\x5e\xfd\xea\x85\xdb"
+			  "\x8a\x7b\x5e\xb2\x5d\x57\xdd\x62"
+			  "\xac\xa9\x8c\x41\x42\x94\x75\xb7",
+		.ptext	= "\x69\xb4\xe8\x8c\x37\xe8\x67\x82"
+			  "\xf1\xec\x5d\x04\xe5\x14\x91\x13"
+			  "\xdf\xf2\x87\x1b\x69\x81\x1d\x71"
+			  "\x70\x9e\x9c\x3b\xde\x49\x70\x11"
+			  "\xa0\xa3\xdb\x0d\x54\x4f\x66\x69"
+			  "\xd7\xdb\x80\xa7\x70\x92\x68\xce"
+			  "\x81\x04\x2c\xc6\xab\xae\xe5\x60"
+			  "\x15\xe9\x6f\xef\xaa\x8f\xa7\xa7"
+			  "\x63\x8f\xf2\xf0\x77\xf1\xa8\xea"
+			  "\xe1\xb7\x1f\x9e\xab\x9e\x4b\x3f"
+			  "\x07\x87\x5b\x6f\xcd\xa8\xaf\xb9"
+			  "\xfa\x70\x0b\x52\xb8\xa8\xa7\x9e"
+			  "\x07\x5f\xa6\x0e\xb3\x9b\x79\x13"
+			  "\x79\xc3\x3e\x8d\x1c\x2c\x68\xc8"
+			  "\x51\x1d\x3c\x7b\x7d\x79\x77\x2a"
+			  "\x56\x65\xc5\x54\x23\x28\xb0\x03",
+		.ctext	= "\xb1\x8b\xa0\x05\x77\xa8\x4d\x59"
+			  "\x1b\x8e\x21\xfc\x3a\x49\xfa\xd4"
+			  "\xeb\x36\xf3\xc4\xdf\xdc\xae\x67"
+			  "\x07\x3f\x70\x0e\xe9\x66\xf5\x0c"
+			  "\x30\x4d\x66\xc9\xa4\x2f\x73\x9c"
+			  "\x13\xc8\x49\x44\xcc\x0a\x90\x9d"
+			  "\x7c\xdd\x19\x3f\xea\x72\x8d\x58"
+			  "\xab\xe7\x09\x2c\xec\xb5\x44\xd2"
+			  "\xca\xa6\x2d\x7a\x5c\x9c\x2b\x15"
+			  "\xec\x2a\xa6\x69\x91\xf9\xf3\x13"
+			  "\xf7\x72\xc1\xc1\x40\xd5\xe1\x94"
+			  "\xf4\x29\xa1\x3e\x25\x02\xa8\x3e"
+			  "\x94\xc1\x91\x14\xa1\x14\xcb\xbe"
+			  "\x67\x4c\xb9\x38\xfe\xa7\xaa\x32"
+			  "\x29\x62\x0d\xb2\xf6\x3c\x58\x57"
+			  "\xc1\xd5\x5a\xbb\xd6\xa6\x2a\xe5",
+		.len	= 128,
+		.also_non_np = 1,
+		.np	= 4,
+		.tap	= { 112, 7, 8, 1 },
+	}, {
+		.key	= "\xd3\x81\x72\x18\x23\xff\x6f\x4a"
+			  "\x25\x74\x29\x0d\x51\x8a\x0e\x13"
+			  "\xc1\x53\x5d\x30\x8d\xee\x75\x0d"
+			  "\x14\xd6\x69\xc9\x15\xa9\x0c\x60",
+		.klen	= 32,
+		.iv	= "\x65\x9b\xd4\xa8\x7d\x29\x1d\xf4"
+			  "\xc4\xd6\x9b\x6a\x28\xab\x64\xe2"
+			  "\x62\x81\x97\xc5\x81\xaa\xf9\x44"
+			  "\xc1\x72\x59\x82\xaf\x16\xc8\x2c",
+		.ptext	= "\xc7\x6b\x52\x6a\x10\xf0\xcc\x09"
+			  "\xc1\x12\x1d\x6d\x21\xa6\x78\xf5"
+			  "\x05\xa3\x69\x60\x91\x36\x98\x57"
+			  "\xba\x0c\x14\xcc\xf3\x2d\x73\x03"
+			  "\xc6\xb2\x5f\xc8\x16\x27\x37\x5d"
+			  "\xd0\x0b\x87\xb2\x50\x94\x7b\x58"
+			  "\x04\xf4\xe0\x7f\x6e\x57\x8e\xc9"
+			  "\x41\x84\xc1\xb1\x7e\x4b\x91\x12"
+			  "\x3a\x8b\x5d\x50\x82\x7b\xcb\xd9"
+			  "\x9a\xd9\x4e\x18\x06\x23\x9e\xd4"
+			  "\xa5\x20\x98\xef\xb5\xda\xe5\xc0"
+			  "\x8a\x6a\x83\x77\x15\x84\x1e\xae"
+			  "\x78\x94\x9d\xdf\xb7\xd1\xea\x67"
+			  "\xaa\xb0\x14\x15\xfa\x67\x21\x84"
+			  "\xd3\x41\x2a\xce\xba\x4b\x4a\xe8"
+			  "\x95\x62\xa9\x55\xf0\x80\xad\xbd"
+			  "\xab\xaf\xdd\x4f\xa5\x7c\x13\x36"
+			  "\xed\x5e\x4f\x72\xad\x4b\xf1\xd0"
+			  "\x88\x4e\xec\x2c\x88\x10\x5e\xea"
+			  "\x12\xc0\x16\x01\x29\xa3\xa0\x55"
+			  "\xaa\x68\xf3\xe9\x9d\x3b\x0d\x3b"
+			  "\x6d\xec\xf8\xa0\x2d\xf0\x90\x8d"
+			  "\x1c\xe2\x88\xd4\x24\x71\xf9\xb3"
+			  "\xc1\x9f\xc5\xd6\x76\x70\xc5\x2e"
+			  "\x9c\xac\xdb\x90\xbd\x83\x72\xba"
+			  "\x6e\xb5\xa5\x53\x83\xa9\xa5\xbf"
+			  "\x7d\x06\x0e\x3c\x2a\xd2\x04\xb5"
+			  "\x1e\x19\x38\x09\x16\xd2\x82\x1f"
+			  "\x75\x18\x56\xb8\x96\x0b\xa6\xf9"
+			  "\xcf\x62\xd9\x32\x5d\xa9\xd7\x1d"
+			  "\xec\xe4\xdf\x1b\xbe\xf1\x36\xee"
+			  "\xe3\x7b\xb5\x2f\xee\xf8\x53\x3d"
+			  "\x6a\xb7\x70\xa9\xfc\x9c\x57\x25"
+			  "\xf2\x89\x10\xd3\xb8\xa8\x8c\x30"
+			  "\xae\x23\x4f\x0e\x13\x66\x4f\xe1"
+			  "\xb6\xc0\xe4\xf8\xef\x93\xbd\x6e"
+			  "\x15\x85\x6b\xe3\x60\x81\x1d\x68"
+			  "\xd7\x31\x87\x89\x09\xab\xd5\x96"
+			  "\x1d\xf3\x6d\x67\x80\xca\x07\x31"
+			  "\x5d\xa7\xe4\xfb\x3e\xf2\x9b\x33"
+			  "\x52\x18\xc8\x30\xfe\x2d\xca\x1e"
+			  "\x79\x92\x7a\x60\x5c\xb6\x58\x87"
+			  "\xa4\x36\xa2\x67\x92\x8b\xa4\xb7"
+			  "\xf1\x86\xdf\xdc\xc0\x7e\x8f\x63"
+			  "\xd2\xa2\xdc\x78\xeb\x4f\xd8\x96"
+			  "\x47\xca\xb8\x91\xf9\xf7\x94\x21"
+			  "\x5f\x9a\x9f\x5b\xb8\x40\x41\x4b"
+			  "\x66\x69\x6a\x72\xd0\xcb\x70\xb7"
+			  "\x93\xb5\x37\x96\x05\x37\x4f\xe5"
+			  "\x8c\xa7\x5a\x4e\x8b\xb7\x84\xea"
+			  "\xc7\xfc\x19\x6e\x1f\x5a\xa1\xac"
+			  "\x18\x7d\x52\x3b\xb3\x34\x62\x99"
+			  "\xe4\x9e\x31\x04\x3f\xc0\x8d\x84"
+			  "\x17\x7c\x25\x48\x52\x67\x11\x27"
+			  "\x67\xbb\x5a\x85\xca\x56\xb2\x5c"
+			  "\xe6\xec\xd5\x96\x3d\x15\xfc\xfb"
+			  "\x22\x25\xf4\x13\xe5\x93\x4b\x9a"
+			  "\x77\xf1\x52\x18\xfa\x16\x5e\x49"
+			  "\x03\x45\xa8\x08\xfa\xb3\x41\x92"
+			  "\x79\x50\x33\xca\xd0\xd7\x42\x55"
+			  "\xc3\x9a\x0c\x4e\xd9\xa4\x3c\x86"
+			  "\x80\x9f\x53\xd1\xa4\x2e\xd1\xbc"
+			  "\xf1\x54\x6e\x93\xa4\x65\x99\x8e"
+			  "\xdf\x29\xc0\x64\x63\x07\xbb\xea",
+		.ctext	= "\xe0\x33\xf6\xe0\xb4\xa5\xdd\x2b"
+			  "\xdd\xce\xfc\x12\x1e\xfc\x2d\xf2"
+			  "\x8b\xc7\xeb\xc1\xc4\x2a\xe8\x44"
+			  "\x0f\x3d\x97\x19\x2e\x6d\xa2\x38"
+			  "\x9d\xa6\xaa\xe1\x96\xb9\x08\xe8"
+			  "\x0b\x70\x48\x5c\xed\xb5\x9b\xcb"
+			  "\x8b\x40\x88\x7e\x69\x73\xf7\x16"
+			  "\x71\xbb\x5b\xfc\xa3\x47\x5d\xa6"
+			  "\xae\x3a\x64\xc4\xe7\xb8\xa8\xe7"
+			  "\xb1\x32\x19\xdb\xe3\x01\xb8\xf0"
+			  "\xa4\x86\xb4\x4c\xc2\xde\x5c\xd2"
+			  "\x6c\x77\xd2\xe8\x18\xb7\x0a\xc9"
+			  "\x3d\x53\xb5\xc4\x5c\xf0\x8c\x06"
+			  "\xdc\x90\xe0\x74\x47\x1b\x0b\xf6"
+			  "\xd2\x71\x6b\xc4\xf1\x97\x00\x2d"
+			  "\x63\x57\x44\x1f\x8c\xf4\xe6\x9b"
+			  "\xe0\x7a\xdd\xec\x32\x73\x42\x32"
+			  "\x7f\x35\x67\x60\x0d\xcf\x10\x52"
+			  "\x61\x22\x53\x8d\x8e\xbb\x33\x76"
+			  "\x59\xd9\x10\xce\xdf\xef\xc0\x41"
+			  "\xd5\x33\x29\x6a\xda\x46\xa4\x51"
+			  "\xf0\x99\x3d\x96\x31\xdd\xb5\xcb"
+			  "\x3e\x2a\x1f\xc7\x5c\x79\xd3\xc5"
+			  "\x20\xa1\xb1\x39\x1b\xc6\x0a\x70"
+			  "\x26\x39\x95\x07\xad\x7a\xc9\x69"
+			  "\xfe\x81\xc7\x88\x08\x38\xaf\xad"
+			  "\x9e\x8d\xfb\xe8\x24\x0d\x22\xb8"
+			  "\x0e\xed\xbe\x37\x53\x7c\xa6\xc6"
+			  "\x78\x62\xec\xa3\x59\xd9\xc6\x9d"
+			  "\xb8\x0e\x69\x77\x84\x2d\x6a\x4c"
+			  "\xc5\xd9\xb2\xa0\x2b\xa8\x80\xcc"
+			  "\xe9\x1e\x9c\x5a\xc4\xa1\xb2\x37"
+			  "\x06\x9b\x30\x32\x67\xf7\xe7\xd2"
+			  "\x42\xc7\xdf\x4e\xd4\xcb\xa0\x12"
+			  "\x94\xa1\x34\x85\x93\x50\x4b\x0a"
+			  "\x3c\x7d\x49\x25\x01\x41\x6b\x96"
+			  "\xa9\x12\xbb\x0b\xc0\xd7\xd0\x93"
+			  "\x1f\x70\x38\xb8\x21\xee\xf6\xa7"
+			  "\xee\xeb\xe7\x81\xa4\x13\xb4\x87"
+			  "\xfa\xc1\xb0\xb5\x37\x8b\x74\xa2"
+			  "\x4e\xc7\xc2\xad\x3d\x62\x3f\xf8"
+			  "\x34\x42\xe5\xae\x45\x13\x63\xfe"
+			  "\xfc\x2a\x17\x46\x61\xa9\xd3\x1c"
+			  "\x4c\xaf\xf0\x09\x62\x26\x66\x1e"
+			  "\x74\xcf\xd6\x68\x3d\x7d\xd8\xb7"
+			  "\xe7\xe6\xf8\xf0\x08\x20\xf7\x47"
+			  "\x1c\x52\xaa\x0f\x3e\x21\xa3\xf2"
+			  "\xbf\x2f\x95\x16\xa8\xc8\xc8\x8c"
+			  "\x99\x0f\x5d\xfb\xfa\x2b\x58\x8a"
+			  "\x7e\xd6\x74\x02\x60\xf0\xd0\x5b"
+			  "\x65\xa8\xac\xea\x8d\x68\x46\x34"
+			  "\x26\x9d\x4f\xb1\x9a\x8e\xc0\x1a"
+			  "\xf1\xed\xc6\x7a\x83\xfd\x8a\x57"
+			  "\xf2\xe6\xe4\xba\xfc\xc6\x3c\xad"
+			  "\x5b\x19\x50\x2f\x3a\xcc\x06\x46"
+			  "\x04\x51\x3f\x91\x97\xf0\xd2\x07"
+			  "\xe7\x93\x89\x7e\xb5\x32\x0f\x03"
+			  "\xe5\x58\x9e\x74\x72\xeb\xc2\x38"
+			  "\x00\x0c\x91\x72\x69\xed\x7d\x6d"
+			  "\xc8\x71\xf0\xec\xff\x80\xd9\x1c"
+			  "\x9e\xd2\xfa\x15\xfc\x6c\x4e\xbc"
+			  "\xb1\xa6\xbd\xbd\x70\x40\xca\x20"
+			  "\xb8\x78\xd2\xa3\xc6\xf3\x79\x9c"
+			  "\xc7\x27\xe1\x6a\x29\xad\xa4\x03",
+		.len	= 512,
+	}
+};
+
 /*
  * CTS (Cipher Text Stealing) mode tests
  */

diff --git a/drivers/Kconfig b/drivers/Kconfig
index ab4d439..681051a 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig

@@ -219,4 +219,5 @@
 
 source "drivers/slimbus/Kconfig"
 
+source "drivers/energy_model/Kconfig"
 endmenu

diff --git a/drivers/Makefile b/drivers/Makefile
index 578f469..1e847e2 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile

@@ -157,6 +157,8 @@
 obj-$(CONFIG_RPMSG)		+= rpmsg/
 obj-$(CONFIG_SOUNDWIRE)		+= soundwire/
 
+obj-$(CONFIG_ENERGY_MODEL)	+= energy_model/
+
 # Virtualization drivers
 obj-$(CONFIG_VIRT_DRIVERS)	+= virt/
 obj-$(CONFIG_HYPERV)		+= hv/

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 6e04e7a..71e777d 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c

@@ -73,6 +73,7 @@
 #include <linux/ratelimit.h>
 
 #include <uapi/linux/android/binder.h>
+#include <uapi/linux/sched/types.h>
 
 #include <asm/cacheflush.h>
 
@@ -350,10 +351,15 @@
  *                        and by @lock)
  * @has_async_transaction: async transaction to node in progress
  *                        (protected by @lock)
+ * @sched_policy:         minimum scheduling policy for node
+ *                        (invariant after initialized)
  * @accept_fds:           file descriptor operations supported for node
  *                        (invariant after initialized)
  * @min_priority:         minimum scheduling priority
  *                        (invariant after initialized)
+ * @inherit_rt:           inherit RT scheduling policy from caller
+ * @txn_security_ctx:     require sender's security context
+ *                        (invariant after initialized)
  * @async_todo:           list of async work items
  *                        (protected by @proc->inner_lock)
  *
@@ -389,7 +395,10 @@
 		/*
 		 * invariant after initialization
 		 */
+		u8 sched_policy:2;
+		u8 inherit_rt:1;
 		u8 accept_fds:1;
+		u8 txn_security_ctx:1;
 		u8 min_priority;
 	};
 	bool has_async_transaction;
@@ -463,6 +472,22 @@
 };
 
 /**
+ * struct binder_priority - scheduler policy and priority
+ * @sched_policy            scheduler policy
+ * @prio                    [100..139] for SCHED_NORMAL, [0..99] for FIFO/RT
+ *
+ * The binder driver supports inheriting the following scheduler policies:
+ * SCHED_NORMAL
+ * SCHED_BATCH
+ * SCHED_FIFO
+ * SCHED_RR
+ */
+struct binder_priority {
+	unsigned int sched_policy;
+	int prio;
+};
+
+/**
  * struct binder_proc - binder process bookkeeping
  * @proc_node:            element for binder_procs list
  * @threads:              rbtree of binder_threads in this proc
@@ -540,7 +565,7 @@
 	int requested_threads;
 	int requested_threads_started;
 	int tmp_ref;
-	long default_priority;
+	struct binder_priority default_priority;
 	struct dentry *debugfs_entry;
 	struct binder_alloc alloc;
 	struct binder_context *context;
@@ -590,6 +615,7 @@
  * @is_dead:              thread is dead and awaiting free
  *                        when outstanding transactions are cleaned up
  *                        (protected by @proc->inner_lock)
+ * @task:                 struct task_struct for this thread
  *
  * Bookkeeping structure for binder threads.
  */
@@ -609,6 +635,7 @@
 	struct binder_stats stats;
 	atomic_t tmp_ref;
 	bool is_dead;
+	struct task_struct *task;
 };
 
 struct binder_transaction {
@@ -625,9 +652,11 @@
 	struct binder_buffer *buffer;
 	unsigned int	code;
 	unsigned int	flags;
-	long	priority;
-	long	saved_priority;
+	struct binder_priority	priority;
+	struct binder_priority	saved_priority;
+	bool    set_priority_called;
 	kuid_t	sender_euid;
+	binder_uintptr_t security_ctx;
 	/**
 	 * @lock:  protects @from, @to_proc, and @to_thread
 	 *
@@ -638,6 +667,26 @@
 };
 
 /**
+ * struct binder_object - union of flat binder object types
+ * @hdr:   generic object header
+ * @fbo:   binder object (nodes and refs)
+ * @fdo:   file descriptor object
+ * @bbo:   binder buffer pointer
+ * @fdao:  file descriptor array
+ *
+ * Used for type-independent object copies
+ */
+struct binder_object {
+	union {
+		struct binder_object_header hdr;
+		struct flat_binder_object fbo;
+		struct binder_fd_object fdo;
+		struct binder_buffer_object bbo;
+		struct binder_fd_array_object fdao;
+	};
+};
+
+/**
  * binder_proc_lock() - Acquire outer lock for given binder_proc
  * @proc:         struct binder_proc to acquire
  *
@@ -1107,22 +1156,145 @@
 	binder_wakeup_thread_ilocked(proc, thread, /* sync = */false);
 }
 
-static void binder_set_nice(long nice)
+static bool is_rt_policy(int policy)
 {
-	long min_nice;
+	return policy == SCHED_FIFO || policy == SCHED_RR;
+}
 
-	if (can_nice(current, nice)) {
-		set_user_nice(current, nice);
+static bool is_fair_policy(int policy)
+{
+	return policy == SCHED_NORMAL || policy == SCHED_BATCH;
+}
+
+static bool binder_supported_policy(int policy)
+{
+	return is_fair_policy(policy) || is_rt_policy(policy);
+}
+
+static int to_userspace_prio(int policy, int kernel_priority)
+{
+	if (is_fair_policy(policy))
+		return PRIO_TO_NICE(kernel_priority);
+	else
+		return MAX_USER_RT_PRIO - 1 - kernel_priority;
+}
+
+static int to_kernel_prio(int policy, int user_priority)
+{
+	if (is_fair_policy(policy))
+		return NICE_TO_PRIO(user_priority);
+	else
+		return MAX_USER_RT_PRIO - 1 - user_priority;
+}
+
+static void binder_do_set_priority(struct task_struct *task,
+				   struct binder_priority desired,
+				   bool verify)
+{
+	int priority; /* user-space prio value */
+	bool has_cap_nice;
+	unsigned int policy = desired.sched_policy;
+
+	if (task->policy == policy && task->normal_prio == desired.prio)
 		return;
+
+	has_cap_nice = has_capability_noaudit(task, CAP_SYS_NICE);
+
+	priority = to_userspace_prio(policy, desired.prio);
+
+	if (verify && is_rt_policy(policy) && !has_cap_nice) {
+		long max_rtprio = task_rlimit(task, RLIMIT_RTPRIO);
+
+		if (max_rtprio == 0) {
+			policy = SCHED_NORMAL;
+			priority = MIN_NICE;
+		} else if (priority > max_rtprio) {
+			priority = max_rtprio;
+		}
 	}
-	min_nice = rlimit_to_nice(rlimit(RLIMIT_NICE));
-	binder_debug(BINDER_DEBUG_PRIORITY_CAP,
-		     "%d: nice value %ld not allowed use %ld instead\n",
-		      current->pid, nice, min_nice);
-	set_user_nice(current, min_nice);
-	if (min_nice <= MAX_NICE)
+
+	if (verify && is_fair_policy(policy) && !has_cap_nice) {
+		long min_nice = rlimit_to_nice(task_rlimit(task, RLIMIT_NICE));
+
+		if (min_nice > MAX_NICE) {
+			binder_user_error("%d RLIMIT_NICE not set\n",
+					  task->pid);
+			return;
+		} else if (priority < min_nice) {
+			priority = min_nice;
+		}
+	}
+
+	if (policy != desired.sched_policy ||
+	    to_kernel_prio(policy, priority) != desired.prio)
+		binder_debug(BINDER_DEBUG_PRIORITY_CAP,
+			     "%d: priority %d not allowed, using %d instead\n",
+			      task->pid, desired.prio,
+			      to_kernel_prio(policy, priority));
+
+	trace_binder_set_priority(task->tgid, task->pid, task->normal_prio,
+				  to_kernel_prio(policy, priority),
+				  desired.prio);
+
+	/* Set the actual priority */
+	if (task->policy != policy || is_rt_policy(policy)) {
+		struct sched_param params;
+
+		params.sched_priority = is_rt_policy(policy) ? priority : 0;
+
+		sched_setscheduler_nocheck(task,
+					   policy | SCHED_RESET_ON_FORK,
+					   &params);
+	}
+	if (is_fair_policy(policy))
+		set_user_nice(task, priority);
+}
+
+static void binder_set_priority(struct task_struct *task,
+				struct binder_priority desired)
+{
+	binder_do_set_priority(task, desired, /* verify = */ true);
+}
+
+static void binder_restore_priority(struct task_struct *task,
+				    struct binder_priority desired)
+{
+	binder_do_set_priority(task, desired, /* verify = */ false);
+}
+
+static void binder_transaction_priority(struct task_struct *task,
+					struct binder_transaction *t,
+					struct binder_priority node_prio,
+					bool inherit_rt)
+{
+	struct binder_priority desired_prio = t->priority;
+
+	if (t->set_priority_called)
 		return;
-	binder_user_error("%d RLIMIT_NICE not set\n", current->pid);
+
+	t->set_priority_called = true;
+	t->saved_priority.sched_policy = task->policy;
+	t->saved_priority.prio = task->normal_prio;
+
+	if (!inherit_rt && is_rt_policy(desired_prio.sched_policy)) {
+		desired_prio.prio = NICE_TO_PRIO(0);
+		desired_prio.sched_policy = SCHED_NORMAL;
+	}
+
+	if (node_prio.prio < t->priority.prio ||
+	    (node_prio.prio == t->priority.prio &&
+	     node_prio.sched_policy == SCHED_FIFO)) {
+		/*
+		 * In case the minimum priority on the node is
+		 * higher (lower value), use that priority. If
+		 * the priority is the same, but the node uses
+		 * SCHED_FIFO, prefer SCHED_FIFO, since it can
+		 * run unbounded, unlike SCHED_RR.
+		 */
+		desired_prio = node_prio;
+	}
+
+	binder_set_priority(task, desired_prio);
 }
 
 static struct binder_node *binder_get_node_ilocked(struct binder_proc *proc,
@@ -1175,6 +1347,7 @@
 	binder_uintptr_t ptr = fp ? fp->binder : 0;
 	binder_uintptr_t cookie = fp ? fp->cookie : 0;
 	__u32 flags = fp ? fp->flags : 0;
+	s8 priority;
 
 	assert_spin_locked(&proc->inner_lock);
 
@@ -1207,8 +1380,13 @@
 	node->ptr = ptr;
 	node->cookie = cookie;
 	node->work.type = BINDER_WORK_NODE;
-	node->min_priority = flags & FLAT_BINDER_FLAG_PRIORITY_MASK;
+	priority = flags & FLAT_BINDER_FLAG_PRIORITY_MASK;
+	node->sched_policy = (flags & FLAT_BINDER_FLAG_SCHED_POLICY_MASK) >>
+		FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT;
+	node->min_priority = to_kernel_prio(node->sched_policy, priority);
 	node->accept_fds = !!(flags & FLAT_BINDER_FLAG_ACCEPTS_FDS);
+	node->inherit_rt = !!(flags & FLAT_BINDER_FLAG_INHERIT_RT);
+	node->txn_security_ctx = !!(flags & FLAT_BINDER_FLAG_TXN_SECURITY_CTX);
 	spin_lock_init(&node->lock);
 	INIT_LIST_HEAD(&node->work.entry);
 	INIT_LIST_HEAD(&node->async_todo);
@@ -2054,26 +2232,34 @@
 }
 
 /**
- * binder_validate_object() - checks for a valid metadata object in a buffer.
+ * binder_get_object() - gets object and checks for valid metadata
+ * @proc:	binder_proc owning the buffer
  * @buffer:	binder_buffer that we're parsing.
- * @offset:	offset in the buffer at which to validate an object.
+ * @offset:	offset in the @buffer at which to validate an object.
+ * @object:	struct binder_object to read into
  *
  * Return:	If there's a valid metadata object at @offset in @buffer, the
- *		size of that object. Otherwise, it returns zero.
+ *		size of that object. Otherwise, it returns zero. The object
+ *		is read into the struct binder_object pointed to by @object.
  */
-static size_t binder_validate_object(struct binder_buffer *buffer, u64 offset)
+static size_t binder_get_object(struct binder_proc *proc,
+				struct binder_buffer *buffer,
+				unsigned long offset,
+				struct binder_object *object)
 {
-	/* Check if we can read a header first */
+	size_t read_size;
 	struct binder_object_header *hdr;
 	size_t object_size = 0;
 
-	if (buffer->data_size < sizeof(*hdr) ||
-	    offset > buffer->data_size - sizeof(*hdr) ||
+	read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset);
+	if (offset > buffer->data_size || read_size < sizeof(*hdr) ||
 	    !IS_ALIGNED(offset, sizeof(u32)))
 		return 0;
+	binder_alloc_copy_from_buffer(&proc->alloc, object, buffer,
+				      offset, read_size);
 
-	/* Ok, now see if we can read a complete object. */
-	hdr = (struct binder_object_header *)(buffer->data + offset);
+	/* Ok, now see if we read a complete object. */
+	hdr = &object->hdr;
 	switch (hdr->type) {
 	case BINDER_TYPE_BINDER:
 	case BINDER_TYPE_WEAK_BINDER:
@@ -2102,10 +2288,13 @@
 
 /**
  * binder_validate_ptr() - validates binder_buffer_object in a binder_buffer.
+ * @proc:	binder_proc owning the buffer
  * @b:		binder_buffer containing the object
+ * @object:	struct binder_object to read into
  * @index:	index in offset array at which the binder_buffer_object is
  *		located
- * @start:	points to the start of the offset array
+ * @start_offset: points to the start of the offset array
+ * @object_offsetp: offset of @object read from @b
  * @num_valid:	the number of valid offsets in the offset array
  *
  * Return:	If @index is within the valid range of the offset array
@@ -2116,34 +2305,46 @@
  *		Note that the offset found in index @index itself is not
  *		verified; this function assumes that @num_valid elements
  *		from @start were previously verified to have valid offsets.
+ *		If @object_offsetp is non-NULL, then the offset within
+ *		@b is written to it.
  */
-static struct binder_buffer_object *binder_validate_ptr(struct binder_buffer *b,
-							binder_size_t index,
-							binder_size_t *start,
-							binder_size_t num_valid)
+static struct binder_buffer_object *binder_validate_ptr(
+						struct binder_proc *proc,
+						struct binder_buffer *b,
+						struct binder_object *object,
+						binder_size_t index,
+						binder_size_t start_offset,
+						binder_size_t *object_offsetp,
+						binder_size_t num_valid)
 {
-	struct binder_buffer_object *buffer_obj;
-	binder_size_t *offp;
+	size_t object_size;
+	binder_size_t object_offset;
+	unsigned long buffer_offset;
 
 	if (index >= num_valid)
 		return NULL;
 
-	offp = start + index;
-	buffer_obj = (struct binder_buffer_object *)(b->data + *offp);
-	if (buffer_obj->hdr.type != BINDER_TYPE_PTR)
+	buffer_offset = start_offset + sizeof(binder_size_t) * index;
+	binder_alloc_copy_from_buffer(&proc->alloc, &object_offset,
+				      b, buffer_offset, sizeof(object_offset));
+	object_size = binder_get_object(proc, b, object_offset, object);
+	if (!object_size || object->hdr.type != BINDER_TYPE_PTR)
 		return NULL;
+	if (object_offsetp)
+		*object_offsetp = object_offset;
 
-	return buffer_obj;
+	return &object->bbo;
 }
 
 /**
  * binder_validate_fixup() - validates pointer/fd fixups happen in order.
+ * @proc:		binder_proc owning the buffer
  * @b:			transaction buffer
- * @objects_start	start of objects buffer
- * @buffer:		binder_buffer_object in which to fix up
- * @offset:		start offset in @buffer to fix up
- * @last_obj:		last binder_buffer_object that we fixed up in
- * @last_min_offset:	minimum fixup offset in @last_obj
+ * @objects_start_offset: offset to start of objects buffer
+ * @buffer_obj_offset:	offset to binder_buffer_object in which to fix up
+ * @fixup_offset:	start offset in @buffer to fix up
+ * @last_obj_offset:	offset to last binder_buffer_object that we fixed
+ * @last_min_offset:	minimum fixup offset in object at @last_obj_offset
  *
  * Return:		%true if a fixup in buffer @buffer at offset @offset is
  *			allowed.
@@ -2174,63 +2375,83 @@
  *   C (parent = A, offset = 16)
  *     D (parent = B, offset = 0) // B is not A or any of A's parents
  */
-static bool binder_validate_fixup(struct binder_buffer *b,
-				  binder_size_t *objects_start,
-				  struct binder_buffer_object *buffer,
+static bool binder_validate_fixup(struct binder_proc *proc,
+				  struct binder_buffer *b,
+				  binder_size_t objects_start_offset,
+				  binder_size_t buffer_obj_offset,
 				  binder_size_t fixup_offset,
-				  struct binder_buffer_object *last_obj,
+				  binder_size_t last_obj_offset,
 				  binder_size_t last_min_offset)
 {
-	if (!last_obj) {
+	if (!last_obj_offset) {
 		/* Nothing to fix up in */
 		return false;
 	}
 
-	while (last_obj != buffer) {
+	while (last_obj_offset != buffer_obj_offset) {
+		unsigned long buffer_offset;
+		struct binder_object last_object;
+		struct binder_buffer_object *last_bbo;
+		size_t object_size = binder_get_object(proc, b, last_obj_offset,
+						       &last_object);
+		if (object_size != sizeof(*last_bbo))
+			return false;
+
+		last_bbo = &last_object.bbo;
 		/*
 		 * Safe to retrieve the parent of last_obj, since it
 		 * was already previously verified by the driver.
 		 */
-		if ((last_obj->flags & BINDER_BUFFER_FLAG_HAS_PARENT) == 0)
+		if ((last_bbo->flags & BINDER_BUFFER_FLAG_HAS_PARENT) == 0)
 			return false;
-		last_min_offset = last_obj->parent_offset + sizeof(uintptr_t);
-		last_obj = (struct binder_buffer_object *)
-			(b->data + *(objects_start + last_obj->parent));
+		last_min_offset = last_bbo->parent_offset + sizeof(uintptr_t);
+		buffer_offset = objects_start_offset +
+			sizeof(binder_size_t) * last_bbo->parent,
+		binder_alloc_copy_from_buffer(&proc->alloc, &last_obj_offset,
+					      b, buffer_offset,
+					      sizeof(last_obj_offset));
 	}
 	return (fixup_offset >= last_min_offset);
 }
 
 static void binder_transaction_buffer_release(struct binder_proc *proc,
 					      struct binder_buffer *buffer,
-					      binder_size_t *failed_at)
+					      binder_size_t failed_at,
+					      bool is_failure)
 {
-	binder_size_t *offp, *off_start, *off_end;
 	int debug_id = buffer->debug_id;
+	binder_size_t off_start_offset, buffer_offset, off_end_offset;
 
 	binder_debug(BINDER_DEBUG_TRANSACTION,
-		     "%d buffer release %d, size %zd-%zd, failed at %pK\n",
+		     "%d buffer release %d, size %zd-%zd, failed at %llx\n",
 		     proc->pid, buffer->debug_id,
-		     buffer->data_size, buffer->offsets_size, failed_at);
+		     buffer->data_size, buffer->offsets_size,
+		     (unsigned long long)failed_at);
 
 	if (buffer->target_node)
 		binder_dec_node(buffer->target_node, 1, 0);
 
-	off_start = (binder_size_t *)(buffer->data +
-				      ALIGN(buffer->data_size, sizeof(void *)));
-	if (failed_at)
-		off_end = failed_at;
-	else
-		off_end = (void *)off_start + buffer->offsets_size;
-	for (offp = off_start; offp < off_end; offp++) {
+	off_start_offset = ALIGN(buffer->data_size, sizeof(void *));
+	off_end_offset = is_failure ? failed_at :
+				off_start_offset + buffer->offsets_size;
+	for (buffer_offset = off_start_offset; buffer_offset < off_end_offset;
+	     buffer_offset += sizeof(binder_size_t)) {
 		struct binder_object_header *hdr;
-		size_t object_size = binder_validate_object(buffer, *offp);
+		size_t object_size;
+		struct binder_object object;
+		binder_size_t object_offset;
 
+		binder_alloc_copy_from_buffer(&proc->alloc, &object_offset,
+					      buffer, buffer_offset,
+					      sizeof(object_offset));
+		object_size = binder_get_object(proc, buffer,
+						object_offset, &object);
 		if (object_size == 0) {
 			pr_err("transaction release %d bad object at offset %lld, size %zd\n",
-			       debug_id, (u64)*offp, buffer->data_size);
+			       debug_id, (u64)object_offset, buffer->data_size);
 			continue;
 		}
-		hdr = (struct binder_object_header *)(buffer->data + *offp);
+		hdr = &object.hdr;
 		switch (hdr->type) {
 		case BINDER_TYPE_BINDER:
 		case BINDER_TYPE_WEAK_BINDER: {
@@ -2288,28 +2509,25 @@
 		case BINDER_TYPE_FDA: {
 			struct binder_fd_array_object *fda;
 			struct binder_buffer_object *parent;
-			uintptr_t parent_buffer;
-			u32 *fd_array;
+			struct binder_object ptr_object;
+			binder_size_t fda_offset;
 			size_t fd_index;
 			binder_size_t fd_buf_size;
+			binder_size_t num_valid;
 
+			num_valid = (buffer_offset - off_start_offset) /
+						sizeof(binder_size_t);
 			fda = to_binder_fd_array_object(hdr);
-			parent = binder_validate_ptr(buffer, fda->parent,
-						     off_start,
-						     offp - off_start);
+			parent = binder_validate_ptr(proc, buffer, &ptr_object,
+						     fda->parent,
+						     off_start_offset,
+						     NULL,
+						     num_valid);
 			if (!parent) {
 				pr_err("transaction release %d bad parent offset\n",
 				       debug_id);
 				continue;
 			}
-			/*
-			 * Since the parent was already fixed up, convert it
-			 * back to kernel address space to access it
-			 */
-			parent_buffer = parent->buffer -
-				binder_alloc_get_user_buffer_offset(
-						&proc->alloc);
-
 			fd_buf_size = sizeof(u32) * fda->num_fds;
 			if (fda->num_fds >= SIZE_MAX / sizeof(u32)) {
 				pr_err("transaction release %d invalid number of fds (%lld)\n",
@@ -2323,9 +2541,29 @@
 				       debug_id, (u64)fda->num_fds);
 				continue;
 			}
-			fd_array = (u32 *)(parent_buffer + (uintptr_t)fda->parent_offset);
-			for (fd_index = 0; fd_index < fda->num_fds; fd_index++)
-				task_close_fd(proc, fd_array[fd_index]);
+			/*
+			 * the source data for binder_buffer_object is visible
+			 * to user-space and the @buffer element is the user
+			 * pointer to the buffer_object containing the fd_array.
+			 * Convert the address to an offset relative to
+			 * the base of the transaction buffer.
+			 */
+			fda_offset =
+			    (parent->buffer - (uintptr_t)buffer->user_data) +
+			    fda->parent_offset;
+			for (fd_index = 0; fd_index < fda->num_fds;
+			     fd_index++) {
+				u32 fd;
+				binder_size_t offset = fda_offset +
+					fd_index * sizeof(fd);
+
+				binder_alloc_copy_from_buffer(&proc->alloc,
+							      &fd,
+							      buffer,
+							      offset,
+							      sizeof(fd));
+				task_close_fd(proc, fd);
+			}
 		} break;
 		default:
 			pr_err("transaction release %d bad object type %x\n",
@@ -2522,9 +2760,8 @@
 				     struct binder_transaction *in_reply_to)
 {
 	binder_size_t fdi, fd_buf_size, num_installed_fds;
+	binder_size_t fda_offset;
 	int target_fd;
-	uintptr_t parent_buffer;
-	u32 *fd_array;
 	struct binder_proc *proc = thread->proc;
 	struct binder_proc *target_proc = t->to_proc;
 
@@ -2542,23 +2779,33 @@
 		return -EINVAL;
 	}
 	/*
-	 * Since the parent was already fixed up, convert it
-	 * back to the kernel address space to access it
+	 * the source data for binder_buffer_object is visible
+	 * to user-space and the @buffer element is the user
+	 * pointer to the buffer_object containing the fd_array.
+	 * Convert the address to an offset relative to
+	 * the base of the transaction buffer.
 	 */
-	parent_buffer = parent->buffer -
-		binder_alloc_get_user_buffer_offset(&target_proc->alloc);
-	fd_array = (u32 *)(parent_buffer + (uintptr_t)fda->parent_offset);
-	if (!IS_ALIGNED((unsigned long)fd_array, sizeof(u32))) {
+	fda_offset = (parent->buffer - (uintptr_t)t->buffer->user_data) +
+		fda->parent_offset;
+	if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32))) {
 		binder_user_error("%d:%d parent offset not aligned correctly.\n",
 				  proc->pid, thread->pid);
 		return -EINVAL;
 	}
 	for (fdi = 0; fdi < fda->num_fds; fdi++) {
-		target_fd = binder_translate_fd(fd_array[fdi], t, thread,
-						in_reply_to);
+		u32 fd;
+
+		binder_size_t offset = fda_offset + fdi * sizeof(fd);
+
+		binder_alloc_copy_from_buffer(&target_proc->alloc,
+					      &fd, t->buffer,
+					      offset, sizeof(fd));
+		target_fd = binder_translate_fd(fd, t, thread, in_reply_to);
 		if (target_fd < 0)
 			goto err_translate_fd_failed;
-		fd_array[fdi] = target_fd;
+		binder_alloc_copy_to_buffer(&target_proc->alloc,
+					    t->buffer, offset,
+					    &target_fd, sizeof(fd));
 	}
 	return 0;
 
@@ -2568,38 +2815,48 @@
 	 * installed so far.
 	 */
 	num_installed_fds = fdi;
-	for (fdi = 0; fdi < num_installed_fds; fdi++)
-		task_close_fd(target_proc, fd_array[fdi]);
+	for (fdi = 0; fdi < num_installed_fds; fdi++) {
+		u32 fd;
+		binder_size_t offset = fda_offset + fdi * sizeof(fd);
+		binder_alloc_copy_from_buffer(&target_proc->alloc,
+					      &fd, t->buffer,
+					      offset, sizeof(fd));
+		task_close_fd(target_proc, fd);
+	}
 	return target_fd;
 }
 
 static int binder_fixup_parent(struct binder_transaction *t,
 			       struct binder_thread *thread,
 			       struct binder_buffer_object *bp,
-			       binder_size_t *off_start,
+			       binder_size_t off_start_offset,
 			       binder_size_t num_valid,
-			       struct binder_buffer_object *last_fixup_obj,
+			       binder_size_t last_fixup_obj_off,
 			       binder_size_t last_fixup_min_off)
 {
 	struct binder_buffer_object *parent;
-	u8 *parent_buffer;
 	struct binder_buffer *b = t->buffer;
 	struct binder_proc *proc = thread->proc;
 	struct binder_proc *target_proc = t->to_proc;
+	struct binder_object object;
+	binder_size_t buffer_offset;
+	binder_size_t parent_offset;
 
 	if (!(bp->flags & BINDER_BUFFER_FLAG_HAS_PARENT))
 		return 0;
 
-	parent = binder_validate_ptr(b, bp->parent, off_start, num_valid);
+	parent = binder_validate_ptr(target_proc, b, &object, bp->parent,
+				     off_start_offset, &parent_offset,
+				     num_valid);
 	if (!parent) {
 		binder_user_error("%d:%d got transaction with invalid parent offset or type\n",
 				  proc->pid, thread->pid);
 		return -EINVAL;
 	}
 
-	if (!binder_validate_fixup(b, off_start,
-				   parent, bp->parent_offset,
-				   last_fixup_obj,
+	if (!binder_validate_fixup(target_proc, b, off_start_offset,
+				   parent_offset, bp->parent_offset,
+				   last_fixup_obj_off,
 				   last_fixup_min_off)) {
 		binder_user_error("%d:%d got transaction with out-of-order buffer fixup\n",
 				  proc->pid, thread->pid);
@@ -2613,10 +2870,10 @@
 				  proc->pid, thread->pid);
 		return -EINVAL;
 	}
-	parent_buffer = (u8 *)((uintptr_t)parent->buffer -
-			binder_alloc_get_user_buffer_offset(
-				&target_proc->alloc));
-	*(binder_uintptr_t *)(parent_buffer + bp->parent_offset) = bp->buffer;
+	buffer_offset = bp->parent_offset +
+			(uintptr_t)parent->buffer - (uintptr_t)b->user_data;
+	binder_alloc_copy_to_buffer(&target_proc->alloc, b, buffer_offset,
+				    &bp->buffer, sizeof(bp->buffer));
 
 	return 0;
 }
@@ -2643,11 +2900,15 @@
 				    struct binder_thread *thread)
 {
 	struct binder_node *node = t->buffer->target_node;
+	struct binder_priority node_prio;
 	bool oneway = !!(t->flags & TF_ONE_WAY);
 	bool pending_async = false;
 
 	BUG_ON(!node);
 	binder_node_lock(node);
+	node_prio.prio = node->min_priority;
+	node_prio.sched_policy = node->sched_policy;
+
 	if (oneway) {
 		BUG_ON(thread);
 		if (node->has_async_transaction) {
@@ -2668,12 +2929,15 @@
 	if (!thread && !pending_async)
 		thread = binder_select_thread_ilocked(proc);
 
-	if (thread)
+	if (thread) {
+		binder_transaction_priority(thread->task, t, node_prio,
+					    node->inherit_rt);
 		binder_enqueue_thread_work_ilocked(thread, &t->work);
-	else if (!pending_async)
+	} else if (!pending_async) {
 		binder_enqueue_work_ilocked(&t->work, &proc->todo);
-	else
+	} else {
 		binder_enqueue_work_ilocked(&t->work, &node->async_todo);
+	}
 
 	if (!pending_async)
 		binder_wakeup_thread_ilocked(proc, thread, !oneway /* sync */);
@@ -2734,9 +2998,10 @@
 	int ret;
 	struct binder_transaction *t;
 	struct binder_work *tcomplete;
-	binder_size_t *offp, *off_end, *off_start;
+	binder_size_t buffer_offset = 0;
+	binder_size_t off_start_offset, off_end_offset;
 	binder_size_t off_min;
-	u8 *sg_bufp, *sg_buf_end;
+	binder_size_t sg_buf_offset, sg_buf_end_offset;
 	struct binder_proc *target_proc = NULL;
 	struct binder_thread *target_thread = NULL;
 	struct binder_node *target_node = NULL;
@@ -2745,10 +3010,12 @@
 	uint32_t return_error = 0;
 	uint32_t return_error_param = 0;
 	uint32_t return_error_line = 0;
-	struct binder_buffer_object *last_fixup_obj = NULL;
+	binder_size_t last_fixup_obj_off = 0;
 	binder_size_t last_fixup_min_off = 0;
 	struct binder_context *context = proc->context;
 	int t_debug_id = atomic_inc_return(&binder_last_id);
+	char *secctx = NULL;
+	u32 secctx_sz = 0;
 
 	e = binder_transaction_log_add(&binder_transaction_log);
 	e->debug_id = t_debug_id;
@@ -2790,7 +3057,6 @@
 		}
 		thread->transaction_stack = in_reply_to->to_parent;
 		binder_inner_proc_unlock(proc);
-		binder_set_nice(in_reply_to->saved_priority);
 		target_thread = binder_get_txn_from_and_acq_inner(in_reply_to);
 		if (target_thread == NULL) {
 			return_error = BR_DEAD_REPLY;
@@ -2963,7 +3229,38 @@
 	t->to_thread = target_thread;
 	t->code = tr->code;
 	t->flags = tr->flags;
-	t->priority = task_nice(current);
+	if (!(t->flags & TF_ONE_WAY) &&
+	    binder_supported_policy(current->policy)) {
+		/* Inherit supported policies for synchronous transactions */
+		t->priority.sched_policy = current->policy;
+		t->priority.prio = current->normal_prio;
+	} else {
+		/* Otherwise, fall back to the default priority */
+		t->priority = target_proc->default_priority;
+	}
+
+	if (target_node && target_node->txn_security_ctx) {
+		u32 secid;
+		size_t added_size;
+
+		security_task_getsecid(proc->tsk, &secid);
+		ret = security_secid_to_secctx(secid, &secctx, &secctx_sz);
+		if (ret) {
+			return_error = BR_FAILED_REPLY;
+			return_error_param = ret;
+			return_error_line = __LINE__;
+			goto err_get_secctx_failed;
+		}
+		added_size = ALIGN(secctx_sz, sizeof(u64));
+		extra_buffers_size += added_size;
+		if (extra_buffers_size < added_size) {
+			/* integer overflow of extra_buffers_size */
+			return_error = BR_FAILED_REPLY;
+			return_error_param = EINVAL;
+			return_error_line = __LINE__;
+			goto err_bad_extra_size;
+		}
+	}
 
 	trace_binder_transaction(reply, t, target_node);
 
@@ -2981,16 +3278,30 @@
 		t->buffer = NULL;
 		goto err_binder_alloc_buf_failed;
 	}
+	if (secctx) {
+		size_t buf_offset = ALIGN(tr->data_size, sizeof(void *)) +
+				    ALIGN(tr->offsets_size, sizeof(void *)) +
+				    ALIGN(extra_buffers_size, sizeof(void *)) -
+				    ALIGN(secctx_sz, sizeof(u64));
+
+		t->security_ctx = (uintptr_t)t->buffer->user_data + buf_offset;
+		binder_alloc_copy_to_buffer(&target_proc->alloc,
+					    t->buffer, buf_offset,
+					    secctx, secctx_sz);
+		security_release_secctx(secctx, secctx_sz);
+		secctx = NULL;
+	}
 	t->buffer->debug_id = t->debug_id;
 	t->buffer->transaction = t;
 	t->buffer->target_node = target_node;
 	trace_binder_transaction_alloc_buf(t->buffer);
-	off_start = (binder_size_t *)(t->buffer->data +
-				      ALIGN(tr->data_size, sizeof(void *)));
-	offp = off_start;
 
-	if (copy_from_user(t->buffer->data, (const void __user *)(uintptr_t)
-			   tr->data.ptr.buffer, tr->data_size)) {
+	if (binder_alloc_copy_user_to_buffer(
+				&target_proc->alloc,
+				t->buffer, 0,
+				(const void __user *)
+					(uintptr_t)tr->data.ptr.buffer,
+				tr->data_size)) {
 		binder_user_error("%d:%d got transaction with invalid data ptr\n",
 				proc->pid, thread->pid);
 		return_error = BR_FAILED_REPLY;
@@ -2998,8 +3309,13 @@
 		return_error_line = __LINE__;
 		goto err_copy_data_failed;
 	}
-	if (copy_from_user(offp, (const void __user *)(uintptr_t)
-			   tr->data.ptr.offsets, tr->offsets_size)) {
+	if (binder_alloc_copy_user_to_buffer(
+				&target_proc->alloc,
+				t->buffer,
+				ALIGN(tr->data_size, sizeof(void *)),
+				(const void __user *)
+					(uintptr_t)tr->data.ptr.offsets,
+				tr->offsets_size)) {
 		binder_user_error("%d:%d got transaction with invalid offsets ptr\n",
 				proc->pid, thread->pid);
 		return_error = BR_FAILED_REPLY;
@@ -3024,17 +3340,31 @@
 		return_error_line = __LINE__;
 		goto err_bad_offset;
 	}
-	off_end = (void *)off_start + tr->offsets_size;
-	sg_bufp = (u8 *)(PTR_ALIGN(off_end, sizeof(void *)));
-	sg_buf_end = sg_bufp + extra_buffers_size;
+	off_start_offset = ALIGN(tr->data_size, sizeof(void *));
+	buffer_offset = off_start_offset;
+	off_end_offset = off_start_offset + tr->offsets_size;
+	sg_buf_offset = ALIGN(off_end_offset, sizeof(void *));
+	sg_buf_end_offset = sg_buf_offset + extra_buffers_size -
+		ALIGN(secctx_sz, sizeof(u64));
 	off_min = 0;
-	for (; offp < off_end; offp++) {
+	for (buffer_offset = off_start_offset; buffer_offset < off_end_offset;
+	     buffer_offset += sizeof(binder_size_t)) {
 		struct binder_object_header *hdr;
-		size_t object_size = binder_validate_object(t->buffer, *offp);
+		size_t object_size;
+		struct binder_object object;
+		binder_size_t object_offset;
 
-		if (object_size == 0 || *offp < off_min) {
+		binder_alloc_copy_from_buffer(&target_proc->alloc,
+					      &object_offset,
+					      t->buffer,
+					      buffer_offset,
+					      sizeof(object_offset));
+		object_size = binder_get_object(target_proc, t->buffer,
+						object_offset, &object);
+		if (object_size == 0 || object_offset < off_min) {
 			binder_user_error("%d:%d got transaction with invalid offset (%lld, min %lld max %lld) or object.\n",
-					  proc->pid, thread->pid, (u64)*offp,
+					  proc->pid, thread->pid,
+					  (u64)object_offset,
 					  (u64)off_min,
 					  (u64)t->buffer->data_size);
 			return_error = BR_FAILED_REPLY;
@@ -3043,8 +3373,8 @@
 			goto err_bad_offset;
 		}
 
-		hdr = (struct binder_object_header *)(t->buffer->data + *offp);
-		off_min = *offp + object_size;
+		hdr = &object.hdr;
+		off_min = object_offset + object_size;
 		switch (hdr->type) {
 		case BINDER_TYPE_BINDER:
 		case BINDER_TYPE_WEAK_BINDER: {
@@ -3058,6 +3388,9 @@
 				return_error_line = __LINE__;
 				goto err_translate_failed;
 			}
+			binder_alloc_copy_to_buffer(&target_proc->alloc,
+						    t->buffer, object_offset,
+						    fp, sizeof(*fp));
 		} break;
 		case BINDER_TYPE_HANDLE:
 		case BINDER_TYPE_WEAK_HANDLE: {
@@ -3071,6 +3404,9 @@
 				return_error_line = __LINE__;
 				goto err_translate_failed;
 			}
+			binder_alloc_copy_to_buffer(&target_proc->alloc,
+						    t->buffer, object_offset,
+						    fp, sizeof(*fp));
 		} break;
 
 		case BINDER_TYPE_FD: {
@@ -3086,14 +3422,23 @@
 			}
 			fp->pad_binder = 0;
 			fp->fd = target_fd;
+			binder_alloc_copy_to_buffer(&target_proc->alloc,
+						    t->buffer, object_offset,
+						    fp, sizeof(*fp));
 		} break;
 		case BINDER_TYPE_FDA: {
+			struct binder_object ptr_object;
+			binder_size_t parent_offset;
 			struct binder_fd_array_object *fda =
 				to_binder_fd_array_object(hdr);
+			size_t num_valid = (buffer_offset - off_start_offset) *
+						sizeof(binder_size_t);
 			struct binder_buffer_object *parent =
-				binder_validate_ptr(t->buffer, fda->parent,
-						    off_start,
-						    offp - off_start);
+				binder_validate_ptr(target_proc, t->buffer,
+						    &ptr_object, fda->parent,
+						    off_start_offset,
+						    &parent_offset,
+						    num_valid);
 			if (!parent) {
 				binder_user_error("%d:%d got transaction with invalid parent offset or type\n",
 						  proc->pid, thread->pid);
@@ -3102,9 +3447,11 @@
 				return_error_line = __LINE__;
 				goto err_bad_parent;
 			}
-			if (!binder_validate_fixup(t->buffer, off_start,
-						   parent, fda->parent_offset,
-						   last_fixup_obj,
+			if (!binder_validate_fixup(target_proc, t->buffer,
+						   off_start_offset,
+						   parent_offset,
+						   fda->parent_offset,
+						   last_fixup_obj_off,
 						   last_fixup_min_off)) {
 				binder_user_error("%d:%d got transaction with out-of-order buffer fixup\n",
 						  proc->pid, thread->pid);
@@ -3121,14 +3468,15 @@
 				return_error_line = __LINE__;
 				goto err_translate_failed;
 			}
-			last_fixup_obj = parent;
+			last_fixup_obj_off = parent_offset;
 			last_fixup_min_off =
 				fda->parent_offset + sizeof(u32) * fda->num_fds;
 		} break;
 		case BINDER_TYPE_PTR: {
 			struct binder_buffer_object *bp =
 				to_binder_buffer_object(hdr);
-			size_t buf_left = sg_buf_end - sg_bufp;
+			size_t buf_left = sg_buf_end_offset - sg_buf_offset;
+			size_t num_valid;
 
 			if (bp->length > buf_left) {
 				binder_user_error("%d:%d got transaction with too large buffer\n",
@@ -3138,9 +3486,13 @@
 				return_error_line = __LINE__;
 				goto err_bad_offset;
 			}
-			if (copy_from_user(sg_bufp,
-					   (const void __user *)(uintptr_t)
-					   bp->buffer, bp->length)) {
+			if (binder_alloc_copy_user_to_buffer(
+						&target_proc->alloc,
+						t->buffer,
+						sg_buf_offset,
+						(const void __user *)
+							(uintptr_t)bp->buffer,
+						bp->length)) {
 				binder_user_error("%d:%d got transaction with invalid offsets ptr\n",
 						  proc->pid, thread->pid);
 				return_error_param = -EFAULT;
@@ -3149,14 +3501,16 @@
 				goto err_copy_data_failed;
 			}
 			/* Fixup buffer pointer to target proc address space */
-			bp->buffer = (uintptr_t)sg_bufp +
-				binder_alloc_get_user_buffer_offset(
-						&target_proc->alloc);
-			sg_bufp += ALIGN(bp->length, sizeof(u64));
+			bp->buffer = (uintptr_t)
+				t->buffer->user_data + sg_buf_offset;
+			sg_buf_offset += ALIGN(bp->length, sizeof(u64));
 
-			ret = binder_fixup_parent(t, thread, bp, off_start,
-						  offp - off_start,
-						  last_fixup_obj,
+			num_valid = (buffer_offset - off_start_offset) *
+					sizeof(binder_size_t);
+			ret = binder_fixup_parent(t, thread, bp,
+						  off_start_offset,
+						  num_valid,
+						  last_fixup_obj_off,
 						  last_fixup_min_off);
 			if (ret < 0) {
 				return_error = BR_FAILED_REPLY;
@@ -3164,7 +3518,10 @@
 				return_error_line = __LINE__;
 				goto err_translate_failed;
 			}
-			last_fixup_obj = bp;
+			binder_alloc_copy_to_buffer(&target_proc->alloc,
+						    t->buffer, object_offset,
+						    bp, sizeof(*bp));
+			last_fixup_obj_off = object_offset;
 			last_fixup_min_off = 0;
 		} break;
 		default:
@@ -3191,6 +3548,7 @@
 		binder_enqueue_thread_work_ilocked(target_thread, &t->work);
 		binder_inner_proc_unlock(target_proc);
 		wake_up_interruptible_sync(&target_thread->wait);
+		binder_restore_priority(current, in_reply_to->saved_priority);
 		binder_free_transaction(in_reply_to);
 	} else if (!(t->flags & TF_ONE_WAY)) {
 		BUG_ON(t->buffer->async_transaction != 0);
@@ -3243,13 +3601,18 @@
 err_bad_parent:
 err_copy_data_failed:
 	trace_binder_transaction_failed_buffer_release(t->buffer);
-	binder_transaction_buffer_release(target_proc, t->buffer, offp);
+	binder_transaction_buffer_release(target_proc, t->buffer,
+					  buffer_offset, true);
 	if (target_node)
 		binder_dec_node_tmpref(target_node);
 	target_node = NULL;
 	t->buffer->transaction = NULL;
 	binder_alloc_free_buf(&target_proc->alloc, t->buffer);
 err_binder_alloc_buf_failed:
+err_bad_extra_size:
+	if (secctx)
+		security_release_secctx(secctx, secctx_sz);
+err_get_secctx_failed:
 	kfree(tcomplete);
 	binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE);
 err_alloc_tcomplete_failed:
@@ -3294,6 +3657,7 @@
 
 	BUG_ON(thread->return_error.cmd != BR_OK);
 	if (in_reply_to) {
+		binder_restore_priority(current, in_reply_to->saved_priority);
 		thread->return_error.cmd = BR_TRANSACTION_COMPLETE;
 		binder_enqueue_thread_work(thread, &thread->return_error.work);
 		binder_send_failed_reply(in_reply_to, return_error);
@@ -3520,7 +3884,7 @@
 				binder_node_inner_unlock(buf_node);
 			}
 			trace_binder_transaction_buffer_release(buffer);
-			binder_transaction_buffer_release(proc, buffer, NULL);
+			binder_transaction_buffer_release(proc, buffer, 0, false);
 			binder_alloc_free_buf(&proc->alloc, buffer);
 			break;
 		}
@@ -3880,7 +4244,7 @@
 			wait_event_interruptible(binder_user_error_wait,
 						 binder_stop_on_user_error < 2);
 		}
-		binder_set_nice(proc->default_priority);
+		binder_restore_priority(current, proc->default_priority);
 	}
 
 	if (non_block) {
@@ -3897,11 +4261,13 @@
 
 	while (1) {
 		uint32_t cmd;
-		struct binder_transaction_data tr;
+		struct binder_transaction_data_secctx tr;
+		struct binder_transaction_data *trd = &tr.transaction_data;
 		struct binder_work *w = NULL;
 		struct list_head *list = NULL;
 		struct binder_transaction *t = NULL;
 		struct binder_thread *t_from;
+		size_t trsize = sizeof(*trd);
 
 		binder_inner_proc_lock(proc);
 		if (!binder_worklist_empty_ilocked(&thread->todo))
@@ -4095,45 +4461,47 @@
 		BUG_ON(t->buffer == NULL);
 		if (t->buffer->target_node) {
 			struct binder_node *target_node = t->buffer->target_node;
+			struct binder_priority node_prio;
 
-			tr.target.ptr = target_node->ptr;
-			tr.cookie =  target_node->cookie;
-			t->saved_priority = task_nice(current);
-			if (t->priority < target_node->min_priority &&
-			    !(t->flags & TF_ONE_WAY))
-				binder_set_nice(t->priority);
-			else if (!(t->flags & TF_ONE_WAY) ||
-				 t->saved_priority > target_node->min_priority)
-				binder_set_nice(target_node->min_priority);
+			trd->target.ptr = target_node->ptr;
+			trd->cookie =  target_node->cookie;
+			node_prio.sched_policy = target_node->sched_policy;
+			node_prio.prio = target_node->min_priority;
+			binder_transaction_priority(current, t, node_prio,
+						    target_node->inherit_rt);
 			cmd = BR_TRANSACTION;
 		} else {
-			tr.target.ptr = 0;
-			tr.cookie = 0;
+			trd->target.ptr = 0;
+			trd->cookie = 0;
 			cmd = BR_REPLY;
 		}
-		tr.code = t->code;
-		tr.flags = t->flags;
-		tr.sender_euid = from_kuid(current_user_ns(), t->sender_euid);
+		trd->code = t->code;
+		trd->flags = t->flags;
+		trd->sender_euid = from_kuid(current_user_ns(), t->sender_euid);
 
 		t_from = binder_get_txn_from(t);
 		if (t_from) {
 			struct task_struct *sender = t_from->proc->tsk;
 
-			tr.sender_pid = task_tgid_nr_ns(sender,
-							task_active_pid_ns(current));
+			trd->sender_pid =
+				task_tgid_nr_ns(sender,
+						task_active_pid_ns(current));
 		} else {
-			tr.sender_pid = 0;
+			trd->sender_pid = 0;
 		}
 
-		tr.data_size = t->buffer->data_size;
-		tr.offsets_size = t->buffer->offsets_size;
-		tr.data.ptr.buffer = (binder_uintptr_t)
-			((uintptr_t)t->buffer->data +
-			binder_alloc_get_user_buffer_offset(&proc->alloc));
-		tr.data.ptr.offsets = tr.data.ptr.buffer +
+		trd->data_size = t->buffer->data_size;
+		trd->offsets_size = t->buffer->offsets_size;
+		trd->data.ptr.buffer = (uintptr_t)t->buffer->user_data;
+		trd->data.ptr.offsets = trd->data.ptr.buffer +
 					ALIGN(t->buffer->data_size,
 					    sizeof(void *));
 
+		tr.secctx = t->security_ctx;
+		if (t->security_ctx) {
+			cmd = BR_TRANSACTION_SEC_CTX;
+			trsize = sizeof(tr);
+		}
 		if (put_user(cmd, (uint32_t __user *)ptr)) {
 			if (t_from)
 				binder_thread_dec_tmpref(t_from);
@@ -4144,7 +4512,7 @@
 			return -EFAULT;
 		}
 		ptr += sizeof(uint32_t);
-		if (copy_to_user(ptr, &tr, sizeof(tr))) {
+		if (copy_to_user(ptr, &tr, trsize)) {
 			if (t_from)
 				binder_thread_dec_tmpref(t_from);
 
@@ -4153,7 +4521,7 @@
 
 			return -EFAULT;
 		}
-		ptr += sizeof(tr);
+		ptr += trsize;
 
 		trace_binder_transaction_received(t);
 		binder_stat_br(proc, thread, cmd);
@@ -4161,16 +4529,18 @@
 			     "%d:%d %s %d %d:%d, cmd %d size %zd-%zd ptr %016llx-%016llx\n",
 			     proc->pid, thread->pid,
 			     (cmd == BR_TRANSACTION) ? "BR_TRANSACTION" :
-			     "BR_REPLY",
+				(cmd == BR_TRANSACTION_SEC_CTX) ?
+				     "BR_TRANSACTION_SEC_CTX" : "BR_REPLY",
 			     t->debug_id, t_from ? t_from->proc->pid : 0,
 			     t_from ? t_from->pid : 0, cmd,
 			     t->buffer->data_size, t->buffer->offsets_size,
-			     (u64)tr.data.ptr.buffer, (u64)tr.data.ptr.offsets);
+			     (u64)trd->data.ptr.buffer,
+			     (u64)trd->data.ptr.offsets);
 
 		if (t_from)
 			binder_thread_dec_tmpref(t_from);
 		t->buffer->allow_user_free = 1;
-		if (cmd == BR_TRANSACTION && !(t->flags & TF_ONE_WAY)) {
+		if (cmd != BR_REPLY && !(t->flags & TF_ONE_WAY)) {
 			binder_inner_proc_lock(thread->proc);
 			t->to_parent = thread->transaction_stack;
 			t->to_thread = thread;
@@ -4282,6 +4652,8 @@
 	binder_stats_created(BINDER_STAT_THREAD);
 	thread->proc = proc;
 	thread->pid = current->pid;
+	get_task_struct(current);
+	thread->task = current;
 	atomic_set(&thread->tmp_ref, 0);
 	init_waitqueue_head(&thread->wait);
 	INIT_LIST_HEAD(&thread->todo);
@@ -4332,6 +4704,7 @@
 	BUG_ON(!list_empty(&thread->todo));
 	binder_stats_deleted(BINDER_STAT_THREAD);
 	binder_proc_dec_tmpref(thread->proc);
+	put_task_struct(thread->task);
 	kfree(thread);
 }
 
@@ -4512,7 +4885,8 @@
 	return ret;
 }
 
-static int binder_ioctl_set_ctx_mgr(struct file *filp)
+static int binder_ioctl_set_ctx_mgr(struct file *filp,
+				    struct flat_binder_object *fbo)
 {
 	int ret = 0;
 	struct binder_proc *proc = filp->private_data;
@@ -4541,7 +4915,7 @@
 	} else {
 		context->binder_context_mgr_uid = curr_euid;
 	}
-	new_node = binder_new_node(proc, NULL);
+	new_node = binder_new_node(proc, fbo);
 	if (!new_node) {
 		ret = -ENOMEM;
 		goto out;
@@ -4559,6 +4933,42 @@
 	return ret;
 }
 
+static int binder_ioctl_get_node_info_for_ref(struct binder_proc *proc,
+		struct binder_node_info_for_ref *info)
+{
+	struct binder_node *node;
+	struct binder_context *context = proc->context;
+	__u32 handle = info->handle;
+
+	if (info->strong_count || info->weak_count || info->reserved1 ||
+	    info->reserved2 || info->reserved3) {
+		binder_user_error("%d BINDER_GET_NODE_INFO_FOR_REF: only handle may be non-zero.",
+				  proc->pid);
+		return -EINVAL;
+	}
+
+	/* This ioctl may only be used by the context manager */
+	mutex_lock(&context->context_mgr_node_lock);
+	if (!context->binder_context_mgr_node ||
+		context->binder_context_mgr_node->proc != proc) {
+		mutex_unlock(&context->context_mgr_node_lock);
+		return -EPERM;
+	}
+	mutex_unlock(&context->context_mgr_node_lock);
+
+	node = binder_get_node_from_ref(proc, handle, true, NULL);
+	if (!node)
+		return -EINVAL;
+
+	info->strong_count = node->local_strong_refs +
+		node->internal_strong_refs;
+	info->weak_count = node->local_weak_refs;
+
+	binder_put_node(node);
+
+	return 0;
+}
+
 static int binder_ioctl_get_node_debug_info(struct binder_proc *proc,
 				struct binder_node_debug_info *info)
 {
@@ -4628,8 +5038,20 @@
 		binder_inner_proc_unlock(proc);
 		break;
 	}
+	case BINDER_SET_CONTEXT_MGR_EXT: {
+		struct flat_binder_object fbo;
+
+		if (copy_from_user(&fbo, ubuf, sizeof(fbo))) {
+			ret = -EINVAL;
+			goto err;
+		}
+		ret = binder_ioctl_set_ctx_mgr(filp, &fbo);
+		if (ret)
+			goto err;
+		break;
+	}
 	case BINDER_SET_CONTEXT_MGR:
-		ret = binder_ioctl_set_ctx_mgr(filp);
+		ret = binder_ioctl_set_ctx_mgr(filp, NULL);
 		if (ret)
 			goto err;
 		break;
@@ -4653,6 +5075,25 @@
 		}
 		break;
 	}
+	case BINDER_GET_NODE_INFO_FOR_REF: {
+		struct binder_node_info_for_ref info;
+
+		if (copy_from_user(&info, ubuf, sizeof(info))) {
+			ret = -EFAULT;
+			goto err;
+		}
+
+		ret = binder_ioctl_get_node_info_for_ref(proc, &info);
+		if (ret < 0)
+			goto err;
+
+		if (copy_to_user(ubuf, &info, sizeof(info))) {
+			ret = -EFAULT;
+			goto err;
+		}
+
+		break;
+	}
 	case BINDER_GET_NODE_DEBUG_INFO: {
 		struct binder_node_debug_info info;
 
@@ -4782,7 +5223,14 @@
 	proc->tsk = current->group_leader;
 	mutex_init(&proc->files_lock);
 	INIT_LIST_HEAD(&proc->todo);
-	proc->default_priority = task_nice(current);
+	if (binder_supported_policy(current->policy)) {
+		proc->default_priority.sched_policy = current->policy;
+		proc->default_priority.prio = current->normal_prio;
+	} else {
+		proc->default_priority.sched_policy = SCHED_NORMAL;
+		proc->default_priority.prio = NICE_TO_PRIO(0);
+	}
+
 	binder_dev = container_of(filp->private_data, struct binder_device,
 				  miscdev);
 	proc->context = &binder_dev->context;
@@ -5076,13 +5524,14 @@
 	spin_lock(&t->lock);
 	to_proc = t->to_proc;
 	seq_printf(m,
-		   "%s %d: %pK from %d:%d to %d:%d code %x flags %x pri %ld r%d",
+		   "%s %d: %pK from %d:%d to %d:%d code %x flags %x pri %d:%d r%d",
 		   prefix, t->debug_id, t,
 		   t->from ? t->from->proc->pid : 0,
 		   t->from ? t->from->pid : 0,
 		   to_proc ? to_proc->pid : 0,
 		   t->to_thread ? t->to_thread->pid : 0,
-		   t->code, t->flags, t->priority, t->need_reply);
+		   t->code, t->flags, t->priority.sched_policy,
+		   t->priority.prio, t->need_reply);
 	spin_unlock(&t->lock);
 
 	if (proc != to_proc) {
@@ -5102,7 +5551,7 @@
 		seq_printf(m, " node %d", buffer->target_node->debug_id);
 	seq_printf(m, " size %zd:%zd data %pK\n",
 		   buffer->data_size, buffer->offsets_size,
-		   buffer->data);
+		   buffer->user_data);
 }
 
 static void print_binder_work_ilocked(struct seq_file *m,
@@ -5200,8 +5649,9 @@
 	hlist_for_each_entry(ref, &node->refs, node_entry)
 		count++;
 
-	seq_printf(m, "  node %d: u%016llx c%016llx hs %d hw %d ls %d lw %d is %d iw %d tr %d",
+	seq_printf(m, "  node %d: u%016llx c%016llx pri %d:%d hs %d hw %d ls %d lw %d is %d iw %d tr %d",
 		   node->debug_id, (u64)node->ptr, (u64)node->cookie,
+		   node->sched_policy, node->min_priority,
 		   node->has_strong_ref, node->has_weak_ref,
 		   node->local_strong_refs, node->local_weak_refs,
 		   node->internal_strong_refs, count, node->tmp_refs);
@@ -5251,6 +5701,9 @@
 	for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) {
 		struct binder_node *node = rb_entry(n, struct binder_node,
 						    rb_node);
+		if (!print_all && !node->has_async_transaction)
+			continue;
+
 		/*
 		 * take a temporary reference on the node so it
 		 * survives and isn't removed from the tree

diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index a654ccf..27bd921 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c

@@ -29,6 +29,8 @@
 #include <linux/list_lru.h>
 #include <linux/ratelimit.h>
 #include <asm/cacheflush.h>
+#include <linux/uaccess.h>
+#include <linux/highmem.h>
 #include "binder_alloc.h"
 #include "binder_trace.h"
 
@@ -67,9 +69,8 @@
 				       struct binder_buffer *buffer)
 {
 	if (list_is_last(&buffer->entry, &alloc->buffers))
-		return (u8 *)alloc->buffer +
-			alloc->buffer_size - (u8 *)buffer->data;
-	return (u8 *)binder_buffer_next(buffer)->data - (u8 *)buffer->data;
+		return alloc->buffer + alloc->buffer_size - buffer->user_data;
+	return binder_buffer_next(buffer)->user_data - buffer->user_data;
 }
 
 static void binder_insert_free_buffer(struct binder_alloc *alloc,
@@ -119,9 +120,9 @@
 		buffer = rb_entry(parent, struct binder_buffer, rb_node);
 		BUG_ON(buffer->free);
 
-		if (new_buffer->data < buffer->data)
+		if (new_buffer->user_data < buffer->user_data)
 			p = &parent->rb_left;
-		else if (new_buffer->data > buffer->data)
+		else if (new_buffer->user_data > buffer->user_data)
 			p = &parent->rb_right;
 		else
 			BUG();
@@ -136,17 +137,17 @@
 {
 	struct rb_node *n = alloc->allocated_buffers.rb_node;
 	struct binder_buffer *buffer;
-	void *kern_ptr;
+	void __user *uptr;
 
-	kern_ptr = (void *)(user_ptr - alloc->user_buffer_offset);
+	uptr = (void __user *)user_ptr;
 
 	while (n) {
 		buffer = rb_entry(n, struct binder_buffer, rb_node);
 		BUG_ON(buffer->free);
 
-		if (kern_ptr < buffer->data)
+		if (uptr < buffer->user_data)
 			n = n->rb_left;
-		else if (kern_ptr > buffer->data)
+		else if (uptr > buffer->user_data)
 			n = n->rb_right;
 		else {
 			/*
@@ -186,9 +187,9 @@
 }
 
 static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
-				    void *start, void *end)
+				    void __user *start, void __user *end)
 {
-	void *page_addr;
+	void __user *page_addr;
 	unsigned long user_page_addr;
 	struct binder_lru_page *page;
 	struct vm_area_struct *vma = NULL;
@@ -263,18 +264,7 @@
 		page->alloc = alloc;
 		INIT_LIST_HEAD(&page->lru);
 
-		ret = map_kernel_range_noflush((unsigned long)page_addr,
-					       PAGE_SIZE, PAGE_KERNEL,
-					       &page->page_ptr);
-		flush_cache_vmap((unsigned long)page_addr,
-				(unsigned long)page_addr + PAGE_SIZE);
-		if (ret != 1) {
-			pr_err("%d: binder_alloc_buf failed to map page at %pK in kernel\n",
-			       alloc->pid, page_addr);
-			goto err_map_kernel_failed;
-		}
-		user_page_addr =
-			(uintptr_t)page_addr + alloc->user_buffer_offset;
+		user_page_addr = (uintptr_t)page_addr;
 		ret = vm_insert_page(vma, user_page_addr, page[0].page_ptr);
 		if (ret) {
 			pr_err("%d: binder_alloc_buf failed to map page at %lx in userspace\n",
@@ -312,8 +302,6 @@
 		continue;
 
 err_vm_insert_page_failed:
-		unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE);
-err_map_kernel_failed:
 		__free_page(page->page_ptr);
 		page->page_ptr = NULL;
 err_alloc_page_failed:
@@ -368,8 +356,8 @@
 	struct binder_buffer *buffer;
 	size_t buffer_size;
 	struct rb_node *best_fit = NULL;
-	void *has_page_addr;
-	void *end_page_addr;
+	void __user *has_page_addr;
+	void __user *end_page_addr;
 	size_t size, data_offsets_size;
 	int ret;
 
@@ -467,15 +455,15 @@
 		     "%d: binder_alloc_buf size %zd got buffer %pK size %zd\n",
 		      alloc->pid, size, buffer, buffer_size);
 
-	has_page_addr =
-		(void *)(((uintptr_t)buffer->data + buffer_size) & PAGE_MASK);
+	has_page_addr = (void __user *)
+		(((uintptr_t)buffer->user_data + buffer_size) & PAGE_MASK);
 	WARN_ON(n && buffer_size != size);
 	end_page_addr =
-		(void *)PAGE_ALIGN((uintptr_t)buffer->data + size);
+		(void __user *)PAGE_ALIGN((uintptr_t)buffer->user_data + size);
 	if (end_page_addr > has_page_addr)
 		end_page_addr = has_page_addr;
-	ret = binder_update_page_range(alloc, 1,
-	    (void *)PAGE_ALIGN((uintptr_t)buffer->data), end_page_addr);
+	ret = binder_update_page_range(alloc, 1, (void __user *)
+		PAGE_ALIGN((uintptr_t)buffer->user_data), end_page_addr);
 	if (ret)
 		return ERR_PTR(ret);
 
@@ -488,7 +476,7 @@
 			       __func__, alloc->pid);
 			goto err_alloc_buf_struct_failed;
 		}
-		new_buffer->data = (u8 *)buffer->data + size;
+		new_buffer->user_data = (u8 __user *)buffer->user_data + size;
 		list_add(&new_buffer->entry, &buffer->entry);
 		new_buffer->free = 1;
 		binder_insert_free_buffer(alloc, new_buffer);
@@ -514,8 +502,8 @@
 	return buffer;
 
 err_alloc_buf_struct_failed:
-	binder_update_page_range(alloc, 0,
-				 (void *)PAGE_ALIGN((uintptr_t)buffer->data),
+	binder_update_page_range(alloc, 0, (void __user *)
+				 PAGE_ALIGN((uintptr_t)buffer->user_data),
 				 end_page_addr);
 	return ERR_PTR(-ENOMEM);
 }
@@ -550,14 +538,15 @@
 	return buffer;
 }
 
-static void *buffer_start_page(struct binder_buffer *buffer)
+static void __user *buffer_start_page(struct binder_buffer *buffer)
 {
-	return (void *)((uintptr_t)buffer->data & PAGE_MASK);
+	return (void __user *)((uintptr_t)buffer->user_data & PAGE_MASK);
 }
 
-static void *prev_buffer_end_page(struct binder_buffer *buffer)
+static void __user *prev_buffer_end_page(struct binder_buffer *buffer)
 {
-	return (void *)(((uintptr_t)(buffer->data) - 1) & PAGE_MASK);
+	return (void __user *)
+		(((uintptr_t)(buffer->user_data) - 1) & PAGE_MASK);
 }
 
 static void binder_delete_free_buffer(struct binder_alloc *alloc,
@@ -572,7 +561,8 @@
 		to_free = false;
 		binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
 				   "%d: merge free, buffer %pK share page with %pK\n",
-				   alloc->pid, buffer->data, prev->data);
+				   alloc->pid, buffer->user_data,
+				   prev->user_data);
 	}
 
 	if (!list_is_last(&buffer->entry, &alloc->buffers)) {
@@ -582,23 +572,24 @@
 			binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
 					   "%d: merge free, buffer %pK share page with %pK\n",
 					   alloc->pid,
-					   buffer->data,
-					   next->data);
+					   buffer->user_data,
+					   next->user_data);
 		}
 	}
 
-	if (PAGE_ALIGNED(buffer->data)) {
+	if (PAGE_ALIGNED(buffer->user_data)) {
 		binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
 				   "%d: merge free, buffer start %pK is page aligned\n",
-				   alloc->pid, buffer->data);
+				   alloc->pid, buffer->user_data);
 		to_free = false;
 	}
 
 	if (to_free) {
 		binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
 				   "%d: merge free, buffer %pK do not share page with %pK or %pK\n",
-				   alloc->pid, buffer->data,
-				   prev->data, next ? next->data : NULL);
+				   alloc->pid, buffer->user_data,
+				   prev->user_data,
+				   next ? next->user_data : NULL);
 		binder_update_page_range(alloc, 0, buffer_start_page(buffer),
 					 buffer_start_page(buffer) + PAGE_SIZE);
 	}
@@ -624,8 +615,8 @@
 	BUG_ON(buffer->free);
 	BUG_ON(size > buffer_size);
 	BUG_ON(buffer->transaction != NULL);
-	BUG_ON(buffer->data < alloc->buffer);
-	BUG_ON(buffer->data > alloc->buffer + alloc->buffer_size);
+	BUG_ON(buffer->user_data < alloc->buffer);
+	BUG_ON(buffer->user_data > alloc->buffer + alloc->buffer_size);
 
 	if (buffer->async_transaction) {
 		alloc->free_async_space += size + sizeof(struct binder_buffer);
@@ -636,8 +627,9 @@
 	}
 
 	binder_update_page_range(alloc, 0,
-		(void *)PAGE_ALIGN((uintptr_t)buffer->data),
-		(void *)(((uintptr_t)buffer->data + buffer_size) & PAGE_MASK));
+		(void __user *)PAGE_ALIGN((uintptr_t)buffer->user_data),
+		(void __user *)(((uintptr_t)
+			  buffer->user_data + buffer_size) & PAGE_MASK));
 
 	rb_erase(&buffer->rb_node, &alloc->allocated_buffers);
 	buffer->free = 1;
@@ -693,7 +685,6 @@
 			      struct vm_area_struct *vma)
 {
 	int ret;
-	struct vm_struct *area;
 	const char *failure_string;
 	struct binder_buffer *buffer;
 
@@ -704,28 +695,9 @@
 		goto err_already_mapped;
 	}
 
-	area = get_vm_area(vma->vm_end - vma->vm_start, VM_ALLOC);
-	if (area == NULL) {
-		ret = -ENOMEM;
-		failure_string = "get_vm_area";
-		goto err_get_vm_area_failed;
-	}
-	alloc->buffer = area->addr;
-	alloc->user_buffer_offset =
-		vma->vm_start - (uintptr_t)alloc->buffer;
+	alloc->buffer = (void __user *)vma->vm_start;
 	mutex_unlock(&binder_alloc_mmap_lock);
 
-#ifdef CONFIG_CPU_CACHE_VIPT
-	if (cache_is_vipt_aliasing()) {
-		while (CACHE_COLOUR(
-				(vma->vm_start ^ (uint32_t)alloc->buffer))) {
-			pr_info("%s: %d %lx-%lx maps %pK bad alignment\n",
-				__func__, alloc->pid, vma->vm_start,
-				vma->vm_end, alloc->buffer);
-			vma->vm_start += PAGE_SIZE;
-		}
-	}
-#endif
 	alloc->pages = kcalloc((vma->vm_end - vma->vm_start) / PAGE_SIZE,
 			       sizeof(alloc->pages[0]),
 			       GFP_KERNEL);
@@ -743,7 +715,7 @@
 		goto err_alloc_buf_struct_failed;
 	}
 
-	buffer->data = alloc->buffer;
+	buffer->user_data = alloc->buffer;
 	list_add(&buffer->entry, &alloc->buffers);
 	buffer->free = 1;
 	binder_insert_free_buffer(alloc, buffer);
@@ -758,9 +730,7 @@
 	alloc->pages = NULL;
 err_alloc_pages_failed:
 	mutex_lock(&binder_alloc_mmap_lock);
-	vfree(alloc->buffer);
 	alloc->buffer = NULL;
-err_get_vm_area_failed:
 err_already_mapped:
 	mutex_unlock(&binder_alloc_mmap_lock);
 	binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
@@ -806,7 +776,7 @@
 		int i;
 
 		for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) {
-			void *page_addr;
+			void __user *page_addr;
 			bool on_lru;
 
 			if (!alloc->pages[i].page_ptr)
@@ -819,12 +789,10 @@
 				     "%s: %d: page %d at %pK %s\n",
 				     __func__, alloc->pid, i, page_addr,
 				     on_lru ? "on lru" : "active");
-			unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE);
 			__free_page(alloc->pages[i].page_ptr);
 			page_count++;
 		}
 		kfree(alloc->pages);
-		vfree(alloc->buffer);
 	}
 	mutex_unlock(&alloc->mutex);
 	if (alloc->vma_vm_mm)
@@ -839,7 +807,7 @@
 				struct binder_buffer *buffer)
 {
 	seq_printf(m, "%s %d: %pK size %zd:%zd:%zd %s\n",
-		   prefix, buffer->debug_id, buffer->data,
+		   prefix, buffer->debug_id, buffer->user_data,
 		   buffer->data_size, buffer->offsets_size,
 		   buffer->extra_buffers_size,
 		   buffer->transaction ? "active" : "delivered");
@@ -972,9 +940,7 @@
 	if (vma) {
 		trace_binder_unmap_user_start(alloc, index);
 
-		zap_page_range(vma,
-			       page_addr + alloc->user_buffer_offset,
-			       PAGE_SIZE);
+		zap_page_range(vma, page_addr, PAGE_SIZE);
 
 		trace_binder_unmap_user_end(alloc, index);
 	}
@@ -983,7 +949,6 @@
 
 	trace_binder_unmap_kernel_start(alloc, index);
 
-	unmap_kernel_range(page_addr, PAGE_SIZE);
 	__free_page(page->page_ptr);
 	page->page_ptr = NULL;
 
@@ -1050,3 +1015,173 @@
 	}
 	return ret;
 }
+
+/**
+ * check_buffer() - verify that buffer/offset is safe to access
+ * @alloc: binder_alloc for this proc
+ * @buffer: binder buffer to be accessed
+ * @offset: offset into @buffer data
+ * @bytes: bytes to access from offset
+ *
+ * Check that the @offset/@bytes are within the size of the given
+ * @buffer and that the buffer is currently active and not freeable.
+ * Offsets must also be multiples of sizeof(u32). The kernel is
+ * allowed to touch the buffer in two cases:
+ *
+ * 1) when the buffer is being created:
+ *     (buffer->free == 0 && buffer->allow_user_free == 0)
+ * 2) when the buffer is being torn down:
+ *     (buffer->free == 0 && buffer->transaction == NULL).
+ *
+ * Return: true if the buffer is safe to access
+ */
+static inline bool check_buffer(struct binder_alloc *alloc,
+				struct binder_buffer *buffer,
+				binder_size_t offset, size_t bytes)
+{
+	size_t buffer_size = binder_alloc_buffer_size(alloc, buffer);
+
+	return buffer_size >= bytes &&
+		offset <= buffer_size - bytes &&
+		IS_ALIGNED(offset, sizeof(u32)) &&
+		!buffer->free &&
+		(!buffer->allow_user_free || !buffer->transaction);
+}
+
+/**
+ * binder_alloc_get_page() - get kernel pointer for given buffer offset
+ * @alloc: binder_alloc for this proc
+ * @buffer: binder buffer to be accessed
+ * @buffer_offset: offset into @buffer data
+ * @pgoffp: address to copy final page offset to
+ *
+ * Lookup the struct page corresponding to the address
+ * at @buffer_offset into @buffer->user_data. If @pgoffp is not
+ * NULL, the byte-offset into the page is written there.
+ *
+ * The caller is responsible to ensure that the offset points
+ * to a valid address within the @buffer and that @buffer is
+ * not freeable by the user. Since it can't be freed, we are
+ * guaranteed that the corresponding elements of @alloc->pages[]
+ * cannot change.
+ *
+ * Return: struct page
+ */
+static struct page *binder_alloc_get_page(struct binder_alloc *alloc,
+					  struct binder_buffer *buffer,
+					  binder_size_t buffer_offset,
+					  pgoff_t *pgoffp)
+{
+	binder_size_t buffer_space_offset = buffer_offset +
+		(buffer->user_data - alloc->buffer);
+	pgoff_t pgoff = buffer_space_offset & ~PAGE_MASK;
+	size_t index = buffer_space_offset >> PAGE_SHIFT;
+	struct binder_lru_page *lru_page;
+
+	lru_page = &alloc->pages[index];
+	*pgoffp = pgoff;
+	return lru_page->page_ptr;
+}
+
+/**
+ * binder_alloc_copy_user_to_buffer() - copy src user to tgt user
+ * @alloc: binder_alloc for this proc
+ * @buffer: binder buffer to be accessed
+ * @buffer_offset: offset into @buffer data
+ * @from: userspace pointer to source buffer
+ * @bytes: bytes to copy
+ *
+ * Copy bytes from source userspace to target buffer.
+ *
+ * Return: bytes remaining to be copied
+ */
+unsigned long
+binder_alloc_copy_user_to_buffer(struct binder_alloc *alloc,
+				 struct binder_buffer *buffer,
+				 binder_size_t buffer_offset,
+				 const void __user *from,
+				 size_t bytes)
+{
+	if (!check_buffer(alloc, buffer, buffer_offset, bytes))
+		return bytes;
+
+	while (bytes) {
+		unsigned long size;
+		unsigned long ret;
+		struct page *page;
+		pgoff_t pgoff;
+		void *kptr;
+
+		page = binder_alloc_get_page(alloc, buffer,
+					     buffer_offset, &pgoff);
+		size = min_t(size_t, bytes, PAGE_SIZE - pgoff);
+		kptr = kmap(page) + pgoff;
+		ret = copy_from_user(kptr, from, size);
+		kunmap(page);
+		if (ret)
+			return bytes - size + ret;
+		bytes -= size;
+		from += size;
+		buffer_offset += size;
+	}
+	return 0;
+}
+
+static void binder_alloc_do_buffer_copy(struct binder_alloc *alloc,
+					bool to_buffer,
+					struct binder_buffer *buffer,
+					binder_size_t buffer_offset,
+					void *ptr,
+					size_t bytes)
+{
+	/* All copies must be 32-bit aligned and 32-bit size */
+	BUG_ON(!check_buffer(alloc, buffer, buffer_offset, bytes));
+
+	while (bytes) {
+		unsigned long size;
+		struct page *page;
+		pgoff_t pgoff;
+		void *tmpptr;
+		void *base_ptr;
+
+		page = binder_alloc_get_page(alloc, buffer,
+					     buffer_offset, &pgoff);
+		size = min_t(size_t, bytes, PAGE_SIZE - pgoff);
+		base_ptr = kmap_atomic(page);
+		tmpptr = base_ptr + pgoff;
+		if (to_buffer)
+			memcpy(tmpptr, ptr, size);
+		else
+			memcpy(ptr, tmpptr, size);
+		/*
+		 * kunmap_atomic() takes care of flushing the cache
+		 * if this device has VIVT cache arch
+		 */
+		kunmap_atomic(base_ptr);
+		bytes -= size;
+		pgoff = 0;
+		ptr = ptr + size;
+		buffer_offset += size;
+	}
+}
+
+void binder_alloc_copy_to_buffer(struct binder_alloc *alloc,
+				 struct binder_buffer *buffer,
+				 binder_size_t buffer_offset,
+				 void *src,
+				 size_t bytes)
+{
+	binder_alloc_do_buffer_copy(alloc, true, buffer, buffer_offset,
+				    src, bytes);
+}
+
+void binder_alloc_copy_from_buffer(struct binder_alloc *alloc,
+				   void *dest,
+				   struct binder_buffer *buffer,
+				   binder_size_t buffer_offset,
+				   size_t bytes)
+{
+	binder_alloc_do_buffer_copy(alloc, false, buffer, buffer_offset,
+				    dest, bytes);
+}
+

diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
index fb3238c7..b60d161 100644
--- a/drivers/android/binder_alloc.h
+++ b/drivers/android/binder_alloc.h

@@ -22,6 +22,7 @@
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/list_lru.h>
+#include <uapi/linux/android/binder.h>
 
 extern struct list_lru binder_alloc_lru;
 struct binder_transaction;
@@ -30,16 +31,16 @@
  * struct binder_buffer - buffer used for binder transactions
  * @entry:              entry alloc->buffers
  * @rb_node:            node for allocated_buffers/free_buffers rb trees
- * @free:               true if buffer is free
- * @allow_user_free:    describe the second member of struct blah,
- * @async_transaction:  describe the second member of struct blah,
- * @debug_id:           describe the second member of struct blah,
- * @transaction:        describe the second member of struct blah,
- * @target_node:        describe the second member of struct blah,
- * @data_size:          describe the second member of struct blah,
- * @offsets_size:       describe the second member of struct blah,
- * @extra_buffers_size: describe the second member of struct blah,
- * @data:i              describe the second member of struct blah,
+ * @free:               %true if buffer is free
+ * @allow_user_free:    %true if user is allowed to free buffer
+ * @async_transaction:  %true if buffer is in use for an async txn
+ * @debug_id:           unique ID for debugging
+ * @transaction:        pointer to associated struct binder_transaction
+ * @target_node:        struct binder_node associated with this buffer
+ * @data_size:          size of @transaction data
+ * @offsets_size:       size of array of offsets
+ * @extra_buffers_size: size of space for other objects (like sg lists)
+ * @user_data:          user pointer to base of buffer space
  *
  * Bookkeeping structure for binder transaction buffers
  */
@@ -58,7 +59,7 @@
 	size_t data_size;
 	size_t offsets_size;
 	size_t extra_buffers_size;
-	void *data;
+	void __user *user_data;
 };
 
 /**
@@ -81,7 +82,6 @@
  *                      (invariant after init)
  * @vma_vm_mm:          copy of vma->vm_mm (invarient after mmap)
  * @buffer:             base of per-proc address space mapped via mmap
- * @user_buffer_offset: offset between user and kernel VAs for buffer
  * @buffers:            list of all buffers for this proc
  * @free_buffers:       rb tree of buffers available for allocation
  *                      sorted by size
@@ -102,8 +102,7 @@
 	struct mutex mutex;
 	struct vm_area_struct *vma;
 	struct mm_struct *vma_vm_mm;
-	void *buffer;
-	ptrdiff_t user_buffer_offset;
+	void __user *buffer;
 	struct list_head buffers;
 	struct rb_root free_buffers;
 	struct rb_root allocated_buffers;
@@ -162,26 +161,24 @@
 	return free_async_space;
 }
 
-/**
- * binder_alloc_get_user_buffer_offset() - get offset between kernel/user addrs
- * @alloc:	binder_alloc for this proc
- *
- * Return:	the offset between kernel and user-space addresses to use for
- * virtual address conversion
- */
-static inline ptrdiff_t
-binder_alloc_get_user_buffer_offset(struct binder_alloc *alloc)
-{
-	/*
-	 * user_buffer_offset is constant if vma is set and
-	 * undefined if vma is not set. It is possible to
-	 * get here with !alloc->vma if the target process
-	 * is dying while a transaction is being initiated.
-	 * Returning the old value is ok in this case and
-	 * the transaction will fail.
-	 */
-	return alloc->user_buffer_offset;
-}
+unsigned long
+binder_alloc_copy_user_to_buffer(struct binder_alloc *alloc,
+				 struct binder_buffer *buffer,
+				 binder_size_t buffer_offset,
+				 const void __user *from,
+				 size_t bytes);
+
+void binder_alloc_copy_to_buffer(struct binder_alloc *alloc,
+				 struct binder_buffer *buffer,
+				 binder_size_t buffer_offset,
+				 void *src,
+				 size_t bytes);
+
+void binder_alloc_copy_from_buffer(struct binder_alloc *alloc,
+				   void *dest,
+				   struct binder_buffer *buffer,
+				   binder_size_t buffer_offset,
+				   size_t bytes);
 
 #endif /* _LINUX_BINDER_ALLOC_H */
 

diff --git a/drivers/android/binder_alloc_selftest.c b/drivers/android/binder_alloc_selftest.c
index 8bd7bce..b727089 100644
--- a/drivers/android/binder_alloc_selftest.c
+++ b/drivers/android/binder_alloc_selftest.c

@@ -102,11 +102,12 @@
 					 struct binder_buffer *buffer,
 					 size_t size)
 {
-	void *page_addr, *end;
+	void __user *page_addr;
+	void __user *end;
 	int page_index;
 
-	end = (void *)PAGE_ALIGN((uintptr_t)buffer->data + size);
-	page_addr = buffer->data;
+	end = (void __user *)PAGE_ALIGN((uintptr_t)buffer->user_data + size);
+	page_addr = buffer->user_data;
 	for (; page_addr < end; page_addr += PAGE_SIZE) {
 		page_index = (page_addr - alloc->buffer) / PAGE_SIZE;
 		if (!alloc->pages[page_index].page_ptr ||

diff --git a/drivers/android/binder_trace.h b/drivers/android/binder_trace.h
index 588eb3e..7df1e94 100644
--- a/drivers/android/binder_trace.h
+++ b/drivers/android/binder_trace.h

@@ -85,6 +85,30 @@
 DEFINE_BINDER_FUNCTION_RETURN_EVENT(binder_write_done);
 DEFINE_BINDER_FUNCTION_RETURN_EVENT(binder_read_done);
 
+TRACE_EVENT(binder_set_priority,
+	TP_PROTO(int proc, int thread, unsigned int old_prio,
+		 unsigned int desired_prio, unsigned int new_prio),
+	TP_ARGS(proc, thread, old_prio, new_prio, desired_prio),
+
+	TP_STRUCT__entry(
+		__field(int, proc)
+		__field(int, thread)
+		__field(unsigned int, old_prio)
+		__field(unsigned int, new_prio)
+		__field(unsigned int, desired_prio)
+	),
+	TP_fast_assign(
+		__entry->proc = proc;
+		__entry->thread = thread;
+		__entry->old_prio = old_prio;
+		__entry->new_prio = new_prio;
+		__entry->desired_prio = desired_prio;
+	),
+	TP_printk("proc=%d thread=%d old=%d => new=%d desired=%d",
+		  __entry->proc, __entry->thread, __entry->old_prio,
+		  __entry->new_prio, __entry->desired_prio)
+);
+
 TRACE_EVENT(binder_wait_for_work,
 	TP_PROTO(bool proc_work, bool transaction_stack, bool thread_todo),
 	TP_ARGS(proc_work, transaction_stack, thread_todo),
@@ -275,7 +299,7 @@
 
 TRACE_EVENT(binder_update_page_range,
 	TP_PROTO(struct binder_alloc *alloc, bool allocate,
-		 void *start, void *end),
+		 void __user *start, void __user *end),
 	TP_ARGS(alloc, allocate, start, end),
 	TP_STRUCT__entry(
 		__field(int, proc)

diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index e7cb0c6a..b5f61f2 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c

@@ -15,8 +15,11 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/sched/topology.h>
+#include <linux/cpuset.h>
 
 DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
+DEFINE_PER_CPU(unsigned long, max_cpu_freq);
+DEFINE_PER_CPU(unsigned long, max_freq_scale) = SCHED_CAPACITY_SCALE;
 
 void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
 			 unsigned long max_freq)
@@ -26,8 +29,29 @@
 
 	scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
 
-	for_each_cpu(i, cpus)
+	for_each_cpu(i, cpus) {
 		per_cpu(freq_scale, i) = scale;
+		per_cpu(max_cpu_freq, i) = max_freq;
+	}
+}
+
+void arch_set_max_freq_scale(struct cpumask *cpus,
+			     unsigned long policy_max_freq)
+{
+	unsigned long scale, max_freq;
+	int cpu = cpumask_first(cpus);
+
+	if (cpu > nr_cpu_ids)
+		return;
+
+	max_freq = per_cpu(max_cpu_freq, cpu);
+	if (!max_freq)
+		return;
+
+	scale = (policy_max_freq << SCHED_CAPACITY_SHIFT) / max_freq;
+
+	for_each_cpu(cpu, cpus)
+		per_cpu(max_freq_scale, cpu) = scale;
 }
 
 static DEFINE_MUTEX(cpu_scale_mutex);
@@ -47,6 +71,9 @@
 	return sprintf(buf, "%lu\n", topology_get_cpu_scale(NULL, cpu->dev.id));
 }
 
+static void update_topology_flags_workfn(struct work_struct *work);
+static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);
+
 static ssize_t cpu_capacity_store(struct device *dev,
 				  struct device_attribute *attr,
 				  const char *buf,
@@ -72,6 +99,8 @@
 		topology_set_cpu_scale(i, new_capacity);
 	mutex_unlock(&cpu_scale_mutex);
 
+	schedule_work(&update_topology_flags_work);
+
 	return count;
 }
 
@@ -96,6 +125,25 @@
 }
 subsys_initcall(register_cpu_capacity_sysctl);
 
+static int update_topology;
+
+int topology_update_cpu_topology(void)
+{
+	return update_topology;
+}
+
+/*
+ * Updating the sched_domains can't be done directly from cpufreq callbacks
+ * due to locking, so queue the work for later.
+ */
+static void update_topology_flags_workfn(struct work_struct *work)
+{
+	update_topology = 1;
+	rebuild_sched_domains();
+	pr_debug("sched_domain hierarchy rebuilt, flags updated\n");
+	update_topology = 0;
+}
+
 static u32 capacity_scale;
 static u32 *raw_capacity;
 
@@ -201,6 +249,7 @@
 
 	if (cpumask_empty(cpus_to_visit)) {
 		topology_normalize_cpu_scale();
+		schedule_work(&update_topology_flags_work);
 		free_raw_capacity();
 		pr_debug("cpu_capacity: parsing done\n");
 		schedule_work(&parsing_done_work);

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 4abd7c6..5544792 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c

@@ -33,6 +33,7 @@
 #include <linux/cpufreq.h>
 #include <linux/cpuidle.h>
 #include <linux/timer.h>
+#include <linux/wakeup_reason.h>
 
 #include "../base.h"
 #include "power.h"
@@ -1706,6 +1707,7 @@
 	pm_callback_t callback = NULL;
 	const char *info = NULL;
 	int error = 0;
+	char suspend_abort[MAX_SUSPEND_ABORT_LEN];
 	DECLARE_DPM_WATCHDOG_ON_STACK(wd);
 
 	TRACE_DEVICE(dev);
@@ -1729,6 +1731,9 @@
 
 	if (pm_wakeup_pending()) {
 		dev->power.direct_complete = false;
+		pm_get_active_wakeup_sources(suspend_abort,
+			MAX_SUSPEND_ABORT_LEN);
+		log_suspend_abort_reason(suspend_abort);
 		async_error = -EBUSY;
 		goto Complete;
 	}

diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 7c84f64..ecbe625 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c

@@ -15,6 +15,7 @@
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
 #include <linux/pm_wakeirq.h>
+#include <linux/types.h>
 #include <trace/events/power.h>
 
 #include "power.h"
@@ -809,6 +810,37 @@
 }
 EXPORT_SYMBOL_GPL(pm_wakeup_dev_event);
 
+void pm_get_active_wakeup_sources(char *pending_wakeup_source, size_t max)
+{
+	struct wakeup_source *ws, *last_active_ws = NULL;
+	int len = 0;
+	bool active = false;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ws, &wakeup_sources, entry) {
+		if (ws->active && len < max) {
+			if (!active)
+				len += scnprintf(pending_wakeup_source, max,
+						"Pending Wakeup Sources: ");
+			len += scnprintf(pending_wakeup_source + len, max - len,
+				"%s ", ws->name);
+			active = true;
+		} else if (!active &&
+			   (!last_active_ws ||
+			    ktime_to_ns(ws->last_time) >
+			    ktime_to_ns(last_active_ws->last_time))) {
+			last_active_ws = ws;
+		}
+	}
+	if (!active && last_active_ws) {
+		scnprintf(pending_wakeup_source, max,
+				"Last active Wakeup Source: %s",
+				last_active_ws->name);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(pm_get_active_wakeup_sources);
+
 void pm_print_active_wakeup_sources(void)
 {
 	struct wakeup_source *ws;

diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c
index 6e076f3..996573f 100644
--- a/drivers/base/syscore.c
+++ b/drivers/base/syscore.c

@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/suspend.h>
 #include <trace/events/power.h>
+#include <linux/wakeup_reason.h>
 
 static LIST_HEAD(syscore_ops_list);
 static DEFINE_MUTEX(syscore_ops_lock);
@@ -74,6 +75,8 @@
 	return 0;
 
  err_out:
+	log_suspend_abort_reason("System core suspend callback %pF failed",
+		ops->suspend);
 	pr_err("PM: System core suspend callback %pF failed.\n", ops->suspend);
 
 	list_for_each_entry_continue(ops, &syscore_ops_list, node)

diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index 6352357..99a2c60 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig

@@ -16,7 +16,7 @@
 	  See Documentation/blockdev/zram.txt for more information.
 
 config ZRAM_WRITEBACK
-       bool "Write back incompressible page to backing device"
+       bool "Write back incompressible or idle page to backing device"
        depends on ZRAM
        default n
        help
@@ -25,6 +25,9 @@
 	 For this feature, admin should set up backing device via
 	 /sys/block/zramX/backing_dev.
 
+	 With /sys/block/zramX/{idle,writeback}, application could ask
+	 idle page's writeback to the backing device to save in memory.
+
 	 See Documentation/blockdev/zram.txt for more information.
 
 config ZRAM_MEMORY_TRACKING

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 70cbd0e..cb08541 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c

@@ -52,20 +52,23 @@
 static size_t huge_class_size;
 
 static void zram_free_page(struct zram *zram, size_t index);
+static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
+				u32 index, int offset, struct bio *bio);
+
 
 static int zram_slot_trylock(struct zram *zram, u32 index)
 {
-	return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].value);
+	return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
 }
 
 static void zram_slot_lock(struct zram *zram, u32 index)
 {
-	bit_spin_lock(ZRAM_LOCK, &zram->table[index].value);
+	bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags);
 }
 
 static void zram_slot_unlock(struct zram *zram, u32 index)
 {
-	bit_spin_unlock(ZRAM_LOCK, &zram->table[index].value);
+	bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
 }
 
 static inline bool init_done(struct zram *zram)
@@ -73,13 +76,6 @@
 	return zram->disksize;
 }
 
-static inline bool zram_allocated(struct zram *zram, u32 index)
-{
-
-	return (zram->table[index].value >> (ZRAM_FLAG_SHIFT + 1)) ||
-					zram->table[index].handle;
-}
-
 static inline struct zram *dev_to_zram(struct device *dev)
 {
 	return (struct zram *)dev_to_disk(dev)->private_data;
@@ -99,19 +95,19 @@
 static bool zram_test_flag(struct zram *zram, u32 index,
 			enum zram_pageflags flag)
 {
-	return zram->table[index].value & BIT(flag);
+	return zram->table[index].flags & BIT(flag);
 }
 
 static void zram_set_flag(struct zram *zram, u32 index,
 			enum zram_pageflags flag)
 {
-	zram->table[index].value |= BIT(flag);
+	zram->table[index].flags |= BIT(flag);
 }
 
 static void zram_clear_flag(struct zram *zram, u32 index,
 			enum zram_pageflags flag)
 {
-	zram->table[index].value &= ~BIT(flag);
+	zram->table[index].flags &= ~BIT(flag);
 }
 
 static inline void zram_set_element(struct zram *zram, u32 index,
@@ -127,15 +123,22 @@
 
 static size_t zram_get_obj_size(struct zram *zram, u32 index)
 {
-	return zram->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
+	return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
 }
 
 static void zram_set_obj_size(struct zram *zram,
 					u32 index, size_t size)
 {
-	unsigned long flags = zram->table[index].value >> ZRAM_FLAG_SHIFT;
+	unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
 
-	zram->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
+	zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
+}
+
+static inline bool zram_allocated(struct zram *zram, u32 index)
+{
+	return zram_get_obj_size(zram, index) ||
+			zram_test_flag(zram, index, ZRAM_SAME) ||
+			zram_test_flag(zram, index, ZRAM_WB);
 }
 
 #if PAGE_SIZE != 4096
@@ -281,17 +284,125 @@
 	return len;
 }
 
-#ifdef CONFIG_ZRAM_WRITEBACK
-static bool zram_wb_enabled(struct zram *zram)
+static ssize_t idle_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
 {
-	return zram->backing_dev;
+	struct zram *zram = dev_to_zram(dev);
+	unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
+	int index;
+	char mode_buf[8];
+	ssize_t sz;
+
+	sz = strscpy(mode_buf, buf, sizeof(mode_buf));
+	if (sz <= 0)
+		return -EINVAL;
+
+	/* ignore trailing new line */
+	if (mode_buf[sz - 1] == '\n')
+		mode_buf[sz - 1] = 0x00;
+
+	if (strcmp(mode_buf, "all"))
+		return -EINVAL;
+
+	down_read(&zram->init_lock);
+	if (!init_done(zram)) {
+		up_read(&zram->init_lock);
+		return -EINVAL;
+	}
+
+	for (index = 0; index < nr_pages; index++) {
+		/*
+		 * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
+		 * See the comment in writeback_store.
+		 */
+		zram_slot_lock(zram, index);
+		if (zram_allocated(zram, index) &&
+				!zram_test_flag(zram, index, ZRAM_UNDER_WB))
+			zram_set_flag(zram, index, ZRAM_IDLE);
+		zram_slot_unlock(zram, index);
+	}
+
+	up_read(&zram->init_lock);
+
+	return len;
+}
+
+#ifdef CONFIG_ZRAM_WRITEBACK
+static ssize_t writeback_limit_enable_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct zram *zram = dev_to_zram(dev);
+	u64 val;
+	ssize_t ret = -EINVAL;
+
+	if (kstrtoull(buf, 10, &val))
+		return ret;
+
+	down_read(&zram->init_lock);
+	spin_lock(&zram->wb_limit_lock);
+	zram->wb_limit_enable = val;
+	spin_unlock(&zram->wb_limit_lock);
+	up_read(&zram->init_lock);
+	ret = len;
+
+	return ret;
+}
+
+static ssize_t writeback_limit_enable_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	bool val;
+	struct zram *zram = dev_to_zram(dev);
+
+	down_read(&zram->init_lock);
+	spin_lock(&zram->wb_limit_lock);
+	val = zram->wb_limit_enable;
+	spin_unlock(&zram->wb_limit_lock);
+	up_read(&zram->init_lock);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", val);
+}
+
+static ssize_t writeback_limit_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct zram *zram = dev_to_zram(dev);
+	u64 val;
+	ssize_t ret = -EINVAL;
+
+	if (kstrtoull(buf, 10, &val))
+		return ret;
+
+	down_read(&zram->init_lock);
+	spin_lock(&zram->wb_limit_lock);
+	zram->bd_wb_limit = val;
+	spin_unlock(&zram->wb_limit_lock);
+	up_read(&zram->init_lock);
+	ret = len;
+
+	return ret;
+}
+
+static ssize_t writeback_limit_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	u64 val;
+	struct zram *zram = dev_to_zram(dev);
+
+	down_read(&zram->init_lock);
+	spin_lock(&zram->wb_limit_lock);
+	val = zram->bd_wb_limit;
+	spin_unlock(&zram->wb_limit_lock);
+	up_read(&zram->init_lock);
+
+	return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
 }
 
 static void reset_bdev(struct zram *zram)
 {
 	struct block_device *bdev;
 
-	if (!zram_wb_enabled(zram))
+	if (!zram->backing_dev)
 		return;
 
 	bdev = zram->bdev;
@@ -318,7 +429,7 @@
 	ssize_t ret;
 
 	down_read(&zram->init_lock);
-	if (!zram_wb_enabled(zram)) {
+	if (!zram->backing_dev) {
 		memcpy(buf, "none\n", 5);
 		up_read(&zram->init_lock);
 		return 5;
@@ -447,7 +558,7 @@
 	return err;
 }
 
-static unsigned long get_entry_bdev(struct zram *zram)
+static unsigned long alloc_block_bdev(struct zram *zram)
 {
 	unsigned long blk_idx = 1;
 retry:
@@ -459,15 +570,17 @@
 	if (test_and_set_bit(blk_idx, zram->bitmap))
 		goto retry;
 
+	atomic64_inc(&zram->stats.bd_count);
 	return blk_idx;
 }
 
-static void put_entry_bdev(struct zram *zram, unsigned long entry)
+static void free_block_bdev(struct zram *zram, unsigned long blk_idx)
 {
 	int was_set;
 
-	was_set = test_and_clear_bit(entry, zram->bitmap);
+	was_set = test_and_clear_bit(blk_idx, zram->bitmap);
 	WARN_ON_ONCE(!was_set);
+	atomic64_dec(&zram->stats.bd_count);
 }
 
 static void zram_page_end_io(struct bio *bio)
@@ -510,6 +623,172 @@
 	return 1;
 }
 
+#define HUGE_WRITEBACK 1
+#define IDLE_WRITEBACK 2
+
+static ssize_t writeback_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct zram *zram = dev_to_zram(dev);
+	unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
+	unsigned long index;
+	struct bio bio;
+	struct bio_vec bio_vec;
+	struct page *page;
+	ssize_t ret, sz;
+	char mode_buf[8];
+	int mode = -1;
+	unsigned long blk_idx = 0;
+
+	sz = strscpy(mode_buf, buf, sizeof(mode_buf));
+	if (sz <= 0)
+		return -EINVAL;
+
+	/* ignore trailing newline */
+	if (mode_buf[sz - 1] == '\n')
+		mode_buf[sz - 1] = 0x00;
+
+	if (!strcmp(mode_buf, "idle"))
+		mode = IDLE_WRITEBACK;
+	else if (!strcmp(mode_buf, "huge"))
+		mode = HUGE_WRITEBACK;
+
+	if (mode == -1)
+		return -EINVAL;
+
+	down_read(&zram->init_lock);
+	if (!init_done(zram)) {
+		ret = -EINVAL;
+		goto release_init_lock;
+	}
+
+	if (!zram->backing_dev) {
+		ret = -ENODEV;
+		goto release_init_lock;
+	}
+
+	page = alloc_page(GFP_KERNEL);
+	if (!page) {
+		ret = -ENOMEM;
+		goto release_init_lock;
+	}
+
+	for (index = 0; index < nr_pages; index++) {
+		struct bio_vec bvec;
+
+		bvec.bv_page = page;
+		bvec.bv_len = PAGE_SIZE;
+		bvec.bv_offset = 0;
+
+		spin_lock(&zram->wb_limit_lock);
+		if (zram->wb_limit_enable && !zram->bd_wb_limit) {
+			spin_unlock(&zram->wb_limit_lock);
+			ret = -EIO;
+			break;
+		}
+		spin_unlock(&zram->wb_limit_lock);
+
+		if (!blk_idx) {
+			blk_idx = alloc_block_bdev(zram);
+			if (!blk_idx) {
+				ret = -ENOSPC;
+				break;
+			}
+		}
+
+		zram_slot_lock(zram, index);
+		if (!zram_allocated(zram, index))
+			goto next;
+
+		if (zram_test_flag(zram, index, ZRAM_WB) ||
+				zram_test_flag(zram, index, ZRAM_SAME) ||
+				zram_test_flag(zram, index, ZRAM_UNDER_WB))
+			goto next;
+
+		if (mode == IDLE_WRITEBACK &&
+			  !zram_test_flag(zram, index, ZRAM_IDLE))
+			goto next;
+		if (mode == HUGE_WRITEBACK &&
+			  !zram_test_flag(zram, index, ZRAM_HUGE))
+			goto next;
+		/*
+		 * Clearing ZRAM_UNDER_WB is duty of caller.
+		 * IOW, zram_free_page never clear it.
+		 */
+		zram_set_flag(zram, index, ZRAM_UNDER_WB);
+		/* Need for hugepage writeback racing */
+		zram_set_flag(zram, index, ZRAM_IDLE);
+		zram_slot_unlock(zram, index);
+		if (zram_bvec_read(zram, &bvec, index, 0, NULL)) {
+			zram_slot_lock(zram, index);
+			zram_clear_flag(zram, index, ZRAM_UNDER_WB);
+			zram_clear_flag(zram, index, ZRAM_IDLE);
+			zram_slot_unlock(zram, index);
+			continue;
+		}
+
+		bio_init(&bio, &bio_vec, 1);
+		bio_set_dev(&bio, zram->bdev);
+		bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
+		bio.bi_opf = REQ_OP_WRITE | REQ_SYNC;
+
+		bio_add_page(&bio, bvec.bv_page, bvec.bv_len,
+				bvec.bv_offset);
+		/*
+		 * XXX: A single page IO would be inefficient for write
+		 * but it would be not bad as starter.
+		 */
+		ret = submit_bio_wait(&bio);
+		if (ret) {
+			zram_slot_lock(zram, index);
+			zram_clear_flag(zram, index, ZRAM_UNDER_WB);
+			zram_clear_flag(zram, index, ZRAM_IDLE);
+			zram_slot_unlock(zram, index);
+			continue;
+		}
+
+		atomic64_inc(&zram->stats.bd_writes);
+		/*
+		 * We released zram_slot_lock so need to check if the slot was
+		 * changed. If there is freeing for the slot, we can catch it
+		 * easily by zram_allocated.
+		 * A subtle case is the slot is freed/reallocated/marked as
+		 * ZRAM_IDLE again. To close the race, idle_store doesn't
+		 * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB.
+		 * Thus, we could close the race by checking ZRAM_IDLE bit.
+		 */
+		zram_slot_lock(zram, index);
+		if (!zram_allocated(zram, index) ||
+			  !zram_test_flag(zram, index, ZRAM_IDLE)) {
+			zram_clear_flag(zram, index, ZRAM_UNDER_WB);
+			zram_clear_flag(zram, index, ZRAM_IDLE);
+			goto next;
+		}
+
+		zram_free_page(zram, index);
+		zram_clear_flag(zram, index, ZRAM_UNDER_WB);
+		zram_set_flag(zram, index, ZRAM_WB);
+		zram_set_element(zram, index, blk_idx);
+		blk_idx = 0;
+		atomic64_inc(&zram->stats.pages_stored);
+		spin_lock(&zram->wb_limit_lock);
+		if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
+			zram->bd_wb_limit -=  1UL << (PAGE_SHIFT - 12);
+		spin_unlock(&zram->wb_limit_lock);
+next:
+		zram_slot_unlock(zram, index);
+	}
+
+	if (blk_idx)
+		free_block_bdev(zram, blk_idx);
+	ret = len;
+	__free_page(page);
+release_init_lock:
+	up_read(&zram->init_lock);
+
+	return ret;
+}
+
 struct zram_work {
 	struct work_struct work;
 	struct zram *zram;
@@ -563,79 +842,21 @@
 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
 			unsigned long entry, struct bio *parent, bool sync)
 {
+	atomic64_inc(&zram->stats.bd_reads);
 	if (sync)
 		return read_from_bdev_sync(zram, bvec, entry, parent);
 	else
 		return read_from_bdev_async(zram, bvec, entry, parent);
 }
-
-static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
-					u32 index, struct bio *parent,
-					unsigned long *pentry)
-{
-	struct bio *bio;
-	unsigned long entry;
-
-	bio = bio_alloc(GFP_ATOMIC, 1);
-	if (!bio)
-		return -ENOMEM;
-
-	entry = get_entry_bdev(zram);
-	if (!entry) {
-		bio_put(bio);
-		return -ENOSPC;
-	}
-
-	bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
-	bio_set_dev(bio, zram->bdev);
-	if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len,
-					bvec->bv_offset)) {
-		bio_put(bio);
-		put_entry_bdev(zram, entry);
-		return -EIO;
-	}
-
-	if (!parent) {
-		bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
-		bio->bi_end_io = zram_page_end_io;
-	} else {
-		bio->bi_opf = parent->bi_opf;
-		bio_chain(bio, parent);
-	}
-
-	submit_bio(bio);
-	*pentry = entry;
-
-	return 0;
-}
-
-static void zram_wb_clear(struct zram *zram, u32 index)
-{
-	unsigned long entry;
-
-	zram_clear_flag(zram, index, ZRAM_WB);
-	entry = zram_get_element(zram, index);
-	zram_set_element(zram, index, 0);
-	put_entry_bdev(zram, entry);
-}
-
 #else
-static bool zram_wb_enabled(struct zram *zram) { return false; }
 static inline void reset_bdev(struct zram *zram) {};
-static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
-					u32 index, struct bio *parent,
-					unsigned long *pentry)
-
-{
-	return -EIO;
-}
-
 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
 			unsigned long entry, struct bio *parent, bool sync)
 {
 	return -EIO;
 }
-static void zram_wb_clear(struct zram *zram, u32 index) {}
+
+static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {};
 #endif
 
 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
@@ -654,14 +875,10 @@
 
 static void zram_accessed(struct zram *zram, u32 index)
 {
+	zram_clear_flag(zram, index, ZRAM_IDLE);
 	zram->table[index].ac_time = ktime_get_boottime();
 }
 
-static void zram_reset_access(struct zram *zram, u32 index)
-{
-	zram->table[index].ac_time = 0;
-}
-
 static ssize_t read_block_state(struct file *file, char __user *buf,
 				size_t count, loff_t *ppos)
 {
@@ -691,12 +908,13 @@
 
 		ts = ktime_to_timespec64(zram->table[index].ac_time);
 		copied = snprintf(kbuf + written, count,
-			"%12zd %12lld.%06lu %c%c%c\n",
+			"%12zd %12lld.%06lu %c%c%c%c\n",
 			index, (s64)ts.tv_sec,
 			ts.tv_nsec / NSEC_PER_USEC,
 			zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
 			zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
-			zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.');
+			zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
+			zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.');
 
 		if (count < copied) {
 			zram_slot_unlock(zram, index);
@@ -741,8 +959,10 @@
 #else
 static void zram_debugfs_create(void) {};
 static void zram_debugfs_destroy(void) {};
-static void zram_accessed(struct zram *zram, u32 index) {};
-static void zram_reset_access(struct zram *zram, u32 index) {};
+static void zram_accessed(struct zram *zram, u32 index)
+{
+	zram_clear_flag(zram, index, ZRAM_IDLE);
+};
 static void zram_debugfs_register(struct zram *zram) {};
 static void zram_debugfs_unregister(struct zram *zram) {};
 #endif
@@ -879,6 +1099,26 @@
 	return ret;
 }
 
+#ifdef CONFIG_ZRAM_WRITEBACK
+#define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
+static ssize_t bd_stat_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct zram *zram = dev_to_zram(dev);
+	ssize_t ret;
+
+	down_read(&zram->init_lock);
+	ret = scnprintf(buf, PAGE_SIZE,
+		"%8llu %8llu %8llu\n",
+			FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
+			FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
+			FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
+	up_read(&zram->init_lock);
+
+	return ret;
+}
+#endif
+
 static ssize_t debug_stat_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -899,6 +1139,9 @@
 
 static DEVICE_ATTR_RO(io_stat);
 static DEVICE_ATTR_RO(mm_stat);
+#ifdef CONFIG_ZRAM_WRITEBACK
+static DEVICE_ATTR_RO(bd_stat);
+#endif
 static DEVICE_ATTR_RO(debug_stat);
 
 static void zram_meta_free(struct zram *zram, u64 disksize)
@@ -943,17 +1186,21 @@
 {
 	unsigned long handle;
 
-	zram_reset_access(zram, index);
+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+	zram->table[index].ac_time = 0;
+#endif
+	if (zram_test_flag(zram, index, ZRAM_IDLE))
+		zram_clear_flag(zram, index, ZRAM_IDLE);
 
 	if (zram_test_flag(zram, index, ZRAM_HUGE)) {
 		zram_clear_flag(zram, index, ZRAM_HUGE);
 		atomic64_dec(&zram->stats.huge_pages);
 	}
 
-	if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) {
-		zram_wb_clear(zram, index);
-		atomic64_dec(&zram->stats.pages_stored);
-		return;
+	if (zram_test_flag(zram, index, ZRAM_WB)) {
+		zram_clear_flag(zram, index, ZRAM_WB);
+		free_block_bdev(zram, zram_get_element(zram, index));
+		goto out;
 	}
 
 	/*
@@ -962,10 +1209,8 @@
 	 */
 	if (zram_test_flag(zram, index, ZRAM_SAME)) {
 		zram_clear_flag(zram, index, ZRAM_SAME);
-		zram_set_element(zram, index, 0);
 		atomic64_dec(&zram->stats.same_pages);
-		atomic64_dec(&zram->stats.pages_stored);
-		return;
+		goto out;
 	}
 
 	handle = zram_get_handle(zram, index);
@@ -976,10 +1221,12 @@
 
 	atomic64_sub(zram_get_obj_size(zram, index),
 			&zram->stats.compr_data_size);
+out:
 	atomic64_dec(&zram->stats.pages_stored);
-
 	zram_set_handle(zram, index, 0);
 	zram_set_obj_size(zram, index, 0);
+	WARN_ON_ONCE(zram->table[index].flags &
+		~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
 }
 
 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
@@ -990,24 +1237,20 @@
 	unsigned int size;
 	void *src, *dst;
 
-	if (zram_wb_enabled(zram)) {
-		zram_slot_lock(zram, index);
-		if (zram_test_flag(zram, index, ZRAM_WB)) {
-			struct bio_vec bvec;
+	zram_slot_lock(zram, index);
+	if (zram_test_flag(zram, index, ZRAM_WB)) {
+		struct bio_vec bvec;
 
-			zram_slot_unlock(zram, index);
-
-			bvec.bv_page = page;
-			bvec.bv_len = PAGE_SIZE;
-			bvec.bv_offset = 0;
-			return read_from_bdev(zram, &bvec,
-					zram_get_element(zram, index),
-					bio, partial_io);
-		}
 		zram_slot_unlock(zram, index);
+
+		bvec.bv_page = page;
+		bvec.bv_len = PAGE_SIZE;
+		bvec.bv_offset = 0;
+		return read_from_bdev(zram, &bvec,
+				zram_get_element(zram, index),
+				bio, partial_io);
 	}
 
-	zram_slot_lock(zram, index);
 	handle = zram_get_handle(zram, index);
 	if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
 		unsigned long value;
@@ -1092,7 +1335,6 @@
 	struct page *page = bvec->bv_page;
 	unsigned long element = 0;
 	enum zram_pageflags flags = 0;
-	bool allow_wb = true;
 
 	mem = kmap_atomic(page);
 	if (page_same_filled(mem, &element)) {
@@ -1117,21 +1359,8 @@
 		return ret;
 	}
 
-	if (unlikely(comp_len >= huge_class_size)) {
+	if (comp_len >= huge_class_size)
 		comp_len = PAGE_SIZE;
-		if (zram_wb_enabled(zram) && allow_wb) {
-			zcomp_stream_put(zram->comp);
-			ret = write_to_bdev(zram, bvec, index, bio, &element);
-			if (!ret) {
-				flags = ZRAM_WB;
-				ret = 1;
-				goto out;
-			}
-			allow_wb = false;
-			goto compress_again;
-		}
-	}
-
 	/*
 	 * handle allocation has 2 paths:
 	 * a) fast path is executed with preemption disabled (for
@@ -1615,10 +1844,14 @@
 static DEVICE_ATTR_WO(reset);
 static DEVICE_ATTR_WO(mem_limit);
 static DEVICE_ATTR_WO(mem_used_max);
+static DEVICE_ATTR_WO(idle);
 static DEVICE_ATTR_RW(max_comp_streams);
 static DEVICE_ATTR_RW(comp_algorithm);
 #ifdef CONFIG_ZRAM_WRITEBACK
 static DEVICE_ATTR_RW(backing_dev);
+static DEVICE_ATTR_WO(writeback);
+static DEVICE_ATTR_RW(writeback_limit);
+static DEVICE_ATTR_RW(writeback_limit_enable);
 #endif
 
 static struct attribute *zram_disk_attrs[] = {
@@ -1628,13 +1861,20 @@
 	&dev_attr_compact.attr,
 	&dev_attr_mem_limit.attr,
 	&dev_attr_mem_used_max.attr,
+	&dev_attr_idle.attr,
 	&dev_attr_max_comp_streams.attr,
 	&dev_attr_comp_algorithm.attr,
 #ifdef CONFIG_ZRAM_WRITEBACK
 	&dev_attr_backing_dev.attr,
+	&dev_attr_writeback.attr,
+	&dev_attr_writeback_limit.attr,
+	&dev_attr_writeback_limit_enable.attr,
 #endif
 	&dev_attr_io_stat.attr,
 	&dev_attr_mm_stat.attr,
+#ifdef CONFIG_ZRAM_WRITEBACK
+	&dev_attr_bd_stat.attr,
+#endif
 	&dev_attr_debug_stat.attr,
 	NULL,
 };
@@ -1668,7 +1908,9 @@
 	device_id = ret;
 
 	init_rwsem(&zram->init_lock);
-
+#ifdef CONFIG_ZRAM_WRITEBACK
+	spin_lock_init(&zram->wb_limit_lock);
+#endif
 	queue = blk_alloc_queue(GFP_KERNEL);
 	if (!queue) {
 		pr_err("Error allocating disk queue for device %d\n",

diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index d1095df..f2fd46d 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h

@@ -30,7 +30,7 @@
 
 
 /*
- * The lower ZRAM_FLAG_SHIFT bits of table.value is for
+ * The lower ZRAM_FLAG_SHIFT bits of table.flags is for
  * object size (excluding header), the higher bits is for
  * zram_pageflags.
  *
@@ -41,13 +41,15 @@
  */
 #define ZRAM_FLAG_SHIFT 24
 
-/* Flags for zram pages (table[page_no].value) */
+/* Flags for zram pages (table[page_no].flags) */
 enum zram_pageflags {
 	/* zram slot is locked */
 	ZRAM_LOCK = ZRAM_FLAG_SHIFT,
 	ZRAM_SAME,	/* Page consists the same element */
 	ZRAM_WB,	/* page is stored on backing_device */
+	ZRAM_UNDER_WB,	/* page is under writeback */
 	ZRAM_HUGE,	/* Incompressible page */
+	ZRAM_IDLE,	/* not accessed page since last idle marking */
 
 	__NR_ZRAM_PAGEFLAGS,
 };
@@ -60,7 +62,7 @@
 		unsigned long handle;
 		unsigned long element;
 	};
-	unsigned long value;
+	unsigned long flags;
 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
 	ktime_t ac_time;
 #endif
@@ -80,6 +82,11 @@
 	atomic_long_t max_used_pages;	/* no. of maximum pages stored */
 	atomic64_t writestall;		/* no. of write slow paths */
 	atomic64_t miss_free;		/* no. of missed free */
+#ifdef	CONFIG_ZRAM_WRITEBACK
+	atomic64_t bd_count;		/* no. of pages in backing device */
+	atomic64_t bd_reads;		/* no. of reads from backing device */
+	atomic64_t bd_writes;		/* no. of writes from backing device */
+#endif
 };
 
 struct zram {
@@ -105,8 +112,11 @@
 	 * zram is claimed so open request will be failed
 	 */
 	bool claim; /* Protected by bdev->bd_mutex */
-#ifdef CONFIG_ZRAM_WRITEBACK
 	struct file *backing_dev;
+#ifdef CONFIG_ZRAM_WRITEBACK
+	spinlock_t wb_limit_lock;
+	bool wb_limit_enable;
+	u64 bd_wb_limit;
 	struct block_device *bdev;
 	unsigned int old_block_size;
 	unsigned long *bitmap;

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 0a84b7f..af6e240 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c

@@ -265,7 +265,7 @@
 #include <linux/syscalls.h>
 #include <linux/completion.h>
 #include <linux/uuid.h>
-#include <crypto/chacha20.h>
+#include <crypto/chacha.h>
 
 #include <asm/processor.h>
 #include <linux/uaccess.h>
@@ -431,11 +431,10 @@
 #define crng_ready() (likely(crng_init > 1))
 static int crng_init_cnt = 0;
 static unsigned long crng_global_init_time = 0;
-#define CRNG_INIT_CNT_THRESH (2*CHACHA20_KEY_SIZE)
-static void _extract_crng(struct crng_state *crng,
-			  __u32 out[CHACHA20_BLOCK_WORDS]);
+#define CRNG_INIT_CNT_THRESH (2*CHACHA_KEY_SIZE)
+static void _extract_crng(struct crng_state *crng, __u8 out[CHACHA_BLOCK_SIZE]);
 static void _crng_backtrack_protect(struct crng_state *crng,
-				    __u32 tmp[CHACHA20_BLOCK_WORDS], int used);
+				    __u8 tmp[CHACHA_BLOCK_SIZE], int used);
 static void process_random_ready_list(void);
 static void _get_random_bytes(void *buf, int nbytes);
 
@@ -866,7 +865,7 @@
 	}
 	p = (unsigned char *) &primary_crng.state[4];
 	while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) {
-		p[crng_init_cnt % CHACHA20_KEY_SIZE] ^= *cp;
+		p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp;
 		cp++; crng_init_cnt++; len--;
 	}
 	spin_unlock_irqrestore(&primary_crng.lock, flags);
@@ -898,7 +897,7 @@
 	unsigned long		flags;
 	static unsigned char	lfsr = 1;
 	unsigned char		tmp;
-	unsigned		i, max = CHACHA20_KEY_SIZE;
+	unsigned		i, max = CHACHA_KEY_SIZE;
 	const char *		src_buf = cp;
 	char *			dest_buf = (char *) &primary_crng.state[4];
 
@@ -916,8 +915,8 @@
 		lfsr >>= 1;
 		if (tmp & 1)
 			lfsr ^= 0xE1;
-		tmp = dest_buf[i % CHACHA20_KEY_SIZE];
-		dest_buf[i % CHACHA20_KEY_SIZE] ^= src_buf[i % len] ^ lfsr;
+		tmp = dest_buf[i % CHACHA_KEY_SIZE];
+		dest_buf[i % CHACHA_KEY_SIZE] ^= src_buf[i % len] ^ lfsr;
 		lfsr += (tmp << 3) | (tmp >> 5);
 	}
 	spin_unlock_irqrestore(&primary_crng.lock, flags);
@@ -929,7 +928,7 @@
 	unsigned long	flags;
 	int		i, num;
 	union {
-		__u32	block[CHACHA20_BLOCK_WORDS];
+		__u8	block[CHACHA_BLOCK_SIZE];
 		__u32	key[8];
 	} buf;
 
@@ -940,7 +939,7 @@
 	} else {
 		_extract_crng(&primary_crng, buf.block);
 		_crng_backtrack_protect(&primary_crng, buf.block,
-					CHACHA20_KEY_SIZE);
+					CHACHA_KEY_SIZE);
 	}
 	spin_lock_irqsave(&crng->lock, flags);
 	for (i = 0; i < 8; i++) {
@@ -976,7 +975,7 @@
 }
 
 static void _extract_crng(struct crng_state *crng,
-			  __u32 out[CHACHA20_BLOCK_WORDS])
+			  __u8 out[CHACHA_BLOCK_SIZE])
 {
 	unsigned long v, flags;
 
@@ -993,7 +992,7 @@
 	spin_unlock_irqrestore(&crng->lock, flags);
 }
 
-static void extract_crng(__u32 out[CHACHA20_BLOCK_WORDS])
+static void extract_crng(__u8 out[CHACHA_BLOCK_SIZE])
 {
 	struct crng_state *crng = NULL;
 
@@ -1011,26 +1010,26 @@
  * enough) to mutate the CRNG key to provide backtracking protection.
  */
 static void _crng_backtrack_protect(struct crng_state *crng,
-				    __u32 tmp[CHACHA20_BLOCK_WORDS], int used)
+				    __u8 tmp[CHACHA_BLOCK_SIZE], int used)
 {
 	unsigned long	flags;
 	__u32		*s, *d;
 	int		i;
 
 	used = round_up(used, sizeof(__u32));
-	if (used + CHACHA20_KEY_SIZE > CHACHA20_BLOCK_SIZE) {
+	if (used + CHACHA_KEY_SIZE > CHACHA_BLOCK_SIZE) {
 		extract_crng(tmp);
 		used = 0;
 	}
 	spin_lock_irqsave(&crng->lock, flags);
-	s = &tmp[used / sizeof(__u32)];
+	s = (__u32 *) &tmp[used];
 	d = &crng->state[4];
 	for (i=0; i < 8; i++)
 		*d++ ^= *s++;
 	spin_unlock_irqrestore(&crng->lock, flags);
 }
 
-static void crng_backtrack_protect(__u32 tmp[CHACHA20_BLOCK_WORDS], int used)
+static void crng_backtrack_protect(__u8 tmp[CHACHA_BLOCK_SIZE], int used)
 {
 	struct crng_state *crng = NULL;
 
@@ -1045,8 +1044,8 @@
 
 static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
 {
-	ssize_t ret = 0, i = CHACHA20_BLOCK_SIZE;
-	__u32 tmp[CHACHA20_BLOCK_WORDS];
+	ssize_t ret = 0, i = CHACHA_BLOCK_SIZE;
+	__u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4);
 	int large_request = (nbytes > 256);
 
 	while (nbytes) {
@@ -1060,7 +1059,7 @@
 		}
 
 		extract_crng(tmp);
-		i = min_t(int, nbytes, CHACHA20_BLOCK_SIZE);
+		i = min_t(int, nbytes, CHACHA_BLOCK_SIZE);
 		if (copy_to_user(buf, tmp, i)) {
 			ret = -EFAULT;
 			break;
@@ -1625,14 +1624,14 @@
  */
 static void _get_random_bytes(void *buf, int nbytes)
 {
-	__u32 tmp[CHACHA20_BLOCK_WORDS];
+	__u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4);
 
 	trace_get_random_bytes(nbytes, _RET_IP_);
 
-	while (nbytes >= CHACHA20_BLOCK_SIZE) {
+	while (nbytes >= CHACHA_BLOCK_SIZE) {
 		extract_crng(buf);
-		buf += CHACHA20_BLOCK_SIZE;
-		nbytes -= CHACHA20_BLOCK_SIZE;
+		buf += CHACHA_BLOCK_SIZE;
+		nbytes -= CHACHA_BLOCK_SIZE;
 	}
 
 	if (nbytes > 0) {
@@ -1640,7 +1639,7 @@
 		memcpy(buf, tmp, nbytes);
 		crng_backtrack_protect(tmp, nbytes);
 	} else
-		crng_backtrack_protect(tmp, CHACHA20_BLOCK_SIZE);
+		crng_backtrack_protect(tmp, CHACHA_BLOCK_SIZE);
 	memzero_explicit(tmp, sizeof(tmp));
 }
 
@@ -2211,8 +2210,8 @@
 
 struct batched_entropy {
 	union {
-		u64 entropy_u64[CHACHA20_BLOCK_SIZE / sizeof(u64)];
-		u32 entropy_u32[CHACHA20_BLOCK_SIZE / sizeof(u32)];
+		u64 entropy_u64[CHACHA_BLOCK_SIZE / sizeof(u64)];
+		u32 entropy_u32[CHACHA_BLOCK_SIZE / sizeof(u32)];
 	};
 	unsigned int position;
 	spinlock_t batch_lock;
@@ -2251,7 +2250,7 @@
 	batch = raw_cpu_ptr(&batched_entropy_u64);
 	spin_lock_irqsave(&batch->batch_lock, flags);
 	if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
-		extract_crng((__u32 *)batch->entropy_u64);
+		extract_crng((u8 *)batch->entropy_u64);
 		batch->position = 0;
 	}
 	ret = batch->entropy_u64[batch->position++];
@@ -2278,7 +2277,7 @@
 	batch = raw_cpu_ptr(&batched_entropy_u32);
 	spin_lock_irqsave(&batch->batch_lock, flags);
 	if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) {
-		extract_crng(batch->entropy_u32);
+		extract_crng((u8 *)batch->entropy_u32);
 		batch->position = 0;
 	}
 	ret = batch->entropy_u32[batch->position++];

diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 608af20..e131237 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig

@@ -37,6 +37,13 @@
 
 	  If in doubt, say N.
 
+config CPU_FREQ_TIMES
+       bool "CPU frequency time-in-state statistics"
+       help
+         Export CPU time-in-state information through procfs.
+
+         If in doubt, say N.
+
 choice
 	prompt "Default CPUFreq governor"
 	default CPU_FREQ_DEFAULT_GOV_USERSPACE if ARM_SA1100_CPUFREQ || ARM_SA1110_CPUFREQ

diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index c1ffeab..648beca 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile

@@ -5,7 +5,10 @@
 # CPUfreq stats
 obj-$(CONFIG_CPU_FREQ_STAT)             += cpufreq_stats.o
 
-# CPUfreq governors 
+# CPUfreq times
+obj-$(CONFIG_CPU_FREQ_TIMES)		+= cpufreq_times.o
+
+# CPUfreq governors
 obj-$(CONFIG_CPU_FREQ_GOV_PERFORMANCE)	+= cpufreq_performance.o
 obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE)	+= cpufreq_powersave.o
 obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE)	+= cpufreq_userspace.o

diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index cf62a1f..803d41c 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c

@@ -24,6 +24,7 @@
 #include <linux/cpufreq.h>
 #include <linux/cpumask.h>
 #include <linux/cpu_cooling.h>
+#include <linux/energy_model.h>
 #include <linux/export.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
@@ -456,6 +457,7 @@
 /* Per-CPU initialization */
 static int bL_cpufreq_init(struct cpufreq_policy *policy)
 {
+	struct em_data_callback em_cb = EM_DATA_CB(of_dev_pm_opp_get_cpu_power);
 	u32 cur_cluster = cpu_to_cluster(policy->cpu);
 	struct device *cpu_dev;
 	int ret;
@@ -487,6 +489,14 @@
 	policy->cpuinfo.transition_latency =
 				arm_bL_ops->get_transition_latency(cpu_dev);
 
+	ret = dev_pm_opp_get_opp_count(cpu_dev);
+	if (ret <= 0) {
+		dev_dbg(cpu_dev, "OPP table is not ready, deferring probe\n");
+		return -EPROBE_DEFER;
+	}
+
+	em_register_perf_domain(policy->cpus, ret, &em_cb);
+
 	if (is_bL_switching_enabled())
 		per_cpu(cpu_last_req_freq, policy->cpu) = clk_get_cpu_rate(policy->cpu);
 

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index e58bfcb..7556e07e 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c

@@ -16,6 +16,7 @@
 #include <linux/cpu_cooling.h>
 #include <linux/cpufreq.h>
 #include <linux/cpumask.h>
+#include <linux/energy_model.h>
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/of.h>
@@ -152,6 +153,7 @@
 
 static int cpufreq_init(struct cpufreq_policy *policy)
 {
+	struct em_data_callback em_cb = EM_DATA_CB(of_dev_pm_opp_get_cpu_power);
 	struct cpufreq_frequency_table *freq_table;
 	struct opp_table *opp_table = NULL;
 	struct private_data *priv;
@@ -160,7 +162,7 @@
 	unsigned int transition_latency;
 	bool fallback = false;
 	const char *name;
-	int ret;
+	int ret, nr_opp;
 
 	cpu_dev = get_cpu_device(policy->cpu);
 	if (!cpu_dev) {
@@ -237,6 +239,7 @@
 		ret = -EPROBE_DEFER;
 		goto out_free_opp;
 	}
+	nr_opp = ret;
 
 	if (fallback) {
 		cpumask_setall(policy->cpus);
@@ -280,6 +283,8 @@
 	policy->cpuinfo.transition_latency = transition_latency;
 	policy->dvfs_possible_from_any_cpu = true;
 
+	em_register_perf_domain(policy->cpus, nr_opp, &em_cb);
+
 	return 0;
 
 out_free_cpufreq_table:

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index d321359..f858b03 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c

@@ -19,6 +19,7 @@
 
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
+#include <linux/cpufreq_times.h>
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/init.h>
@@ -158,6 +159,12 @@
 }
 EXPORT_SYMBOL_GPL(arch_set_freq_scale);
 
+__weak void arch_set_max_freq_scale(struct cpumask *cpus,
+				    unsigned long policy_max_freq)
+{
+}
+EXPORT_SYMBOL_GPL(arch_set_max_freq_scale);
+
 /*
  * This is a generic cpufreq init() routine which can be used by cpufreq
  * drivers of SMP systems. It will do following:
@@ -349,6 +356,7 @@
 		}
 
 		cpufreq_stats_record_transition(policy, freqs->new);
+		cpufreq_times_record_transition(policy, freqs->new);
 		policy->cur = freqs->new;
 	}
 }
@@ -1296,6 +1304,7 @@
 			goto out_destroy_policy;
 
 		cpufreq_stats_create_table(policy);
+		cpufreq_times_create_policy(policy);
 
 		write_lock_irqsave(&cpufreq_driver_lock, flags);
 		list_add(&policy->policy_list, &cpufreq_policy_list);
@@ -1858,9 +1867,15 @@
 unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
 					unsigned int target_freq)
 {
+	int ret;
+
 	target_freq = clamp_val(target_freq, policy->min, policy->max);
 
-	return cpufreq_driver->fast_switch(policy, target_freq);
+	ret = cpufreq_driver->fast_switch(policy, target_freq);
+	if (ret)
+		cpufreq_times_record_transition(policy, ret);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
 
@@ -2240,6 +2255,8 @@
 	policy->max = new_policy->max;
 	trace_cpu_frequency_limits(policy);
 
+	arch_set_max_freq_scale(policy->cpus, policy->max);
+
 	policy->cached_target_freq = UINT_MAX;
 
 	pr_debug("new min and max freqs are %u - %u kHz\n",
@@ -2274,6 +2291,7 @@
 		ret = cpufreq_start_governor(policy);
 		if (!ret) {
 			pr_debug("cpufreq: governor change\n");
+			sched_cpufreq_governor_change(policy, old_gov);
 			return 0;
 		}
 		cpufreq_exit_governor(policy);

diff --git a/drivers/cpufreq/cpufreq_times.c b/drivers/cpufreq/cpufreq_times.c
new file mode 100644
index 0000000..5b5248a
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_times.c

@@ -0,0 +1,632 @@
+/* drivers/cpufreq/cpufreq_times.c
+ *
+ * Copyright (C) 2018 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/cpufreq_times.h>
+#include <linux/hashtable.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/proc_fs.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/threads.h>
+
+#define UID_HASH_BITS 10
+
+static DECLARE_HASHTABLE(uid_hash_table, UID_HASH_BITS);
+
+static DEFINE_SPINLOCK(task_time_in_state_lock); /* task->time_in_state */
+static DEFINE_SPINLOCK(uid_lock); /* uid_hash_table */
+
+struct concurrent_times {
+	atomic64_t active[NR_CPUS];
+	atomic64_t policy[NR_CPUS];
+};
+
+struct uid_entry {
+	uid_t uid;
+	unsigned int max_state;
+	struct hlist_node hash;
+	struct rcu_head rcu;
+	struct concurrent_times *concurrent_times;
+	u64 time_in_state[0];
+};
+
+/**
+ * struct cpu_freqs - per-cpu frequency information
+ * @offset: start of these freqs' stats in task time_in_state array
+ * @max_state: number of entries in freq_table
+ * @last_index: index in freq_table of last frequency switched to
+ * @freq_table: list of available frequencies
+ */
+struct cpu_freqs {
+	unsigned int offset;
+	unsigned int max_state;
+	unsigned int last_index;
+	unsigned int freq_table[0];
+};
+
+static struct cpu_freqs *all_freqs[NR_CPUS];
+
+static unsigned int next_offset;
+
+
+/* Caller must hold rcu_read_lock() */
+static struct uid_entry *find_uid_entry_rcu(uid_t uid)
+{
+	struct uid_entry *uid_entry;
+
+	hash_for_each_possible_rcu(uid_hash_table, uid_entry, hash, uid) {
+		if (uid_entry->uid == uid)
+			return uid_entry;
+	}
+	return NULL;
+}
+
+/* Caller must hold uid lock */
+static struct uid_entry *find_uid_entry_locked(uid_t uid)
+{
+	struct uid_entry *uid_entry;
+
+	hash_for_each_possible(uid_hash_table, uid_entry, hash, uid) {
+		if (uid_entry->uid == uid)
+			return uid_entry;
+	}
+	return NULL;
+}
+
+/* Caller must hold uid lock */
+static struct uid_entry *find_or_register_uid_locked(uid_t uid)
+{
+	struct uid_entry *uid_entry, *temp;
+	struct concurrent_times *times;
+	unsigned int max_state = READ_ONCE(next_offset);
+	size_t alloc_size = sizeof(*uid_entry) + max_state *
+		sizeof(uid_entry->time_in_state[0]);
+
+	uid_entry = find_uid_entry_locked(uid);
+	if (uid_entry) {
+		if (uid_entry->max_state == max_state)
+			return uid_entry;
+		/* uid_entry->time_in_state is too small to track all freqs, so
+		 * expand it.
+		 */
+		temp = __krealloc(uid_entry, alloc_size, GFP_ATOMIC);
+		if (!temp)
+			return uid_entry;
+		temp->max_state = max_state;
+		memset(temp->time_in_state + uid_entry->max_state, 0,
+		       (max_state - uid_entry->max_state) *
+		       sizeof(uid_entry->time_in_state[0]));
+		if (temp != uid_entry) {
+			hlist_replace_rcu(&uid_entry->hash, &temp->hash);
+			kfree_rcu(uid_entry, rcu);
+		}
+		return temp;
+	}
+
+	uid_entry = kzalloc(alloc_size, GFP_ATOMIC);
+	if (!uid_entry)
+		return NULL;
+	times = kzalloc(sizeof(*times), GFP_ATOMIC);
+	if (!times) {
+		kfree(uid_entry);
+		return NULL;
+	}
+
+	uid_entry->uid = uid;
+	uid_entry->max_state = max_state;
+	uid_entry->concurrent_times = times;
+
+	hash_add_rcu(uid_hash_table, &uid_entry->hash, uid);
+
+	return uid_entry;
+}
+
+static int single_uid_time_in_state_show(struct seq_file *m, void *ptr)
+{
+	struct uid_entry *uid_entry;
+	unsigned int i;
+	uid_t uid = from_kuid_munged(current_user_ns(), *(kuid_t *)m->private);
+
+	if (uid == overflowuid)
+		return -EINVAL;
+
+	rcu_read_lock();
+
+	uid_entry = find_uid_entry_rcu(uid);
+	if (!uid_entry) {
+		rcu_read_unlock();
+		return 0;
+	}
+
+	for (i = 0; i < uid_entry->max_state; ++i) {
+		u64 time = nsec_to_clock_t(uid_entry->time_in_state[i]);
+		seq_write(m, &time, sizeof(time));
+	}
+
+	rcu_read_unlock();
+
+	return 0;
+}
+
+static void *uid_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	if (*pos >= HASH_SIZE(uid_hash_table))
+		return NULL;
+
+	return &uid_hash_table[*pos];
+}
+
+static void *uid_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	do {
+		(*pos)++;
+
+		if (*pos >= HASH_SIZE(uid_hash_table))
+			return NULL;
+	} while (hlist_empty(&uid_hash_table[*pos]));
+
+	return &uid_hash_table[*pos];
+}
+
+static void uid_seq_stop(struct seq_file *seq, void *v) { }
+
+static int uid_time_in_state_seq_show(struct seq_file *m, void *v)
+{
+	struct uid_entry *uid_entry;
+	struct cpu_freqs *freqs, *last_freqs = NULL;
+	int i, cpu;
+
+	if (v == uid_hash_table) {
+		seq_puts(m, "uid:");
+		for_each_possible_cpu(cpu) {
+			freqs = all_freqs[cpu];
+			if (!freqs || freqs == last_freqs)
+				continue;
+			last_freqs = freqs;
+			for (i = 0; i < freqs->max_state; i++) {
+				seq_put_decimal_ull(m, " ",
+						    freqs->freq_table[i]);
+			}
+		}
+		seq_putc(m, '\n');
+	}
+
+	rcu_read_lock();
+
+	hlist_for_each_entry_rcu(uid_entry, (struct hlist_head *)v, hash) {
+		if (uid_entry->max_state) {
+			seq_put_decimal_ull(m, "", uid_entry->uid);
+			seq_putc(m, ':');
+		}
+		for (i = 0; i < uid_entry->max_state; ++i) {
+			u64 time = nsec_to_clock_t(uid_entry->time_in_state[i]);
+			seq_put_decimal_ull(m, " ", time);
+		}
+		if (uid_entry->max_state)
+			seq_putc(m, '\n');
+	}
+
+	rcu_read_unlock();
+	return 0;
+}
+
+static int concurrent_time_seq_show(struct seq_file *m, void *v,
+	atomic64_t *(*get_times)(struct concurrent_times *))
+{
+	struct uid_entry *uid_entry;
+	int i, num_possible_cpus = num_possible_cpus();
+
+	rcu_read_lock();
+
+	hlist_for_each_entry_rcu(uid_entry, (struct hlist_head *)v, hash) {
+		atomic64_t *times = get_times(uid_entry->concurrent_times);
+
+		seq_put_decimal_ull(m, "", (u64)uid_entry->uid);
+		seq_putc(m, ':');
+
+		for (i = 0; i < num_possible_cpus; ++i) {
+			u64 time = nsec_to_clock_t(atomic64_read(&times[i]));
+
+			seq_put_decimal_ull(m, " ", time);
+		}
+		seq_putc(m, '\n');
+	}
+
+	rcu_read_unlock();
+
+	return 0;
+}
+
+static inline atomic64_t *get_active_times(struct concurrent_times *times)
+{
+	return times->active;
+}
+
+static int concurrent_active_time_seq_show(struct seq_file *m, void *v)
+{
+	if (v == uid_hash_table) {
+		seq_put_decimal_ull(m, "cpus: ", num_possible_cpus());
+		seq_putc(m, '\n');
+	}
+
+	return concurrent_time_seq_show(m, v, get_active_times);
+}
+
+static inline atomic64_t *get_policy_times(struct concurrent_times *times)
+{
+	return times->policy;
+}
+
+static int concurrent_policy_time_seq_show(struct seq_file *m, void *v)
+{
+	int i;
+	struct cpu_freqs *freqs, *last_freqs = NULL;
+
+	if (v == uid_hash_table) {
+		int cnt = 0;
+
+		for_each_possible_cpu(i) {
+			freqs = all_freqs[i];
+			if (!freqs)
+				continue;
+			if (freqs != last_freqs) {
+				if (last_freqs) {
+					seq_put_decimal_ull(m, ": ", cnt);
+					seq_putc(m, ' ');
+					cnt = 0;
+				}
+				seq_put_decimal_ull(m, "policy", i);
+
+				last_freqs = freqs;
+			}
+			cnt++;
+		}
+		if (last_freqs) {
+			seq_put_decimal_ull(m, ": ", cnt);
+			seq_putc(m, '\n');
+		}
+	}
+
+	return concurrent_time_seq_show(m, v, get_policy_times);
+}
+
+void cpufreq_task_times_init(struct task_struct *p)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&task_time_in_state_lock, flags);
+	p->time_in_state = NULL;
+	spin_unlock_irqrestore(&task_time_in_state_lock, flags);
+	p->max_state = 0;
+}
+
+void cpufreq_task_times_alloc(struct task_struct *p)
+{
+	void *temp;
+	unsigned long flags;
+	unsigned int max_state = READ_ONCE(next_offset);
+
+	/* We use one array to avoid multiple allocs per task */
+	temp = kcalloc(max_state, sizeof(p->time_in_state[0]), GFP_ATOMIC);
+	if (!temp)
+		return;
+
+	spin_lock_irqsave(&task_time_in_state_lock, flags);
+	p->time_in_state = temp;
+	spin_unlock_irqrestore(&task_time_in_state_lock, flags);
+	p->max_state = max_state;
+}
+
+/* Caller must hold task_time_in_state_lock */
+static int cpufreq_task_times_realloc_locked(struct task_struct *p)
+{
+	void *temp;
+	unsigned int max_state = READ_ONCE(next_offset);
+
+	temp = krealloc(p->time_in_state, max_state * sizeof(u64), GFP_ATOMIC);
+	if (!temp)
+		return -ENOMEM;
+	p->time_in_state = temp;
+	memset(p->time_in_state + p->max_state, 0,
+	       (max_state - p->max_state) * sizeof(u64));
+	p->max_state = max_state;
+	return 0;
+}
+
+void cpufreq_task_times_exit(struct task_struct *p)
+{
+	unsigned long flags;
+	void *temp;
+
+	if (!p->time_in_state)
+		return;
+
+	spin_lock_irqsave(&task_time_in_state_lock, flags);
+	temp = p->time_in_state;
+	p->time_in_state = NULL;
+	spin_unlock_irqrestore(&task_time_in_state_lock, flags);
+	kfree(temp);
+}
+
+int proc_time_in_state_show(struct seq_file *m, struct pid_namespace *ns,
+	struct pid *pid, struct task_struct *p)
+{
+	unsigned int cpu, i;
+	u64 cputime;
+	unsigned long flags;
+	struct cpu_freqs *freqs;
+	struct cpu_freqs *last_freqs = NULL;
+
+	spin_lock_irqsave(&task_time_in_state_lock, flags);
+	for_each_possible_cpu(cpu) {
+		freqs = all_freqs[cpu];
+		if (!freqs || freqs == last_freqs)
+			continue;
+		last_freqs = freqs;
+
+		seq_printf(m, "cpu%u\n", cpu);
+		for (i = 0; i < freqs->max_state; i++) {
+			cputime = 0;
+			if (freqs->offset + i < p->max_state &&
+			    p->time_in_state)
+				cputime = p->time_in_state[freqs->offset + i];
+			seq_printf(m, "%u %lu\n", freqs->freq_table[i],
+				   (unsigned long)nsec_to_clock_t(cputime));
+		}
+	}
+	spin_unlock_irqrestore(&task_time_in_state_lock, flags);
+	return 0;
+}
+
+void cpufreq_acct_update_power(struct task_struct *p, u64 cputime)
+{
+	unsigned long flags;
+	unsigned int state;
+	unsigned int active_cpu_cnt = 0;
+	unsigned int policy_cpu_cnt = 0;
+	unsigned int policy_first_cpu;
+	struct uid_entry *uid_entry;
+	struct cpu_freqs *freqs = all_freqs[task_cpu(p)];
+	struct cpufreq_policy *policy;
+	uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
+	int cpu = 0;
+
+	if (!freqs || is_idle_task(p) || p->flags & PF_EXITING)
+		return;
+
+	state = freqs->offset + READ_ONCE(freqs->last_index);
+
+	spin_lock_irqsave(&task_time_in_state_lock, flags);
+	if ((state < p->max_state || !cpufreq_task_times_realloc_locked(p)) &&
+	    p->time_in_state)
+		p->time_in_state[state] += cputime;
+	spin_unlock_irqrestore(&task_time_in_state_lock, flags);
+
+	spin_lock_irqsave(&uid_lock, flags);
+	uid_entry = find_or_register_uid_locked(uid);
+	if (uid_entry && state < uid_entry->max_state)
+		uid_entry->time_in_state[state] += cputime;
+	spin_unlock_irqrestore(&uid_lock, flags);
+
+	rcu_read_lock();
+	uid_entry = find_uid_entry_rcu(uid);
+	if (!uid_entry) {
+		rcu_read_unlock();
+		return;
+	}
+
+	for_each_possible_cpu(cpu)
+		if (!idle_cpu(cpu))
+			++active_cpu_cnt;
+
+	atomic64_add(cputime,
+		     &uid_entry->concurrent_times->active[active_cpu_cnt - 1]);
+
+	policy = cpufreq_cpu_get(task_cpu(p));
+	if (!policy) {
+		/*
+		 * This CPU may have just come up and not have a cpufreq policy
+		 * yet.
+		 */
+		rcu_read_unlock();
+		return;
+	}
+
+	for_each_cpu(cpu, policy->related_cpus)
+		if (!idle_cpu(cpu))
+			++policy_cpu_cnt;
+
+	policy_first_cpu = cpumask_first(policy->related_cpus);
+	cpufreq_cpu_put(policy);
+
+	atomic64_add(cputime,
+		     &uid_entry->concurrent_times->policy[policy_first_cpu +
+							  policy_cpu_cnt - 1]);
+	rcu_read_unlock();
+}
+
+static int cpufreq_times_get_index(struct cpu_freqs *freqs, unsigned int freq)
+{
+	int index;
+        for (index = 0; index < freqs->max_state; ++index) {
+		if (freqs->freq_table[index] == freq)
+			return index;
+        }
+	return -1;
+}
+
+void cpufreq_times_create_policy(struct cpufreq_policy *policy)
+{
+	int cpu, index = 0;
+	unsigned int count = 0;
+	struct cpufreq_frequency_table *pos, *table;
+	struct cpu_freqs *freqs;
+	void *tmp;
+
+	if (all_freqs[policy->cpu])
+		return;
+
+	table = policy->freq_table;
+	if (!table)
+		return;
+
+	cpufreq_for_each_valid_entry(pos, table)
+		count++;
+
+	tmp =  kzalloc(sizeof(*freqs) + sizeof(freqs->freq_table[0]) * count,
+		       GFP_KERNEL);
+	if (!tmp)
+		return;
+
+	freqs = tmp;
+	freqs->max_state = count;
+
+	cpufreq_for_each_valid_entry(pos, table)
+		freqs->freq_table[index++] = pos->frequency;
+
+	index = cpufreq_times_get_index(freqs, policy->cur);
+	if (index >= 0)
+		WRITE_ONCE(freqs->last_index, index);
+
+	freqs->offset = next_offset;
+	WRITE_ONCE(next_offset, freqs->offset + count);
+	for_each_cpu(cpu, policy->related_cpus)
+		all_freqs[cpu] = freqs;
+}
+
+static void uid_entry_reclaim(struct rcu_head *rcu)
+{
+	struct uid_entry *uid_entry = container_of(rcu, struct uid_entry, rcu);
+
+	kfree(uid_entry->concurrent_times);
+	kfree(uid_entry);
+}
+
+void cpufreq_task_times_remove_uids(uid_t uid_start, uid_t uid_end)
+{
+	struct uid_entry *uid_entry;
+	struct hlist_node *tmp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&uid_lock, flags);
+
+	for (; uid_start <= uid_end; uid_start++) {
+		hash_for_each_possible_safe(uid_hash_table, uid_entry, tmp,
+			hash, uid_start) {
+			if (uid_start == uid_entry->uid) {
+				hash_del_rcu(&uid_entry->hash);
+				call_rcu(&uid_entry->rcu, uid_entry_reclaim);
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&uid_lock, flags);
+}
+
+void cpufreq_times_record_transition(struct cpufreq_policy *policy,
+	unsigned int new_freq)
+{
+	int index;
+	struct cpu_freqs *freqs = all_freqs[policy->cpu];
+	if (!freqs)
+		return;
+
+	index = cpufreq_times_get_index(freqs, new_freq);
+	if (index >= 0)
+		WRITE_ONCE(freqs->last_index, index);
+}
+
+static const struct seq_operations uid_time_in_state_seq_ops = {
+	.start = uid_seq_start,
+	.next = uid_seq_next,
+	.stop = uid_seq_stop,
+	.show = uid_time_in_state_seq_show,
+};
+
+static int uid_time_in_state_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &uid_time_in_state_seq_ops);
+}
+
+int single_uid_time_in_state_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, single_uid_time_in_state_show,
+			&(inode->i_uid));
+}
+
+static const struct file_operations uid_time_in_state_fops = {
+	.open		= uid_time_in_state_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static const struct seq_operations concurrent_active_time_seq_ops = {
+	.start = uid_seq_start,
+	.next = uid_seq_next,
+	.stop = uid_seq_stop,
+	.show = concurrent_active_time_seq_show,
+};
+
+static int concurrent_active_time_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &concurrent_active_time_seq_ops);
+}
+
+static const struct file_operations concurrent_active_time_fops = {
+	.open		= concurrent_active_time_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static const struct seq_operations concurrent_policy_time_seq_ops = {
+	.start = uid_seq_start,
+	.next = uid_seq_next,
+	.stop = uid_seq_stop,
+	.show = concurrent_policy_time_seq_show,
+};
+
+static int concurrent_policy_time_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &concurrent_policy_time_seq_ops);
+}
+
+static const struct file_operations concurrent_policy_time_fops = {
+	.open		= concurrent_policy_time_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int __init cpufreq_times_init(void)
+{
+	proc_create_data("uid_time_in_state", 0444, NULL,
+			 &uid_time_in_state_fops, NULL);
+
+	proc_create_data("uid_concurrent_active_time", 0444, NULL,
+			 &concurrent_active_time_fops, NULL);
+
+	proc_create_data("uid_concurrent_policy_time", 0444, NULL,
+			 &concurrent_policy_time_fops, NULL);
+
+	return 0;
+}
+
+early_initcall(cpufreq_times_init);

diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c
index 3f06934..80001120 100644
--- a/drivers/cpufreq/scmi-cpufreq.c
+++ b/drivers/cpufreq/scmi-cpufreq.c

@@ -12,6 +12,7 @@
 #include <linux/cpufreq.h>
 #include <linux/cpumask.h>
 #include <linux/cpu_cooling.h>
+#include <linux/energy_model.h>
 #include <linux/export.h>
 #include <linux/module.h>
 #include <linux/pm_opp.h>
@@ -103,13 +104,42 @@
 	return 0;
 }
 
+static int __maybe_unused
+scmi_get_cpu_power(unsigned long *power, unsigned long *KHz, int cpu)
+{
+	struct device *cpu_dev = get_cpu_device(cpu);
+	unsigned long Hz;
+	int ret, domain;
+
+	if (!cpu_dev) {
+		pr_err("failed to get cpu%d device\n", cpu);
+		return -ENODEV;
+	}
+
+	domain = handle->perf_ops->device_domain_id(cpu_dev);
+	if (domain < 0)
+		return domain;
+
+	/* Get the power cost of the performance domain. */
+	Hz = *KHz * 1000;
+	ret = handle->perf_ops->est_power_get(handle, domain, &Hz, power);
+	if (ret)
+		return ret;
+
+	/* The EM framework specifies the frequency in KHz. */
+	*KHz = Hz / 1000;
+
+	return 0;
+}
+
 static int scmi_cpufreq_init(struct cpufreq_policy *policy)
 {
-	int ret;
+	int ret, nr_opp;
 	unsigned int latency;
 	struct device *cpu_dev;
 	struct scmi_data *priv;
 	struct cpufreq_frequency_table *freq_table;
+	struct em_data_callback em_cb = EM_DATA_CB(scmi_get_cpu_power);
 
 	cpu_dev = get_cpu_device(policy->cpu);
 	if (!cpu_dev) {
@@ -142,6 +172,7 @@
 		ret = -EPROBE_DEFER;
 		goto out_free_opp;
 	}
+	nr_opp = ret;
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv) {
@@ -171,6 +202,9 @@
 	policy->cpuinfo.transition_latency = latency;
 
 	policy->fast_switch_possible = true;
+
+	em_register_perf_domain(policy->cpus, nr_opp, &em_cb);
+
 	return 0;
 
 out_free_priv:

diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c
index 87a98ec..05fc744 100644
--- a/drivers/cpufreq/scpi-cpufreq.c
+++ b/drivers/cpufreq/scpi-cpufreq.c

@@ -23,6 +23,7 @@
 #include <linux/cpufreq.h>
 #include <linux/cpumask.h>
 #include <linux/cpu_cooling.h>
+#include <linux/energy_model.h>
 #include <linux/export.h>
 #include <linux/module.h>
 #include <linux/of_platform.h>
@@ -98,11 +99,12 @@
 
 static int scpi_cpufreq_init(struct cpufreq_policy *policy)
 {
-	int ret;
+	int ret, nr_opp;
 	unsigned int latency;
 	struct device *cpu_dev;
 	struct scpi_data *priv;
 	struct cpufreq_frequency_table *freq_table;
+	struct em_data_callback em_cb = EM_DATA_CB(of_dev_pm_opp_get_cpu_power);
 
 	cpu_dev = get_cpu_device(policy->cpu);
 	if (!cpu_dev) {
@@ -135,6 +137,7 @@
 		ret = -EPROBE_DEFER;
 		goto out_free_opp;
 	}
+	nr_opp = ret;
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv) {
@@ -170,6 +173,9 @@
 	policy->cpuinfo.transition_latency = latency;
 
 	policy->fast_switch_possible = false;
+
+	em_register_perf_domain(policy->cpus, nr_opp, &em_cb);
+
 	return 0;
 
 out_free_cpufreq_table:

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 6df894d..96a3a9b 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c

@@ -221,7 +221,7 @@
 	}
 
 	/* Take note of the planned idle state. */
-	sched_idle_set_state(target_state);
+	sched_idle_set_state(target_state, index);
 
 	trace_cpu_idle_rcuidle(index, dev->cpu);
 	time_start = ns_to_ktime(local_clock());
@@ -235,7 +235,7 @@
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
 
 	/* The cpu is no longer idle or about to enter idle. */
-	sched_idle_set_state(NULL);
+	sched_idle_set_state(NULL, -1);
 
 	if (broadcast) {
 		if (WARN_ON_ONCE(!irqs_disabled()))

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index e26a409..4961f32 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c

@@ -131,10 +131,6 @@
 	int		interval_ptr;
 };
 
-
-#define LOAD_INT(x) ((x) >> FSHIFT)
-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
-
 static inline int get_loadavg(unsigned long load)
 {
 	return LOAD_INT(load) * 10 + LOAD_FRAC(load) / 10;

diff --git a/drivers/energy_model/Kconfig b/drivers/energy_model/Kconfig
new file mode 100644
index 0000000..3fbf968
--- /dev/null
+++ b/drivers/energy_model/Kconfig

@@ -0,0 +1,16 @@
+config LEGACY_ENERGY_MODEL_DT
+	bool "Legacy DT-based Energy Model of CPUs"
+	default n
+	help
+	  The Energy Aware Scheduler (EAS) used to rely on Energy Models
+	  (EMs) statically defined in the Device Tree. More recent
+	  versions of EAS now rely on the EM framework to get the power
+	  costs of CPUs.
+
+	  This driver reads old-style static EMs in DT and feeds them in
+	  the EM framework, hence enabling to use EAS on platforms with
+	  old DT files. Since EAS now uses only the active costs of CPUs,
+	  the cluster-related costs and idle-costs of the old EM are
+	  ignored.
+
+	  If in doubt, say N.

diff --git a/drivers/energy_model/Makefile b/drivers/energy_model/Makefile
new file mode 100644
index 0000000..7bc0a7e
--- /dev/null
+++ b/drivers/energy_model/Makefile

@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_LEGACY_ENERGY_MODEL_DT)	+= legacy_em_dt.o

diff --git a/drivers/energy_model/legacy_em_dt.c b/drivers/energy_model/legacy_em_dt.c
new file mode 100644
index 0000000..b608790
--- /dev/null
+++ b/drivers/energy_model/legacy_em_dt.c

@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Legacy Energy Model loading driver
+ *
+ * Copyright (C) 2018, ARM Ltd.
+ * Written by: Quentin Perret, ARM Ltd.
+ */
+
+#define pr_fmt(fmt) "legacy-dt-em: " fmt
+
+#include <linux/cpufreq.h>
+#include <linux/cpumask.h>
+#include <linux/cpuset.h>
+#include <linux/energy_model.h>
+#include <linux/gfp.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+
+static cpumask_var_t cpus_to_visit;
+
+static DEFINE_PER_CPU(unsigned long, nr_states) = 0;
+
+struct em_state {
+	unsigned long frequency;
+	unsigned long power;
+	unsigned long capacity;
+};
+static DEFINE_PER_CPU(struct em_state*, cpu_em) = NULL;
+
+static void finish_em_loading_workfn(struct work_struct *work);
+static DECLARE_WORK(finish_em_loading_work, finish_em_loading_workfn);
+
+static DEFINE_MUTEX(em_loading_mutex);
+
+/*
+ * Callback given to the EM framework. All this does is browse the table
+ * created by legacy_em_dt().
+ */
+static int get_power(unsigned long *mW, unsigned long *KHz, int cpu)
+{
+	unsigned long nstates = per_cpu(nr_states, cpu);
+	struct em_state *em = per_cpu(cpu_em, cpu);
+	int i;
+
+	if (!nstates || !em)
+		return -ENODEV;
+
+	for (i = 0; i < nstates - 1; i++) {
+		if (em[i].frequency > *KHz)
+			break;
+	}
+
+	*KHz = em[i].frequency;
+	*mW = em[i].power;
+
+	return 0;
+}
+
+static int init_em_dt_callback(struct notifier_block *nb, unsigned long val,
+			       void *data)
+{
+	struct em_data_callback em_cb = EM_DATA_CB(get_power);
+	unsigned long nstates, scale_cpu, max_freq;
+	struct cpufreq_policy *policy = data;
+	const struct property *prop;
+	struct device_node *cn, *cp;
+	struct em_state *em;
+	int cpu, i, ret = 0;
+	const __be32 *tmp;
+
+	if (val != CPUFREQ_NOTIFY)
+		return 0;
+
+	mutex_lock(&em_loading_mutex);
+
+	/* Do not register twice an energy model */
+	for_each_cpu(cpu, policy->cpus) {
+		if (per_cpu(nr_states, cpu) || per_cpu(cpu_em, cpu)) {
+			pr_err("EM of CPU%d already loaded\n", cpu);
+			ret = -EEXIST;
+			goto unlock;
+		}
+	}
+
+	max_freq = policy->cpuinfo.max_freq;
+	if (!max_freq) {
+		pr_err("No policy->max for CPU%d\n", cpu);
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	cpu = cpumask_first(policy->cpus);
+	cn = of_get_cpu_node(cpu, NULL);
+	if (!cn) {
+		pr_err("No device_node for CPU%d\n", cpu);
+		ret = -ENODEV;
+		goto unlock;
+	}
+
+	cp = of_parse_phandle(cn, "sched-energy-costs", 0);
+	if (!cp) {
+		pr_err("CPU%d node has no sched-energy-costs\n", cpu);
+		ret = -ENODEV;
+		goto unlock;
+	}
+
+	prop = of_find_property(cp, "busy-cost-data", NULL);
+	if (!prop || !prop->value) {
+		pr_err("No busy-cost-data for CPU%d\n", cpu);
+		ret = -ENODEV;
+		goto unlock;
+	}
+
+	nstates = (prop->length / sizeof(u32)) / 2;
+	em = kcalloc(nstates, sizeof(struct em_cap_state), GFP_KERNEL);
+	if (!em) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	/* Copy the capacity and power cost to the table. */
+	for (i = 0, tmp = prop->value; i < nstates; i++) {
+		em[i].capacity = be32_to_cpup(tmp++);
+		em[i].power = be32_to_cpup(tmp++);
+	}
+
+	/* Get the CPU capacity (according to the EM) */
+	scale_cpu = em[nstates - 1].capacity;
+	if (!scale_cpu) {
+		pr_err("CPU%d: capacity cannot be 0\n", cpu);
+		kfree(em);
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	/* Re-compute the intermediate frequencies based on the EM. */
+	for (i = 0; i < nstates; i++)
+		em[i].frequency = em[i].capacity * max_freq / scale_cpu;
+
+	/* Assign the table to all CPUs of this policy. */
+	for_each_cpu(i, policy->cpus) {
+		per_cpu(nr_states, i) = nstates;
+		per_cpu(cpu_em, i) = em;
+	}
+
+	pr_info("Registering EM of %*pbl\n", cpumask_pr_args(policy->cpus));
+	em_register_perf_domain(policy->cpus, nstates, &em_cb);
+
+	/* Finish the work when all possible CPUs have been registered. */
+	cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->cpus);
+	if (cpumask_empty(cpus_to_visit))
+		schedule_work(&finish_em_loading_work);
+
+unlock:
+	mutex_unlock(&em_loading_mutex);
+
+	return ret;
+}
+
+static struct notifier_block init_em_dt_notifier = {
+	.notifier_call = init_em_dt_callback,
+};
+
+static void finish_em_loading_workfn(struct work_struct *work)
+{
+	cpufreq_unregister_notifier(&init_em_dt_notifier,
+				    CPUFREQ_POLICY_NOTIFIER);
+	free_cpumask_var(cpus_to_visit);
+
+	/* Let the scheduler know the Energy Model is ready. */
+	rebuild_sched_domains();
+}
+
+static int __init register_cpufreq_notifier(void)
+{
+	int ret;
+
+	if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL))
+		return -ENOMEM;
+
+	cpumask_copy(cpus_to_visit, cpu_possible_mask);
+
+	ret = cpufreq_register_notifier(&init_em_dt_notifier,
+					CPUFREQ_POLICY_NOTIFIER);
+
+	if (ret)
+		free_cpumask_var(cpus_to_visit);
+
+	return ret;
+}
+core_initcall(register_cpufreq_notifier);

diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c
index 6434294..c8024a3 100644
--- a/drivers/firmware/arm_scmi/perf.c
+++ b/drivers/firmware/arm_scmi/perf.c

@@ -427,6 +427,33 @@
 	return ret;
 }
 
+static int scmi_dvfs_est_power_get(const struct scmi_handle *handle, u32 domain,
+				   unsigned long *freq, unsigned long *power)
+{
+	struct scmi_perf_info *pi = handle->perf_priv;
+	struct perf_dom_info *dom;
+	unsigned long opp_freq;
+	int idx, ret = -EINVAL;
+	struct scmi_opp *opp;
+
+	dom = pi->dom_info + domain;
+	if (!dom)
+		return -EIO;
+
+	for (opp = dom->opp, idx = 0; idx < dom->opp_count; idx++, opp++) {
+		opp_freq = opp->perf * dom->mult_factor;
+		if (opp_freq < *freq)
+			continue;
+
+		*freq = opp_freq;
+		*power = opp->power;
+		ret = 0;
+		break;
+	}
+
+	return ret;
+}
+
 static struct scmi_perf_ops perf_ops = {
 	.limits_set = scmi_perf_limits_set,
 	.limits_get = scmi_perf_limits_get,
@@ -437,6 +464,7 @@
 	.device_opps_add = scmi_dvfs_device_opps_add,
 	.freq_set = scmi_dvfs_freq_set,
 	.freq_get = scmi_dvfs_freq_get,
+	.est_power_get = scmi_dvfs_est_power_get,
 };
 
 static int scmi_perf_protocol_init(struct scmi_handle *handle)

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index d984509..818d399 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile

@@ -28,7 +28,8 @@
 				   -D__NO_FORTIFY \
 				   $(call cc-option,-ffreestanding) \
 				   $(call cc-option,-fno-stack-protector) \
-				   -D__DISABLE_EXPORTS
+				   -D__DISABLE_EXPORTS \
+				   $(DISABLE_LTO)
 
 GCOV_PROFILE			:= n
 KASAN_SANITIZE			:= n

diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
index 95f332e..e4a8b33 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile

@@ -32,6 +32,12 @@
 
 calcs_ccflags := -mhard-float -msse $(cc_stack_align)
 
+# Use -msse2 only with clang:
+#   https://bugs.freedesktop.org/show_bug.cgi?id=109487
+ifdef CONFIG_CC_IS_CLANG
+calcs_cc_flags += -msse2
+endif
+
 CFLAGS_dcn_calcs.o := $(calcs_ccflags)
 CFLAGS_dcn_calc_auto.o := $(calcs_ccflags)
 CFLAGS_dcn_calc_math.o := $(calcs_ccflags) -Wno-tautological-compare

diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index d97ca65..648c80c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile

@@ -32,6 +32,12 @@
 
 dml_ccflags := -mhard-float -msse $(cc_stack_align)
 
+# Use -msse2 only with clang:
+#   https://bugs.freedesktop.org/show_bug.cgi?id=109487
+ifdef CONFIG_CC_IS_CLANG
+dml_ccflags += -msse2
+endif
+
 CFLAGS_display_mode_lib.o := $(dml_ccflags)
 CFLAGS_display_pipe_clocks.o := $(dml_ccflags)
 CFLAGS_dml1_display_rq_dlg_calc.o := $(dml_ccflags)

diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 8b8c123..499e424 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig

@@ -479,6 +479,15 @@
 
 	  If unsure, say N.
 
+config DM_VERITY_AVB
+	tristate "Support AVB specific verity error behavior"
+	depends on DM_VERITY
+	---help---
+	  Enables Android Verified Boot platform-specific error
+	  behavior. In particular, it will modify the vbmeta partition
+	  specified on the kernel command-line when non-transient error
+	  occurs (followed by a panic).
+
 config DM_VERITY_FEC
 	bool "Verity forward error correction support"
 	depends on DM_VERITY
@@ -559,4 +568,16 @@
 
 	  If unsure, say N.
 
+config DM_BOW
+	tristate "Backup block device"
+	depends on BLK_DEV_DM
+	select DM_BUFIO
+	---help---
+	  This device-mapper target takes a device and keeps a log of all
+	  changes using free blocks identified by issuing a trim command.
+	  This can then be restored by running a command line utility,
+	  or committed by simply replacing the target.
+
+	  If unsure, say N.
+
 endif # MD

diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 822f4e8..8bb6800 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile

@@ -68,11 +68,16 @@
 obj-$(CONFIG_DM_INTEGRITY)	+= dm-integrity.o
 obj-$(CONFIG_DM_ZONED)		+= dm-zoned.o
 obj-$(CONFIG_DM_WRITECACHE)	+= dm-writecache.o
+obj-$(CONFIG_DM_BOW)		+= dm-bow.o
 
 ifeq ($(CONFIG_DM_UEVENT),y)
 dm-mod-objs			+= dm-uevent.o
 endif
 
+ifeq ($(CONFIG_DM_VERITY_AVB),y)
+dm-verity-objs			+= dm-verity-avb.o
+endif
+
 ifeq ($(CONFIG_DM_VERITY_FEC),y)
 dm-verity-objs			+= dm-verity-fec.o
 endif

diff --git a/drivers/md/dm-bow.c b/drivers/md/dm-bow.c
new file mode 100644
index 0000000..9323c7c
--- /dev/null
+++ b/drivers/md/dm-bow.c

@@ -0,0 +1,1231 @@
+/*
+ * Copyright (C) 2018 Google Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+#include "dm-core.h"
+
+#include <linux/crc32.h>
+#include <linux/dm-bufio.h>
+#include <linux/module.h>
+
+#define DM_MSG_PREFIX "bow"
+
+struct log_entry {
+	u64 source;
+	u64 dest;
+	u32 size;
+	u32 checksum;
+} __packed;
+
+struct log_sector {
+	u32 magic;
+	u16 header_version;
+	u16 header_size;
+	u32 block_size;
+	u32 count;
+	u32 sequence;
+	sector_t sector0;
+	struct log_entry entries[];
+} __packed;
+
+/*
+ * MAGIC is BOW in ascii
+ */
+#define MAGIC 0x00574f42
+#define HEADER_VERSION 0x0100
+
+/*
+ * A sorted set of ranges representing the state of the data on the device.
+ * Use an rb_tree for fast lookup of a given sector
+ * Consecutive ranges are always of different type - operations on this
+ * set must merge matching consecutive ranges.
+ *
+ * Top range is always of type TOP
+ */
+struct bow_range {
+	struct rb_node		node;
+	sector_t		sector;
+	enum {
+		INVALID,	/* Type not set */
+		SECTOR0,	/* First sector - holds log record */
+		SECTOR0_CURRENT,/* Live contents of sector0 */
+		UNCHANGED,	/* Original contents */
+		TRIMMED,	/* Range has been trimmed */
+		CHANGED,	/* Range has been changed */
+		BACKUP,		/* Range is being used as a backup */
+		TOP,		/* Final range - sector is size of device */
+	} type;
+	struct list_head	trimmed_list; /* list of TRIMMED ranges */
+};
+
+static const char * const readable_type[] = {
+	"Invalid",
+	"Sector0",
+	"Sector0_current",
+	"Unchanged",
+	"Free",
+	"Changed",
+	"Backup",
+	"Top",
+};
+
+enum state {
+	TRIM,
+	CHECKPOINT,
+	COMMITTED,
+};
+
+struct bow_context {
+	struct dm_dev *dev;
+	u32 block_size;
+	u32 block_shift;
+	struct workqueue_struct *workqueue;
+	struct dm_bufio_client *bufio;
+	struct mutex ranges_lock; /* Hold to access this struct and/or ranges */
+	struct rb_root ranges;
+	struct dm_kobject_holder kobj_holder;	/* for sysfs attributes */
+	atomic_t state; /* One of the enum state values above */
+	u64 trims_total;
+	struct log_sector *log_sector;
+	struct list_head trimmed_list;
+	bool forward_trims;
+};
+
+sector_t range_top(struct bow_range *br)
+{
+	return container_of(rb_next(&br->node), struct bow_range, node)
+		->sector;
+}
+
+u64 range_size(struct bow_range *br)
+{
+	return (range_top(br) - br->sector) * SECTOR_SIZE;
+}
+
+static sector_t bvec_top(struct bvec_iter *bi_iter)
+{
+	return bi_iter->bi_sector + bi_iter->bi_size / SECTOR_SIZE;
+}
+
+/*
+ * Find the first range that overlaps with bi_iter
+ * bi_iter is set to the size of the overlapping sub-range
+ */
+static struct bow_range *find_first_overlapping_range(struct rb_root *ranges,
+						      struct bvec_iter *bi_iter)
+{
+	struct rb_node *node = ranges->rb_node;
+	struct bow_range *br;
+
+	while (node) {
+		br = container_of(node, struct bow_range, node);
+
+		if (br->sector <= bi_iter->bi_sector
+		    && bi_iter->bi_sector < range_top(br))
+			break;
+
+		if (bi_iter->bi_sector < br->sector)
+			node = node->rb_left;
+		else
+			node = node->rb_right;
+	}
+
+	WARN_ON(!node);
+	if (!node)
+		return NULL;
+
+	if (range_top(br) - bi_iter->bi_sector
+	    < bi_iter->bi_size >> SECTOR_SHIFT)
+		bi_iter->bi_size = (range_top(br) - bi_iter->bi_sector)
+			<< SECTOR_SHIFT;
+
+	return br;
+}
+
+void add_before(struct rb_root *ranges, struct bow_range *new_br,
+		struct bow_range *existing)
+{
+	struct rb_node *parent = &(existing->node);
+	struct rb_node **link = &(parent->rb_left);
+
+	while (*link) {
+		parent = *link;
+		link = &((*link)->rb_right);
+	}
+
+	rb_link_node(&new_br->node, parent, link);
+	rb_insert_color(&new_br->node, ranges);
+}
+
+/*
+ * Given a range br returned by find_first_overlapping_range, split br into a
+ * leading range, a range matching the bi_iter and a trailing range.
+ * Leading and trailing may end up size 0 and will then be deleted. The
+ * new range matching the bi_iter is then returned and should have its type
+ * and type specific fields populated.
+ * If bi_iter runs off the end of the range, bi_iter is truncated accordingly
+ */
+static int split_range(struct bow_context *bc, struct bow_range **br,
+		       struct bvec_iter *bi_iter)
+{
+	struct bow_range *new_br;
+
+	if (bi_iter->bi_sector < (*br)->sector) {
+		WARN_ON(true);
+		return BLK_STS_IOERR;
+	}
+
+	if (bi_iter->bi_sector > (*br)->sector) {
+		struct bow_range *leading_br =
+			kzalloc(sizeof(*leading_br), GFP_KERNEL);
+
+		if (!leading_br)
+			return BLK_STS_RESOURCE;
+
+		*leading_br = **br;
+		if (leading_br->type == TRIMMED)
+			list_add(&leading_br->trimmed_list, &bc->trimmed_list);
+
+		add_before(&bc->ranges, leading_br, *br);
+		(*br)->sector = bi_iter->bi_sector;
+	}
+
+	if (bvec_top(bi_iter) >= range_top(*br)) {
+		bi_iter->bi_size = (range_top(*br) - (*br)->sector)
+					* SECTOR_SIZE;
+		return BLK_STS_OK;
+	}
+
+	/* new_br will be the beginning, existing br will be the tail */
+	new_br = kzalloc(sizeof(*new_br), GFP_KERNEL);
+	if (!new_br)
+		return BLK_STS_RESOURCE;
+
+	new_br->sector = (*br)->sector;
+	(*br)->sector = bvec_top(bi_iter);
+	add_before(&bc->ranges, new_br, *br);
+	*br = new_br;
+
+	return BLK_STS_OK;
+}
+
+/*
+ * Sets type of a range. May merge range into surrounding ranges
+ * Since br may be invalidated, always sets br to NULL to prevent
+ * usage after this is called
+ */
+static void set_type(struct bow_context *bc, struct bow_range **br, int type)
+{
+	struct bow_range *prev = container_of(rb_prev(&(*br)->node),
+						      struct bow_range, node);
+	struct bow_range *next = container_of(rb_next(&(*br)->node),
+						      struct bow_range, node);
+
+	if ((*br)->type == TRIMMED) {
+		bc->trims_total -= range_size(*br);
+		list_del(&(*br)->trimmed_list);
+	}
+
+	if (type == TRIMMED) {
+		bc->trims_total += range_size(*br);
+		list_add(&(*br)->trimmed_list, &bc->trimmed_list);
+	}
+
+	(*br)->type = type;
+
+	if (next->type == type) {
+		if (type == TRIMMED)
+			list_del(&next->trimmed_list);
+		rb_erase(&next->node, &bc->ranges);
+		kfree(next);
+	}
+
+	if (prev->type == type) {
+		if (type == TRIMMED)
+			list_del(&(*br)->trimmed_list);
+		rb_erase(&(*br)->node, &bc->ranges);
+		kfree(*br);
+	}
+
+	*br = NULL;
+}
+
+static struct bow_range *find_free_range(struct bow_context *bc)
+{
+	if (list_empty(&bc->trimmed_list)) {
+		DMERR("Unable to find free space to back up to");
+		return NULL;
+	}
+
+	return list_first_entry(&bc->trimmed_list, struct bow_range,
+				trimmed_list);
+}
+
+static sector_t sector_to_page(struct bow_context const *bc, sector_t sector)
+{
+	WARN_ON((sector & (((sector_t)1 << (bc->block_shift - SECTOR_SHIFT)) - 1))
+		!= 0);
+	return sector >> (bc->block_shift - SECTOR_SHIFT);
+}
+
+static int copy_data(struct bow_context const *bc,
+		     struct bow_range *source, struct bow_range *dest,
+		     u32 *checksum)
+{
+	int i;
+
+	if (range_size(source) != range_size(dest)) {
+		WARN_ON(1);
+		return BLK_STS_IOERR;
+	}
+
+	if (checksum)
+		*checksum = sector_to_page(bc, source->sector);
+
+	for (i = 0; i < range_size(source) >> bc->block_shift; ++i) {
+		struct dm_buffer *read_buffer, *write_buffer;
+		u8 *read, *write;
+		sector_t page = sector_to_page(bc, source->sector) + i;
+
+		read = dm_bufio_read(bc->bufio, page, &read_buffer);
+		if (IS_ERR(read)) {
+			DMERR("Cannot read page %llu",
+			      (unsigned long long)page);
+			return PTR_ERR(read);
+		}
+
+		if (checksum)
+			*checksum = crc32(*checksum, read, bc->block_size);
+
+		write = dm_bufio_new(bc->bufio,
+				     sector_to_page(bc, dest->sector) + i,
+				     &write_buffer);
+		if (IS_ERR(write)) {
+			DMERR("Cannot write sector");
+			dm_bufio_release(read_buffer);
+			return PTR_ERR(write);
+		}
+
+		memcpy(write, read, bc->block_size);
+
+		dm_bufio_mark_buffer_dirty(write_buffer);
+		dm_bufio_release(write_buffer);
+		dm_bufio_release(read_buffer);
+	}
+
+	dm_bufio_write_dirty_buffers(bc->bufio);
+	return BLK_STS_OK;
+}
+
+/****** logging functions ******/
+
+static int add_log_entry(struct bow_context *bc, sector_t source, sector_t dest,
+			 unsigned int size, u32 checksum);
+
+static int backup_log_sector(struct bow_context *bc)
+{
+	struct bow_range *first_br, *free_br;
+	struct bvec_iter bi_iter;
+	u32 checksum = 0;
+	int ret;
+
+	first_br = container_of(rb_first(&bc->ranges), struct bow_range, node);
+
+	if (first_br->type != SECTOR0) {
+		WARN_ON(1);
+		return BLK_STS_IOERR;
+	}
+
+	if (range_size(first_br) != bc->block_size) {
+		WARN_ON(1);
+		return BLK_STS_IOERR;
+	}
+
+	free_br = find_free_range(bc);
+	/* No space left - return this error to userspace */
+	if (!free_br)
+		return BLK_STS_NOSPC;
+	bi_iter.bi_sector = free_br->sector;
+	bi_iter.bi_size = bc->block_size;
+	ret = split_range(bc, &free_br, &bi_iter);
+	if (ret)
+		return ret;
+	if (bi_iter.bi_size != bc->block_size) {
+		WARN_ON(1);
+		return BLK_STS_IOERR;
+	}
+
+	ret = copy_data(bc, first_br, free_br, &checksum);
+	if (ret)
+		return ret;
+
+	bc->log_sector->count = 0;
+	bc->log_sector->sequence++;
+	ret = add_log_entry(bc, first_br->sector, free_br->sector,
+			    range_size(first_br), checksum);
+	if (ret)
+		return ret;
+
+	set_type(bc, &free_br, BACKUP);
+	return BLK_STS_OK;
+}
+
+static int add_log_entry(struct bow_context *bc, sector_t source, sector_t dest,
+			 unsigned int size, u32 checksum)
+{
+	struct dm_buffer *sector_buffer;
+	u8 *sector;
+
+	if (sizeof(struct log_sector)
+	    + sizeof(struct log_entry) * (bc->log_sector->count + 1)
+		> bc->block_size) {
+		int ret = backup_log_sector(bc);
+
+		if (ret)
+			return ret;
+	}
+
+	sector = dm_bufio_new(bc->bufio, 0, &sector_buffer);
+	if (IS_ERR(sector)) {
+		DMERR("Cannot write boot sector");
+		dm_bufio_release(sector_buffer);
+		return BLK_STS_NOSPC;
+	}
+
+	bc->log_sector->entries[bc->log_sector->count].source = source;
+	bc->log_sector->entries[bc->log_sector->count].dest = dest;
+	bc->log_sector->entries[bc->log_sector->count].size = size;
+	bc->log_sector->entries[bc->log_sector->count].checksum = checksum;
+	bc->log_sector->count++;
+
+	memcpy(sector, bc->log_sector, bc->block_size);
+	dm_bufio_mark_buffer_dirty(sector_buffer);
+	dm_bufio_release(sector_buffer);
+	dm_bufio_write_dirty_buffers(bc->bufio);
+	return BLK_STS_OK;
+}
+
+static int prepare_log(struct bow_context *bc)
+{
+	struct bow_range *free_br, *first_br;
+	struct bvec_iter bi_iter;
+	u32 checksum = 0;
+	int ret;
+
+	/* Carve out first sector as log sector */
+	first_br = container_of(rb_first(&bc->ranges), struct bow_range, node);
+	if (first_br->type != UNCHANGED) {
+		WARN_ON(1);
+		return BLK_STS_IOERR;
+	}
+
+	if (range_size(first_br) < bc->block_size) {
+		WARN_ON(1);
+		return BLK_STS_IOERR;
+	}
+	bi_iter.bi_sector = 0;
+	bi_iter.bi_size = bc->block_size;
+	ret = split_range(bc, &first_br, &bi_iter);
+	if (ret)
+		return ret;
+	first_br->type = SECTOR0;
+	if (range_size(first_br) != bc->block_size) {
+		WARN_ON(1);
+		return BLK_STS_IOERR;
+	}
+
+	/* Find free sector for active sector0 reads/writes */
+	free_br = find_free_range(bc);
+	if (!free_br)
+		return BLK_STS_NOSPC;
+	bi_iter.bi_sector = free_br->sector;
+	bi_iter.bi_size = bc->block_size;
+	ret = split_range(bc, &free_br, &bi_iter);
+	if (ret)
+		return ret;
+	free_br->type = SECTOR0_CURRENT;
+
+	/* Copy data */
+	ret = copy_data(bc, first_br, free_br, NULL);
+	if (ret)
+		return ret;
+
+	bc->log_sector->sector0 = free_br->sector;
+
+	/* Find free sector to back up original sector zero */
+	free_br = find_free_range(bc);
+	if (!free_br)
+		return BLK_STS_NOSPC;
+	bi_iter.bi_sector = free_br->sector;
+	bi_iter.bi_size = bc->block_size;
+	ret = split_range(bc, &free_br, &bi_iter);
+	if (ret)
+		return ret;
+
+	/* Back up */
+	ret = copy_data(bc, first_br, free_br, &checksum);
+	if (ret)
+		return ret;
+
+	/*
+	 * Set up our replacement boot sector - it will get written when we
+	 * add the first log entry, which we do immediately
+	 */
+	bc->log_sector->magic = MAGIC;
+	bc->log_sector->header_version = HEADER_VERSION;
+	bc->log_sector->header_size = sizeof(*bc->log_sector);
+	bc->log_sector->block_size = bc->block_size;
+	bc->log_sector->count = 0;
+	bc->log_sector->sequence = 0;
+
+	/* Add log entry */
+	ret = add_log_entry(bc, first_br->sector, free_br->sector,
+			    range_size(first_br), checksum);
+	if (ret)
+		return ret;
+
+	set_type(bc, &free_br, BACKUP);
+	return BLK_STS_OK;
+}
+
+static struct bow_range *find_sector0_current(struct bow_context *bc)
+{
+	struct bvec_iter bi_iter;
+
+	bi_iter.bi_sector = bc->log_sector->sector0;
+	bi_iter.bi_size = bc->block_size;
+	return find_first_overlapping_range(&bc->ranges, &bi_iter);
+}
+
+/****** sysfs interface functions ******/
+
+static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
+			  char *buf)
+{
+	struct bow_context *bc = container_of(kobj, struct bow_context,
+					      kobj_holder.kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&bc->state));
+}
+
+static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
+			   const char *buf, size_t count)
+{
+	struct bow_context *bc = container_of(kobj, struct bow_context,
+					      kobj_holder.kobj);
+	enum state state, original_state;
+	int ret;
+
+	state = buf[0] - '0';
+	if (state < TRIM || state > COMMITTED) {
+		DMERR("State value %d out of range", state);
+		return -EINVAL;
+	}
+
+	mutex_lock(&bc->ranges_lock);
+	original_state = atomic_read(&bc->state);
+	if (state != original_state + 1) {
+		DMERR("Invalid state change from %d to %d",
+		      original_state, state);
+		ret = -EINVAL;
+		goto bad;
+	}
+
+	DMINFO("Switching to state %s", state == CHECKPOINT ? "Checkpoint"
+	       : state == COMMITTED ? "Committed" : "Unknown");
+
+	if (state == CHECKPOINT) {
+		ret = prepare_log(bc);
+		if (ret) {
+			DMERR("Failed to switch to checkpoint state");
+			goto bad;
+		}
+	} else if (state == COMMITTED) {
+		struct bow_range *br = find_sector0_current(bc);
+		struct bow_range *sector0_br =
+			container_of(rb_first(&bc->ranges), struct bow_range,
+				     node);
+
+		ret = copy_data(bc, br, sector0_br, 0);
+		if (ret) {
+			DMERR("Failed to switch to committed state");
+			goto bad;
+		}
+	}
+	atomic_inc(&bc->state);
+	ret = count;
+
+bad:
+	mutex_unlock(&bc->ranges_lock);
+	return ret;
+}
+
+static ssize_t free_show(struct kobject *kobj, struct kobj_attribute *attr,
+			  char *buf)
+{
+	struct bow_context *bc = container_of(kobj, struct bow_context,
+					      kobj_holder.kobj);
+	u64 trims_total;
+
+	mutex_lock(&bc->ranges_lock);
+	trims_total = bc->trims_total;
+	mutex_unlock(&bc->ranges_lock);
+
+	return scnprintf(buf, PAGE_SIZE, "%llu\n", trims_total);
+}
+
+static struct kobj_attribute attr_state = __ATTR_RW(state);
+static struct kobj_attribute attr_free = __ATTR_RO(free);
+
+static struct attribute *bow_attrs[] = {
+	&attr_state.attr,
+	&attr_free.attr,
+	NULL
+};
+
+static struct kobj_type bow_ktype = {
+	.sysfs_ops = &kobj_sysfs_ops,
+	.default_attrs = bow_attrs,
+	.release = dm_kobject_release
+};
+
+/****** constructor/destructor ******/
+
+static void dm_bow_dtr(struct dm_target *ti)
+{
+	struct bow_context *bc = (struct bow_context *) ti->private;
+	struct kobject *kobj;
+
+	while (rb_first(&bc->ranges)) {
+		struct bow_range *br = container_of(rb_first(&bc->ranges),
+						    struct bow_range, node);
+
+		rb_erase(&br->node, &bc->ranges);
+		kfree(br);
+	}
+	if (bc->workqueue)
+		destroy_workqueue(bc->workqueue);
+	if (bc->bufio)
+		dm_bufio_client_destroy(bc->bufio);
+
+	kobj = &bc->kobj_holder.kobj;
+	if (kobj->state_initialized) {
+		kobject_put(kobj);
+		wait_for_completion(dm_get_completion_from_kobject(kobj));
+	}
+
+	kfree(bc->log_sector);
+	kfree(bc);
+}
+
+static int dm_bow_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+	struct bow_context *bc;
+	struct bow_range *br;
+	int ret;
+	struct mapped_device *md = dm_table_get_md(ti->table);
+
+	if (argc != 1) {
+		ti->error = "Invalid argument count";
+		return -EINVAL;
+	}
+
+	bc = kzalloc(sizeof(*bc), GFP_KERNEL);
+	if (!bc) {
+		ti->error = "Cannot allocate bow context";
+		return -ENOMEM;
+	}
+
+	ti->num_flush_bios = 1;
+	ti->num_discard_bios = 1;
+	ti->num_write_same_bios = 1;
+	ti->private = bc;
+
+	ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
+			    &bc->dev);
+	if (ret) {
+		ti->error = "Device lookup failed";
+		goto bad;
+	}
+
+	if (bc->dev->bdev->bd_queue->limits.max_discard_sectors == 0) {
+		bc->dev->bdev->bd_queue->limits.discard_granularity = 1 << 12;
+		bc->dev->bdev->bd_queue->limits.max_hw_discard_sectors = 1 << 15;
+		bc->dev->bdev->bd_queue->limits.max_discard_sectors = 1 << 15;
+		bc->forward_trims = false;
+	} else {
+		bc->forward_trims = true;
+	}
+
+	bc->block_size = bc->dev->bdev->bd_queue->limits.logical_block_size;
+	bc->block_shift = ilog2(bc->block_size);
+	bc->log_sector = kzalloc(bc->block_size, GFP_KERNEL);
+	if (!bc->log_sector) {
+		ti->error = "Cannot allocate log sector";
+		goto bad;
+	}
+
+	init_completion(&bc->kobj_holder.completion);
+	ret = kobject_init_and_add(&bc->kobj_holder.kobj, &bow_ktype,
+				   &disk_to_dev(dm_disk(md))->kobj, "%s",
+				   "bow");
+	if (ret) {
+		ti->error = "Cannot create sysfs node";
+		goto bad;
+	}
+
+	mutex_init(&bc->ranges_lock);
+	bc->ranges = RB_ROOT;
+	bc->bufio = dm_bufio_client_create(bc->dev->bdev, bc->block_size, 1, 0,
+					   NULL, NULL);
+	if (IS_ERR(bc->bufio)) {
+		ti->error = "Cannot initialize dm-bufio";
+		ret = PTR_ERR(bc->bufio);
+		bc->bufio = NULL;
+		goto bad;
+	}
+
+	bc->workqueue = alloc_workqueue("dm-bow",
+					WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM
+					| WQ_UNBOUND, num_online_cpus());
+	if (!bc->workqueue) {
+		ti->error = "Cannot allocate workqueue";
+		ret = -ENOMEM;
+		goto bad;
+	}
+
+	INIT_LIST_HEAD(&bc->trimmed_list);
+
+	br = kzalloc(sizeof(*br), GFP_KERNEL);
+	if (!br) {
+		ti->error = "Cannot allocate ranges";
+		ret = -ENOMEM;
+		goto bad;
+	}
+
+	br->sector = ti->len;
+	br->type = TOP;
+	rb_link_node(&br->node, NULL, &bc->ranges.rb_node);
+	rb_insert_color(&br->node, &bc->ranges);
+
+	br = kzalloc(sizeof(*br), GFP_KERNEL);
+	if (!br) {
+		ti->error = "Cannot allocate ranges";
+		ret = -ENOMEM;
+		goto bad;
+	}
+
+	br->sector = 0;
+	br->type = UNCHANGED;
+	rb_link_node(&br->node, bc->ranges.rb_node,
+		     &bc->ranges.rb_node->rb_left);
+	rb_insert_color(&br->node, &bc->ranges);
+
+	ti->discards_supported = true;
+
+	return 0;
+
+bad:
+	dm_bow_dtr(ti);
+	return ret;
+}
+
+/****** Handle writes ******/
+
+static int prepare_unchanged_range(struct bow_context *bc, struct bow_range *br,
+				   struct bvec_iter *bi_iter,
+				   bool record_checksum)
+{
+	struct bow_range *backup_br;
+	struct bvec_iter backup_bi;
+	sector_t log_source, log_dest;
+	unsigned int log_size;
+	u32 checksum = 0;
+	int ret;
+	int original_type;
+	sector_t sector0;
+
+	/* Find a free range */
+	backup_br = find_free_range(bc);
+	if (!backup_br)
+		return BLK_STS_NOSPC;
+
+	/* Carve out a backup range. This may be smaller than the br given */
+	backup_bi.bi_sector = backup_br->sector;
+	backup_bi.bi_size = min(range_size(backup_br), (u64) bi_iter->bi_size);
+	ret = split_range(bc, &backup_br, &backup_bi);
+	if (ret)
+		return ret;
+
+	/*
+	 * Carve out a changed range. This will not be smaller than the backup
+	 * br since the backup br is smaller than the source range and iterator
+	 */
+	bi_iter->bi_size = backup_bi.bi_size;
+	ret = split_range(bc, &br, bi_iter);
+	if (ret)
+		return ret;
+	if (range_size(br) != range_size(backup_br)) {
+		WARN_ON(1);
+		return BLK_STS_IOERR;
+	}
+
+
+	/* Copy data over */
+	ret = copy_data(bc, br, backup_br, record_checksum ? &checksum : NULL);
+	if (ret)
+		return ret;
+
+	/* Add an entry to the log */
+	log_source = br->sector;
+	log_dest = backup_br->sector;
+	log_size = range_size(br);
+
+	/*
+	 * Set the types. Note that since set_type also amalgamates ranges
+	 * we have to set both sectors to their final type before calling
+	 * set_type on either
+	 */
+	original_type = br->type;
+	sector0 = backup_br->sector;
+	if (backup_br->type == TRIMMED)
+		list_del(&backup_br->trimmed_list);
+	backup_br->type = br->type == SECTOR0_CURRENT ? SECTOR0_CURRENT
+						      : BACKUP;
+	br->type = CHANGED;
+	set_type(bc, &backup_br, backup_br->type);
+
+	/*
+	 * Add the log entry after marking the backup sector, since adding a log
+	 * can cause another backup
+	 */
+	ret = add_log_entry(bc, log_source, log_dest, log_size, checksum);
+	if (ret) {
+		br->type = original_type;
+		return ret;
+	}
+
+	/* Now it is safe to mark this backup successful */
+	if (original_type == SECTOR0_CURRENT)
+		bc->log_sector->sector0 = sector0;
+
+	set_type(bc, &br, br->type);
+	return ret;
+}
+
+static int prepare_free_range(struct bow_context *bc, struct bow_range *br,
+			      struct bvec_iter *bi_iter)
+{
+	int ret;
+
+	ret = split_range(bc, &br, bi_iter);
+	if (ret)
+		return ret;
+	set_type(bc, &br, CHANGED);
+	return BLK_STS_OK;
+}
+
+static int prepare_changed_range(struct bow_context *bc, struct bow_range *br,
+				 struct bvec_iter *bi_iter)
+{
+	/* Nothing to do ... */
+	return BLK_STS_OK;
+}
+
+static int prepare_one_range(struct bow_context *bc,
+			     struct bvec_iter *bi_iter)
+{
+	struct bow_range *br = find_first_overlapping_range(&bc->ranges,
+							    bi_iter);
+	switch (br->type) {
+	case CHANGED:
+		return prepare_changed_range(bc, br, bi_iter);
+
+	case TRIMMED:
+		return prepare_free_range(bc, br, bi_iter);
+
+	case UNCHANGED:
+	case BACKUP:
+		return prepare_unchanged_range(bc, br, bi_iter, true);
+
+	/*
+	 * We cannot track the checksum for the active sector0, since it
+	 * may change at any point.
+	 */
+	case SECTOR0_CURRENT:
+		return prepare_unchanged_range(bc, br, bi_iter, false);
+
+	case SECTOR0:	/* Handled in the dm_bow_map */
+	case TOP:	/* Illegal - top is off the end of the device */
+	default:
+		WARN_ON(1);
+		return BLK_STS_IOERR;
+	}
+}
+
+struct write_work {
+	struct work_struct work;
+	struct bow_context *bc;
+	struct bio *bio;
+};
+
+static void bow_write(struct work_struct *work)
+{
+	struct write_work *ww = container_of(work, struct write_work, work);
+	struct bow_context *bc = ww->bc;
+	struct bio *bio = ww->bio;
+	struct bvec_iter bi_iter = bio->bi_iter;
+	int ret = BLK_STS_OK;
+
+	kfree(ww);
+
+	mutex_lock(&bc->ranges_lock);
+	do {
+		ret = prepare_one_range(bc, &bi_iter);
+		bi_iter.bi_sector += bi_iter.bi_size / SECTOR_SIZE;
+		bi_iter.bi_size = bio->bi_iter.bi_size
+			- (bi_iter.bi_sector - bio->bi_iter.bi_sector)
+			  * SECTOR_SIZE;
+	} while (!ret && bi_iter.bi_size);
+
+	mutex_unlock(&bc->ranges_lock);
+
+	if (!ret) {
+		bio_set_dev(bio, bc->dev->bdev);
+		submit_bio(bio);
+	} else {
+		DMERR("Write failure with error %d", -ret);
+		bio->bi_status = ret;
+		bio_endio(bio);
+	}
+}
+
+static int queue_write(struct bow_context *bc, struct bio *bio)
+{
+	struct write_work *ww = kmalloc(sizeof(*ww), GFP_NOIO | __GFP_NORETRY
+					| __GFP_NOMEMALLOC | __GFP_NOWARN);
+	if (!ww) {
+		DMERR("Failed to allocate write_work");
+		return -ENOMEM;
+	}
+
+	INIT_WORK(&ww->work, bow_write);
+	ww->bc = bc;
+	ww->bio = bio;
+	queue_work(bc->workqueue, &ww->work);
+	return DM_MAPIO_SUBMITTED;
+}
+
+static int handle_sector0(struct bow_context *bc, struct bio *bio)
+{
+	int ret = DM_MAPIO_REMAPPED;
+
+	if (bio->bi_iter.bi_size > bc->block_size) {
+		struct bio * split = bio_split(bio,
+					       bc->block_size >> SECTOR_SHIFT,
+					       GFP_NOIO,
+					       &fs_bio_set);
+		if (!split) {
+			DMERR("Failed to split bio");
+			bio->bi_status = BLK_STS_RESOURCE;
+			bio_endio(bio);
+			return DM_MAPIO_SUBMITTED;
+		}
+
+		bio_chain(split, bio);
+		split->bi_iter.bi_sector = bc->log_sector->sector0;
+		bio_set_dev(split, bc->dev->bdev);
+		submit_bio(split);
+
+		if (bio_data_dir(bio) == WRITE)
+			ret = queue_write(bc, bio);
+	} else {
+		bio->bi_iter.bi_sector = bc->log_sector->sector0;
+	}
+
+	return ret;
+}
+
+static int add_trim(struct bow_context *bc, struct bio *bio)
+{
+	struct bow_range *br;
+	struct bvec_iter bi_iter = bio->bi_iter;
+
+	DMDEBUG("add_trim: %llu, %u",
+		(unsigned long long)bio->bi_iter.bi_sector,
+		bio->bi_iter.bi_size);
+
+	do {
+		br = find_first_overlapping_range(&bc->ranges, &bi_iter);
+
+		switch (br->type) {
+		case UNCHANGED:
+			if (!split_range(bc, &br, &bi_iter))
+				set_type(bc, &br, TRIMMED);
+			break;
+
+		case TRIMMED:
+			/* Nothing to do */
+			break;
+
+		default:
+			/* No other case is legal in TRIM state */
+			WARN_ON(true);
+			break;
+		}
+
+		bi_iter.bi_sector += bi_iter.bi_size / SECTOR_SIZE;
+		bi_iter.bi_size = bio->bi_iter.bi_size
+			- (bi_iter.bi_sector - bio->bi_iter.bi_sector)
+			  * SECTOR_SIZE;
+
+	} while (bi_iter.bi_size);
+
+	bio_endio(bio);
+	return DM_MAPIO_SUBMITTED;
+}
+
+static int remove_trim(struct bow_context *bc, struct bio *bio)
+{
+	struct bow_range *br;
+	struct bvec_iter bi_iter = bio->bi_iter;
+
+	DMDEBUG("remove_trim: %llu, %u",
+		(unsigned long long)bio->bi_iter.bi_sector,
+		bio->bi_iter.bi_size);
+
+	do {
+		br = find_first_overlapping_range(&bc->ranges, &bi_iter);
+
+		switch (br->type) {
+		case UNCHANGED:
+			/* Nothing to do */
+			break;
+
+		case TRIMMED:
+			if (!split_range(bc, &br, &bi_iter))
+				set_type(bc, &br, UNCHANGED);
+			break;
+
+		default:
+			/* No other case is legal in TRIM state */
+			WARN_ON(true);
+			break;
+		}
+
+		bi_iter.bi_sector += bi_iter.bi_size / SECTOR_SIZE;
+		bi_iter.bi_size = bio->bi_iter.bi_size
+			- (bi_iter.bi_sector - bio->bi_iter.bi_sector)
+			  * SECTOR_SIZE;
+
+	} while (bi_iter.bi_size);
+
+	return DM_MAPIO_REMAPPED;
+}
+
+int remap_unless_illegal_trim(struct bow_context *bc, struct bio *bio)
+{
+	if (!bc->forward_trims && bio_op(bio) == REQ_OP_DISCARD) {
+		bio->bi_status = BLK_STS_NOTSUPP;
+		bio_endio(bio);
+		return DM_MAPIO_SUBMITTED;
+	} else {
+		bio_set_dev(bio, bc->dev->bdev);
+		return DM_MAPIO_REMAPPED;
+	}
+}
+
+/****** dm interface ******/
+
+static int dm_bow_map(struct dm_target *ti, struct bio *bio)
+{
+	int ret = DM_MAPIO_REMAPPED;
+	struct bow_context *bc = ti->private;
+
+	if (likely(bc->state.counter == COMMITTED))
+		return remap_unless_illegal_trim(bc, bio);
+
+	if (bio_data_dir(bio) == READ && bio->bi_iter.bi_sector != 0)
+		return remap_unless_illegal_trim(bc, bio);
+
+	if (atomic_read(&bc->state) != COMMITTED) {
+		enum state state;
+
+		mutex_lock(&bc->ranges_lock);
+		state = atomic_read(&bc->state);
+		if (state == TRIM) {
+			if (bio_op(bio) == REQ_OP_DISCARD)
+				ret = add_trim(bc, bio);
+			else if (bio_data_dir(bio) == WRITE)
+				ret = remove_trim(bc, bio);
+			else
+				/* pass-through */;
+		} else if (state == CHECKPOINT) {
+			if (bio->bi_iter.bi_sector == 0)
+				ret = handle_sector0(bc, bio);
+			else if (bio_data_dir(bio) == WRITE)
+				ret = queue_write(bc, bio);
+			else
+				/* pass-through */;
+		} else {
+			/* pass-through */
+		}
+		mutex_unlock(&bc->ranges_lock);
+	}
+
+	if (ret == DM_MAPIO_REMAPPED)
+		return remap_unless_illegal_trim(bc, bio);
+
+	return ret;
+}
+
+static void dm_bow_tablestatus(struct dm_target *ti, char *result,
+			       unsigned int maxlen)
+{
+	char *end = result + maxlen;
+	struct bow_context *bc = ti->private;
+	struct rb_node *i;
+	int trimmed_list_length = 0;
+	int trimmed_range_count = 0;
+	struct bow_range *br;
+
+	if (maxlen == 0)
+		return;
+	result[0] = 0;
+
+	list_for_each_entry(br, &bc->trimmed_list, trimmed_list)
+		if (br->type == TRIMMED) {
+			++trimmed_list_length;
+		} else {
+			scnprintf(result, end - result,
+				  "ERROR: non-trimmed entry in trimmed_list");
+			return;
+		}
+
+	if (!rb_first(&bc->ranges)) {
+		scnprintf(result, end - result, "ERROR: Empty ranges");
+		return;
+	}
+
+	if (container_of(rb_first(&bc->ranges), struct bow_range, node)
+	    ->sector) {
+		scnprintf(result, end - result,
+			 "ERROR: First range does not start at sector 0");
+		return;
+	}
+
+	for (i = rb_first(&bc->ranges); i; i = rb_next(i)) {
+		struct bow_range *br = container_of(i, struct bow_range, node);
+
+		result += scnprintf(result, end - result, "%s: %llu",
+				    readable_type[br->type],
+				    (unsigned long long)br->sector);
+		if (result >= end)
+			return;
+
+		result += scnprintf(result, end - result, "\n");
+		if (result >= end)
+			return;
+
+		if (br->type == TRIMMED)
+			++trimmed_range_count;
+
+		if (br->type == TOP) {
+			if (br->sector != ti->len) {
+				scnprintf(result, end - result,
+					 "\nERROR: Top sector is incorrect");
+			}
+
+			if (&br->node != rb_last(&bc->ranges)) {
+				scnprintf(result, end - result,
+					  "\nERROR: Top sector is not last");
+			}
+
+			break;
+		}
+
+		if (!rb_next(i)) {
+			scnprintf(result, end - result,
+				  "\nERROR: Last range not of type TOP");
+			return;
+		}
+
+		if (br->sector > range_top(br)) {
+			scnprintf(result, end - result,
+				  "\nERROR: sectors out of order");
+			return;
+		}
+	}
+
+	if (trimmed_range_count != trimmed_list_length)
+		scnprintf(result, end - result,
+			  "\nERROR: not all trimmed ranges in trimmed list");
+}
+
+static void dm_bow_status(struct dm_target *ti, status_type_t type,
+			  unsigned int status_flags, char *result,
+			  unsigned int maxlen)
+{
+	switch (type) {
+	case STATUSTYPE_INFO:
+		if (maxlen)
+			result[0] = 0;
+		break;
+
+	case STATUSTYPE_TABLE:
+		dm_bow_tablestatus(ti, result, maxlen);
+		break;
+	}
+}
+
+int dm_bow_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
+{
+	struct bow_context *bc = ti->private;
+	struct dm_dev *dev = bc->dev;
+
+	*bdev = dev->bdev;
+	/* Only pass ioctls through if the device sizes match exactly. */
+	return ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
+}
+
+static int dm_bow_iterate_devices(struct dm_target *ti,
+				  iterate_devices_callout_fn fn, void *data)
+{
+	struct bow_context *bc = ti->private;
+
+	return fn(ti, bc->dev, 0, ti->len, data);
+}
+
+static struct target_type bow_target = {
+	.name   = "bow",
+	.version = {1, 1, 1},
+	.module = THIS_MODULE,
+	.ctr    = dm_bow_ctr,
+	.dtr    = dm_bow_dtr,
+	.map    = dm_bow_map,
+	.status = dm_bow_status,
+	.prepare_ioctl  = dm_bow_prepare_ioctl,
+	.iterate_devices = dm_bow_iterate_devices,
+};
+
+int __init dm_bow_init(void)
+{
+	int r = dm_register_target(&bow_target);
+
+	if (r < 0)
+		DMERR("registering bow failed %d", r);
+	return r;
+}
+
+void dm_bow_exit(void)
+{
+	dm_unregister_target(&bow_target);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(dm_bow_init);
+module_exit(dm_bow_exit);

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index f3dcc76..8f61366 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c

@@ -1885,6 +1885,13 @@
 		}
 	}
 
+	/*
+	 * dm-crypt performance can vary greatly depending on which crypto
+	 * algorithm implementation is used.  Help people debug performance
+	 * problems by logging the ->cra_driver_name.
+	 */
+	DMINFO("%s using implementation \"%s\"", ciphermode,
+	       crypto_skcipher_alg(any_tfm(cc))->base.cra_driver_name);
 	return 0;
 }
 
@@ -1903,6 +1910,8 @@
 		return err;
 	}
 
+	DMINFO("%s using implementation \"%s\"", ciphermode,
+	       crypto_aead_alg(any_tfm_aead(cc))->base.cra_driver_name);
 	return 0;
 }
 

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 36275c5..7767f60 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c

@@ -1979,6 +1979,9 @@
 	 */
 	if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random))
 		blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
+
+	/* io_pages is used for readahead */
+	q->backing_dev_info->io_pages = limits->max_sectors >> (PAGE_SHIFT - 9);
 }
 
 unsigned int dm_table_get_num_targets(struct dm_table *t)

diff --git a/drivers/md/dm-verity-avb.c b/drivers/md/dm-verity-avb.c
new file mode 100644
index 0000000..a9f102a
--- /dev/null
+++ b/drivers/md/dm-verity-avb.c

@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2017 Google.
+ *
+ * This file is released under the GPLv2.
+ *
+ * Based on drivers/md/dm-verity-chromeos.c
+ */
+
+#include <linux/device-mapper.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+
+#define DM_MSG_PREFIX "verity-avb"
+
+/* Set via module parameters. */
+static char avb_vbmeta_device[64];
+static char avb_invalidate_on_error[4];
+
+static void invalidate_vbmeta_endio(struct bio *bio)
+{
+	if (bio->bi_status)
+		DMERR("invalidate_vbmeta_endio: error %d", bio->bi_status);
+	complete(bio->bi_private);
+}
+
+static int invalidate_vbmeta_submit(struct bio *bio,
+				    struct block_device *bdev,
+				    int op, int access_last_sector,
+				    struct page *page)
+{
+	DECLARE_COMPLETION_ONSTACK(wait);
+
+	bio->bi_private = &wait;
+	bio->bi_end_io = invalidate_vbmeta_endio;
+	bio_set_dev(bio, bdev);
+	bio_set_op_attrs(bio, op, REQ_SYNC);
+
+	bio->bi_iter.bi_sector = 0;
+	if (access_last_sector) {
+		sector_t last_sector;
+
+		last_sector = (i_size_read(bdev->bd_inode)>>SECTOR_SHIFT) - 1;
+		bio->bi_iter.bi_sector = last_sector;
+	}
+	if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
+		DMERR("invalidate_vbmeta_submit: bio_add_page error");
+		return -EIO;
+	}
+
+	submit_bio(bio);
+	/* Wait up to 2 seconds for completion or fail. */
+	if (!wait_for_completion_timeout(&wait, msecs_to_jiffies(2000)))
+		return -EIO;
+	return 0;
+}
+
+static int invalidate_vbmeta(dev_t vbmeta_devt)
+{
+	int ret = 0;
+	struct block_device *bdev;
+	struct bio *bio;
+	struct page *page;
+	fmode_t dev_mode;
+	/* Ensure we do synchronous unblocked I/O. We may also need
+	 * sync_bdev() on completion, but it really shouldn't.
+	 */
+	int access_last_sector = 0;
+
+	DMINFO("invalidate_vbmeta: acting on device %d:%d",
+	       MAJOR(vbmeta_devt), MINOR(vbmeta_devt));
+
+	/* First we open the device for reading. */
+	dev_mode = FMODE_READ | FMODE_EXCL;
+	bdev = blkdev_get_by_dev(vbmeta_devt, dev_mode,
+				 invalidate_vbmeta);
+	if (IS_ERR(bdev)) {
+		DMERR("invalidate_kernel: could not open device for reading");
+		dev_mode = 0;
+		ret = -ENOENT;
+		goto failed_to_read;
+	}
+
+	bio = bio_alloc(GFP_NOIO, 1);
+	if (!bio) {
+		ret = -ENOMEM;
+		goto failed_bio_alloc;
+	}
+
+	page = alloc_page(GFP_NOIO);
+	if (!page) {
+		ret = -ENOMEM;
+		goto failed_to_alloc_page;
+	}
+
+	access_last_sector = 0;
+	ret = invalidate_vbmeta_submit(bio, bdev, REQ_OP_READ,
+				       access_last_sector, page);
+	if (ret) {
+		DMERR("invalidate_vbmeta: error reading");
+		goto failed_to_submit_read;
+	}
+
+	/* We have a page. Let's make sure it looks right. */
+	if (memcmp("AVB0", page_address(page), 4) == 0) {
+		/* Stamp it. */
+		memcpy(page_address(page), "AVE0", 4);
+		DMINFO("invalidate_vbmeta: found vbmeta partition");
+	} else {
+		/* Could be this is on a AVB footer, check. Also, since the
+		 * AVB footer is in the last 64 bytes, adjust for the fact that
+		 * we're dealing with 512-byte sectors.
+		 */
+		size_t offset = (1<<SECTOR_SHIFT) - 64;
+
+		access_last_sector = 1;
+		ret = invalidate_vbmeta_submit(bio, bdev, REQ_OP_READ,
+					       access_last_sector, page);
+		if (ret) {
+			DMERR("invalidate_vbmeta: error reading");
+			goto failed_to_submit_read;
+		}
+		if (memcmp("AVBf", page_address(page) + offset, 4) != 0) {
+			DMERR("invalidate_vbmeta on non-vbmeta partition");
+			ret = -EINVAL;
+			goto invalid_header;
+		}
+		/* Stamp it. */
+		memcpy(page_address(page) + offset, "AVE0", 4);
+		DMINFO("invalidate_vbmeta: found vbmeta footer partition");
+	}
+
+	/* Now rewrite the changed page - the block dev was being
+	 * changed on read. Let's reopen here.
+	 */
+	blkdev_put(bdev, dev_mode);
+	dev_mode = FMODE_WRITE | FMODE_EXCL;
+	bdev = blkdev_get_by_dev(vbmeta_devt, dev_mode,
+				 invalidate_vbmeta);
+	if (IS_ERR(bdev)) {
+		DMERR("invalidate_vbmeta: could not open device for writing");
+		dev_mode = 0;
+		ret = -ENOENT;
+		goto failed_to_write;
+	}
+
+	/* We re-use the same bio to do the write after the read. Need to reset
+	 * it to initialize bio->bi_remaining.
+	 */
+	bio_reset(bio);
+
+	ret = invalidate_vbmeta_submit(bio, bdev, REQ_OP_WRITE,
+				       access_last_sector, page);
+	if (ret) {
+		DMERR("invalidate_vbmeta: error writing");
+		goto failed_to_submit_write;
+	}
+
+	DMERR("invalidate_vbmeta: completed.");
+	ret = 0;
+failed_to_submit_write:
+failed_to_write:
+invalid_header:
+	__free_page(page);
+failed_to_submit_read:
+	/* Technically, we'll leak a page with the pending bio, but
+	 * we're about to reboot anyway.
+	 */
+failed_to_alloc_page:
+	bio_put(bio);
+failed_bio_alloc:
+	if (dev_mode)
+		blkdev_put(bdev, dev_mode);
+failed_to_read:
+	return ret;
+}
+
+void dm_verity_avb_error_handler(void)
+{
+	dev_t dev;
+
+	DMINFO("AVB error handler called for %s", avb_vbmeta_device);
+
+	if (strcmp(avb_invalidate_on_error, "yes") != 0) {
+		DMINFO("Not configured to invalidate");
+		return;
+	}
+
+	if (avb_vbmeta_device[0] == '\0') {
+		DMERR("avb_vbmeta_device parameter not set");
+		goto fail_no_dev;
+	}
+
+	dev = name_to_dev_t(avb_vbmeta_device);
+	if (!dev) {
+		DMERR("No matching partition for device: %s",
+		      avb_vbmeta_device);
+		goto fail_no_dev;
+	}
+
+	invalidate_vbmeta(dev);
+
+fail_no_dev:
+	;
+}
+
+static int __init dm_verity_avb_init(void)
+{
+	DMINFO("AVB error handler initialized with vbmeta device: %s",
+	       avb_vbmeta_device);
+	return 0;
+}
+
+static void __exit dm_verity_avb_exit(void)
+{
+}
+
+module_init(dm_verity_avb_init);
+module_exit(dm_verity_avb_exit);
+
+MODULE_AUTHOR("David Zeuthen <zeuthen@google.com>");
+MODULE_DESCRIPTION("AVB-specific error handler for dm-verity");
+MODULE_LICENSE("GPL");
+
+/* Declare parameter with no module prefix */
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX	"androidboot.vbmeta."
+module_param_string(device, avb_vbmeta_device, sizeof(avb_vbmeta_device), 0);
+module_param_string(invalidate_on_error, avb_invalidate_on_error,
+		    sizeof(avb_invalidate_on_error), 0);

diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index e3599b4..758b802 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c

@@ -251,8 +251,12 @@
 	if (v->mode == DM_VERITY_MODE_LOGGING)
 		return 0;
 
-	if (v->mode == DM_VERITY_MODE_RESTART)
+	if (v->mode == DM_VERITY_MODE_RESTART) {
+#ifdef CONFIG_DM_VERITY_AVB
+		dm_verity_avb_error_handler();
+#endif
 		kernel_restart("dm-verity device corrupted");
+	}
 
 	return 1;
 }
@@ -1040,6 +1044,15 @@
 		v->tfm = NULL;
 		goto bad;
 	}
+
+	/*
+	 * dm-verity performance can vary greatly depending on which hash
+	 * algorithm implementation is used.  Help people debug performance
+	 * problems by logging the ->cra_driver_name.
+	 */
+	DMINFO("%s using implementation \"%s\"", v->alg_name,
+	       crypto_hash_alg_common(v->tfm)->base.cra_driver_name);
+
 	v->digest_size = crypto_ahash_digestsize(v->tfm);
 	if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
 		ti->error = "Digest size too big";

diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index 3441c10..0152d36 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h

@@ -127,4 +127,6 @@
 extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
 				 sector_t block, u8 *digest, bool *is_zero);
 
+extern void dm_verity_avb_error_handler(void);
+
 #endif /* DM_VERITY_H */

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 3726eac..241e3d9 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig

@@ -513,6 +513,21 @@
 	tristate
 	default MISC_RTSX_PCI || MISC_RTSX_USB
 
+config UID_SYS_STATS
+	bool "Per-UID statistics"
+	depends on PROFILING && TASK_XACCT && TASK_IO_ACCOUNTING
+	help
+	  Per UID based cpu time statistics exported to /proc/uid_cputime
+	  Per UID based io statistics exported to /proc/uid_io
+	  Per UID based procstat control in /proc/uid_procstat
+
+config UID_SYS_STATS_DEBUG
+	bool "Per-TASK statistics"
+	depends on UID_SYS_STATS
+	default n
+	help
+	  Per TASK based io statistics exported to /proc/uid_io
+
 source "drivers/misc/c2port/Kconfig"
 source "drivers/misc/eeprom/Kconfig"
 source "drivers/misc/cb710/Kconfig"

diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index af22bbc..fca6aae 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile

@@ -58,3 +58,4 @@
 obj-$(CONFIG_PCI_ENDPOINT_TEST)	+= pci_endpoint_test.o
 obj-$(CONFIG_OCXL)		+= ocxl/
 obj-$(CONFIG_MISC_RTSX)		+= cardreader/
+obj-$(CONFIG_UID_SYS_STATS)	+= uid_sys_stats.o

diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile
index cce47a1..a524289 100644
--- a/drivers/misc/lkdtm/Makefile
+++ b/drivers/misc/lkdtm/Makefile

@@ -10,6 +10,7 @@
 lkdtm-$(CONFIG_LKDTM)		+= usercopy.o
 
 KCOV_INSTRUMENT_rodata.o	:= n
+CFLAGS_lkdtm_rodata.o		+= $(DISABLE_LTO)
 
 OBJCOPYFLAGS :=
 OBJCOPYFLAGS_rodata_objcopy.o	:= \

diff --git a/drivers/misc/uid_sys_stats.c b/drivers/misc/uid_sys_stats.c
new file mode 100644
index 0000000..88dc1cd
--- /dev/null
+++ b/drivers/misc/uid_sys_stats.c

@@ -0,0 +1,703 @@
+/* drivers/misc/uid_sys_stats.c
+ *
+ * Copyright (C) 2014 - 2015 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/atomic.h>
+#include <linux/cpufreq_times.h>
+#include <linux/err.h>
+#include <linux/hashtable.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/profile.h>
+#include <linux/rtmutex.h>
+#include <linux/sched/cputime.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+
+#define UID_HASH_BITS	10
+DECLARE_HASHTABLE(hash_table, UID_HASH_BITS);
+
+static DEFINE_RT_MUTEX(uid_lock);
+static struct proc_dir_entry *cpu_parent;
+static struct proc_dir_entry *io_parent;
+static struct proc_dir_entry *proc_parent;
+
+struct io_stats {
+	u64 read_bytes;
+	u64 write_bytes;
+	u64 rchar;
+	u64 wchar;
+	u64 fsync;
+};
+
+#define UID_STATE_FOREGROUND	0
+#define UID_STATE_BACKGROUND	1
+#define UID_STATE_BUCKET_SIZE	2
+
+#define UID_STATE_TOTAL_CURR	2
+#define UID_STATE_TOTAL_LAST	3
+#define UID_STATE_DEAD_TASKS	4
+#define UID_STATE_SIZE		5
+
+#define MAX_TASK_COMM_LEN 256
+
+struct task_entry {
+	char comm[MAX_TASK_COMM_LEN];
+	pid_t pid;
+	struct io_stats io[UID_STATE_SIZE];
+	struct hlist_node hash;
+};
+
+struct uid_entry {
+	uid_t uid;
+	u64 utime;
+	u64 stime;
+	u64 active_utime;
+	u64 active_stime;
+	int state;
+	struct io_stats io[UID_STATE_SIZE];
+	struct hlist_node hash;
+#ifdef CONFIG_UID_SYS_STATS_DEBUG
+	DECLARE_HASHTABLE(task_entries, UID_HASH_BITS);
+#endif
+};
+
+static u64 compute_write_bytes(struct task_struct *task)
+{
+	if (task->ioac.write_bytes <= task->ioac.cancelled_write_bytes)
+		return 0;
+
+	return task->ioac.write_bytes - task->ioac.cancelled_write_bytes;
+}
+
+static void compute_io_bucket_stats(struct io_stats *io_bucket,
+					struct io_stats *io_curr,
+					struct io_stats *io_last,
+					struct io_stats *io_dead)
+{
+	/* tasks could switch to another uid group, but its io_last in the
+	 * previous uid group could still be positive.
+	 * therefore before each update, do an overflow check first
+	 */
+	int64_t delta;
+
+	delta = io_curr->read_bytes + io_dead->read_bytes -
+		io_last->read_bytes;
+	io_bucket->read_bytes += delta > 0 ? delta : 0;
+	delta = io_curr->write_bytes + io_dead->write_bytes -
+		io_last->write_bytes;
+	io_bucket->write_bytes += delta > 0 ? delta : 0;
+	delta = io_curr->rchar + io_dead->rchar - io_last->rchar;
+	io_bucket->rchar += delta > 0 ? delta : 0;
+	delta = io_curr->wchar + io_dead->wchar - io_last->wchar;
+	io_bucket->wchar += delta > 0 ? delta : 0;
+	delta = io_curr->fsync + io_dead->fsync - io_last->fsync;
+	io_bucket->fsync += delta > 0 ? delta : 0;
+
+	io_last->read_bytes = io_curr->read_bytes;
+	io_last->write_bytes = io_curr->write_bytes;
+	io_last->rchar = io_curr->rchar;
+	io_last->wchar = io_curr->wchar;
+	io_last->fsync = io_curr->fsync;
+
+	memset(io_dead, 0, sizeof(struct io_stats));
+}
+
+#ifdef CONFIG_UID_SYS_STATS_DEBUG
+static void get_full_task_comm(struct task_entry *task_entry,
+		struct task_struct *task)
+{
+	int i = 0, offset = 0, len = 0;
+	/* save one byte for terminating null character */
+	int unused_len = MAX_TASK_COMM_LEN - TASK_COMM_LEN - 1;
+	char buf[unused_len];
+	struct mm_struct *mm = task->mm;
+
+	/* fill the first TASK_COMM_LEN bytes with thread name */
+	__get_task_comm(task_entry->comm, TASK_COMM_LEN, task);
+	i = strlen(task_entry->comm);
+	while (i < TASK_COMM_LEN)
+		task_entry->comm[i++] = ' ';
+
+	/* next the executable file name */
+	if (mm) {
+		down_read(&mm->mmap_sem);
+		if (mm->exe_file) {
+			char *pathname = d_path(&mm->exe_file->f_path, buf,
+					unused_len);
+
+			if (!IS_ERR(pathname)) {
+				len = strlcpy(task_entry->comm + i, pathname,
+						unused_len);
+				i += len;
+				task_entry->comm[i++] = ' ';
+				unused_len--;
+			}
+		}
+		up_read(&mm->mmap_sem);
+	}
+	unused_len -= len;
+
+	/* fill the rest with command line argument
+	 * replace each null or new line character
+	 * between args in argv with whitespace */
+	len = get_cmdline(task, buf, unused_len);
+	while (offset < len) {
+		if (buf[offset] != '\0' && buf[offset] != '\n')
+			task_entry->comm[i++] = buf[offset];
+		else
+			task_entry->comm[i++] = ' ';
+		offset++;
+	}
+
+	/* get rid of trailing whitespaces in case when arg is memset to
+	 * zero before being reset in userspace
+	 */
+	while (task_entry->comm[i-1] == ' ')
+		i--;
+	task_entry->comm[i] = '\0';
+}
+
+static struct task_entry *find_task_entry(struct uid_entry *uid_entry,
+		struct task_struct *task)
+{
+	struct task_entry *task_entry;
+
+	hash_for_each_possible(uid_entry->task_entries, task_entry, hash,
+			task->pid) {
+		if (task->pid == task_entry->pid) {
+			/* if thread name changed, update the entire command */
+			int len = strnchr(task_entry->comm, ' ', TASK_COMM_LEN)
+				- task_entry->comm;
+
+			if (strncmp(task_entry->comm, task->comm, len))
+				get_full_task_comm(task_entry, task);
+			return task_entry;
+		}
+	}
+	return NULL;
+}
+
+static struct task_entry *find_or_register_task(struct uid_entry *uid_entry,
+		struct task_struct *task)
+{
+	struct task_entry *task_entry;
+	pid_t pid = task->pid;
+
+	task_entry = find_task_entry(uid_entry, task);
+	if (task_entry)
+		return task_entry;
+
+	task_entry = kzalloc(sizeof(struct task_entry), GFP_ATOMIC);
+	if (!task_entry)
+		return NULL;
+
+	get_full_task_comm(task_entry, task);
+
+	task_entry->pid = pid;
+	hash_add(uid_entry->task_entries, &task_entry->hash, (unsigned int)pid);
+
+	return task_entry;
+}
+
+static void remove_uid_tasks(struct uid_entry *uid_entry)
+{
+	struct task_entry *task_entry;
+	unsigned long bkt_task;
+	struct hlist_node *tmp_task;
+
+	hash_for_each_safe(uid_entry->task_entries, bkt_task,
+			tmp_task, task_entry, hash) {
+		hash_del(&task_entry->hash);
+		kfree(task_entry);
+	}
+}
+
+static void set_io_uid_tasks_zero(struct uid_entry *uid_entry)
+{
+	struct task_entry *task_entry;
+	unsigned long bkt_task;
+
+	hash_for_each(uid_entry->task_entries, bkt_task, task_entry, hash) {
+		memset(&task_entry->io[UID_STATE_TOTAL_CURR], 0,
+			sizeof(struct io_stats));
+	}
+}
+
+static void add_uid_tasks_io_stats(struct uid_entry *uid_entry,
+		struct task_struct *task, int slot)
+{
+	struct task_entry *task_entry = find_or_register_task(uid_entry, task);
+	struct io_stats *task_io_slot = &task_entry->io[slot];
+
+	task_io_slot->read_bytes += task->ioac.read_bytes;
+	task_io_slot->write_bytes += compute_write_bytes(task);
+	task_io_slot->rchar += task->ioac.rchar;
+	task_io_slot->wchar += task->ioac.wchar;
+	task_io_slot->fsync += task->ioac.syscfs;
+}
+
+static void compute_io_uid_tasks(struct uid_entry *uid_entry)
+{
+	struct task_entry *task_entry;
+	unsigned long bkt_task;
+
+	hash_for_each(uid_entry->task_entries, bkt_task, task_entry, hash) {
+		compute_io_bucket_stats(&task_entry->io[uid_entry->state],
+					&task_entry->io[UID_STATE_TOTAL_CURR],
+					&task_entry->io[UID_STATE_TOTAL_LAST],
+					&task_entry->io[UID_STATE_DEAD_TASKS]);
+	}
+}
+
+static void show_io_uid_tasks(struct seq_file *m, struct uid_entry *uid_entry)
+{
+	struct task_entry *task_entry;
+	unsigned long bkt_task;
+
+	hash_for_each(uid_entry->task_entries, bkt_task, task_entry, hash) {
+		/* Separated by comma because space exists in task comm */
+		seq_printf(m, "task,%s,%lu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu\n",
+				task_entry->comm,
+				(unsigned long)task_entry->pid,
+				task_entry->io[UID_STATE_FOREGROUND].rchar,
+				task_entry->io[UID_STATE_FOREGROUND].wchar,
+				task_entry->io[UID_STATE_FOREGROUND].read_bytes,
+				task_entry->io[UID_STATE_FOREGROUND].write_bytes,
+				task_entry->io[UID_STATE_BACKGROUND].rchar,
+				task_entry->io[UID_STATE_BACKGROUND].wchar,
+				task_entry->io[UID_STATE_BACKGROUND].read_bytes,
+				task_entry->io[UID_STATE_BACKGROUND].write_bytes,
+				task_entry->io[UID_STATE_FOREGROUND].fsync,
+				task_entry->io[UID_STATE_BACKGROUND].fsync);
+	}
+}
+#else
+static void remove_uid_tasks(struct uid_entry *uid_entry) {};
+static void set_io_uid_tasks_zero(struct uid_entry *uid_entry) {};
+static void add_uid_tasks_io_stats(struct uid_entry *uid_entry,
+		struct task_struct *task, int slot) {};
+static void compute_io_uid_tasks(struct uid_entry *uid_entry) {};
+static void show_io_uid_tasks(struct seq_file *m,
+		struct uid_entry *uid_entry) {}
+#endif
+
+static struct uid_entry *find_uid_entry(uid_t uid)
+{
+	struct uid_entry *uid_entry;
+	hash_for_each_possible(hash_table, uid_entry, hash, uid) {
+		if (uid_entry->uid == uid)
+			return uid_entry;
+	}
+	return NULL;
+}
+
+static struct uid_entry *find_or_register_uid(uid_t uid)
+{
+	struct uid_entry *uid_entry;
+
+	uid_entry = find_uid_entry(uid);
+	if (uid_entry)
+		return uid_entry;
+
+	uid_entry = kzalloc(sizeof(struct uid_entry), GFP_ATOMIC);
+	if (!uid_entry)
+		return NULL;
+
+	uid_entry->uid = uid;
+#ifdef CONFIG_UID_SYS_STATS_DEBUG
+	hash_init(uid_entry->task_entries);
+#endif
+	hash_add(hash_table, &uid_entry->hash, uid);
+
+	return uid_entry;
+}
+
+static int uid_cputime_show(struct seq_file *m, void *v)
+{
+	struct uid_entry *uid_entry = NULL;
+	struct task_struct *task, *temp;
+	struct user_namespace *user_ns = current_user_ns();
+	u64 utime;
+	u64 stime;
+	unsigned long bkt;
+	uid_t uid;
+
+	rt_mutex_lock(&uid_lock);
+
+	hash_for_each(hash_table, bkt, uid_entry, hash) {
+		uid_entry->active_stime = 0;
+		uid_entry->active_utime = 0;
+	}
+
+	rcu_read_lock();
+	do_each_thread(temp, task) {
+		uid = from_kuid_munged(user_ns, task_uid(task));
+		if (!uid_entry || uid_entry->uid != uid)
+			uid_entry = find_or_register_uid(uid);
+		if (!uid_entry) {
+			rcu_read_unlock();
+			rt_mutex_unlock(&uid_lock);
+			pr_err("%s: failed to find the uid_entry for uid %d\n",
+				__func__, uid);
+			return -ENOMEM;
+		}
+		task_cputime_adjusted(task, &utime, &stime);
+		uid_entry->active_utime += utime;
+		uid_entry->active_stime += stime;
+	} while_each_thread(temp, task);
+	rcu_read_unlock();
+
+	hash_for_each(hash_table, bkt, uid_entry, hash) {
+		u64 total_utime = uid_entry->utime +
+							uid_entry->active_utime;
+		u64 total_stime = uid_entry->stime +
+							uid_entry->active_stime;
+		seq_printf(m, "%d: %llu %llu\n", uid_entry->uid,
+			ktime_to_ms(total_utime), ktime_to_ms(total_stime));
+	}
+
+	rt_mutex_unlock(&uid_lock);
+	return 0;
+}
+
+static int uid_cputime_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, uid_cputime_show, PDE_DATA(inode));
+}
+
+static const struct file_operations uid_cputime_fops = {
+	.open		= uid_cputime_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int uid_remove_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, NULL, NULL);
+}
+
+static ssize_t uid_remove_write(struct file *file,
+			const char __user *buffer, size_t count, loff_t *ppos)
+{
+	struct uid_entry *uid_entry;
+	struct hlist_node *tmp;
+	char uids[128];
+	char *start_uid, *end_uid = NULL;
+	long int uid_start = 0, uid_end = 0;
+
+	if (count >= sizeof(uids))
+		count = sizeof(uids) - 1;
+
+	if (copy_from_user(uids, buffer, count))
+		return -EFAULT;
+
+	uids[count] = '\0';
+	end_uid = uids;
+	start_uid = strsep(&end_uid, "-");
+
+	if (!start_uid || !end_uid)
+		return -EINVAL;
+
+	if (kstrtol(start_uid, 10, &uid_start) != 0 ||
+		kstrtol(end_uid, 10, &uid_end) != 0) {
+		return -EINVAL;
+	}
+
+	/* Also remove uids from /proc/uid_time_in_state */
+	cpufreq_task_times_remove_uids(uid_start, uid_end);
+
+	rt_mutex_lock(&uid_lock);
+
+	for (; uid_start <= uid_end; uid_start++) {
+		hash_for_each_possible_safe(hash_table, uid_entry, tmp,
+							hash, (uid_t)uid_start) {
+			if (uid_start == uid_entry->uid) {
+				remove_uid_tasks(uid_entry);
+				hash_del(&uid_entry->hash);
+				kfree(uid_entry);
+			}
+		}
+	}
+
+	rt_mutex_unlock(&uid_lock);
+	return count;
+}
+
+static const struct file_operations uid_remove_fops = {
+	.open		= uid_remove_open,
+	.release	= single_release,
+	.write		= uid_remove_write,
+};
+
+
+static void add_uid_io_stats(struct uid_entry *uid_entry,
+			struct task_struct *task, int slot)
+{
+	struct io_stats *io_slot = &uid_entry->io[slot];
+
+	io_slot->read_bytes += task->ioac.read_bytes;
+	io_slot->write_bytes += compute_write_bytes(task);
+	io_slot->rchar += task->ioac.rchar;
+	io_slot->wchar += task->ioac.wchar;
+	io_slot->fsync += task->ioac.syscfs;
+
+	add_uid_tasks_io_stats(uid_entry, task, slot);
+}
+
+static void update_io_stats_all_locked(void)
+{
+	struct uid_entry *uid_entry = NULL;
+	struct task_struct *task, *temp;
+	struct user_namespace *user_ns = current_user_ns();
+	unsigned long bkt;
+	uid_t uid;
+
+	hash_for_each(hash_table, bkt, uid_entry, hash) {
+		memset(&uid_entry->io[UID_STATE_TOTAL_CURR], 0,
+			sizeof(struct io_stats));
+		set_io_uid_tasks_zero(uid_entry);
+	}
+
+	rcu_read_lock();
+	do_each_thread(temp, task) {
+		uid = from_kuid_munged(user_ns, task_uid(task));
+		if (!uid_entry || uid_entry->uid != uid)
+			uid_entry = find_or_register_uid(uid);
+		if (!uid_entry)
+			continue;
+		add_uid_io_stats(uid_entry, task, UID_STATE_TOTAL_CURR);
+	} while_each_thread(temp, task);
+	rcu_read_unlock();
+
+	hash_for_each(hash_table, bkt, uid_entry, hash) {
+		compute_io_bucket_stats(&uid_entry->io[uid_entry->state],
+					&uid_entry->io[UID_STATE_TOTAL_CURR],
+					&uid_entry->io[UID_STATE_TOTAL_LAST],
+					&uid_entry->io[UID_STATE_DEAD_TASKS]);
+		compute_io_uid_tasks(uid_entry);
+	}
+}
+
+static void update_io_stats_uid_locked(struct uid_entry *uid_entry)
+{
+	struct task_struct *task, *temp;
+	struct user_namespace *user_ns = current_user_ns();
+
+	memset(&uid_entry->io[UID_STATE_TOTAL_CURR], 0,
+		sizeof(struct io_stats));
+	set_io_uid_tasks_zero(uid_entry);
+
+	rcu_read_lock();
+	do_each_thread(temp, task) {
+		if (from_kuid_munged(user_ns, task_uid(task)) != uid_entry->uid)
+			continue;
+		add_uid_io_stats(uid_entry, task, UID_STATE_TOTAL_CURR);
+	} while_each_thread(temp, task);
+	rcu_read_unlock();
+
+	compute_io_bucket_stats(&uid_entry->io[uid_entry->state],
+				&uid_entry->io[UID_STATE_TOTAL_CURR],
+				&uid_entry->io[UID_STATE_TOTAL_LAST],
+				&uid_entry->io[UID_STATE_DEAD_TASKS]);
+	compute_io_uid_tasks(uid_entry);
+}
+
+
+static int uid_io_show(struct seq_file *m, void *v)
+{
+	struct uid_entry *uid_entry;
+	unsigned long bkt;
+
+	rt_mutex_lock(&uid_lock);
+
+	update_io_stats_all_locked();
+
+	hash_for_each(hash_table, bkt, uid_entry, hash) {
+		seq_printf(m, "%d %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
+				uid_entry->uid,
+				uid_entry->io[UID_STATE_FOREGROUND].rchar,
+				uid_entry->io[UID_STATE_FOREGROUND].wchar,
+				uid_entry->io[UID_STATE_FOREGROUND].read_bytes,
+				uid_entry->io[UID_STATE_FOREGROUND].write_bytes,
+				uid_entry->io[UID_STATE_BACKGROUND].rchar,
+				uid_entry->io[UID_STATE_BACKGROUND].wchar,
+				uid_entry->io[UID_STATE_BACKGROUND].read_bytes,
+				uid_entry->io[UID_STATE_BACKGROUND].write_bytes,
+				uid_entry->io[UID_STATE_FOREGROUND].fsync,
+				uid_entry->io[UID_STATE_BACKGROUND].fsync);
+
+		show_io_uid_tasks(m, uid_entry);
+	}
+
+	rt_mutex_unlock(&uid_lock);
+	return 0;
+}
+
+static int uid_io_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, uid_io_show, PDE_DATA(inode));
+}
+
+static const struct file_operations uid_io_fops = {
+	.open		= uid_io_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int uid_procstat_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, NULL, NULL);
+}
+
+static ssize_t uid_procstat_write(struct file *file,
+			const char __user *buffer, size_t count, loff_t *ppos)
+{
+	struct uid_entry *uid_entry;
+	uid_t uid;
+	int argc, state;
+	char input[128];
+
+	if (count >= sizeof(input))
+		return -EINVAL;
+
+	if (copy_from_user(input, buffer, count))
+		return -EFAULT;
+
+	input[count] = '\0';
+
+	argc = sscanf(input, "%u %d", &uid, &state);
+	if (argc != 2)
+		return -EINVAL;
+
+	if (state != UID_STATE_BACKGROUND && state != UID_STATE_FOREGROUND)
+		return -EINVAL;
+
+	rt_mutex_lock(&uid_lock);
+
+	uid_entry = find_or_register_uid(uid);
+	if (!uid_entry) {
+		rt_mutex_unlock(&uid_lock);
+		return -EINVAL;
+	}
+
+	if (uid_entry->state == state) {
+		rt_mutex_unlock(&uid_lock);
+		return count;
+	}
+
+	update_io_stats_uid_locked(uid_entry);
+
+	uid_entry->state = state;
+
+	rt_mutex_unlock(&uid_lock);
+
+	return count;
+}
+
+static const struct file_operations uid_procstat_fops = {
+	.open		= uid_procstat_open,
+	.release	= single_release,
+	.write		= uid_procstat_write,
+};
+
+static int process_notifier(struct notifier_block *self,
+			unsigned long cmd, void *v)
+{
+	struct task_struct *task = v;
+	struct uid_entry *uid_entry;
+	u64 utime, stime;
+	uid_t uid;
+
+	if (!task)
+		return NOTIFY_OK;
+
+	rt_mutex_lock(&uid_lock);
+	uid = from_kuid_munged(current_user_ns(), task_uid(task));
+	uid_entry = find_or_register_uid(uid);
+	if (!uid_entry) {
+		pr_err("%s: failed to find uid %d\n", __func__, uid);
+		goto exit;
+	}
+
+	task_cputime_adjusted(task, &utime, &stime);
+	uid_entry->utime += utime;
+	uid_entry->stime += stime;
+
+	add_uid_io_stats(uid_entry, task, UID_STATE_DEAD_TASKS);
+
+exit:
+	rt_mutex_unlock(&uid_lock);
+	return NOTIFY_OK;
+}
+
+static struct notifier_block process_notifier_block = {
+	.notifier_call	= process_notifier,
+};
+
+static int __init proc_uid_sys_stats_init(void)
+{
+	hash_init(hash_table);
+
+	cpu_parent = proc_mkdir("uid_cputime", NULL);
+	if (!cpu_parent) {
+		pr_err("%s: failed to create uid_cputime proc entry\n",
+			__func__);
+		goto err;
+	}
+
+	proc_create_data("remove_uid_range", 0222, cpu_parent,
+		&uid_remove_fops, NULL);
+	proc_create_data("show_uid_stat", 0444, cpu_parent,
+		&uid_cputime_fops, NULL);
+
+	io_parent = proc_mkdir("uid_io", NULL);
+	if (!io_parent) {
+		pr_err("%s: failed to create uid_io proc entry\n",
+			__func__);
+		goto err;
+	}
+
+	proc_create_data("stats", 0444, io_parent,
+		&uid_io_fops, NULL);
+
+	proc_parent = proc_mkdir("uid_procstat", NULL);
+	if (!proc_parent) {
+		pr_err("%s: failed to create uid_procstat proc entry\n",
+			__func__);
+		goto err;
+	}
+
+	proc_create_data("set", 0222, proc_parent,
+		&uid_procstat_fops, NULL);
+
+	profile_event_register(PROFILE_TASK_EXIT, &process_notifier_block);
+
+	return 0;
+
+err:
+	remove_proc_subtree("uid_cputime", NULL);
+	remove_proc_subtree("uid_io", NULL);
+	remove_proc_subtree("uid_procstat", NULL);
+	return -ENOMEM;
+}
+
+early_initcall(proc_uid_sys_stats_init);

diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index f57f5de..f3dc49fa 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c

@@ -441,7 +441,8 @@
 #endif
 
 	mmc_start_host(host);
-	mmc_register_pm_notifier(host);
+	if (!(host->pm_flags & MMC_PM_IGNORE_PM_NOTIFY))
+		mmc_register_pm_notifier(host);
 
 	return 0;
 }
@@ -458,7 +459,8 @@
  */
 void mmc_remove_host(struct mmc_host *host)
 {
-	mmc_unregister_pm_notifier(host);
+	if (!(host->pm_flags & MMC_PM_IGNORE_PM_NOTIFY))
+		mmc_unregister_pm_notifier(host);
 	mmc_stop_host(host);
 
 #ifdef CONFIG_DEBUG_FS

diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c
index bdc4d23..7eae088 100644
--- a/drivers/net/ppp/ppp_async.c
+++ b/drivers/net/ppp/ppp_async.c

@@ -770,7 +770,7 @@
 {
 	struct sk_buff *skb;
 	unsigned char *p;
-	unsigned int len, fcs, proto;
+	unsigned int len, fcs;
 
 	skb = ap->rpkt;
 	if (ap->state & (SC_TOSS | SC_ESCAPE))
@@ -799,14 +799,14 @@
 			goto err;
 		p = skb_pull(skb, 2);
 	}
-	proto = p[0];
-	if (proto & 1) {
-		/* protocol is compressed */
-		*(u8 *)skb_push(skb, 1) = 0;
-	} else {
+
+	/* If protocol field is not compressed, it can be LCP packet */
+	if (!(p[0] & 0x01)) {
+		unsigned int proto;
+
 		if (skb->len < 2)
 			goto err;
-		proto = (proto << 8) + p[1];
+		proto = (p[0] << 8) + p[1];
 		if (proto == PPP_LCP)
 			async_lcp_peek(ap, p, skb->len, 1);
 	}

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 02ad03a..8b1ef1b 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c

@@ -1965,6 +1965,46 @@
 	ppp_recv_unlock(ppp);
 }
 
+/**
+ * __ppp_decompress_proto - Decompress protocol field, slim version.
+ * @skb: Socket buffer where protocol field should be decompressed. It must have
+ *	 at least 1 byte of head room and 1 byte of linear data. First byte of
+ *	 data must be a protocol field byte.
+ *
+ * Decompress protocol field in PPP header if it's compressed, e.g. when
+ * Protocol-Field-Compression (PFC) was negotiated. No checks w.r.t. skb data
+ * length are done in this function.
+ */
+static void __ppp_decompress_proto(struct sk_buff *skb)
+{
+	if (skb->data[0] & 0x01)
+		*(u8 *)skb_push(skb, 1) = 0x00;
+}
+
+/**
+ * ppp_decompress_proto - Check skb data room and decompress protocol field.
+ * @skb: Socket buffer where protocol field should be decompressed. First byte
+ *	 of data must be a protocol field byte.
+ *
+ * Decompress protocol field in PPP header if it's compressed, e.g. when
+ * Protocol-Field-Compression (PFC) was negotiated. This function also makes
+ * sure that skb data room is sufficient for Protocol field, before and after
+ * decompression.
+ *
+ * Return: true - decompressed successfully, false - not enough room in skb.
+ */
+static bool ppp_decompress_proto(struct sk_buff *skb)
+{
+	/* At least one byte should be present (if protocol is compressed) */
+	if (!pskb_may_pull(skb, 1))
+		return false;
+
+	__ppp_decompress_proto(skb);
+
+	/* Protocol field should occupy 2 bytes when not compressed */
+	return pskb_may_pull(skb, 2);
+}
+
 void
 ppp_input(struct ppp_channel *chan, struct sk_buff *skb)
 {
@@ -1977,7 +2017,7 @@
 	}
 
 	read_lock_bh(&pch->upl);
-	if (!pskb_may_pull(skb, 2)) {
+	if (!ppp_decompress_proto(skb)) {
 		kfree_skb(skb);
 		if (pch->ppp) {
 			++pch->ppp->dev->stats.rx_length_errors;
@@ -2074,6 +2114,9 @@
 	if (ppp->flags & SC_MUST_COMP && ppp->rstate & SC_DC_FERROR)
 		goto err;
 
+	/* At this point the "Protocol" field MUST be decompressed, either in
+	 * ppp_input(), ppp_decompress_frame() or in ppp_receive_mp_frame().
+	 */
 	proto = PPP_PROTO(skb);
 	switch (proto) {
 	case PPP_VJC_COMP:
@@ -2245,6 +2288,9 @@
 		skb_put(skb, len);
 		skb_pull(skb, 2);	/* pull off the A/C bytes */
 
+		/* Don't call __ppp_decompress_proto() here, but instead rely on
+		 * corresponding algo (mppe/bsd/deflate) to decompress it.
+		 */
 	} else {
 		/* Uncompressed frame - pass to decompressor so it
 		   can update its dictionary if necessary. */
@@ -2290,9 +2336,11 @@
 
 	/*
 	 * Do protocol ID decompression on the first fragment of each packet.
+	 * We have to do that here, because ppp_receive_nonmp_frame() expects
+	 * decompressed protocol field.
 	 */
-	if ((PPP_MP_CB(skb)->BEbits & B) && (skb->data[0] & 1))
-		*(u8 *)skb_push(skb, 1) = 0;
+	if (PPP_MP_CB(skb)->BEbits & B)
+		__ppp_decompress_proto(skb);
 
 	/*
 	 * Expand sequence number to 32 bits, making it as close

diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c
index 047f6c6..d02ba24 100644
--- a/drivers/net/ppp/ppp_synctty.c
+++ b/drivers/net/ppp/ppp_synctty.c

@@ -709,11 +709,10 @@
 		p = skb_pull(skb, 2);
 	}
 
-	/* decompress protocol field if compressed */
-	if (p[0] & 1) {
-		/* protocol is compressed */
-		*(u8 *)skb_push(skb, 1) = 0;
-	} else if (skb->len < 2)
+	/* PPP packet length should be >= 2 bytes when protocol field is not
+	 * compressed.
+	 */
+	if (!(p[0] & 0x01) && skb->len < 2)
 		goto err;
 
 	/* queue the frame to be processed */

diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index 9ad3ff4..b626001 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c

@@ -325,11 +325,6 @@
 			skb_pull(skb, 2);
 		}
 
-		if ((*skb->data) & 1) {
-			/* protocol is compressed */
-			*(u8 *)skb_push(skb, 1) = 0;
-		}
-
 		skb->ip_summed = CHECKSUM_NONE;
 		skb_set_network_header(skb, skb->head-skb->data);
 		ppp_input(&po->chan, skb);

diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig
index 166920a..8c456a6 100644
--- a/drivers/net/wireless/Kconfig
+++ b/drivers/net/wireless/Kconfig

@@ -114,4 +114,11 @@
 
 	  If you choose to build a module, it'll be called rndis_wlan.
 
+config VIRT_WIFI
+	tristate "Wifi wrapper for ethernet drivers"
+	depends on CFG80211
+	---help---
+	  This option adds support for ethernet connections to appear as if they
+	  are wifi connections through a special rtnetlink device.
+
 endif # WLAN

diff --git a/drivers/net/wireless/Makefile b/drivers/net/wireless/Makefile
index 7fc9630..6cfe745 100644
--- a/drivers/net/wireless/Makefile
+++ b/drivers/net/wireless/Makefile

@@ -27,3 +27,5 @@
 obj-$(CONFIG_USB_NET_RNDIS_WLAN)	+= rndis_wlan.o
 
 obj-$(CONFIG_MAC80211_HWSIM)	+= mac80211_hwsim.o
+
+obj-$(CONFIG_VIRT_WIFI)	+= virt_wifi.o

diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c
new file mode 100644
index 0000000..946246c
--- /dev/null
+++ b/drivers/net/wireless/virt_wifi.c

@@ -0,0 +1,633 @@
+// SPDX-License-Identifier: GPL-2.0
+/* drivers/net/wireless/virt_wifi.c
+ *
+ * A fake implementation of cfg80211_ops that can be tacked on to an ethernet
+ * net_device to make it appear as a wireless connection.
+ *
+ * Copyright (C) 2018 Google, Inc.
+ *
+ * Author: schuffelen@google.com
+ */
+
+#include <net/cfg80211.h>
+#include <net/rtnetlink.h>
+#include <linux/etherdevice.h>
+#include <linux/module.h>
+
+#include <net/cfg80211.h>
+#include <net/rtnetlink.h>
+#include <linux/etherdevice.h>
+#include <linux/module.h>
+
+static struct wiphy *common_wiphy;
+
+struct virt_wifi_wiphy_priv {
+	struct delayed_work scan_result;
+	struct cfg80211_scan_request *scan_request;
+	bool being_deleted;
+};
+
+static struct ieee80211_channel channel_2ghz = {
+	.band = NL80211_BAND_2GHZ,
+	.center_freq = 2432,
+	.hw_value = 2432,
+	.max_power = 20,
+};
+
+static struct ieee80211_rate bitrates_2ghz[] = {
+	{ .bitrate = 10 },
+	{ .bitrate = 20 },
+	{ .bitrate = 55 },
+	{ .bitrate = 110 },
+	{ .bitrate = 60 },
+	{ .bitrate = 120 },
+	{ .bitrate = 240 },
+};
+
+static struct ieee80211_supported_band band_2ghz = {
+	.channels = &channel_2ghz,
+	.bitrates = bitrates_2ghz,
+	.band = NL80211_BAND_2GHZ,
+	.n_channels = 1,
+	.n_bitrates = ARRAY_SIZE(bitrates_2ghz),
+	.ht_cap = {
+		.ht_supported = true,
+		.cap = IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
+		       IEEE80211_HT_CAP_GRN_FLD |
+		       IEEE80211_HT_CAP_SGI_20 |
+		       IEEE80211_HT_CAP_SGI_40 |
+		       IEEE80211_HT_CAP_DSSSCCK40,
+		.ampdu_factor = 0x3,
+		.ampdu_density = 0x6,
+		.mcs = {
+			.rx_mask = {0xff, 0xff},
+			.tx_params = IEEE80211_HT_MCS_TX_DEFINED,
+		},
+	},
+};
+
+static struct ieee80211_channel channel_5ghz = {
+	.band = NL80211_BAND_5GHZ,
+	.center_freq = 5240,
+	.hw_value = 5240,
+	.max_power = 20,
+};
+
+static struct ieee80211_rate bitrates_5ghz[] = {
+	{ .bitrate = 60 },
+	{ .bitrate = 120 },
+	{ .bitrate = 240 },
+};
+
+#define RX_MCS_MAP (IEEE80211_VHT_MCS_SUPPORT_0_9 << 0 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 2 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 4 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 6 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 8 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 10 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 12 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 14)
+
+#define TX_MCS_MAP (IEEE80211_VHT_MCS_SUPPORT_0_9 << 0 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 2 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 4 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 6 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 8 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 10 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 12 | \
+		    IEEE80211_VHT_MCS_SUPPORT_0_9 << 14)
+
+static struct ieee80211_supported_band band_5ghz = {
+	.channels = &channel_5ghz,
+	.bitrates = bitrates_5ghz,
+	.band = NL80211_BAND_5GHZ,
+	.n_channels = 1,
+	.n_bitrates = ARRAY_SIZE(bitrates_5ghz),
+	.ht_cap = {
+		.ht_supported = true,
+		.cap = IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
+		       IEEE80211_HT_CAP_GRN_FLD |
+		       IEEE80211_HT_CAP_SGI_20 |
+		       IEEE80211_HT_CAP_SGI_40 |
+		       IEEE80211_HT_CAP_DSSSCCK40,
+		.ampdu_factor = 0x3,
+		.ampdu_density = 0x6,
+		.mcs = {
+			.rx_mask = {0xff, 0xff},
+			.tx_params = IEEE80211_HT_MCS_TX_DEFINED,
+		},
+	},
+	.vht_cap = {
+		.vht_supported = true,
+		.cap = IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 |
+		       IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ |
+		       IEEE80211_VHT_CAP_RXLDPC |
+		       IEEE80211_VHT_CAP_SHORT_GI_80 |
+		       IEEE80211_VHT_CAP_SHORT_GI_160 |
+		       IEEE80211_VHT_CAP_TXSTBC |
+		       IEEE80211_VHT_CAP_RXSTBC_1 |
+		       IEEE80211_VHT_CAP_RXSTBC_2 |
+		       IEEE80211_VHT_CAP_RXSTBC_3 |
+		       IEEE80211_VHT_CAP_RXSTBC_4 |
+		       IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK,
+		.vht_mcs = {
+			.rx_mcs_map = cpu_to_le16(RX_MCS_MAP),
+			.tx_mcs_map = cpu_to_le16(TX_MCS_MAP),
+		}
+	},
+};
+
+/* Assigned at module init. Guaranteed locally-administered and unicast. */
+static u8 fake_router_bssid[ETH_ALEN] __ro_after_init = {};
+
+/* Called with the rtnl lock held. */
+static int virt_wifi_scan(struct wiphy *wiphy,
+			  struct cfg80211_scan_request *request)
+{
+	struct virt_wifi_wiphy_priv *priv = wiphy_priv(wiphy);
+
+	wiphy_debug(wiphy, "scan\n");
+
+	if (priv->scan_request || priv->being_deleted)
+		return -EBUSY;
+
+	priv->scan_request = request;
+	schedule_delayed_work(&priv->scan_result, HZ * 2);
+
+	return 0;
+}
+
+/* Acquires and releases the rdev BSS lock. */
+static void virt_wifi_scan_result(struct work_struct *work)
+{
+	struct {
+		u8 tag;
+		u8 len;
+		u8 ssid[8];
+	} __packed ssid = {
+		.tag = WLAN_EID_SSID, .len = 8, .ssid = "VirtWifi",
+	};
+	struct cfg80211_bss *informed_bss;
+	struct virt_wifi_wiphy_priv *priv =
+		container_of(work, struct virt_wifi_wiphy_priv,
+			     scan_result.work);
+	struct wiphy *wiphy = priv_to_wiphy(priv);
+	struct cfg80211_scan_info scan_info = { .aborted = false };
+
+	informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz,
+					   CFG80211_BSS_FTYPE_PRESP,
+					   fake_router_bssid,
+					   ktime_get_boot_ns(),
+					   WLAN_CAPABILITY_ESS, 0,
+					   (void *)&ssid, sizeof(ssid),
+					   DBM_TO_MBM(-50), GFP_KERNEL);
+	cfg80211_put_bss(wiphy, informed_bss);
+
+	/* Schedules work which acquires and releases the rtnl lock. */
+	cfg80211_scan_done(priv->scan_request, &scan_info);
+	priv->scan_request = NULL;
+}
+
+/* May acquire and release the rdev BSS lock. */
+static void virt_wifi_cancel_scan(struct wiphy *wiphy)
+{
+	struct virt_wifi_wiphy_priv *priv = wiphy_priv(wiphy);
+
+	cancel_delayed_work_sync(&priv->scan_result);
+	/* Clean up dangling callbacks if necessary. */
+	if (priv->scan_request) {
+		struct cfg80211_scan_info scan_info = { .aborted = true };
+		/* Schedules work which acquires and releases the rtnl lock. */
+		cfg80211_scan_done(priv->scan_request, &scan_info);
+		priv->scan_request = NULL;
+	}
+}
+
+struct virt_wifi_netdev_priv {
+	struct delayed_work connect;
+	struct net_device *lowerdev;
+	struct net_device *upperdev;
+	u32 tx_packets;
+	u32 tx_failed;
+	u8 connect_requested_bss[ETH_ALEN];
+	bool is_up;
+	bool is_connected;
+	bool being_deleted;
+};
+
+/* Called with the rtnl lock held. */
+static int virt_wifi_connect(struct wiphy *wiphy, struct net_device *netdev,
+			     struct cfg80211_connect_params *sme)
+{
+	struct virt_wifi_netdev_priv *priv = netdev_priv(netdev);
+	bool could_schedule;
+
+	if (priv->being_deleted || !priv->is_up)
+		return -EBUSY;
+
+	could_schedule = schedule_delayed_work(&priv->connect, HZ * 2);
+	if (!could_schedule)
+		return -EBUSY;
+
+	if (sme->bssid)
+		ether_addr_copy(priv->connect_requested_bss, sme->bssid);
+	else
+		eth_zero_addr(priv->connect_requested_bss);
+
+	wiphy_debug(wiphy, "connect\n");
+
+	return 0;
+}
+
+/* Acquires and releases the rdev event lock. */
+static void virt_wifi_connect_complete(struct work_struct *work)
+{
+	struct virt_wifi_netdev_priv *priv =
+		container_of(work, struct virt_wifi_netdev_priv, connect.work);
+	u8 *requested_bss = priv->connect_requested_bss;
+	bool has_addr = !is_zero_ether_addr(requested_bss);
+	bool right_addr = ether_addr_equal(requested_bss, fake_router_bssid);
+	u16 status = WLAN_STATUS_SUCCESS;
+
+	if (!priv->is_up || (has_addr && !right_addr))
+		status = WLAN_STATUS_UNSPECIFIED_FAILURE;
+	else
+		priv->is_connected = true;
+
+	/* Schedules an event that acquires the rtnl lock. */
+	cfg80211_connect_result(priv->upperdev, requested_bss, NULL, 0, NULL, 0,
+				status, GFP_KERNEL);
+	netif_carrier_on(priv->upperdev);
+}
+
+/* May acquire and release the rdev event lock. */
+static void virt_wifi_cancel_connect(struct net_device *netdev)
+{
+	struct virt_wifi_netdev_priv *priv = netdev_priv(netdev);
+
+	/* If there is work pending, clean up dangling callbacks. */
+	if (cancel_delayed_work_sync(&priv->connect)) {
+		/* Schedules an event that acquires the rtnl lock. */
+		cfg80211_connect_result(priv->upperdev,
+					priv->connect_requested_bss, NULL, 0,
+					NULL, 0,
+					WLAN_STATUS_UNSPECIFIED_FAILURE,
+					GFP_KERNEL);
+	}
+}
+
+/* Called with the rtnl lock held. Acquires the rdev event lock. */
+static int virt_wifi_disconnect(struct wiphy *wiphy, struct net_device *netdev,
+				u16 reason_code)
+{
+	struct virt_wifi_netdev_priv *priv = netdev_priv(netdev);
+
+	if (priv->being_deleted)
+		return -EBUSY;
+
+	wiphy_debug(wiphy, "disconnect\n");
+	virt_wifi_cancel_connect(netdev);
+
+	cfg80211_disconnected(netdev, reason_code, NULL, 0, true, GFP_KERNEL);
+	priv->is_connected = false;
+	netif_carrier_off(netdev);
+
+	return 0;
+}
+
+/* Called with the rtnl lock held. */
+static int virt_wifi_get_station(struct wiphy *wiphy, struct net_device *dev,
+				 const u8 *mac, struct station_info *sinfo)
+{
+	struct virt_wifi_netdev_priv *priv = netdev_priv(dev);
+
+	wiphy_debug(wiphy, "get_station\n");
+
+	if (!priv->is_connected || !ether_addr_equal(mac, fake_router_bssid))
+		return -ENOENT;
+
+	sinfo->filled = BIT_ULL(NL80211_STA_INFO_TX_PACKETS) |
+		BIT_ULL(NL80211_STA_INFO_TX_FAILED) |
+		BIT_ULL(NL80211_STA_INFO_SIGNAL) |
+		BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
+	sinfo->tx_packets = priv->tx_packets;
+	sinfo->tx_failed = priv->tx_failed;
+	/* For CFG80211_SIGNAL_TYPE_MBM, value is expressed in _dBm_ */
+	sinfo->signal = -50;
+	sinfo->txrate = (struct rate_info) {
+		.legacy = 10, /* units are 100kbit/s */
+	};
+	return 0;
+}
+
+/* Called with the rtnl lock held. */
+static int virt_wifi_dump_station(struct wiphy *wiphy, struct net_device *dev,
+				  int idx, u8 *mac, struct station_info *sinfo)
+{
+	struct virt_wifi_netdev_priv *priv = netdev_priv(dev);
+
+	wiphy_debug(wiphy, "dump_station\n");
+
+	if (idx != 0 || !priv->is_connected)
+		return -ENOENT;
+
+	ether_addr_copy(mac, fake_router_bssid);
+	return virt_wifi_get_station(wiphy, dev, fake_router_bssid, sinfo);
+}
+
+static const struct cfg80211_ops virt_wifi_cfg80211_ops = {
+	.scan = virt_wifi_scan,
+
+	.connect = virt_wifi_connect,
+	.disconnect = virt_wifi_disconnect,
+
+	.get_station = virt_wifi_get_station,
+	.dump_station = virt_wifi_dump_station,
+};
+
+/* Acquires and releases the rtnl lock. */
+static struct wiphy *virt_wifi_make_wiphy(void)
+{
+	struct wiphy *wiphy;
+	struct virt_wifi_wiphy_priv *priv;
+	int err;
+
+	wiphy = wiphy_new(&virt_wifi_cfg80211_ops, sizeof(*priv));
+
+	if (!wiphy)
+		return NULL;
+
+	wiphy->max_scan_ssids = 4;
+	wiphy->max_scan_ie_len = 1000;
+	wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM;
+
+	wiphy->bands[NL80211_BAND_2GHZ] = &band_2ghz;
+	wiphy->bands[NL80211_BAND_5GHZ] = &band_5ghz;
+	wiphy->bands[NL80211_BAND_60GHZ] = NULL;
+
+	wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION);
+
+	priv = wiphy_priv(wiphy);
+	priv->being_deleted = false;
+	priv->scan_request = NULL;
+	INIT_DELAYED_WORK(&priv->scan_result, virt_wifi_scan_result);
+
+	err = wiphy_register(wiphy);
+	if (err < 0) {
+		wiphy_free(wiphy);
+		return NULL;
+	}
+
+	return wiphy;
+}
+
+/* Acquires and releases the rtnl lock. */
+static void virt_wifi_destroy_wiphy(struct wiphy *wiphy)
+{
+	struct virt_wifi_wiphy_priv *priv;
+
+	WARN(!wiphy, "%s called with null wiphy", __func__);
+	if (!wiphy)
+		return;
+
+	priv = wiphy_priv(wiphy);
+	priv->being_deleted = true;
+	virt_wifi_cancel_scan(wiphy);
+
+	if (wiphy->registered)
+		wiphy_unregister(wiphy);
+	wiphy_free(wiphy);
+}
+
+/* Enters and exits a RCU-bh critical section. */
+static netdev_tx_t virt_wifi_start_xmit(struct sk_buff *skb,
+					struct net_device *dev)
+{
+	struct virt_wifi_netdev_priv *priv = netdev_priv(dev);
+
+	priv->tx_packets++;
+	if (!priv->is_connected) {
+		priv->tx_failed++;
+		return NET_XMIT_DROP;
+	}
+
+	skb->dev = priv->lowerdev;
+	return dev_queue_xmit(skb);
+}
+
+/* Called with rtnl lock held. */
+static int virt_wifi_net_device_open(struct net_device *dev)
+{
+	struct virt_wifi_netdev_priv *priv = netdev_priv(dev);
+
+	priv->is_up = true;
+	return 0;
+}
+
+/* Called with rtnl lock held. */
+static int virt_wifi_net_device_stop(struct net_device *dev)
+{
+	struct virt_wifi_netdev_priv *n_priv = netdev_priv(dev);
+	struct virt_wifi_wiphy_priv *w_priv;
+
+	n_priv->is_up = false;
+
+	if (!dev->ieee80211_ptr)
+		return 0;
+	w_priv = wiphy_priv(dev->ieee80211_ptr->wiphy);
+
+	virt_wifi_cancel_scan(dev->ieee80211_ptr->wiphy);
+	virt_wifi_cancel_connect(dev);
+	netif_carrier_off(dev);
+
+	return 0;
+}
+
+static const struct net_device_ops virt_wifi_ops = {
+	.ndo_start_xmit = virt_wifi_start_xmit,
+	.ndo_open = virt_wifi_net_device_open,
+	.ndo_stop = virt_wifi_net_device_stop,
+};
+
+/* Invoked as part of rtnl lock release. */
+static void virt_wifi_net_device_destructor(struct net_device *dev)
+{
+	/* Delayed past dellink to allow nl80211 to react to the device being
+	 * deleted.
+	 */
+	kfree(dev->ieee80211_ptr);
+	dev->ieee80211_ptr = NULL;
+	free_netdev(dev);
+}
+
+/* No lock interaction. */
+static void virt_wifi_setup(struct net_device *dev)
+{
+	ether_setup(dev);
+	dev->netdev_ops = &virt_wifi_ops;
+	dev->priv_destructor = virt_wifi_net_device_destructor;
+}
+
+/* Called in a RCU read critical section from netif_receive_skb */
+static rx_handler_result_t virt_wifi_rx_handler(struct sk_buff **pskb)
+{
+	struct sk_buff *skb = *pskb;
+	struct virt_wifi_netdev_priv *priv =
+		rcu_dereference(skb->dev->rx_handler_data);
+
+	if (!priv->is_connected)
+		return RX_HANDLER_PASS;
+
+	/* GFP_ATOMIC because this is a packet interrupt handler. */
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb) {
+		dev_err(&priv->upperdev->dev, "can't skb_share_check\n");
+		return RX_HANDLER_CONSUMED;
+	}
+
+	*pskb = skb;
+	skb->dev = priv->upperdev;
+	skb->pkt_type = PACKET_HOST;
+	return RX_HANDLER_ANOTHER;
+}
+
+/* Called with rtnl lock held. */
+static int virt_wifi_newlink(struct net *src_net, struct net_device *dev,
+			     struct nlattr *tb[], struct nlattr *data[],
+			     struct netlink_ext_ack *extack)
+{
+	struct virt_wifi_netdev_priv *priv = netdev_priv(dev);
+	int err;
+
+	if (!tb[IFLA_LINK])
+		return -EINVAL;
+
+	netif_carrier_off(dev);
+
+	priv->upperdev = dev;
+	priv->lowerdev = __dev_get_by_index(src_net,
+					    nla_get_u32(tb[IFLA_LINK]));
+
+	if (!priv->lowerdev)
+		return -ENODEV;
+	if (!tb[IFLA_MTU])
+		dev->mtu = priv->lowerdev->mtu;
+	else if (dev->mtu > priv->lowerdev->mtu)
+		return -EINVAL;
+
+	err = netdev_rx_handler_register(priv->lowerdev, virt_wifi_rx_handler,
+					 priv);
+	if (err) {
+		dev_err(&priv->lowerdev->dev,
+			"can't netdev_rx_handler_register: %d\n", err);
+		return err;
+	}
+
+	eth_hw_addr_inherit(dev, priv->lowerdev);
+	netif_stacked_transfer_operstate(priv->lowerdev, dev);
+
+	SET_NETDEV_DEV(dev, &priv->lowerdev->dev);
+	dev->ieee80211_ptr = kzalloc(sizeof(*dev->ieee80211_ptr), GFP_KERNEL);
+
+	if (!dev->ieee80211_ptr) {
+		err = -ENOMEM;
+		goto remove_handler;
+	}
+
+	dev->ieee80211_ptr->iftype = NL80211_IFTYPE_STATION;
+	dev->ieee80211_ptr->wiphy = common_wiphy;
+
+	err = register_netdevice(dev);
+	if (err) {
+		dev_err(&priv->lowerdev->dev, "can't register_netdevice: %d\n",
+			err);
+		goto free_wireless_dev;
+	}
+
+	err = netdev_upper_dev_link(priv->lowerdev, dev, extack);
+	if (err) {
+		dev_err(&priv->lowerdev->dev, "can't netdev_upper_dev_link: %d\n",
+			err);
+		goto unregister_netdev;
+	}
+
+	priv->being_deleted = false;
+	priv->is_connected = false;
+	priv->is_up = false;
+	INIT_DELAYED_WORK(&priv->connect, virt_wifi_connect_complete);
+
+	return 0;
+unregister_netdev:
+	unregister_netdevice(dev);
+free_wireless_dev:
+	kfree(dev->ieee80211_ptr);
+	dev->ieee80211_ptr = NULL;
+remove_handler:
+	netdev_rx_handler_unregister(priv->lowerdev);
+
+	return err;
+}
+
+/* Called with rtnl lock held. */
+static void virt_wifi_dellink(struct net_device *dev,
+			      struct list_head *head)
+{
+	struct virt_wifi_netdev_priv *priv = netdev_priv(dev);
+
+	if (dev->ieee80211_ptr)
+		virt_wifi_cancel_scan(dev->ieee80211_ptr->wiphy);
+
+	priv->being_deleted = true;
+	virt_wifi_cancel_connect(dev);
+	netif_carrier_off(dev);
+
+	netdev_rx_handler_unregister(priv->lowerdev);
+	netdev_upper_dev_unlink(priv->lowerdev, dev);
+
+	unregister_netdevice_queue(dev, head);
+
+	/* Deleting the wiphy is handled in the module destructor. */
+}
+
+static struct rtnl_link_ops virt_wifi_link_ops = {
+	.kind		= "virt_wifi",
+	.setup		= virt_wifi_setup,
+	.newlink	= virt_wifi_newlink,
+	.dellink	= virt_wifi_dellink,
+	.priv_size	= sizeof(struct virt_wifi_netdev_priv),
+};
+
+/* Acquires and releases the rtnl lock. */
+static int __init virt_wifi_init_module(void)
+{
+	int err;
+
+	/* Guaranteed to be locallly-administered and not multicast. */
+	eth_random_addr(fake_router_bssid);
+
+	common_wiphy = virt_wifi_make_wiphy();
+	if (!common_wiphy)
+		return -ENOMEM;
+
+	err = rtnl_link_register(&virt_wifi_link_ops);
+	if (err)
+		virt_wifi_destroy_wiphy(common_wiphy);
+
+	return err;
+}
+
+/* Acquires and releases the rtnl lock. */
+static void __exit virt_wifi_cleanup_module(void)
+{
+	/* Will delete any devices that depend on the wiphy. */
+	rtnl_link_unregister(&virt_wifi_link_ops);
+	virt_wifi_destroy_wiphy(common_wiphy);
+}
+
+module_init(virt_wifi_init_module);
+module_exit(virt_wifi_cleanup_module);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Cody Schuffelen <schuffelen@google.com>");
+MODULE_DESCRIPTION("Driver for a wireless wrapper of ethernet devices");
+MODULE_ALIAS_RTNL_LINK("virt_wifi");

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 800ad25..2aa4261d 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c

@@ -1072,42 +1072,66 @@
 	return 0;
 }
 
+/*
+ * Convert configs to something easy to use in C code
+ */
+#if defined(CONFIG_CMDLINE_FORCE)
+static const int overwrite_incoming_cmdline = 1;
+static const int read_dt_cmdline;
+static const int concat_cmdline;
+#elif defined(CONFIG_CMDLINE_EXTEND)
+static const int overwrite_incoming_cmdline;
+static const int read_dt_cmdline = 1;
+static const int concat_cmdline = 1;
+#else /* CMDLINE_FROM_BOOTLOADER */
+static const int overwrite_incoming_cmdline;
+static const int read_dt_cmdline = 1;
+static const int concat_cmdline;
+#endif
+
+#ifdef CONFIG_CMDLINE
+static const char *config_cmdline = CONFIG_CMDLINE;
+#else
+static const char *config_cmdline = "";
+#endif
+
 int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
 				     int depth, void *data)
 {
-	int l;
-	const char *p;
+	int l = 0;
+	const char *p = NULL;
+	char *cmdline = data;
 
 	pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname);
 
-	if (depth != 1 || !data ||
+	if (depth != 1 || !cmdline ||
 	    (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0))
 		return 0;
 
 	early_init_dt_check_for_initrd(node);
 
-	/* Retrieve command line */
-	p = of_get_flat_dt_prop(node, "bootargs", &l);
-	if (p != NULL && l > 0)
-		strlcpy(data, p, min((int)l, COMMAND_LINE_SIZE));
+	/* Put CONFIG_CMDLINE in if forced or if data had nothing in it to start */
+	if (overwrite_incoming_cmdline || !cmdline[0])
+		strlcpy(cmdline, config_cmdline, COMMAND_LINE_SIZE);
 
-	/*
-	 * CONFIG_CMDLINE is meant to be a default in case nothing else
-	 * managed to set the command line, unless CONFIG_CMDLINE_FORCE
-	 * is set in which case we override whatever was found earlier.
-	 */
-#ifdef CONFIG_CMDLINE
-#if defined(CONFIG_CMDLINE_EXTEND)
-	strlcat(data, " ", COMMAND_LINE_SIZE);
-	strlcat(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-#elif defined(CONFIG_CMDLINE_FORCE)
-	strlcpy(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-#else
-	/* No arguments from boot loader, use kernel's  cmdl*/
-	if (!((char *)data)[0])
-		strlcpy(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-#endif
-#endif /* CONFIG_CMDLINE */
+	/* Retrieve command line unless forcing */
+	if (read_dt_cmdline)
+		p = of_get_flat_dt_prop(node, "bootargs", &l);
+
+	if (p != NULL && l > 0) {
+		if (concat_cmdline) {
+			int cmdline_len;
+			int copy_len;
+			strlcat(cmdline, " ", COMMAND_LINE_SIZE);
+			cmdline_len = strlen(cmdline);
+			copy_len = COMMAND_LINE_SIZE - cmdline_len - 1;
+			copy_len = min((int)l, copy_len);
+			strncpy(cmdline + cmdline_len, p, copy_len);
+			cmdline[cmdline_len + copy_len] = '\0';
+		} else {
+			strlcpy(cmdline, p, min((int)l, COMMAND_LINE_SIZE));
+		}
+	}
 
 	pr_debug("Command line is: %s\n", (char*)data);
 

diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index 20988c4..7830df8 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c

@@ -778,3 +778,63 @@
 	return of_node_get(opp->np);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node);
+
+/**
+ * of_dev_pm_opp_get_cpu_power() - Estimates the power of a CPU
+ * @mW:		pointer to the power estimate in milli-watts
+ * @KHz:	pointer to the OPP's frequency, in kilo-hertz
+ * @cpu:	CPU for which power needs to be estimated
+ *
+ * Computes the power estimated by @CPU at the first OPP above @KHz (ceil),
+ * and updates @KHz and @mW accordingly.
+ *
+ * The power is estimated as P = C * V^2 * f, with C the CPU's capacitance
+ * (read from the 'dynamic-power-coefficient' devicetree binding) and V and f
+ * respectively the voltage and frequency of the OPP.
+ *
+ * Return: -ENODEV if the CPU device cannot be found, -EINVAL if the power
+ * calculation failed because of missing parameters, 0 otherwise.
+ */
+int of_dev_pm_opp_get_cpu_power(unsigned long *mW, unsigned long *KHz, int cpu)
+{
+	unsigned long mV, Hz, MHz;
+	struct device *cpu_dev;
+	struct dev_pm_opp *opp;
+	struct device_node *np;
+	u32 cap;
+	u64 tmp;
+	int ret;
+
+	cpu_dev = get_cpu_device(cpu);
+	if (!cpu_dev)
+		return -ENODEV;
+
+	np = of_node_get(cpu_dev->of_node);
+	if (!np)
+		return -EINVAL;
+
+	ret = of_property_read_u32(np, "dynamic-power-coefficient", &cap);
+	of_node_put(np);
+	if (ret)
+		return -EINVAL;
+
+	Hz = *KHz * 1000;
+	opp = dev_pm_opp_find_freq_ceil(cpu_dev, &Hz);
+	if (IS_ERR(opp))
+		return -EINVAL;
+
+	mV = dev_pm_opp_get_voltage(opp) / 1000;
+	dev_pm_opp_put(opp);
+	if (!mV)
+		return -EINVAL;
+
+	MHz = Hz / 1000000;
+	tmp = (u64)cap * mV * mV * MHz;
+	do_div(tmp, 1000000000);
+
+	*mW = (unsigned long)tmp;
+	*KHz = Hz / 1000;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_dev_pm_opp_get_cpu_power);

diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index fdcf307..4b77fa1 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c

@@ -15,6 +15,7 @@
 
 #include <linux/clk.h>
 #include <linux/dmaengine.h>
+#include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/pinctrl/consumer.h>

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index dfd2324..5c15a63 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c

@@ -31,6 +31,7 @@
 #include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/cpu_cooling.h>
+#include <linux/energy_model.h>
 
 #include <trace/events/thermal.h>
 
@@ -49,19 +50,6 @@
  */
 
 /**
- * struct freq_table - frequency table along with power entries
- * @frequency:	frequency in KHz
- * @power:	power in mW
- *
- * This structure is built when the cooling device registers and helps
- * in translating frequency to power and vice versa.
- */
-struct freq_table {
-	u32 frequency;
-	u32 power;
-};
-
-/**
  * struct time_in_idle - Idle time stats
  * @time: previous reading of the absolute time that this cpu was idle
  * @timestamp: wall time of the last invocation of get_cpu_idle_time_us()
@@ -82,7 +70,7 @@
  *	frequency.
  * @max_level: maximum cooling level. One less than total number of valid
  *	cpufreq frequencies.
- * @freq_table: Freq table in descending order of frequencies
+ * @em: Reference on the Energy Model of the device
  * @cdev: thermal_cooling_device pointer to keep track of the
  *	registered cooling device.
  * @policy: cpufreq policy.
@@ -98,7 +86,7 @@
 	unsigned int cpufreq_state;
 	unsigned int clipped_freq;
 	unsigned int max_level;
-	struct freq_table *freq_table;	/* In descending order */
+	struct em_perf_domain *em;
 	struct thermal_cooling_device *cdev;
 	struct cpufreq_policy *policy;
 	struct list_head node;
@@ -112,26 +100,6 @@
 /* Below code defines functions to be used for cpufreq as cooling device */
 
 /**
- * get_level: Find the level for a particular frequency
- * @cpufreq_cdev: cpufreq_cdev for which the property is required
- * @freq: Frequency
- *
- * Return: level corresponding to the frequency.
- */
-static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
-			       unsigned int freq)
-{
-	struct freq_table *freq_table = cpufreq_cdev->freq_table;
-	unsigned long level;
-
-	for (level = 1; level <= cpufreq_cdev->max_level; level++)
-		if (freq > freq_table[level].frequency)
-			break;
-
-	return level - 1;
-}
-
-/**
  * cpufreq_thermal_notifier - notifier callback for cpufreq policy change.
  * @nb:	struct notifier_block * with callback info.
  * @event: value showing cpufreq event for which this function invoked.
@@ -184,105 +152,52 @@
 	return NOTIFY_OK;
 }
 
+#ifdef CONFIG_ENERGY_MODEL
 /**
- * update_freq_table() - Update the freq table with power numbers
- * @cpufreq_cdev:	the cpufreq cooling device in which to update the table
- * @capacitance: dynamic power coefficient for these cpus
+ * get_level: Find the level for a particular frequency
+ * @cpufreq_cdev: cpufreq_cdev for which the property is required
+ * @freq: Frequency
  *
- * Update the freq table with power numbers.  This table will be used in
- * cpu_power_to_freq() and cpu_freq_to_power() to convert between power and
- * frequency efficiently.  Power is stored in mW, frequency in KHz.  The
- * resulting table is in descending order.
- *
- * Return: 0 on success, -EINVAL if there are no OPPs for any CPUs,
- * or -ENOMEM if we run out of memory.
+ * Return: level corresponding to the frequency.
  */
-static int update_freq_table(struct cpufreq_cooling_device *cpufreq_cdev,
-			     u32 capacitance)
+static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
+			       unsigned int freq)
 {
-	struct freq_table *freq_table = cpufreq_cdev->freq_table;
-	struct dev_pm_opp *opp;
-	struct device *dev = NULL;
-	int num_opps = 0, cpu = cpufreq_cdev->policy->cpu, i;
+	int i;
 
-	dev = get_cpu_device(cpu);
-	if (unlikely(!dev)) {
-		dev_warn(&cpufreq_cdev->cdev->device,
-			 "No cpu device for cpu %d\n", cpu);
-		return -ENODEV;
+	for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
+		if (freq > cpufreq_cdev->em->table[i].frequency)
+			break;
 	}
 
-	num_opps = dev_pm_opp_get_opp_count(dev);
-	if (num_opps < 0)
-		return num_opps;
-
-	/*
-	 * The cpufreq table is also built from the OPP table and so the count
-	 * should match.
-	 */
-	if (num_opps != cpufreq_cdev->max_level + 1) {
-		dev_warn(dev, "Number of OPPs not matching with max_levels\n");
-		return -EINVAL;
-	}
-
-	for (i = 0; i <= cpufreq_cdev->max_level; i++) {
-		unsigned long freq = freq_table[i].frequency * 1000;
-		u32 freq_mhz = freq_table[i].frequency / 1000;
-		u64 power;
-		u32 voltage_mv;
-
-		/*
-		 * Find ceil frequency as 'freq' may be slightly lower than OPP
-		 * freq due to truncation while converting to kHz.
-		 */
-		opp = dev_pm_opp_find_freq_ceil(dev, &freq);
-		if (IS_ERR(opp)) {
-			dev_err(dev, "failed to get opp for %lu frequency\n",
-				freq);
-			return -EINVAL;
-		}
-
-		voltage_mv = dev_pm_opp_get_voltage(opp) / 1000;
-		dev_pm_opp_put(opp);
-
-		/*
-		 * Do the multiplication with MHz and millivolt so as
-		 * to not overflow.
-		 */
-		power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv;
-		do_div(power, 1000000000);
-
-		/* power is stored in mW */
-		freq_table[i].power = power;
-	}
-
-	return 0;
+	return cpufreq_cdev->max_level - i - 1;
 }
 
+
 static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev,
 			     u32 freq)
 {
 	int i;
-	struct freq_table *freq_table = cpufreq_cdev->freq_table;
 
-	for (i = 1; i <= cpufreq_cdev->max_level; i++)
-		if (freq > freq_table[i].frequency)
+	for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
+		if (freq > cpufreq_cdev->em->table[i].frequency)
 			break;
+	}
 
-	return freq_table[i - 1].power;
+	return cpufreq_cdev->em->table[i + 1].power;
 }
 
 static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
 			     u32 power)
 {
 	int i;
-	struct freq_table *freq_table = cpufreq_cdev->freq_table;
 
-	for (i = 1; i <= cpufreq_cdev->max_level; i++)
-		if (power > freq_table[i].power)
+	for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
+		if (power > cpufreq_cdev->em->table[i].power)
 			break;
+	}
 
-	return freq_table[i - 1].frequency;
+	return cpufreq_cdev->em->table[i + 1].frequency;
 }
 
 /**
@@ -332,6 +247,7 @@
 	raw_cpu_power = cpu_freq_to_power(cpufreq_cdev, freq);
 	return (raw_cpu_power * cpufreq_cdev->last_load) / 100;
 }
+#endif
 
 /* cpufreq cooling device callback functions are defined below */
 
@@ -374,6 +290,30 @@
 	return 0;
 }
 
+static unsigned int get_state_freq(struct cpufreq_cooling_device *cpufreq_cdev,
+			      unsigned long state)
+{
+	struct cpufreq_policy *policy;
+	unsigned long idx;
+
+#ifdef CONFIG_ENERGY_MODEL
+	/* Use the Energy Model table if available */
+	if (cpufreq_cdev->em) {
+		idx = cpufreq_cdev->max_level - state;
+		return cpufreq_cdev->em->table[idx].frequency;
+	}
+#endif
+
+	/* Otherwise, fallback on the CPUFreq table */
+	policy = cpufreq_cdev->policy;
+	if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING)
+		idx = cpufreq_cdev->max_level - state;
+	else
+		idx = state;
+
+	return policy->freq_table[idx].frequency;
+}
+
 /**
  * cpufreq_set_cur_state - callback function to set the current cooling state.
  * @cdev: thermal cooling device pointer.
@@ -398,7 +338,7 @@
 	if (cpufreq_cdev->cpufreq_state == state)
 		return 0;
 
-	clip_freq = cpufreq_cdev->freq_table[state].frequency;
+	clip_freq = get_state_freq(cpufreq_cdev, state);
 	cpufreq_cdev->cpufreq_state = state;
 	cpufreq_cdev->clipped_freq = clip_freq;
 
@@ -407,6 +347,7 @@
 	return 0;
 }
 
+#ifdef CONFIG_ENERGY_MODEL
 /**
  * cpufreq_get_requested_power() - get the current power
  * @cdev:	&thermal_cooling_device pointer
@@ -497,7 +438,7 @@
 			       struct thermal_zone_device *tz,
 			       unsigned long state, u32 *power)
 {
-	unsigned int freq, num_cpus;
+	unsigned int freq, num_cpus, idx;
 	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
 
 	/* Request state should be less than max_level */
@@ -506,7 +447,8 @@
 
 	num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus);
 
-	freq = cpufreq_cdev->freq_table[state].frequency;
+	idx = cpufreq_cdev->max_level - state;
+	freq = cpufreq_cdev->em->table[idx].frequency;
 	*power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus;
 
 	return 0;
@@ -553,14 +495,6 @@
 	return 0;
 }
 
-/* Bind cpufreq callbacks to thermal cooling device ops */
-
-static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
-	.get_max_state = cpufreq_get_max_state,
-	.get_cur_state = cpufreq_get_cur_state,
-	.set_cur_state = cpufreq_set_cur_state,
-};
-
 static struct thermal_cooling_device_ops cpufreq_power_cooling_ops = {
 	.get_max_state		= cpufreq_get_max_state,
 	.get_cur_state		= cpufreq_get_cur_state,
@@ -569,32 +503,27 @@
 	.state2power		= cpufreq_state2power,
 	.power2state		= cpufreq_power2state,
 };
+#endif
+
+/* Bind cpufreq callbacks to thermal cooling device ops */
+
+static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
+	.get_max_state = cpufreq_get_max_state,
+	.get_cur_state = cpufreq_get_cur_state,
+	.set_cur_state = cpufreq_set_cur_state,
+};
 
 /* Notifier for cpufreq policy change */
 static struct notifier_block thermal_cpufreq_notifier_block = {
 	.notifier_call = cpufreq_thermal_notifier,
 };
 
-static unsigned int find_next_max(struct cpufreq_frequency_table *table,
-				  unsigned int prev_max)
-{
-	struct cpufreq_frequency_table *pos;
-	unsigned int max = 0;
-
-	cpufreq_for_each_valid_entry(pos, table) {
-		if (pos->frequency > max && pos->frequency < prev_max)
-			max = pos->frequency;
-	}
-
-	return max;
-}
-
 /**
  * __cpufreq_cooling_register - helper function to create cpufreq cooling device
  * @np: a valid struct device_node to the cooling device device tree node
  * @policy: cpufreq policy
  * Normally this should be same as cpufreq policy->related_cpus.
- * @capacitance: dynamic power coefficient for these cpus
+ * @try_model: true if a power model should be used
  *
  * This interface function registers the cpufreq cooling device with the name
  * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
@@ -606,12 +535,12 @@
  */
 static struct thermal_cooling_device *
 __cpufreq_cooling_register(struct device_node *np,
-			struct cpufreq_policy *policy, u32 capacitance)
+			struct cpufreq_policy *policy, bool try_model)
 {
 	struct thermal_cooling_device *cdev;
 	struct cpufreq_cooling_device *cpufreq_cdev;
 	char dev_name[THERMAL_NAME_LENGTH];
-	unsigned int freq, i, num_cpus;
+	unsigned int i, num_cpus;
 	int ret;
 	struct thermal_cooling_device_ops *cooling_ops;
 	bool first;
@@ -645,54 +574,37 @@
 	/* max_level is an index, not a counter */
 	cpufreq_cdev->max_level = i - 1;
 
-	cpufreq_cdev->freq_table = kmalloc_array(i,
-					sizeof(*cpufreq_cdev->freq_table),
-					GFP_KERNEL);
-	if (!cpufreq_cdev->freq_table) {
-		cdev = ERR_PTR(-ENOMEM);
-		goto free_idle_time;
-	}
+#ifdef CONFIG_ENERGY_MODEL
+	if (try_model) {
+		struct em_perf_domain *em = em_cpu_get(policy->cpu);
+
+		if (!em || !cpumask_equal(policy->related_cpus,
+					  to_cpumask(em->cpus))) {
+			cdev = ERR_PTR(-EINVAL);
+			goto free_idle_time;
+		}
+		cpufreq_cdev->em = em;
+		cooling_ops = &cpufreq_power_cooling_ops;
+	} else
+#endif
+		cooling_ops = &cpufreq_cooling_ops;
 
 	ret = ida_simple_get(&cpufreq_ida, 0, 0, GFP_KERNEL);
 	if (ret < 0) {
 		cdev = ERR_PTR(ret);
-		goto free_table;
+		goto free_idle_time;
 	}
 	cpufreq_cdev->id = ret;
 
 	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
 		 cpufreq_cdev->id);
 
-	/* Fill freq-table in descending order of frequencies */
-	for (i = 0, freq = -1; i <= cpufreq_cdev->max_level; i++) {
-		freq = find_next_max(policy->freq_table, freq);
-		cpufreq_cdev->freq_table[i].frequency = freq;
-
-		/* Warn for duplicate entries */
-		if (!freq)
-			pr_warn("%s: table has duplicate entries\n", __func__);
-		else
-			pr_debug("%s: freq:%u KHz\n", __func__, freq);
-	}
-
-	if (capacitance) {
-		ret = update_freq_table(cpufreq_cdev, capacitance);
-		if (ret) {
-			cdev = ERR_PTR(ret);
-			goto remove_ida;
-		}
-
-		cooling_ops = &cpufreq_power_cooling_ops;
-	} else {
-		cooling_ops = &cpufreq_cooling_ops;
-	}
-
 	cdev = thermal_of_cooling_device_register(np, dev_name, cpufreq_cdev,
 						  cooling_ops);
 	if (IS_ERR(cdev))
 		goto remove_ida;
 
-	cpufreq_cdev->clipped_freq = cpufreq_cdev->freq_table[0].frequency;
+	cpufreq_cdev->clipped_freq = get_state_freq(cpufreq_cdev, 0);
 	cpufreq_cdev->cdev = cdev;
 
 	mutex_lock(&cooling_list_lock);
@@ -709,8 +621,6 @@
 
 remove_ida:
 	ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
-free_table:
-	kfree(cpufreq_cdev->freq_table);
 free_idle_time:
 	kfree(cpufreq_cdev->idle_time);
 free_cdev:
@@ -732,7 +642,7 @@
 struct thermal_cooling_device *
 cpufreq_cooling_register(struct cpufreq_policy *policy)
 {
-	return __cpufreq_cooling_register(NULL, policy, 0);
+	return __cpufreq_cooling_register(NULL, policy, false);
 }
 EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
 
@@ -760,7 +670,6 @@
 {
 	struct device_node *np = of_get_cpu_node(policy->cpu, NULL);
 	struct thermal_cooling_device *cdev = NULL;
-	u32 capacitance = 0;
 
 	if (!np) {
 		pr_err("cpu_cooling: OF node not available for cpu%d\n",
@@ -769,10 +678,7 @@
 	}
 
 	if (of_find_property(np, "#cooling-cells", NULL)) {
-		of_property_read_u32(np, "dynamic-power-coefficient",
-				     &capacitance);
-
-		cdev = __cpufreq_cooling_register(np, policy, capacitance);
+		cdev = __cpufreq_cooling_register(np, policy, true);
 		if (IS_ERR(cdev)) {
 			pr_err("cpu_cooling: cpu%d is not running as cooling device: %ld\n",
 			       policy->cpu, PTR_ERR(cdev));
@@ -814,7 +720,6 @@
 	thermal_cooling_device_unregister(cpufreq_cdev->cdev);
 	ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
 	kfree(cpufreq_cdev->idle_time);
-	kfree(cpufreq_cdev->freq_table);
 	kfree(cpufreq_cdev);
 }
 EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);

diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 31cce780..d4f9a91 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig

@@ -215,6 +215,12 @@
 config USB_F_TCM
 	tristate
 
+config USB_F_ACC
+	tristate
+
+config USB_F_AUDIO_SRC
+	tristate
+
 # this first set of drivers all depend on bulk-capable hardware.
 
 config USB_CONFIGFS
@@ -229,6 +235,14 @@
 	  appropriate symbolic links.
 	  For more information see Documentation/usb/gadget_configfs.txt.
 
+config USB_CONFIGFS_UEVENT
+	bool "Uevent notification of Gadget state"
+	depends on USB_CONFIGFS
+	help
+	  Enable uevent notifications to userspace when the gadget
+	  state changes. The gadget can be in any of the following
+	  three states: "CONNECTED/DISCONNECTED/CONFIGURED"
+
 config USB_CONFIGFS_SERIAL
 	bool "Generic serial bulk in/out"
 	depends on USB_CONFIGFS
@@ -368,6 +382,22 @@
 	  implemented in kernel space (for instance Ethernet, serial or
 	  mass storage) and other are implemented in user space.
 
+config USB_CONFIGFS_F_ACC
+	bool "Accessory gadget"
+	depends on USB_CONFIGFS
+	select USB_F_ACC
+	help
+	  USB gadget Accessory support
+
+config USB_CONFIGFS_F_AUDIO_SRC
+	bool "Audio Source gadget"
+	depends on USB_CONFIGFS
+	depends on SND
+	select SND_PCM
+	select USB_F_AUDIO_SRC
+	help
+	  USB gadget Audio Source support
+
 config USB_CONFIGFS_F_UAC1
 	bool "Audio Class 1.0"
 	depends on USB_CONFIGFS

diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 0251299..b0148a4 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c

@@ -10,6 +10,31 @@
 #include "u_f.h"
 #include "u_os_desc.h"
 
+#ifdef CONFIG_USB_CONFIGFS_UEVENT
+#include <linux/platform_device.h>
+#include <linux/kdev_t.h>
+#include <linux/usb/ch9.h>
+
+#ifdef CONFIG_USB_CONFIGFS_F_ACC
+extern int acc_ctrlrequest(struct usb_composite_dev *cdev,
+				const struct usb_ctrlrequest *ctrl);
+void acc_disconnect(void);
+#endif
+static struct class *android_class;
+static struct device *android_device;
+static int index;
+
+struct device *create_function_device(char *name)
+{
+	if (android_device && !IS_ERR(android_device))
+		return device_create(android_class, android_device,
+			MKDEV(0, index++), NULL, name);
+	else
+		return ERR_PTR(-EINVAL);
+}
+EXPORT_SYMBOL_GPL(create_function_device);
+#endif
+
 int check_user_usb_string(const char *name,
 		struct usb_gadget_strings *stringtab_dev)
 {
@@ -61,6 +86,12 @@
 	bool use_os_desc;
 	char b_vendor_code;
 	char qw_sign[OS_STRING_QW_SIGN_LEN];
+#ifdef CONFIG_USB_CONFIGFS_UEVENT
+	bool connected;
+	bool sw_connected;
+	struct work_struct work;
+	struct device *dev;
+#endif
 };
 
 static inline struct gadget_info *to_gadget_info(struct config_item *item)
@@ -266,7 +297,7 @@
 
 	mutex_lock(&gi->lock);
 
-	if (!strlen(name)) {
+	if (!strlen(name) || strcmp(name, "none") == 0) {
 		ret = unregister_gadget(gi);
 		if (ret)
 			goto err;
@@ -1372,6 +1403,60 @@
 	return ret;
 }
 
+#ifdef CONFIG_USB_CONFIGFS_UEVENT
+static void android_work(struct work_struct *data)
+{
+	struct gadget_info *gi = container_of(data, struct gadget_info, work);
+	struct usb_composite_dev *cdev = &gi->cdev;
+	char *disconnected[2] = { "USB_STATE=DISCONNECTED", NULL };
+	char *connected[2]    = { "USB_STATE=CONNECTED", NULL };
+	char *configured[2]   = { "USB_STATE=CONFIGURED", NULL };
+	/* 0-connected 1-configured 2-disconnected*/
+	bool status[3] = { false, false, false };
+	unsigned long flags;
+	bool uevent_sent = false;
+
+	spin_lock_irqsave(&cdev->lock, flags);
+	if (cdev->config)
+		status[1] = true;
+
+	if (gi->connected != gi->sw_connected) {
+		if (gi->connected)
+			status[0] = true;
+		else
+			status[2] = true;
+		gi->sw_connected = gi->connected;
+	}
+	spin_unlock_irqrestore(&cdev->lock, flags);
+
+	if (status[0]) {
+		kobject_uevent_env(&android_device->kobj,
+					KOBJ_CHANGE, connected);
+		pr_info("%s: sent uevent %s\n", __func__, connected[0]);
+		uevent_sent = true;
+	}
+
+	if (status[1]) {
+		kobject_uevent_env(&android_device->kobj,
+					KOBJ_CHANGE, configured);
+		pr_info("%s: sent uevent %s\n", __func__, configured[0]);
+		uevent_sent = true;
+	}
+
+	if (status[2]) {
+		kobject_uevent_env(&android_device->kobj,
+					KOBJ_CHANGE, disconnected);
+		pr_info("%s: sent uevent %s\n", __func__, disconnected[0]);
+		uevent_sent = true;
+	}
+
+	if (!uevent_sent) {
+		pr_info("%s: did not send uevent (%d %d %p)\n", __func__,
+			gi->connected, gi->sw_connected, cdev->config);
+	}
+}
+#endif
+
 static void configfs_composite_unbind(struct usb_gadget *gadget)
 {
 	struct usb_composite_dev	*cdev;
@@ -1391,14 +1476,91 @@
 	set_gadget_data(gadget, NULL);
 }
 
+#ifdef CONFIG_USB_CONFIGFS_UEVENT
+static int android_setup(struct usb_gadget *gadget,
+			const struct usb_ctrlrequest *c)
+{
+	struct usb_composite_dev *cdev = get_gadget_data(gadget);
+	unsigned long flags;
+	struct gadget_info *gi = container_of(cdev, struct gadget_info, cdev);
+	int value = -EOPNOTSUPP;
+	struct usb_function_instance *fi;
+
+	spin_lock_irqsave(&cdev->lock, flags);
+	if (!gi->connected) {
+		gi->connected = 1;
+		schedule_work(&gi->work);
+	}
+	spin_unlock_irqrestore(&cdev->lock, flags);
+	list_for_each_entry(fi, &gi->available_func, cfs_list) {
+		if (fi != NULL && fi->f != NULL && fi->f->setup != NULL) {
+			value = fi->f->setup(fi->f, c);
+			if (value >= 0)
+				break;
+		}
+	}
+
+#ifdef CONFIG_USB_CONFIGFS_F_ACC
+	if (value < 0)
+		value = acc_ctrlrequest(cdev, c);
+#endif
+
+	if (value < 0)
+		value = composite_setup(gadget, c);
+
+	spin_lock_irqsave(&cdev->lock, flags);
+	if (c->bRequest == USB_REQ_SET_CONFIGURATION &&
+						cdev->config) {
+		schedule_work(&gi->work);
+	}
+	spin_unlock_irqrestore(&cdev->lock, flags);
+
+	return value;
+}
+
+static void android_disconnect(struct usb_gadget *gadget)
+{
+	struct usb_composite_dev        *cdev = get_gadget_data(gadget);
+	struct gadget_info *gi = container_of(cdev, struct gadget_info, cdev);
+
+	/* FIXME: There's a race between usb_gadget_udc_stop() which is likely
+	 * to set the gadget driver to NULL in the udc driver and this drivers
+	 * gadget disconnect fn which likely checks for the gadget driver to
+	 * be a null ptr. It happens that unbind (doing set_gadget_data(NULL))
+	 * is called before the gadget driver is set to NULL and the udc driver
+	 * calls disconnect fn which results in cdev being a null ptr.
+	 */
+	if (cdev == NULL) {
+		WARN(1, "%s: gadget driver already disconnected\n", __func__);
+		return;
+	}
+
+	/* accessory HID support can be active while the
+		accessory function is not actually enabled,
+		so we need to inform it when we are disconnected.
+	*/
+
+#ifdef CONFIG_USB_CONFIGFS_F_ACC
+	acc_disconnect();
+#endif
+	gi->connected = 0;
+	schedule_work(&gi->work);
+	composite_disconnect(gadget);
+}
+#endif
+
 static const struct usb_gadget_driver configfs_driver_template = {
 	.bind           = configfs_composite_bind,
 	.unbind         = configfs_composite_unbind,
-
+#ifdef CONFIG_USB_CONFIGFS_UEVENT
+	.setup          = android_setup,
+	.reset          = android_disconnect,
+	.disconnect     = android_disconnect,
+#else
 	.setup          = composite_setup,
 	.reset          = composite_disconnect,
 	.disconnect     = composite_disconnect,
-
+#endif
 	.suspend	= composite_suspend,
 	.resume		= composite_resume,
 
@@ -1410,6 +1572,89 @@
 	.match_existing_only = 1,
 };
 
+#ifdef CONFIG_USB_CONFIGFS_UEVENT
+static ssize_t state_show(struct device *pdev, struct device_attribute *attr,
+			char *buf)
+{
+	struct gadget_info *dev = dev_get_drvdata(pdev);
+	struct usb_composite_dev *cdev;
+	char *state = "DISCONNECTED";
+	unsigned long flags;
+
+	if (!dev)
+		goto out;
+
+	cdev = &dev->cdev;
+
+	if (!cdev)
+		goto out;
+
+	spin_lock_irqsave(&cdev->lock, flags);
+	if (cdev->config)
+		state = "CONFIGURED";
+	else if (dev->connected)
+		state = "CONNECTED";
+	spin_unlock_irqrestore(&cdev->lock, flags);
+out:
+	return sprintf(buf, "%s\n", state);
+}
+
+static DEVICE_ATTR(state, S_IRUGO, state_show, NULL);
+
+static struct device_attribute *android_usb_attributes[] = {
+	&dev_attr_state,
+	NULL
+};
+
+static int android_device_create(struct gadget_info *gi)
+{
+	struct device_attribute **attrs;
+	struct device_attribute *attr;
+
+	INIT_WORK(&gi->work, android_work);
+	android_device = device_create(android_class, NULL,
+				MKDEV(0, 0), NULL, "android0");
+	if (IS_ERR(android_device))
+		return PTR_ERR(android_device);
+
+	dev_set_drvdata(android_device, gi);
+
+	attrs = android_usb_attributes;
+	while ((attr = *attrs++)) {
+		int err;
+
+		err = device_create_file(android_device, attr);
+		if (err) {
+			device_destroy(android_device->class,
+				       android_device->devt);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static void android_device_destroy(void)
+{
+	struct device_attribute **attrs;
+	struct device_attribute *attr;
+
+	attrs = android_usb_attributes;
+	while ((attr = *attrs++))
+		device_remove_file(android_device, attr);
+	device_destroy(android_device->class, android_device->devt);
+}
+#else
+static inline int android_device_create(struct gadget_info *gi)
+{
+	return 0;
+}
+
+static inline void android_device_destroy(void)
+{
+}
+#endif
+
 static struct config_group *gadgets_make(
 		struct config_group *group,
 		const char *name)
@@ -1461,7 +1706,11 @@
 	if (!gi->composite.gadget_driver.function)
 		goto err;
 
+	if (android_device_create(gi) < 0)
+		goto err;
+
 	return &gi->group;
+
 err:
 	kfree(gi);
 	return ERR_PTR(-ENOMEM);
@@ -1470,6 +1719,7 @@
 static void gadgets_drop(struct config_group *group, struct config_item *item)
 {
 	config_item_put(item);
+	android_device_destroy();
 }
 
 static struct configfs_group_operations gadgets_ops = {
@@ -1509,6 +1759,13 @@
 	config_group_init(&gadget_subsys.su_group);
 
 	ret = configfs_register_subsystem(&gadget_subsys);
+
+#ifdef CONFIG_USB_CONFIGFS_UEVENT
+	android_class = class_create(THIS_MODULE, "android_usb");
+	if (IS_ERR(android_class))
+		return PTR_ERR(android_class);
+#endif
+
 	return ret;
 }
 module_init(gadget_cfs_init);
@@ -1516,5 +1773,10 @@
 static void __exit gadget_cfs_exit(void)
 {
 	configfs_unregister_subsystem(&gadget_subsys);
+#ifdef CONFIG_USB_CONFIGFS_UEVENT
+	if (!IS_ERR(android_class))
+		class_destroy(android_class);
+#endif
+
 }
 module_exit(gadget_cfs_exit);

diff --git a/drivers/usb/gadget/function/Makefile b/drivers/usb/gadget/function/Makefile
index 5d3a6cf..dd33a12 100644
--- a/drivers/usb/gadget/function/Makefile
+++ b/drivers/usb/gadget/function/Makefile

@@ -50,3 +50,7 @@
 obj-$(CONFIG_USB_F_PRINTER)	+= usb_f_printer.o
 usb_f_tcm-y			:= f_tcm.o
 obj-$(CONFIG_USB_F_TCM)		+= usb_f_tcm.o
+usb_f_accessory-y		:= f_accessory.o
+obj-$(CONFIG_USB_F_ACC)		+= usb_f_accessory.o
+usb_f_audio_source-y		:= f_audio_source.o
+obj-$(CONFIG_USB_F_AUDIO_SRC)	+= usb_f_audio_source.o

diff --git a/drivers/usb/gadget/function/f_accessory.c b/drivers/usb/gadget/function/f_accessory.c
new file mode 100644
index 0000000..7aa2656
--- /dev/null
+++ b/drivers/usb/gadget/function/f_accessory.c

@@ -0,0 +1,1352 @@
+/*
+ * Gadget Function Driver for Android USB accessories
+ *
+ * Copyright (C) 2011 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+/* #define DEBUG */
+/* #define VERBOSE_DEBUG */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+
+#include <linux/types.h>
+#include <linux/file.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+
+#include <linux/hid.h>
+#include <linux/hiddev.h>
+#include <linux/usb.h>
+#include <linux/usb/ch9.h>
+#include <linux/usb/f_accessory.h>
+
+#include <linux/configfs.h>
+#include <linux/usb/composite.h>
+
+#define MAX_INST_NAME_LEN        40
+#define BULK_BUFFER_SIZE    16384
+#define ACC_STRING_SIZE     256
+
+#define PROTOCOL_VERSION    2
+
+/* String IDs */
+#define INTERFACE_STRING_INDEX	0
+
+/* number of tx and rx requests to allocate */
+#define TX_REQ_MAX 4
+#define RX_REQ_MAX 2
+
+struct acc_hid_dev {
+	struct list_head	list;
+	struct hid_device *hid;
+	struct acc_dev *dev;
+	/* accessory defined ID */
+	int id;
+	/* HID report descriptor */
+	u8 *report_desc;
+	/* length of HID report descriptor */
+	int report_desc_len;
+	/* number of bytes of report_desc we have received so far */
+	int report_desc_offset;
+};
+
+struct acc_dev {
+	struct usb_function function;
+	struct usb_composite_dev *cdev;
+	spinlock_t lock;
+
+	struct usb_ep *ep_in;
+	struct usb_ep *ep_out;
+
+	/* online indicates state of function_set_alt & function_unbind
+	 * set to 1 when we connect
+	 */
+	int online:1;
+
+	/* disconnected indicates state of open & release
+	 * Set to 1 when we disconnect.
+	 * Not cleared until our file is closed.
+	 */
+	int disconnected:1;
+
+	/* strings sent by the host */
+	char manufacturer[ACC_STRING_SIZE];
+	char model[ACC_STRING_SIZE];
+	char description[ACC_STRING_SIZE];
+	char version[ACC_STRING_SIZE];
+	char uri[ACC_STRING_SIZE];
+	char serial[ACC_STRING_SIZE];
+
+	/* for acc_complete_set_string */
+	int string_index;
+
+	/* set to 1 if we have a pending start request */
+	int start_requested;
+
+	int audio_mode;
+
+	/* synchronize access to our device file */
+	atomic_t open_excl;
+
+	struct list_head tx_idle;
+
+	wait_queue_head_t read_wq;
+	wait_queue_head_t write_wq;
+	struct usb_request *rx_req[RX_REQ_MAX];
+	int rx_done;
+
+	/* delayed work for handling ACCESSORY_START */
+	struct delayed_work start_work;
+
+	/* worker for registering and unregistering hid devices */
+	struct work_struct hid_work;
+
+	/* list of active HID devices */
+	struct list_head	hid_list;
+
+	/* list of new HID devices to register */
+	struct list_head	new_hid_list;
+
+	/* list of dead HID devices to unregister */
+	struct list_head	dead_hid_list;
+};
+
+static struct usb_interface_descriptor acc_interface_desc = {
+	.bLength                = USB_DT_INTERFACE_SIZE,
+	.bDescriptorType        = USB_DT_INTERFACE,
+	.bInterfaceNumber       = 0,
+	.bNumEndpoints          = 2,
+	.bInterfaceClass        = USB_CLASS_VENDOR_SPEC,
+	.bInterfaceSubClass     = USB_SUBCLASS_VENDOR_SPEC,
+	.bInterfaceProtocol     = 0,
+};
+
+static struct usb_endpoint_descriptor acc_highspeed_in_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_IN,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+	.wMaxPacketSize         = __constant_cpu_to_le16(512),
+};
+
+static struct usb_endpoint_descriptor acc_highspeed_out_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_OUT,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+	.wMaxPacketSize         = __constant_cpu_to_le16(512),
+};
+
+static struct usb_endpoint_descriptor acc_fullspeed_in_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_IN,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+};
+
+static struct usb_endpoint_descriptor acc_fullspeed_out_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_OUT,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+};
+
+static struct usb_descriptor_header *fs_acc_descs[] = {
+	(struct usb_descriptor_header *) &acc_interface_desc,
+	(struct usb_descriptor_header *) &acc_fullspeed_in_desc,
+	(struct usb_descriptor_header *) &acc_fullspeed_out_desc,
+	NULL,
+};
+
+static struct usb_descriptor_header *hs_acc_descs[] = {
+	(struct usb_descriptor_header *) &acc_interface_desc,
+	(struct usb_descriptor_header *) &acc_highspeed_in_desc,
+	(struct usb_descriptor_header *) &acc_highspeed_out_desc,
+	NULL,
+};
+
+static struct usb_string acc_string_defs[] = {
+	[INTERFACE_STRING_INDEX].s	= "Android Accessory Interface",
+	{  },	/* end of list */
+};
+
+static struct usb_gadget_strings acc_string_table = {
+	.language		= 0x0409,	/* en-US */
+	.strings		= acc_string_defs,
+};
+
+static struct usb_gadget_strings *acc_strings[] = {
+	&acc_string_table,
+	NULL,
+};
+
+/* temporary variable used between acc_open() and acc_gadget_bind() */
+static struct acc_dev *_acc_dev;
+
+struct acc_instance {
+	struct usb_function_instance func_inst;
+	const char *name;
+};
+
+static inline struct acc_dev *func_to_dev(struct usb_function *f)
+{
+	return container_of(f, struct acc_dev, function);
+}
+
+static struct usb_request *acc_request_new(struct usb_ep *ep, int buffer_size)
+{
+	struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL);
+
+	if (!req)
+		return NULL;
+
+	/* now allocate buffers for the requests */
+	req->buf = kmalloc(buffer_size, GFP_KERNEL);
+	if (!req->buf) {
+		usb_ep_free_request(ep, req);
+		return NULL;
+	}
+
+	return req;
+}
+
+static void acc_request_free(struct usb_request *req, struct usb_ep *ep)
+{
+	if (req) {
+		kfree(req->buf);
+		usb_ep_free_request(ep, req);
+	}
+}
+
+/* add a request to the tail of a list */
+static void req_put(struct acc_dev *dev, struct list_head *head,
+		struct usb_request *req)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	list_add_tail(&req->list, head);
+	spin_unlock_irqrestore(&dev->lock, flags);
+}
+
+/* remove a request from the head of a list */
+static struct usb_request *req_get(struct acc_dev *dev, struct list_head *head)
+{
+	unsigned long flags;
+	struct usb_request *req;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	if (list_empty(head)) {
+		req = 0;
+	} else {
+		req = list_first_entry(head, struct usb_request, list);
+		list_del(&req->list);
+	}
+	spin_unlock_irqrestore(&dev->lock, flags);
+	return req;
+}
+
+static void acc_set_disconnected(struct acc_dev *dev)
+{
+	dev->disconnected = 1;
+}
+
+static void acc_complete_in(struct usb_ep *ep, struct usb_request *req)
+{
+	struct acc_dev *dev = _acc_dev;
+
+	if (req->status == -ESHUTDOWN) {
+		pr_debug("acc_complete_in set disconnected");
+		acc_set_disconnected(dev);
+	}
+
+	req_put(dev, &dev->tx_idle, req);
+
+	wake_up(&dev->write_wq);
+}
+
+static void acc_complete_out(struct usb_ep *ep, struct usb_request *req)
+{
+	struct acc_dev *dev = _acc_dev;
+
+	dev->rx_done = 1;
+	if (req->status == -ESHUTDOWN) {
+		pr_debug("acc_complete_out set disconnected");
+		acc_set_disconnected(dev);
+	}
+
+	wake_up(&dev->read_wq);
+}
+
+static void acc_complete_set_string(struct usb_ep *ep, struct usb_request *req)
+{
+	struct acc_dev	*dev = ep->driver_data;
+	char *string_dest = NULL;
+	int length = req->actual;
+
+	if (req->status != 0) {
+		pr_err("acc_complete_set_string, err %d\n", req->status);
+		return;
+	}
+
+	switch (dev->string_index) {
+	case ACCESSORY_STRING_MANUFACTURER:
+		string_dest = dev->manufacturer;
+		break;
+	case ACCESSORY_STRING_MODEL:
+		string_dest = dev->model;
+		break;
+	case ACCESSORY_STRING_DESCRIPTION:
+		string_dest = dev->description;
+		break;
+	case ACCESSORY_STRING_VERSION:
+		string_dest = dev->version;
+		break;
+	case ACCESSORY_STRING_URI:
+		string_dest = dev->uri;
+		break;
+	case ACCESSORY_STRING_SERIAL:
+		string_dest = dev->serial;
+		break;
+	}
+	if (string_dest) {
+		unsigned long flags;
+
+		if (length >= ACC_STRING_SIZE)
+			length = ACC_STRING_SIZE - 1;
+
+		spin_lock_irqsave(&dev->lock, flags);
+		memcpy(string_dest, req->buf, length);
+		/* ensure zero termination */
+		string_dest[length] = 0;
+		spin_unlock_irqrestore(&dev->lock, flags);
+	} else {
+		pr_err("unknown accessory string index %d\n",
+			dev->string_index);
+	}
+}
+
+static void acc_complete_set_hid_report_desc(struct usb_ep *ep,
+		struct usb_request *req)
+{
+	struct acc_hid_dev *hid = req->context;
+	struct acc_dev *dev = hid->dev;
+	int length = req->actual;
+
+	if (req->status != 0) {
+		pr_err("acc_complete_set_hid_report_desc, err %d\n",
+			req->status);
+		return;
+	}
+
+	memcpy(hid->report_desc + hid->report_desc_offset, req->buf, length);
+	hid->report_desc_offset += length;
+	if (hid->report_desc_offset == hid->report_desc_len) {
+		/* After we have received the entire report descriptor
+		 * we schedule work to initialize the HID device
+		 */
+		schedule_work(&dev->hid_work);
+	}
+}
+
+static void acc_complete_send_hid_event(struct usb_ep *ep,
+		struct usb_request *req)
+{
+	struct acc_hid_dev *hid = req->context;
+	int length = req->actual;
+
+	if (req->status != 0) {
+		pr_err("acc_complete_send_hid_event, err %d\n", req->status);
+		return;
+	}
+
+	hid_report_raw_event(hid->hid, HID_INPUT_REPORT, req->buf, length, 1);
+}
+
+static int acc_hid_parse(struct hid_device *hid)
+{
+	struct acc_hid_dev *hdev = hid->driver_data;
+
+	hid_parse_report(hid, hdev->report_desc, hdev->report_desc_len);
+	return 0;
+}
+
+static int acc_hid_start(struct hid_device *hid)
+{
+	return 0;
+}
+
+static void acc_hid_stop(struct hid_device *hid)
+{
+}
+
+static int acc_hid_open(struct hid_device *hid)
+{
+	return 0;
+}
+
+static void acc_hid_close(struct hid_device *hid)
+{
+}
+
+static int acc_hid_raw_request(struct hid_device *hid, unsigned char reportnum,
+	__u8 *buf, size_t len, unsigned char rtype, int reqtype)
+{
+	return 0;
+}
+
+static struct hid_ll_driver acc_hid_ll_driver = {
+	.parse = acc_hid_parse,
+	.start = acc_hid_start,
+	.stop = acc_hid_stop,
+	.open = acc_hid_open,
+	.close = acc_hid_close,
+	.raw_request = acc_hid_raw_request,
+};
+
+static struct acc_hid_dev *acc_hid_new(struct acc_dev *dev,
+		int id, int desc_len)
+{
+	struct acc_hid_dev *hdev;
+
+	hdev = kzalloc(sizeof(*hdev), GFP_ATOMIC);
+	if (!hdev)
+		return NULL;
+	hdev->report_desc = kzalloc(desc_len, GFP_ATOMIC);
+	if (!hdev->report_desc) {
+		kfree(hdev);
+		return NULL;
+	}
+	hdev->dev = dev;
+	hdev->id = id;
+	hdev->report_desc_len = desc_len;
+
+	return hdev;
+}
+
+static struct acc_hid_dev *acc_hid_get(struct list_head *list, int id)
+{
+	struct acc_hid_dev *hid;
+
+	list_for_each_entry(hid, list, list) {
+		if (hid->id == id)
+			return hid;
+	}
+	return NULL;
+}
+
+static int acc_register_hid(struct acc_dev *dev, int id, int desc_length)
+{
+	struct acc_hid_dev *hid;
+	unsigned long flags;
+
+	/* report descriptor length must be > 0 */
+	if (desc_length <= 0)
+		return -EINVAL;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	/* replace HID if one already exists with this ID */
+	hid = acc_hid_get(&dev->hid_list, id);
+	if (!hid)
+		hid = acc_hid_get(&dev->new_hid_list, id);
+	if (hid)
+		list_move(&hid->list, &dev->dead_hid_list);
+
+	hid = acc_hid_new(dev, id, desc_length);
+	if (!hid) {
+		spin_unlock_irqrestore(&dev->lock, flags);
+		return -ENOMEM;
+	}
+
+	list_add(&hid->list, &dev->new_hid_list);
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	/* schedule work to register the HID device */
+	schedule_work(&dev->hid_work);
+	return 0;
+}
+
+static int acc_unregister_hid(struct acc_dev *dev, int id)
+{
+	struct acc_hid_dev *hid;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	hid = acc_hid_get(&dev->hid_list, id);
+	if (!hid)
+		hid = acc_hid_get(&dev->new_hid_list, id);
+	if (!hid) {
+		spin_unlock_irqrestore(&dev->lock, flags);
+		return -EINVAL;
+	}
+
+	list_move(&hid->list, &dev->dead_hid_list);
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	schedule_work(&dev->hid_work);
+	return 0;
+}
+
+static int create_bulk_endpoints(struct acc_dev *dev,
+				struct usb_endpoint_descriptor *in_desc,
+				struct usb_endpoint_descriptor *out_desc)
+{
+	struct usb_composite_dev *cdev = dev->cdev;
+	struct usb_request *req;
+	struct usb_ep *ep;
+	int i;
+
+	DBG(cdev, "create_bulk_endpoints dev: %p\n", dev);
+
+	ep = usb_ep_autoconfig(cdev->gadget, in_desc);
+	if (!ep) {
+		DBG(cdev, "usb_ep_autoconfig for ep_in failed\n");
+		return -ENODEV;
+	}
+	DBG(cdev, "usb_ep_autoconfig for ep_in got %s\n", ep->name);
+	ep->driver_data = dev;		/* claim the endpoint */
+	dev->ep_in = ep;
+
+	ep = usb_ep_autoconfig(cdev->gadget, out_desc);
+	if (!ep) {
+		DBG(cdev, "usb_ep_autoconfig for ep_out failed\n");
+		return -ENODEV;
+	}
+	DBG(cdev, "usb_ep_autoconfig for ep_out got %s\n", ep->name);
+	ep->driver_data = dev;		/* claim the endpoint */
+	dev->ep_out = ep;
+
+	/* now allocate requests for our endpoints */
+	for (i = 0; i < TX_REQ_MAX; i++) {
+		req = acc_request_new(dev->ep_in, BULK_BUFFER_SIZE);
+		if (!req)
+			goto fail;
+		req->complete = acc_complete_in;
+		req_put(dev, &dev->tx_idle, req);
+	}
+	for (i = 0; i < RX_REQ_MAX; i++) {
+		req = acc_request_new(dev->ep_out, BULK_BUFFER_SIZE);
+		if (!req)
+			goto fail;
+		req->complete = acc_complete_out;
+		dev->rx_req[i] = req;
+	}
+
+	return 0;
+
+fail:
+	pr_err("acc_bind() could not allocate requests\n");
+	while ((req = req_get(dev, &dev->tx_idle)))
+		acc_request_free(req, dev->ep_in);
+	for (i = 0; i < RX_REQ_MAX; i++)
+		acc_request_free(dev->rx_req[i], dev->ep_out);
+	return -1;
+}
+
+static ssize_t acc_read(struct file *fp, char __user *buf,
+	size_t count, loff_t *pos)
+{
+	struct acc_dev *dev = fp->private_data;
+	struct usb_request *req;
+	ssize_t r = count;
+	unsigned xfer;
+	int ret = 0;
+
+	pr_debug("acc_read(%zu)\n", count);
+
+	if (dev->disconnected) {
+		pr_debug("acc_read disconnected");
+		return -ENODEV;
+	}
+
+	if (count > BULK_BUFFER_SIZE)
+		count = BULK_BUFFER_SIZE;
+
+	/* we will block until we're online */
+	pr_debug("acc_read: waiting for online\n");
+	ret = wait_event_interruptible(dev->read_wq, dev->online);
+	if (ret < 0) {
+		r = ret;
+		goto done;
+	}
+
+	if (dev->rx_done) {
+		// last req cancelled. try to get it.
+		req = dev->rx_req[0];
+		goto copy_data;
+	}
+
+requeue_req:
+	/* queue a request */
+	req = dev->rx_req[0];
+	req->length = count;
+	dev->rx_done = 0;
+	ret = usb_ep_queue(dev->ep_out, req, GFP_KERNEL);
+	if (ret < 0) {
+		r = -EIO;
+		goto done;
+	} else {
+		pr_debug("rx %p queue\n", req);
+	}
+
+	/* wait for a request to complete */
+	ret = wait_event_interruptible(dev->read_wq, dev->rx_done);
+	if (ret < 0) {
+		r = ret;
+		ret = usb_ep_dequeue(dev->ep_out, req);
+		if (ret != 0) {
+			// cancel failed. There can be a data already received.
+			// it will be retrieved in the next read.
+			pr_debug("acc_read: cancelling failed %d", ret);
+		}
+		goto done;
+	}
+
+copy_data:
+	dev->rx_done = 0;
+	if (dev->online) {
+		/* If we got a 0-len packet, throw it back and try again. */
+		if (req->actual == 0)
+			goto requeue_req;
+
+		pr_debug("rx %p %u\n", req, req->actual);
+		xfer = (req->actual < count) ? req->actual : count;
+		r = xfer;
+		if (copy_to_user(buf, req->buf, xfer))
+			r = -EFAULT;
+	} else
+		r = -EIO;
+
+done:
+	pr_debug("acc_read returning %zd\n", r);
+	return r;
+}
+
+static ssize_t acc_write(struct file *fp, const char __user *buf,
+	size_t count, loff_t *pos)
+{
+	struct acc_dev *dev = fp->private_data;
+	struct usb_request *req = 0;
+	ssize_t r = count;
+	unsigned xfer;
+	int ret;
+
+	pr_debug("acc_write(%zu)\n", count);
+
+	if (!dev->online || dev->disconnected) {
+		pr_debug("acc_write disconnected or not online");
+		return -ENODEV;
+	}
+
+	while (count > 0) {
+		if (!dev->online) {
+			pr_debug("acc_write dev->error\n");
+			r = -EIO;
+			break;
+		}
+
+		/* get an idle tx request to use */
+		req = 0;
+		ret = wait_event_interruptible(dev->write_wq,
+			((req = req_get(dev, &dev->tx_idle)) || !dev->online));
+		if (!req) {
+			r = ret;
+			break;
+		}
+
+		if (count > BULK_BUFFER_SIZE) {
+			xfer = BULK_BUFFER_SIZE;
+			/* ZLP, They will be more TX requests so not yet. */
+			req->zero = 0;
+		} else {
+			xfer = count;
+			/* If the data length is a multple of the
+			 * maxpacket size then send a zero length packet(ZLP).
+			*/
+			req->zero = ((xfer % dev->ep_in->maxpacket) == 0);
+		}
+		if (copy_from_user(req->buf, buf, xfer)) {
+			r = -EFAULT;
+			break;
+		}
+
+		req->length = xfer;
+		ret = usb_ep_queue(dev->ep_in, req, GFP_KERNEL);
+		if (ret < 0) {
+			pr_debug("acc_write: xfer error %d\n", ret);
+			r = -EIO;
+			break;
+		}
+
+		buf += xfer;
+		count -= xfer;
+
+		/* zero this so we don't try to free it on error exit */
+		req = 0;
+	}
+
+	if (req)
+		req_put(dev, &dev->tx_idle, req);
+
+	pr_debug("acc_write returning %zd\n", r);
+	return r;
+}
+
+static long acc_ioctl(struct file *fp, unsigned code, unsigned long value)
+{
+	struct acc_dev *dev = fp->private_data;
+	char *src = NULL;
+	int ret;
+
+	switch (code) {
+	case ACCESSORY_GET_STRING_MANUFACTURER:
+		src = dev->manufacturer;
+		break;
+	case ACCESSORY_GET_STRING_MODEL:
+		src = dev->model;
+		break;
+	case ACCESSORY_GET_STRING_DESCRIPTION:
+		src = dev->description;
+		break;
+	case ACCESSORY_GET_STRING_VERSION:
+		src = dev->version;
+		break;
+	case ACCESSORY_GET_STRING_URI:
+		src = dev->uri;
+		break;
+	case ACCESSORY_GET_STRING_SERIAL:
+		src = dev->serial;
+		break;
+	case ACCESSORY_IS_START_REQUESTED:
+		return dev->start_requested;
+	case ACCESSORY_GET_AUDIO_MODE:
+		return dev->audio_mode;
+	}
+	if (!src)
+		return -EINVAL;
+
+	ret = strlen(src) + 1;
+	if (copy_to_user((void __user *)value, src, ret))
+		ret = -EFAULT;
+	return ret;
+}
+
+static int acc_open(struct inode *ip, struct file *fp)
+{
+	printk(KERN_INFO "acc_open\n");
+	if (atomic_xchg(&_acc_dev->open_excl, 1))
+		return -EBUSY;
+
+	_acc_dev->disconnected = 0;
+	fp->private_data = _acc_dev;
+	return 0;
+}
+
+static int acc_release(struct inode *ip, struct file *fp)
+{
+	printk(KERN_INFO "acc_release\n");
+
+	WARN_ON(!atomic_xchg(&_acc_dev->open_excl, 0));
+	/* indicate that we are disconnected
+	 * still could be online so don't touch online flag
+	 */
+	_acc_dev->disconnected = 1;
+	return 0;
+}
+
+/* file operations for /dev/usb_accessory */
+static const struct file_operations acc_fops = {
+	.owner = THIS_MODULE,
+	.read = acc_read,
+	.write = acc_write,
+	.unlocked_ioctl = acc_ioctl,
+	.open = acc_open,
+	.release = acc_release,
+};
+
+static int acc_hid_probe(struct hid_device *hdev,
+		const struct hid_device_id *id)
+{
+	int ret;
+
+	ret = hid_parse(hdev);
+	if (ret)
+		return ret;
+	return hid_hw_start(hdev, HID_CONNECT_DEFAULT);
+}
+
+static struct miscdevice acc_device = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "usb_accessory",
+	.fops = &acc_fops,
+};
+
+static const struct hid_device_id acc_hid_table[] = {
+	{ HID_USB_DEVICE(HID_ANY_ID, HID_ANY_ID) },
+	{ }
+};
+
+static struct hid_driver acc_hid_driver = {
+	.name = "USB accessory",
+	.id_table = acc_hid_table,
+	.probe = acc_hid_probe,
+};
+
+static void acc_complete_setup_noop(struct usb_ep *ep, struct usb_request *req)
+{
+	/*
+	 * Default no-op function when nothing needs to be done for the
+	 * setup request
+	 */
+}
+
+int acc_ctrlrequest(struct usb_composite_dev *cdev,
+				const struct usb_ctrlrequest *ctrl)
+{
+	struct acc_dev	*dev = _acc_dev;
+	int	value = -EOPNOTSUPP;
+	struct acc_hid_dev *hid;
+	int offset;
+	u8 b_requestType = ctrl->bRequestType;
+	u8 b_request = ctrl->bRequest;
+	u16	w_index = le16_to_cpu(ctrl->wIndex);
+	u16	w_value = le16_to_cpu(ctrl->wValue);
+	u16	w_length = le16_to_cpu(ctrl->wLength);
+	unsigned long flags;
+
+/*
+	printk(KERN_INFO "acc_ctrlrequest "
+			"%02x.%02x v%04x i%04x l%u\n",
+			b_requestType, b_request,
+			w_value, w_index, w_length);
+*/
+
+	if (b_requestType == (USB_DIR_OUT | USB_TYPE_VENDOR)) {
+		if (b_request == ACCESSORY_START) {
+			dev->start_requested = 1;
+			schedule_delayed_work(
+				&dev->start_work, msecs_to_jiffies(10));
+			value = 0;
+			cdev->req->complete = acc_complete_setup_noop;
+		} else if (b_request == ACCESSORY_SEND_STRING) {
+			dev->string_index = w_index;
+			cdev->gadget->ep0->driver_data = dev;
+			cdev->req->complete = acc_complete_set_string;
+			value = w_length;
+		} else if (b_request == ACCESSORY_SET_AUDIO_MODE &&
+				w_index == 0 && w_length == 0) {
+			dev->audio_mode = w_value;
+			cdev->req->complete = acc_complete_setup_noop;
+			value = 0;
+		} else if (b_request == ACCESSORY_REGISTER_HID) {
+			cdev->req->complete = acc_complete_setup_noop;
+			value = acc_register_hid(dev, w_value, w_index);
+		} else if (b_request == ACCESSORY_UNREGISTER_HID) {
+			cdev->req->complete = acc_complete_setup_noop;
+			value = acc_unregister_hid(dev, w_value);
+		} else if (b_request == ACCESSORY_SET_HID_REPORT_DESC) {
+			spin_lock_irqsave(&dev->lock, flags);
+			hid = acc_hid_get(&dev->new_hid_list, w_value);
+			spin_unlock_irqrestore(&dev->lock, flags);
+			if (!hid) {
+				value = -EINVAL;
+				goto err;
+			}
+			offset = w_index;
+			if (offset != hid->report_desc_offset
+				|| offset + w_length > hid->report_desc_len) {
+				value = -EINVAL;
+				goto err;
+			}
+			cdev->req->context = hid;
+			cdev->req->complete = acc_complete_set_hid_report_desc;
+			value = w_length;
+		} else if (b_request == ACCESSORY_SEND_HID_EVENT) {
+			spin_lock_irqsave(&dev->lock, flags);
+			hid = acc_hid_get(&dev->hid_list, w_value);
+			spin_unlock_irqrestore(&dev->lock, flags);
+			if (!hid) {
+				value = -EINVAL;
+				goto err;
+			}
+			cdev->req->context = hid;
+			cdev->req->complete = acc_complete_send_hid_event;
+			value = w_length;
+		}
+	} else if (b_requestType == (USB_DIR_IN | USB_TYPE_VENDOR)) {
+		if (b_request == ACCESSORY_GET_PROTOCOL) {
+			*((u16 *)cdev->req->buf) = PROTOCOL_VERSION;
+			value = sizeof(u16);
+			cdev->req->complete = acc_complete_setup_noop;
+			/* clear any string left over from a previous session */
+			memset(dev->manufacturer, 0, sizeof(dev->manufacturer));
+			memset(dev->model, 0, sizeof(dev->model));
+			memset(dev->description, 0, sizeof(dev->description));
+			memset(dev->version, 0, sizeof(dev->version));
+			memset(dev->uri, 0, sizeof(dev->uri));
+			memset(dev->serial, 0, sizeof(dev->serial));
+			dev->start_requested = 0;
+			dev->audio_mode = 0;
+		}
+	}
+
+	if (value >= 0) {
+		cdev->req->zero = 0;
+		cdev->req->length = value;
+		value = usb_ep_queue(cdev->gadget->ep0, cdev->req, GFP_ATOMIC);
+		if (value < 0)
+			ERROR(cdev, "%s setup response queue error\n",
+				__func__);
+	}
+
+err:
+	if (value == -EOPNOTSUPP)
+		VDBG(cdev,
+			"unknown class-specific control req "
+			"%02x.%02x v%04x i%04x l%u\n",
+			ctrl->bRequestType, ctrl->bRequest,
+			w_value, w_index, w_length);
+	return value;
+}
+EXPORT_SYMBOL_GPL(acc_ctrlrequest);
+
+static int
+__acc_function_bind(struct usb_configuration *c,
+			struct usb_function *f, bool configfs)
+{
+	struct usb_composite_dev *cdev = c->cdev;
+	struct acc_dev	*dev = func_to_dev(f);
+	int			id;
+	int			ret;
+
+	DBG(cdev, "acc_function_bind dev: %p\n", dev);
+
+	if (configfs) {
+		if (acc_string_defs[INTERFACE_STRING_INDEX].id == 0) {
+			ret = usb_string_id(c->cdev);
+			if (ret < 0)
+				return ret;
+			acc_string_defs[INTERFACE_STRING_INDEX].id = ret;
+			acc_interface_desc.iInterface = ret;
+		}
+		dev->cdev = c->cdev;
+	}
+	ret = hid_register_driver(&acc_hid_driver);
+	if (ret)
+		return ret;
+
+	dev->start_requested = 0;
+
+	/* allocate interface ID(s) */
+	id = usb_interface_id(c, f);
+	if (id < 0)
+		return id;
+	acc_interface_desc.bInterfaceNumber = id;
+
+	/* allocate endpoints */
+	ret = create_bulk_endpoints(dev, &acc_fullspeed_in_desc,
+			&acc_fullspeed_out_desc);
+	if (ret)
+		return ret;
+
+	/* support high speed hardware */
+	if (gadget_is_dualspeed(c->cdev->gadget)) {
+		acc_highspeed_in_desc.bEndpointAddress =
+			acc_fullspeed_in_desc.bEndpointAddress;
+		acc_highspeed_out_desc.bEndpointAddress =
+			acc_fullspeed_out_desc.bEndpointAddress;
+	}
+
+	DBG(cdev, "%s speed %s: IN/%s, OUT/%s\n",
+			gadget_is_dualspeed(c->cdev->gadget) ? "dual" : "full",
+			f->name, dev->ep_in->name, dev->ep_out->name);
+	return 0;
+}
+
+static int
+acc_function_bind_configfs(struct usb_configuration *c,
+			struct usb_function *f) {
+	return __acc_function_bind(c, f, true);
+}
+
+static void
+kill_all_hid_devices(struct acc_dev *dev)
+{
+	struct acc_hid_dev *hid;
+	struct list_head *entry, *temp;
+	unsigned long flags;
+
+	/* do nothing if usb accessory device doesn't exist */
+	if (!dev)
+		return;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	list_for_each_safe(entry, temp, &dev->hid_list) {
+		hid = list_entry(entry, struct acc_hid_dev, list);
+		list_del(&hid->list);
+		list_add(&hid->list, &dev->dead_hid_list);
+	}
+	list_for_each_safe(entry, temp, &dev->new_hid_list) {
+		hid = list_entry(entry, struct acc_hid_dev, list);
+		list_del(&hid->list);
+		list_add(&hid->list, &dev->dead_hid_list);
+	}
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	schedule_work(&dev->hid_work);
+}
+
+static void
+acc_hid_unbind(struct acc_dev *dev)
+{
+	hid_unregister_driver(&acc_hid_driver);
+	kill_all_hid_devices(dev);
+}
+
+static void
+acc_function_unbind(struct usb_configuration *c, struct usb_function *f)
+{
+	struct acc_dev	*dev = func_to_dev(f);
+	struct usb_request *req;
+	int i;
+
+	dev->online = 0;		/* clear online flag */
+	wake_up(&dev->read_wq);		/* unblock reads on closure */
+	wake_up(&dev->write_wq);	/* likewise for writes */
+
+	while ((req = req_get(dev, &dev->tx_idle)))
+		acc_request_free(req, dev->ep_in);
+	for (i = 0; i < RX_REQ_MAX; i++)
+		acc_request_free(dev->rx_req[i], dev->ep_out);
+
+	acc_hid_unbind(dev);
+}
+
+static void acc_start_work(struct work_struct *data)
+{
+	char *envp[2] = { "ACCESSORY=START", NULL };
+
+	kobject_uevent_env(&acc_device.this_device->kobj, KOBJ_CHANGE, envp);
+}
+
+static int acc_hid_init(struct acc_hid_dev *hdev)
+{
+	struct hid_device *hid;
+	int ret;
+
+	hid = hid_allocate_device();
+	if (IS_ERR(hid))
+		return PTR_ERR(hid);
+
+	hid->ll_driver = &acc_hid_ll_driver;
+	hid->dev.parent = acc_device.this_device;
+
+	hid->bus = BUS_USB;
+	hid->vendor = HID_ANY_ID;
+	hid->product = HID_ANY_ID;
+	hid->driver_data = hdev;
+	ret = hid_add_device(hid);
+	if (ret) {
+		pr_err("can't add hid device: %d\n", ret);
+		hid_destroy_device(hid);
+		return ret;
+	}
+
+	hdev->hid = hid;
+	return 0;
+}
+
+static void acc_hid_delete(struct acc_hid_dev *hid)
+{
+	kfree(hid->report_desc);
+	kfree(hid);
+}
+
+static void acc_hid_work(struct work_struct *data)
+{
+	struct acc_dev *dev = _acc_dev;
+	struct list_head	*entry, *temp;
+	struct acc_hid_dev *hid;
+	struct list_head	new_list, dead_list;
+	unsigned long flags;
+
+	INIT_LIST_HEAD(&new_list);
+
+	spin_lock_irqsave(&dev->lock, flags);
+
+	/* copy hids that are ready for initialization to new_list */
+	list_for_each_safe(entry, temp, &dev->new_hid_list) {
+		hid = list_entry(entry, struct acc_hid_dev, list);
+		if (hid->report_desc_offset == hid->report_desc_len)
+			list_move(&hid->list, &new_list);
+	}
+
+	if (list_empty(&dev->dead_hid_list)) {
+		INIT_LIST_HEAD(&dead_list);
+	} else {
+		/* move all of dev->dead_hid_list to dead_list */
+		dead_list.prev = dev->dead_hid_list.prev;
+		dead_list.next = dev->dead_hid_list.next;
+		dead_list.next->prev = &dead_list;
+		dead_list.prev->next = &dead_list;
+		INIT_LIST_HEAD(&dev->dead_hid_list);
+	}
+
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	/* register new HID devices */
+	list_for_each_safe(entry, temp, &new_list) {
+		hid = list_entry(entry, struct acc_hid_dev, list);
+		if (acc_hid_init(hid)) {
+			pr_err("can't add HID device %p\n", hid);
+			acc_hid_delete(hid);
+		} else {
+			spin_lock_irqsave(&dev->lock, flags);
+			list_move(&hid->list, &dev->hid_list);
+			spin_unlock_irqrestore(&dev->lock, flags);
+		}
+	}
+
+	/* remove dead HID devices */
+	list_for_each_safe(entry, temp, &dead_list) {
+		hid = list_entry(entry, struct acc_hid_dev, list);
+		list_del(&hid->list);
+		if (hid->hid)
+			hid_destroy_device(hid->hid);
+		acc_hid_delete(hid);
+	}
+}
+
+static int acc_function_set_alt(struct usb_function *f,
+		unsigned intf, unsigned alt)
+{
+	struct acc_dev	*dev = func_to_dev(f);
+	struct usb_composite_dev *cdev = f->config->cdev;
+	int ret;
+
+	DBG(cdev, "acc_function_set_alt intf: %d alt: %d\n", intf, alt);
+
+	ret = config_ep_by_speed(cdev->gadget, f, dev->ep_in);
+	if (ret)
+		return ret;
+
+	ret = usb_ep_enable(dev->ep_in);
+	if (ret)
+		return ret;
+
+	ret = config_ep_by_speed(cdev->gadget, f, dev->ep_out);
+	if (ret)
+		return ret;
+
+	ret = usb_ep_enable(dev->ep_out);
+	if (ret) {
+		usb_ep_disable(dev->ep_in);
+		return ret;
+	}
+
+	dev->online = 1;
+	dev->disconnected = 0; /* if online then not disconnected */
+
+	/* readers may be blocked waiting for us to go online */
+	wake_up(&dev->read_wq);
+	return 0;
+}
+
+static void acc_function_disable(struct usb_function *f)
+{
+	struct acc_dev	*dev = func_to_dev(f);
+	struct usb_composite_dev	*cdev = dev->cdev;
+
+	DBG(cdev, "acc_function_disable\n");
+	acc_set_disconnected(dev); /* this now only sets disconnected */
+	dev->online = 0; /* so now need to clear online flag here too */
+	usb_ep_disable(dev->ep_in);
+	usb_ep_disable(dev->ep_out);
+
+	/* readers may be blocked waiting for us to go online */
+	wake_up(&dev->read_wq);
+
+	VDBG(cdev, "%s disabled\n", dev->function.name);
+}
+
+static int acc_setup(void)
+{
+	struct acc_dev *dev;
+	int ret;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	spin_lock_init(&dev->lock);
+	init_waitqueue_head(&dev->read_wq);
+	init_waitqueue_head(&dev->write_wq);
+	atomic_set(&dev->open_excl, 0);
+	INIT_LIST_HEAD(&dev->tx_idle);
+	INIT_LIST_HEAD(&dev->hid_list);
+	INIT_LIST_HEAD(&dev->new_hid_list);
+	INIT_LIST_HEAD(&dev->dead_hid_list);
+	INIT_DELAYED_WORK(&dev->start_work, acc_start_work);
+	INIT_WORK(&dev->hid_work, acc_hid_work);
+
+	/* _acc_dev must be set before calling usb_gadget_register_driver */
+	_acc_dev = dev;
+
+	ret = misc_register(&acc_device);
+	if (ret)
+		goto err;
+
+	return 0;
+
+err:
+	kfree(dev);
+	pr_err("USB accessory gadget driver failed to initialize\n");
+	return ret;
+}
+
+void acc_disconnect(void)
+{
+	/* unregister all HID devices if USB is disconnected */
+	kill_all_hid_devices(_acc_dev);
+}
+EXPORT_SYMBOL_GPL(acc_disconnect);
+
+static void acc_cleanup(void)
+{
+	misc_deregister(&acc_device);
+	kfree(_acc_dev);
+	_acc_dev = NULL;
+}
+static struct acc_instance *to_acc_instance(struct config_item *item)
+{
+	return container_of(to_config_group(item), struct acc_instance,
+		func_inst.group);
+}
+
+static void acc_attr_release(struct config_item *item)
+{
+	struct acc_instance *fi_acc = to_acc_instance(item);
+
+	usb_put_function_instance(&fi_acc->func_inst);
+}
+
+static struct configfs_item_operations acc_item_ops = {
+	.release        = acc_attr_release,
+};
+
+static struct config_item_type acc_func_type = {
+	.ct_item_ops    = &acc_item_ops,
+	.ct_owner       = THIS_MODULE,
+};
+
+static struct acc_instance *to_fi_acc(struct usb_function_instance *fi)
+{
+	return container_of(fi, struct acc_instance, func_inst);
+}
+
+static int acc_set_inst_name(struct usb_function_instance *fi, const char *name)
+{
+	struct acc_instance *fi_acc;
+	char *ptr;
+	int name_len;
+
+	name_len = strlen(name) + 1;
+	if (name_len > MAX_INST_NAME_LEN)
+		return -ENAMETOOLONG;
+
+	ptr = kstrndup(name, name_len, GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	fi_acc = to_fi_acc(fi);
+	fi_acc->name = ptr;
+	return 0;
+}
+
+static void acc_free_inst(struct usb_function_instance *fi)
+{
+	struct acc_instance *fi_acc;
+
+	fi_acc = to_fi_acc(fi);
+	kfree(fi_acc->name);
+	acc_cleanup();
+}
+
+static struct usb_function_instance *acc_alloc_inst(void)
+{
+	struct acc_instance *fi_acc;
+	struct acc_dev *dev;
+	int err;
+
+	fi_acc = kzalloc(sizeof(*fi_acc), GFP_KERNEL);
+	if (!fi_acc)
+		return ERR_PTR(-ENOMEM);
+	fi_acc->func_inst.set_inst_name = acc_set_inst_name;
+	fi_acc->func_inst.free_func_inst = acc_free_inst;
+
+	err = acc_setup();
+	if (err) {
+		kfree(fi_acc);
+		pr_err("Error setting ACCESSORY\n");
+		return ERR_PTR(err);
+	}
+
+	config_group_init_type_name(&fi_acc->func_inst.group,
+					"", &acc_func_type);
+	dev = _acc_dev;
+	return  &fi_acc->func_inst;
+}
+
+static void acc_free(struct usb_function *f)
+{
+/*NO-OP: no function specific resource allocation in mtp_alloc*/
+}
+
+int acc_ctrlrequest_configfs(struct usb_function *f,
+			const struct usb_ctrlrequest *ctrl) {
+	if (f->config != NULL && f->config->cdev != NULL)
+		return acc_ctrlrequest(f->config->cdev, ctrl);
+	else
+		return -1;
+}
+
+static struct usb_function *acc_alloc(struct usb_function_instance *fi)
+{
+	struct acc_dev *dev = _acc_dev;
+
+	pr_info("acc_alloc\n");
+
+	dev->function.name = "accessory";
+	dev->function.strings = acc_strings,
+	dev->function.fs_descriptors = fs_acc_descs;
+	dev->function.hs_descriptors = hs_acc_descs;
+	dev->function.bind = acc_function_bind_configfs;
+	dev->function.unbind = acc_function_unbind;
+	dev->function.set_alt = acc_function_set_alt;
+	dev->function.disable = acc_function_disable;
+	dev->function.free_func = acc_free;
+	dev->function.setup = acc_ctrlrequest_configfs;
+
+	return &dev->function;
+}
+DECLARE_USB_FUNCTION_INIT(accessory, acc_alloc_inst, acc_alloc);
+MODULE_LICENSE("GPL");

diff --git a/drivers/usb/gadget/function/f_audio_source.c b/drivers/usb/gadget/function/f_audio_source.c
new file mode 100644
index 0000000..8124af3
--- /dev/null
+++ b/drivers/usb/gadget/function/f_audio_source.c

@@ -0,0 +1,1071 @@
+/*
+ * Gadget Function Driver for USB audio source device
+ *
+ * Copyright (C) 2012 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/usb/audio.h>
+#include <linux/wait.h>
+#include <linux/pm_qos.h>
+#include <sound/core.h>
+#include <sound/initval.h>
+#include <sound/pcm.h>
+
+#include <linux/usb.h>
+#include <linux/usb_usual.h>
+#include <linux/usb/ch9.h>
+#include <linux/configfs.h>
+#include <linux/usb/composite.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#define SAMPLE_RATE 44100
+#define FRAMES_PER_MSEC (SAMPLE_RATE / 1000)
+
+#define IN_EP_MAX_PACKET_SIZE 256
+
+/* Number of requests to allocate */
+#define IN_EP_REQ_COUNT 4
+
+#define AUDIO_AC_INTERFACE	0
+#define AUDIO_AS_INTERFACE	1
+#define AUDIO_NUM_INTERFACES	2
+#define MAX_INST_NAME_LEN     40
+
+/* B.3.1  Standard AC Interface Descriptor */
+static struct usb_interface_descriptor ac_interface_desc = {
+	.bLength =		USB_DT_INTERFACE_SIZE,
+	.bDescriptorType =	USB_DT_INTERFACE,
+	.bNumEndpoints =	0,
+	.bInterfaceClass =	USB_CLASS_AUDIO,
+	.bInterfaceSubClass =	USB_SUBCLASS_AUDIOCONTROL,
+};
+
+DECLARE_UAC_AC_HEADER_DESCRIPTOR(2);
+
+#define UAC_DT_AC_HEADER_LENGTH	UAC_DT_AC_HEADER_SIZE(AUDIO_NUM_INTERFACES)
+/* 1 input terminal, 1 output terminal and 1 feature unit */
+#define UAC_DT_TOTAL_LENGTH (UAC_DT_AC_HEADER_LENGTH \
+	+ UAC_DT_INPUT_TERMINAL_SIZE + UAC_DT_OUTPUT_TERMINAL_SIZE \
+	+ UAC_DT_FEATURE_UNIT_SIZE(0))
+/* B.3.2  Class-Specific AC Interface Descriptor */
+static struct uac1_ac_header_descriptor_2 ac_header_desc = {
+	.bLength =		UAC_DT_AC_HEADER_LENGTH,
+	.bDescriptorType =	USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype =	UAC_HEADER,
+	.bcdADC =		__constant_cpu_to_le16(0x0100),
+	.wTotalLength =		__constant_cpu_to_le16(UAC_DT_TOTAL_LENGTH),
+	.bInCollection =	AUDIO_NUM_INTERFACES,
+	.baInterfaceNr = {
+		[0] =		AUDIO_AC_INTERFACE,
+		[1] =		AUDIO_AS_INTERFACE,
+	}
+};
+
+#define INPUT_TERMINAL_ID	1
+static struct uac_input_terminal_descriptor input_terminal_desc = {
+	.bLength =		UAC_DT_INPUT_TERMINAL_SIZE,
+	.bDescriptorType =	USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype =	UAC_INPUT_TERMINAL,
+	.bTerminalID =		INPUT_TERMINAL_ID,
+	.wTerminalType =	UAC_INPUT_TERMINAL_MICROPHONE,
+	.bAssocTerminal =	0,
+	.wChannelConfig =	0x3,
+};
+
+DECLARE_UAC_FEATURE_UNIT_DESCRIPTOR(0);
+
+#define FEATURE_UNIT_ID		2
+static struct uac_feature_unit_descriptor_0 feature_unit_desc = {
+	.bLength		= UAC_DT_FEATURE_UNIT_SIZE(0),
+	.bDescriptorType	= USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype	= UAC_FEATURE_UNIT,
+	.bUnitID		= FEATURE_UNIT_ID,
+	.bSourceID		= INPUT_TERMINAL_ID,
+	.bControlSize		= 2,
+};
+
+#define OUTPUT_TERMINAL_ID	3
+static struct uac1_output_terminal_descriptor output_terminal_desc = {
+	.bLength		= UAC_DT_OUTPUT_TERMINAL_SIZE,
+	.bDescriptorType	= USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype	= UAC_OUTPUT_TERMINAL,
+	.bTerminalID		= OUTPUT_TERMINAL_ID,
+	.wTerminalType		= UAC_TERMINAL_STREAMING,
+	.bAssocTerminal		= FEATURE_UNIT_ID,
+	.bSourceID		= FEATURE_UNIT_ID,
+};
+
+/* B.4.1  Standard AS Interface Descriptor */
+static struct usb_interface_descriptor as_interface_alt_0_desc = {
+	.bLength =		USB_DT_INTERFACE_SIZE,
+	.bDescriptorType =	USB_DT_INTERFACE,
+	.bAlternateSetting =	0,
+	.bNumEndpoints =	0,
+	.bInterfaceClass =	USB_CLASS_AUDIO,
+	.bInterfaceSubClass =	USB_SUBCLASS_AUDIOSTREAMING,
+};
+
+static struct usb_interface_descriptor as_interface_alt_1_desc = {
+	.bLength =		USB_DT_INTERFACE_SIZE,
+	.bDescriptorType =	USB_DT_INTERFACE,
+	.bAlternateSetting =	1,
+	.bNumEndpoints =	1,
+	.bInterfaceClass =	USB_CLASS_AUDIO,
+	.bInterfaceSubClass =	USB_SUBCLASS_AUDIOSTREAMING,
+};
+
+/* B.4.2  Class-Specific AS Interface Descriptor */
+static struct uac1_as_header_descriptor as_header_desc = {
+	.bLength =		UAC_DT_AS_HEADER_SIZE,
+	.bDescriptorType =	USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype =	UAC_AS_GENERAL,
+	.bTerminalLink =	INPUT_TERMINAL_ID,
+	.bDelay =		1,
+	.wFormatTag =		UAC_FORMAT_TYPE_I_PCM,
+};
+
+DECLARE_UAC_FORMAT_TYPE_I_DISCRETE_DESC(1);
+
+static struct uac_format_type_i_discrete_descriptor_1 as_type_i_desc = {
+	.bLength =		UAC_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(1),
+	.bDescriptorType =	USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype =	UAC_FORMAT_TYPE,
+	.bFormatType =		UAC_FORMAT_TYPE_I,
+	.bSubframeSize =	2,
+	.bBitResolution =	16,
+	.bSamFreqType =		1,
+};
+
+/* Standard ISO IN Endpoint Descriptor for highspeed */
+static struct usb_endpoint_descriptor hs_as_in_ep_desc  = {
+	.bLength =		USB_DT_ENDPOINT_AUDIO_SIZE,
+	.bDescriptorType =	USB_DT_ENDPOINT,
+	.bEndpointAddress =	USB_DIR_IN,
+	.bmAttributes =		USB_ENDPOINT_SYNC_SYNC
+				| USB_ENDPOINT_XFER_ISOC,
+	.wMaxPacketSize =	__constant_cpu_to_le16(IN_EP_MAX_PACKET_SIZE),
+	.bInterval =		4, /* poll 1 per millisecond */
+};
+
+/* Standard ISO IN Endpoint Descriptor for highspeed */
+static struct usb_endpoint_descriptor fs_as_in_ep_desc  = {
+	.bLength =		USB_DT_ENDPOINT_AUDIO_SIZE,
+	.bDescriptorType =	USB_DT_ENDPOINT,
+	.bEndpointAddress =	USB_DIR_IN,
+	.bmAttributes =		USB_ENDPOINT_SYNC_SYNC
+				| USB_ENDPOINT_XFER_ISOC,
+	.wMaxPacketSize =	__constant_cpu_to_le16(IN_EP_MAX_PACKET_SIZE),
+	.bInterval =		1, /* poll 1 per millisecond */
+};
+
+/* Class-specific AS ISO OUT Endpoint Descriptor */
+static struct uac_iso_endpoint_descriptor as_iso_in_desc = {
+	.bLength =		UAC_ISO_ENDPOINT_DESC_SIZE,
+	.bDescriptorType =	USB_DT_CS_ENDPOINT,
+	.bDescriptorSubtype =	UAC_EP_GENERAL,
+	.bmAttributes =		1,
+	.bLockDelayUnits =	1,
+	.wLockDelay =		__constant_cpu_to_le16(1),
+};
+
+static struct usb_descriptor_header *hs_audio_desc[] = {
+	(struct usb_descriptor_header *)&ac_interface_desc,
+	(struct usb_descriptor_header *)&ac_header_desc,
+
+	(struct usb_descriptor_header *)&input_terminal_desc,
+	(struct usb_descriptor_header *)&output_terminal_desc,
+	(struct usb_descriptor_header *)&feature_unit_desc,
+
+	(struct usb_descriptor_header *)&as_interface_alt_0_desc,
+	(struct usb_descriptor_header *)&as_interface_alt_1_desc,
+	(struct usb_descriptor_header *)&as_header_desc,
+
+	(struct usb_descriptor_header *)&as_type_i_desc,
+
+	(struct usb_descriptor_header *)&hs_as_in_ep_desc,
+	(struct usb_descriptor_header *)&as_iso_in_desc,
+	NULL,
+};
+
+static struct usb_descriptor_header *fs_audio_desc[] = {
+	(struct usb_descriptor_header *)&ac_interface_desc,
+	(struct usb_descriptor_header *)&ac_header_desc,
+
+	(struct usb_descriptor_header *)&input_terminal_desc,
+	(struct usb_descriptor_header *)&output_terminal_desc,
+	(struct usb_descriptor_header *)&feature_unit_desc,
+
+	(struct usb_descriptor_header *)&as_interface_alt_0_desc,
+	(struct usb_descriptor_header *)&as_interface_alt_1_desc,
+	(struct usb_descriptor_header *)&as_header_desc,
+
+	(struct usb_descriptor_header *)&as_type_i_desc,
+
+	(struct usb_descriptor_header *)&fs_as_in_ep_desc,
+	(struct usb_descriptor_header *)&as_iso_in_desc,
+	NULL,
+};
+
+static struct snd_pcm_hardware audio_hw_info = {
+	.info =			SNDRV_PCM_INFO_MMAP |
+				SNDRV_PCM_INFO_MMAP_VALID |
+				SNDRV_PCM_INFO_BATCH |
+				SNDRV_PCM_INFO_INTERLEAVED |
+				SNDRV_PCM_INFO_BLOCK_TRANSFER,
+
+	.formats		= SNDRV_PCM_FMTBIT_S16_LE,
+	.channels_min		= 2,
+	.channels_max		= 2,
+	.rate_min		= SAMPLE_RATE,
+	.rate_max		= SAMPLE_RATE,
+
+	.buffer_bytes_max =	1024 * 1024,
+	.period_bytes_min =	64,
+	.period_bytes_max =	512 * 1024,
+	.periods_min =		2,
+	.periods_max =		1024,
+};
+
+/*-------------------------------------------------------------------------*/
+
+struct audio_source_config {
+	int	card;
+	int	device;
+};
+
+struct audio_dev {
+	struct usb_function		func;
+	struct snd_card			*card;
+	struct snd_pcm			*pcm;
+	struct snd_pcm_substream *substream;
+
+	struct list_head		idle_reqs;
+	struct usb_ep			*in_ep;
+
+	spinlock_t			lock;
+
+	/* beginning, end and current position in our buffer */
+	void				*buffer_start;
+	void				*buffer_end;
+	void				*buffer_pos;
+
+	/* byte size of a "period" */
+	unsigned int			period;
+	/* bytes sent since last call to snd_pcm_period_elapsed */
+	unsigned int			period_offset;
+	/* time we started playing */
+	ktime_t				start_time;
+	/* number of frames sent since start_time */
+	s64				frames_sent;
+	struct audio_source_config	*config;
+	/* for creating and issuing QoS requests */
+	struct pm_qos_request pm_qos;
+};
+
+static inline struct audio_dev *func_to_audio(struct usb_function *f)
+{
+	return container_of(f, struct audio_dev, func);
+}
+
+/*-------------------------------------------------------------------------*/
+
+struct audio_source_instance {
+	struct usb_function_instance func_inst;
+	const char *name;
+	struct audio_source_config *config;
+	struct device *audio_device;
+};
+
+static void audio_source_attr_release(struct config_item *item);
+
+static struct configfs_item_operations audio_source_item_ops = {
+	.release        = audio_source_attr_release,
+};
+
+static struct config_item_type audio_source_func_type = {
+	.ct_item_ops    = &audio_source_item_ops,
+	.ct_owner       = THIS_MODULE,
+};
+
+static ssize_t audio_source_pcm_show(struct device *dev,
+		struct device_attribute *attr, char *buf);
+
+static DEVICE_ATTR(pcm, S_IRUGO, audio_source_pcm_show, NULL);
+
+static struct device_attribute *audio_source_function_attributes[] = {
+	&dev_attr_pcm,
+	NULL
+};
+
+/*--------------------------------------------------------------------------*/
+
+static struct usb_request *audio_request_new(struct usb_ep *ep, int buffer_size)
+{
+	struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL);
+
+	if (!req)
+		return NULL;
+
+	req->buf = kmalloc(buffer_size, GFP_KERNEL);
+	if (!req->buf) {
+		usb_ep_free_request(ep, req);
+		return NULL;
+	}
+	req->length = buffer_size;
+	return req;
+}
+
+static void audio_request_free(struct usb_request *req, struct usb_ep *ep)
+{
+	if (req) {
+		kfree(req->buf);
+		usb_ep_free_request(ep, req);
+	}
+}
+
+static void audio_req_put(struct audio_dev *audio, struct usb_request *req)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&audio->lock, flags);
+	list_add_tail(&req->list, &audio->idle_reqs);
+	spin_unlock_irqrestore(&audio->lock, flags);
+}
+
+static struct usb_request *audio_req_get(struct audio_dev *audio)
+{
+	unsigned long flags;
+	struct usb_request *req;
+
+	spin_lock_irqsave(&audio->lock, flags);
+	if (list_empty(&audio->idle_reqs)) {
+		req = 0;
+	} else {
+		req = list_first_entry(&audio->idle_reqs, struct usb_request,
+				list);
+		list_del(&req->list);
+	}
+	spin_unlock_irqrestore(&audio->lock, flags);
+	return req;
+}
+
+/* send the appropriate number of packets to match our bitrate */
+static void audio_send(struct audio_dev *audio)
+{
+	struct snd_pcm_runtime *runtime;
+	struct usb_request *req;
+	int length, length1, length2, ret;
+	s64 msecs;
+	s64 frames;
+	ktime_t now;
+
+	/* audio->substream will be null if we have been closed */
+	if (!audio->substream)
+		return;
+	/* audio->buffer_pos will be null if we have been stopped */
+	if (!audio->buffer_pos)
+		return;
+
+	runtime = audio->substream->runtime;
+
+	/* compute number of frames to send */
+	now = ktime_get();
+	msecs = div_s64((ktime_to_ns(now) - ktime_to_ns(audio->start_time)),
+			1000000);
+	frames = div_s64((msecs * SAMPLE_RATE), 1000);
+
+	/* Readjust our frames_sent if we fall too far behind.
+	 * If we get too far behind it is better to drop some frames than
+	 * to keep sending data too fast in an attempt to catch up.
+	 */
+	if (frames - audio->frames_sent > 10 * FRAMES_PER_MSEC)
+		audio->frames_sent = frames - FRAMES_PER_MSEC;
+
+	frames -= audio->frames_sent;
+
+	/* We need to send something to keep the pipeline going */
+	if (frames <= 0)
+		frames = FRAMES_PER_MSEC;
+
+	while (frames > 0) {
+		req = audio_req_get(audio);
+		if (!req)
+			break;
+
+		length = frames_to_bytes(runtime, frames);
+		if (length > IN_EP_MAX_PACKET_SIZE)
+			length = IN_EP_MAX_PACKET_SIZE;
+
+		if (audio->buffer_pos + length > audio->buffer_end)
+			length1 = audio->buffer_end - audio->buffer_pos;
+		else
+			length1 = length;
+		memcpy(req->buf, audio->buffer_pos, length1);
+		if (length1 < length) {
+			/* Wrap around and copy remaining length
+			 * at beginning of buffer.
+			 */
+			length2 = length - length1;
+			memcpy(req->buf + length1, audio->buffer_start,
+					length2);
+			audio->buffer_pos = audio->buffer_start + length2;
+		} else {
+			audio->buffer_pos += length1;
+			if (audio->buffer_pos >= audio->buffer_end)
+				audio->buffer_pos = audio->buffer_start;
+		}
+
+		req->length = length;
+		ret = usb_ep_queue(audio->in_ep, req, GFP_ATOMIC);
+		if (ret < 0) {
+			pr_err("usb_ep_queue failed ret: %d\n", ret);
+			audio_req_put(audio, req);
+			break;
+		}
+
+		frames -= bytes_to_frames(runtime, length);
+		audio->frames_sent += bytes_to_frames(runtime, length);
+	}
+}
+
+static void audio_control_complete(struct usb_ep *ep, struct usb_request *req)
+{
+	/* nothing to do here */
+}
+
+static void audio_data_complete(struct usb_ep *ep, struct usb_request *req)
+{
+	struct audio_dev *audio = req->context;
+
+	pr_debug("audio_data_complete req->status %d req->actual %d\n",
+		req->status, req->actual);
+
+	audio_req_put(audio, req);
+
+	if (!audio->buffer_start || req->status)
+		return;
+
+	audio->period_offset += req->actual;
+	if (audio->period_offset >= audio->period) {
+		snd_pcm_period_elapsed(audio->substream);
+		audio->period_offset = 0;
+	}
+	audio_send(audio);
+}
+
+static int audio_set_endpoint_req(struct usb_function *f,
+		const struct usb_ctrlrequest *ctrl)
+{
+	int value = -EOPNOTSUPP;
+	u16 ep = le16_to_cpu(ctrl->wIndex);
+	u16 len = le16_to_cpu(ctrl->wLength);
+	u16 w_value = le16_to_cpu(ctrl->wValue);
+
+	pr_debug("bRequest 0x%x, w_value 0x%04x, len %d, endpoint %d\n",
+			ctrl->bRequest, w_value, len, ep);
+
+	switch (ctrl->bRequest) {
+	case UAC_SET_CUR:
+	case UAC_SET_MIN:
+	case UAC_SET_MAX:
+	case UAC_SET_RES:
+		value = len;
+		break;
+	default:
+		break;
+	}
+
+	return value;
+}
+
+static int audio_get_endpoint_req(struct usb_function *f,
+		const struct usb_ctrlrequest *ctrl)
+{
+	struct usb_composite_dev *cdev = f->config->cdev;
+	int value = -EOPNOTSUPP;
+	u8 ep = ((le16_to_cpu(ctrl->wIndex) >> 8) & 0xFF);
+	u16 len = le16_to_cpu(ctrl->wLength);
+	u16 w_value = le16_to_cpu(ctrl->wValue);
+	u8 *buf = cdev->req->buf;
+
+	pr_debug("bRequest 0x%x, w_value 0x%04x, len %d, endpoint %d\n",
+			ctrl->bRequest, w_value, len, ep);
+
+	if (w_value == UAC_EP_CS_ATTR_SAMPLE_RATE << 8) {
+		switch (ctrl->bRequest) {
+		case UAC_GET_CUR:
+		case UAC_GET_MIN:
+		case UAC_GET_MAX:
+		case UAC_GET_RES:
+			/* return our sample rate */
+			buf[0] = (u8)SAMPLE_RATE;
+			buf[1] = (u8)(SAMPLE_RATE >> 8);
+			buf[2] = (u8)(SAMPLE_RATE >> 16);
+			value = 3;
+			break;
+		default:
+			break;
+		}
+	}
+
+	return value;
+}
+
+static int
+audio_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl)
+{
+	struct usb_composite_dev *cdev = f->config->cdev;
+	struct usb_request *req = cdev->req;
+	int value = -EOPNOTSUPP;
+	u16 w_index = le16_to_cpu(ctrl->wIndex);
+	u16 w_value = le16_to_cpu(ctrl->wValue);
+	u16 w_length = le16_to_cpu(ctrl->wLength);
+
+	/* composite driver infrastructure handles everything; interface
+	 * activation uses set_alt().
+	 */
+	switch (ctrl->bRequestType) {
+	case USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_ENDPOINT:
+		value = audio_set_endpoint_req(f, ctrl);
+		break;
+
+	case USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT:
+		value = audio_get_endpoint_req(f, ctrl);
+		break;
+	}
+
+	/* respond with data transfer or status phase? */
+	if (value >= 0) {
+		pr_debug("audio req%02x.%02x v%04x i%04x l%d\n",
+			ctrl->bRequestType, ctrl->bRequest,
+			w_value, w_index, w_length);
+		req->zero = 0;
+		req->length = value;
+		req->complete = audio_control_complete;
+		value = usb_ep_queue(cdev->gadget->ep0, req, GFP_ATOMIC);
+		if (value < 0)
+			pr_err("audio response on err %d\n", value);
+	}
+
+	/* device either stalls (value < 0) or reports success */
+	return value;
+}
+
+static int audio_set_alt(struct usb_function *f, unsigned intf, unsigned alt)
+{
+	struct audio_dev *audio = func_to_audio(f);
+	struct usb_composite_dev *cdev = f->config->cdev;
+	int ret;
+
+	pr_debug("audio_set_alt intf %d, alt %d\n", intf, alt);
+
+	ret = config_ep_by_speed(cdev->gadget, f, audio->in_ep);
+	if (ret)
+		return ret;
+
+	usb_ep_enable(audio->in_ep);
+	return 0;
+}
+
+static void audio_disable(struct usb_function *f)
+{
+	struct audio_dev	*audio = func_to_audio(f);
+
+	pr_debug("audio_disable\n");
+	usb_ep_disable(audio->in_ep);
+}
+
+static void audio_free_func(struct usb_function *f)
+{
+	/* no-op */
+}
+
+/*-------------------------------------------------------------------------*/
+
+static void audio_build_desc(struct audio_dev *audio)
+{
+	u8 *sam_freq;
+	int rate;
+
+	/* Set channel numbers */
+	input_terminal_desc.bNrChannels = 2;
+	as_type_i_desc.bNrChannels = 2;
+
+	/* Set sample rates */
+	rate = SAMPLE_RATE;
+	sam_freq = as_type_i_desc.tSamFreq[0];
+	memcpy(sam_freq, &rate, 3);
+}
+
+
+static int snd_card_setup(struct usb_configuration *c,
+	struct audio_source_config *config);
+static struct audio_source_instance *to_fi_audio_source(
+	const struct usb_function_instance *fi);
+
+
+/* audio function driver setup/binding */
+static int
+audio_bind(struct usb_configuration *c, struct usb_function *f)
+{
+	struct usb_composite_dev *cdev = c->cdev;
+	struct audio_dev *audio = func_to_audio(f);
+	int status;
+	struct usb_ep *ep;
+	struct usb_request *req;
+	int i;
+	int err;
+
+	if (IS_ENABLED(CONFIG_USB_CONFIGFS)) {
+		struct audio_source_instance *fi_audio =
+				to_fi_audio_source(f->fi);
+		struct audio_source_config *config =
+				fi_audio->config;
+
+		err = snd_card_setup(c, config);
+		if (err)
+			return err;
+	}
+
+	audio_build_desc(audio);
+
+	/* allocate instance-specific interface IDs, and patch descriptors */
+	status = usb_interface_id(c, f);
+	if (status < 0)
+		goto fail;
+	ac_interface_desc.bInterfaceNumber = status;
+
+	/* AUDIO_AC_INTERFACE */
+	ac_header_desc.baInterfaceNr[0] = status;
+
+	status = usb_interface_id(c, f);
+	if (status < 0)
+		goto fail;
+	as_interface_alt_0_desc.bInterfaceNumber = status;
+	as_interface_alt_1_desc.bInterfaceNumber = status;
+
+	/* AUDIO_AS_INTERFACE */
+	ac_header_desc.baInterfaceNr[1] = status;
+
+	status = -ENODEV;
+
+	/* allocate our endpoint */
+	ep = usb_ep_autoconfig(cdev->gadget, &fs_as_in_ep_desc);
+	if (!ep)
+		goto fail;
+	audio->in_ep = ep;
+	ep->driver_data = audio; /* claim */
+
+	if (gadget_is_dualspeed(c->cdev->gadget))
+		hs_as_in_ep_desc.bEndpointAddress =
+			fs_as_in_ep_desc.bEndpointAddress;
+
+	f->fs_descriptors = fs_audio_desc;
+	f->hs_descriptors = hs_audio_desc;
+
+	for (i = 0, status = 0; i < IN_EP_REQ_COUNT && status == 0; i++) {
+		req = audio_request_new(ep, IN_EP_MAX_PACKET_SIZE);
+		if (req) {
+			req->context = audio;
+			req->complete = audio_data_complete;
+			audio_req_put(audio, req);
+		} else
+			status = -ENOMEM;
+	}
+
+fail:
+	return status;
+}
+
+static void
+audio_unbind(struct usb_configuration *c, struct usb_function *f)
+{
+	struct audio_dev *audio = func_to_audio(f);
+	struct usb_request *req;
+
+	while ((req = audio_req_get(audio)))
+		audio_request_free(req, audio->in_ep);
+
+	snd_card_free_when_closed(audio->card);
+	audio->card = NULL;
+	audio->pcm = NULL;
+	audio->substream = NULL;
+	audio->in_ep = NULL;
+
+	if (IS_ENABLED(CONFIG_USB_CONFIGFS)) {
+		struct audio_source_instance *fi_audio =
+				to_fi_audio_source(f->fi);
+		struct audio_source_config *config =
+				fi_audio->config;
+
+		config->card = -1;
+		config->device = -1;
+	}
+}
+
+static void audio_pcm_playback_start(struct audio_dev *audio)
+{
+	audio->start_time = ktime_get();
+	audio->frames_sent = 0;
+	audio_send(audio);
+}
+
+static void audio_pcm_playback_stop(struct audio_dev *audio)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&audio->lock, flags);
+	audio->buffer_start = 0;
+	audio->buffer_end = 0;
+	audio->buffer_pos = 0;
+	spin_unlock_irqrestore(&audio->lock, flags);
+}
+
+static int audio_pcm_open(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct audio_dev *audio = substream->private_data;
+
+	runtime->private_data = audio;
+	runtime->hw = audio_hw_info;
+	snd_pcm_limit_hw_rates(runtime);
+	runtime->hw.channels_max = 2;
+
+	audio->substream = substream;
+
+	/* Add the QoS request and set the latency to 0 */
+	pm_qos_add_request(&audio->pm_qos, PM_QOS_CPU_DMA_LATENCY, 0);
+
+	return 0;
+}
+
+static int audio_pcm_close(struct snd_pcm_substream *substream)
+{
+	struct audio_dev *audio = substream->private_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&audio->lock, flags);
+
+	/* Remove the QoS request */
+	pm_qos_remove_request(&audio->pm_qos);
+
+	audio->substream = NULL;
+	spin_unlock_irqrestore(&audio->lock, flags);
+
+	return 0;
+}
+
+static int audio_pcm_hw_params(struct snd_pcm_substream *substream,
+				struct snd_pcm_hw_params *params)
+{
+	unsigned int channels = params_channels(params);
+	unsigned int rate = params_rate(params);
+
+	if (rate != SAMPLE_RATE)
+		return -EINVAL;
+	if (channels != 2)
+		return -EINVAL;
+
+	return snd_pcm_lib_alloc_vmalloc_buffer(substream,
+		params_buffer_bytes(params));
+}
+
+static int audio_pcm_hw_free(struct snd_pcm_substream *substream)
+{
+	return snd_pcm_lib_free_vmalloc_buffer(substream);
+}
+
+static int audio_pcm_prepare(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct audio_dev *audio = runtime->private_data;
+
+	audio->period = snd_pcm_lib_period_bytes(substream);
+	audio->period_offset = 0;
+	audio->buffer_start = runtime->dma_area;
+	audio->buffer_end = audio->buffer_start
+		+ snd_pcm_lib_buffer_bytes(substream);
+	audio->buffer_pos = audio->buffer_start;
+
+	return 0;
+}
+
+static snd_pcm_uframes_t audio_pcm_pointer(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct audio_dev *audio = runtime->private_data;
+	ssize_t bytes = audio->buffer_pos - audio->buffer_start;
+
+	/* return offset of next frame to fill in our buffer */
+	return bytes_to_frames(runtime, bytes);
+}
+
+static int audio_pcm_playback_trigger(struct snd_pcm_substream *substream,
+					int cmd)
+{
+	struct audio_dev *audio = substream->runtime->private_data;
+	int ret = 0;
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+	case SNDRV_PCM_TRIGGER_RESUME:
+		audio_pcm_playback_start(audio);
+		break;
+
+	case SNDRV_PCM_TRIGGER_STOP:
+	case SNDRV_PCM_TRIGGER_SUSPEND:
+		audio_pcm_playback_stop(audio);
+		break;
+
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static struct audio_dev _audio_dev = {
+	.func = {
+		.name = "audio_source",
+		.bind = audio_bind,
+		.unbind = audio_unbind,
+		.set_alt = audio_set_alt,
+		.setup = audio_setup,
+		.disable = audio_disable,
+		.free_func = audio_free_func,
+	},
+	.lock = __SPIN_LOCK_UNLOCKED(_audio_dev.lock),
+	.idle_reqs = LIST_HEAD_INIT(_audio_dev.idle_reqs),
+};
+
+static struct snd_pcm_ops audio_playback_ops = {
+	.open		= audio_pcm_open,
+	.close		= audio_pcm_close,
+	.ioctl		= snd_pcm_lib_ioctl,
+	.hw_params	= audio_pcm_hw_params,
+	.hw_free	= audio_pcm_hw_free,
+	.prepare	= audio_pcm_prepare,
+	.trigger	= audio_pcm_playback_trigger,
+	.pointer	= audio_pcm_pointer,
+};
+
+int audio_source_bind_config(struct usb_configuration *c,
+		struct audio_source_config *config)
+{
+	struct audio_dev *audio;
+	int err;
+
+	config->card = -1;
+	config->device = -1;
+
+	audio = &_audio_dev;
+
+	err = snd_card_setup(c, config);
+	if (err)
+		return err;
+
+	err = usb_add_function(c, &audio->func);
+	if (err)
+		goto add_fail;
+
+	return 0;
+
+add_fail:
+	snd_card_free(audio->card);
+	return err;
+}
+
+static int snd_card_setup(struct usb_configuration *c,
+		struct audio_source_config *config)
+{
+	struct audio_dev *audio;
+	struct snd_card *card;
+	struct snd_pcm *pcm;
+	int err;
+
+	audio = &_audio_dev;
+
+	err = snd_card_new(&c->cdev->gadget->dev,
+			SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1,
+			THIS_MODULE, 0, &card);
+	if (err)
+		return err;
+
+	err = snd_pcm_new(card, "USB audio source", 0, 1, 0, &pcm);
+	if (err)
+		goto pcm_fail;
+
+	pcm->private_data = audio;
+	pcm->info_flags = 0;
+	audio->pcm = pcm;
+
+	strlcpy(pcm->name, "USB gadget audio", sizeof(pcm->name));
+
+	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &audio_playback_ops);
+	snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
+				NULL, 0, 64 * 1024);
+
+	strlcpy(card->driver, "audio_source", sizeof(card->driver));
+	strlcpy(card->shortname, card->driver, sizeof(card->shortname));
+	strlcpy(card->longname, "USB accessory audio source",
+		sizeof(card->longname));
+
+	err = snd_card_register(card);
+	if (err)
+		goto register_fail;
+
+	config->card = pcm->card->number;
+	config->device = pcm->device;
+	audio->card = card;
+	return 0;
+
+register_fail:
+pcm_fail:
+	snd_card_free(audio->card);
+	return err;
+}
+
+static struct audio_source_instance *to_audio_source_instance(
+					struct config_item *item)
+{
+	return container_of(to_config_group(item), struct audio_source_instance,
+		func_inst.group);
+}
+
+static struct audio_source_instance *to_fi_audio_source(
+					const struct usb_function_instance *fi)
+{
+	return container_of(fi, struct audio_source_instance, func_inst);
+}
+
+static void audio_source_attr_release(struct config_item *item)
+{
+	struct audio_source_instance *fi_audio = to_audio_source_instance(item);
+
+	usb_put_function_instance(&fi_audio->func_inst);
+}
+
+static int audio_source_set_inst_name(struct usb_function_instance *fi,
+					const char *name)
+{
+	struct audio_source_instance *fi_audio;
+	char *ptr;
+	int name_len;
+
+	name_len = strlen(name) + 1;
+	if (name_len > MAX_INST_NAME_LEN)
+		return -ENAMETOOLONG;
+
+	ptr = kstrndup(name, name_len, GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	fi_audio = to_fi_audio_source(fi);
+	fi_audio->name = ptr;
+
+	return 0;
+}
+
+static void audio_source_free_inst(struct usb_function_instance *fi)
+{
+	struct audio_source_instance *fi_audio;
+
+	fi_audio = to_fi_audio_source(fi);
+	device_destroy(fi_audio->audio_device->class,
+			fi_audio->audio_device->devt);
+	kfree(fi_audio->name);
+	kfree(fi_audio->config);
+}
+
+static ssize_t audio_source_pcm_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct audio_source_instance *fi_audio = dev_get_drvdata(dev);
+	struct audio_source_config *config = fi_audio->config;
+
+	/* print PCM card and device numbers */
+	return sprintf(buf, "%d %d\n", config->card, config->device);
+}
+
+struct device *create_function_device(char *name);
+
+static struct usb_function_instance *audio_source_alloc_inst(void)
+{
+	struct audio_source_instance *fi_audio;
+	struct device_attribute **attrs;
+	struct device_attribute *attr;
+	struct device *dev;
+	void *err_ptr;
+	int err = 0;
+
+	fi_audio = kzalloc(sizeof(*fi_audio), GFP_KERNEL);
+	if (!fi_audio)
+		return ERR_PTR(-ENOMEM);
+
+	fi_audio->func_inst.set_inst_name = audio_source_set_inst_name;
+	fi_audio->func_inst.free_func_inst = audio_source_free_inst;
+
+	fi_audio->config = kzalloc(sizeof(struct audio_source_config),
+							GFP_KERNEL);
+	if (!fi_audio->config) {
+		err_ptr = ERR_PTR(-ENOMEM);
+		goto fail_audio;
+	}
+
+	config_group_init_type_name(&fi_audio->func_inst.group, "",
+						&audio_source_func_type);
+	dev = create_function_device("f_audio_source");
+
+	if (IS_ERR(dev)) {
+		err_ptr = dev;
+		goto fail_audio_config;
+	}
+
+	fi_audio->config->card = -1;
+	fi_audio->config->device = -1;
+	fi_audio->audio_device = dev;
+
+	attrs = audio_source_function_attributes;
+	if (attrs) {
+		while ((attr = *attrs++) && !err)
+			err = device_create_file(dev, attr);
+		if (err) {
+			err_ptr = ERR_PTR(-EINVAL);
+			goto fail_device;
+		}
+	}
+
+	dev_set_drvdata(dev, fi_audio);
+	_audio_dev.config = fi_audio->config;
+
+	return  &fi_audio->func_inst;
+
+fail_device:
+	device_destroy(dev->class, dev->devt);
+fail_audio_config:
+	kfree(fi_audio->config);
+fail_audio:
+	kfree(fi_audio);
+	return err_ptr;
+
+}
+
+static struct usb_function *audio_source_alloc(struct usb_function_instance *fi)
+{
+	return &_audio_dev.func;
+}
+
+DECLARE_USB_FUNCTION_INIT(audio_source, audio_source_alloc_inst,
+			audio_source_alloc);
+MODULE_LICENSE("GPL");

diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 46af0aa..4713a1c 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c

@@ -1216,6 +1216,65 @@
 	}
 }
 
+#ifdef CONFIG_USB_CONFIGFS_UEVENT
+extern struct device *create_function_device(char *name);
+static ssize_t alsa_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct usb_function_instance *fi_midi = dev_get_drvdata(dev);
+	struct f_midi *midi;
+
+	if (!fi_midi->f)
+		dev_warn(dev, "f_midi: function not set\n");
+
+	if (fi_midi && fi_midi->f) {
+		midi = func_to_midi(fi_midi->f);
+		if (midi->rmidi && midi->rmidi->card)
+			return sprintf(buf, "%d %d\n",
+			midi->rmidi->card->number, midi->rmidi->device);
+	}
+
+	/* print PCM card and device numbers */
+	return sprintf(buf, "%d %d\n", -1, -1);
+}
+
+static DEVICE_ATTR(alsa, S_IRUGO, alsa_show, NULL);
+
+static struct device_attribute *alsa_function_attributes[] = {
+	&dev_attr_alsa,
+	NULL
+};
+
+static int create_alsa_device(struct usb_function_instance *fi)
+{
+	struct device *dev;
+	struct device_attribute **attrs;
+	struct device_attribute *attr;
+	int err = 0;
+
+	dev = create_function_device("f_midi");
+	if (IS_ERR(dev))
+		return PTR_ERR(dev);
+
+	attrs = alsa_function_attributes;
+	if (attrs) {
+		while ((attr = *attrs++) && !err)
+			err = device_create_file(dev, attr);
+		if (err) {
+			device_destroy(dev->class, dev->devt);
+			return -EINVAL;
+		}
+	}
+	dev_set_drvdata(dev, fi);
+	return 0;
+}
+#else
+static int create_alsa_device(struct usb_function_instance *fi)
+{
+	return 0;
+}
+#endif
+
 static struct usb_function_instance *f_midi_alloc_inst(void)
 {
 	struct f_midi_opts *opts;
@@ -1234,6 +1293,11 @@
 	opts->out_ports = 1;
 	opts->refcnt = 1;
 
+	if (create_alsa_device(&opts->func_inst)) {
+		kfree(opts);
+		return ERR_PTR(-ENODEV);
+	}
+
 	config_group_init_type_name(&opts->func_inst.group, "",
 				    &midi_func_type);
 
@@ -1254,6 +1318,7 @@
 		kfifo_free(&midi->in_req_fifo);
 		kfree(midi);
 		free = true;
+		opts->func_inst.f = NULL;
 	}
 	mutex_unlock(&opts->lock);
 
@@ -1341,6 +1406,7 @@
 	midi->func.disable	= f_midi_disable;
 	midi->func.free_func	= f_midi_free;
 
+	fi->f = &midi->func;
 	return &midi->func;
 
 setup_fail:

diff --git a/fs/Kconfig b/fs/Kconfig
index ac474a6..ffbd9a4 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig

@@ -234,6 +234,7 @@
 source "fs/adfs/Kconfig"
 source "fs/affs/Kconfig"
 source "fs/ecryptfs/Kconfig"
+source "fs/sdcardfs/Kconfig"
 source "fs/hfs/Kconfig"
 source "fs/hfsplus/Kconfig"
 source "fs/befs/Kconfig"

diff --git a/fs/Makefile b/fs/Makefile
index 293733f..110241a 100644
--- a/fs/Makefile
+++ b/fs/Makefile

@@ -84,6 +84,7 @@
 obj-$(CONFIG_HFSPLUS_FS)	+= hfsplus/ # Before hfs to find wrapped HFS+
 obj-$(CONFIG_HFS_FS)		+= hfs/
 obj-$(CONFIG_ECRYPT_FS)		+= ecryptfs/
+obj-$(CONFIG_SDCARD_FS)		+= sdcardfs/
 obj-$(CONFIG_VXFS_FS)		+= freevxfs/
 obj-$(CONFIG_NFS_FS)		+= nfs/
 obj-$(CONFIG_EXPORTFS)		+= exportfs/

diff --git a/fs/attr.c b/fs/attr.c
index d22e818..bea2f7c 100644
--- a/fs/attr.c
+++ b/fs/attr.c

@@ -223,7 +223,7 @@
  * the file open for write, as there can be no conflicting delegation in
  * that case.
  */
-int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode)
+int notify_change2(struct vfsmount *mnt, struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode)
 {
 	struct inode *inode = dentry->d_inode;
 	umode_t mode = inode->i_mode;
@@ -247,7 +247,7 @@
 			return -EPERM;
 
 		if (!inode_owner_or_capable(inode)) {
-			error = inode_permission(inode, MAY_WRITE);
+			error = inode_permission2(mnt, inode, MAY_WRITE);
 			if (error)
 				return error;
 		}
@@ -330,7 +330,9 @@
 	if (error)
 		return error;
 
-	if (inode->i_op->setattr)
+	if (mnt && inode->i_op->setattr2)
+		error = inode->i_op->setattr2(mnt, dentry, attr);
+	else if (inode->i_op->setattr)
 		error = inode->i_op->setattr(dentry, attr);
 	else
 		error = simple_setattr(dentry, attr);
@@ -343,4 +345,10 @@
 
 	return error;
 }
+EXPORT_SYMBOL(notify_change2);
+
+int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode)
+{
+	return notify_change2(NULL, dentry, attr, delegated_inode);
+}
 EXPORT_SYMBOL(notify_change);

diff --git a/fs/coredump.c b/fs/coredump.c
index 1e2c87a..d9b3ba0 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c

@@ -742,7 +742,7 @@
 			goto close_fail;
 		if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
 			goto close_fail;
-		if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
+		if (do_truncate2(cprm.file->f_path.mnt, cprm.file->f_path.dentry, 0, 0, cprm.file))
 			goto close_fail;
 	}
 

diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index c83ddff..203e8b69 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c

@@ -133,15 +133,25 @@
 }
 EXPORT_SYMBOL(fscrypt_get_ctx);
 
+void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
+			 const struct fscrypt_info *ci)
+{
+	memset(iv, 0, ci->ci_mode->ivsize);
+	iv->lblk_num = cpu_to_le64(lblk_num);
+
+	if (ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY)
+		memcpy(iv->nonce, ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE);
+
+	if (ci->ci_essiv_tfm != NULL)
+		crypto_cipher_encrypt_one(ci->ci_essiv_tfm, iv->raw, iv->raw);
+}
+
 int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
 			   u64 lblk_num, struct page *src_page,
 			   struct page *dest_page, unsigned int len,
 			   unsigned int offs, gfp_t gfp_flags)
 {
-	struct {
-		__le64 index;
-		u8 padding[FS_IV_SIZE - sizeof(__le64)];
-	} iv;
+	union fscrypt_iv iv;
 	struct skcipher_request *req = NULL;
 	DECLARE_CRYPTO_WAIT(wait);
 	struct scatterlist dst, src;
@@ -154,15 +164,7 @@
 	if (WARN_ON_ONCE(len % FS_CRYPTO_BLOCK_SIZE != 0))
 		return -EINVAL;
 
-	BUILD_BUG_ON(sizeof(iv) != FS_IV_SIZE);
-	BUILD_BUG_ON(AES_BLOCK_SIZE != FS_IV_SIZE);
-	iv.index = cpu_to_le64(lblk_num);
-	memset(iv.padding, 0, sizeof(iv.padding));
-
-	if (ci->ci_essiv_tfm != NULL) {
-		crypto_cipher_encrypt_one(ci->ci_essiv_tfm, (u8 *)&iv,
-					  (u8 *)&iv);
-	}
+	fscrypt_generate_iv(&iv, lblk_num, ci);
 
 	req = skcipher_request_alloc(tfm, gfp_flags);
 	if (!req)

diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index d7a0f68..7ff40a7 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c

@@ -40,10 +40,11 @@
 {
 	struct skcipher_request *req = NULL;
 	DECLARE_CRYPTO_WAIT(wait);
-	struct crypto_skcipher *tfm = inode->i_crypt_info->ci_ctfm;
-	int res = 0;
-	char iv[FS_CRYPTO_BLOCK_SIZE];
+	struct fscrypt_info *ci = inode->i_crypt_info;
+	struct crypto_skcipher *tfm = ci->ci_ctfm;
+	union fscrypt_iv iv;
 	struct scatterlist sg;
+	int res;
 
 	/*
 	 * Copy the filename to the output buffer for encrypting in-place and
@@ -55,7 +56,7 @@
 	memset(out + iname->len, 0, olen - iname->len);
 
 	/* Initialize the IV */
-	memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
+	fscrypt_generate_iv(&iv, 0, ci);
 
 	/* Set up the encryption request */
 	req = skcipher_request_alloc(tfm, GFP_NOFS);
@@ -65,7 +66,7 @@
 			CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
 			crypto_req_done, &wait);
 	sg_init_one(&sg, out, olen);
-	skcipher_request_set_crypt(req, &sg, &sg, olen, iv);
+	skcipher_request_set_crypt(req, &sg, &sg, olen, &iv);
 
 	/* Do the encryption */
 	res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
@@ -94,9 +95,10 @@
 	struct skcipher_request *req = NULL;
 	DECLARE_CRYPTO_WAIT(wait);
 	struct scatterlist src_sg, dst_sg;
-	struct crypto_skcipher *tfm = inode->i_crypt_info->ci_ctfm;
-	int res = 0;
-	char iv[FS_CRYPTO_BLOCK_SIZE];
+	struct fscrypt_info *ci = inode->i_crypt_info;
+	struct crypto_skcipher *tfm = ci->ci_ctfm;
+	union fscrypt_iv iv;
+	int res;
 
 	/* Allocate request */
 	req = skcipher_request_alloc(tfm, GFP_NOFS);
@@ -107,12 +109,12 @@
 		crypto_req_done, &wait);
 
 	/* Initialize IV */
-	memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
+	fscrypt_generate_iv(&iv, 0, ci);
 
 	/* Create decryption request */
 	sg_init_one(&src_sg, iname->name, iname->len);
 	sg_init_one(&dst_sg, oname->name, oname->len);
-	skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
+	skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, &iv);
 	res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait);
 	skcipher_request_free(req);
 	if (res < 0) {

diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 79debfc..7424f85 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h

@@ -17,7 +17,6 @@
 #include <crypto/hash.h>
 
 /* Encryption parameters */
-#define FS_IV_SIZE			16
 #define FS_KEY_DERIVATION_NONCE_SIZE	16
 
 /**
@@ -52,16 +51,42 @@
 } __packed;
 
 /*
- * A pointer to this structure is stored in the file system's in-core
- * representation of an inode.
+ * fscrypt_info - the "encryption key" for an inode
+ *
+ * When an encrypted file's key is made available, an instance of this struct is
+ * allocated and stored in ->i_crypt_info.  Once created, it remains until the
+ * inode is evicted.
  */
 struct fscrypt_info {
+
+	/* The actual crypto transform used for encryption and decryption */
+	struct crypto_skcipher *ci_ctfm;
+
+	/*
+	 * Cipher for ESSIV IV generation.  Only set for CBC contents
+	 * encryption, otherwise is NULL.
+	 */
+	struct crypto_cipher *ci_essiv_tfm;
+
+	/*
+	 * Encryption mode used for this inode.  It corresponds to either
+	 * ci_data_mode or ci_filename_mode, depending on the inode type.
+	 */
+	struct fscrypt_mode *ci_mode;
+
+	/*
+	 * If non-NULL, then this inode uses a master key directly rather than a
+	 * derived key, and ci_ctfm will equal ci_master_key->mk_ctfm.
+	 * Otherwise, this inode uses a derived key.
+	 */
+	struct fscrypt_master_key *ci_master_key;
+
+	/* fields from the fscrypt_context */
 	u8 ci_data_mode;
 	u8 ci_filename_mode;
 	u8 ci_flags;
-	struct crypto_skcipher *ci_ctfm;
-	struct crypto_cipher *ci_essiv_tfm;
-	u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE];
+	u8 ci_master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+	u8 ci_nonce[FS_KEY_DERIVATION_NONCE_SIZE];
 };
 
 typedef enum {
@@ -83,6 +108,10 @@
 	    filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS)
 		return true;
 
+	if (contents_mode == FS_ENCRYPTION_MODE_ADIANTUM &&
+	    filenames_mode == FS_ENCRYPTION_MODE_ADIANTUM)
+		return true;
+
 	return false;
 }
 
@@ -107,6 +136,22 @@
 #define fscrypt_err(sb, fmt, ...)		\
 	fscrypt_msg(sb, KERN_ERR, fmt, ##__VA_ARGS__)
 
+#define FSCRYPT_MAX_IV_SIZE	32
+
+union fscrypt_iv {
+	struct {
+		/* logical block number within the file */
+		__le64 lblk_num;
+
+		/* per-file nonce; only set in DIRECT_KEY mode */
+		u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
+	};
+	u8 raw[FSCRYPT_MAX_IV_SIZE];
+};
+
+void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
+			 const struct fscrypt_info *ci);
+
 /* fname.c */
 extern int fname_encrypt(struct inode *inode, const struct qstr *iname,
 			 u8 *out, unsigned int olen);
@@ -115,6 +160,16 @@
 					 u32 *encrypted_len_ret);
 
 /* keyinfo.c */
+
+struct fscrypt_mode {
+	const char *friendly_name;
+	const char *cipher_str;
+	int keysize;
+	int ivsize;
+	bool logged_impl_name;
+	bool needs_essiv;
+};
+
 extern void __exit fscrypt_essiv_cleanup(void);
 
 #endif /* _FSCRYPT_PRIVATE_H */

diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 7874c9b..1e11a68 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c

@@ -10,15 +10,21 @@
  */
 
 #include <keys/user-type.h>
+#include <linux/hashtable.h>
 #include <linux/scatterlist.h>
 #include <linux/ratelimit.h>
 #include <crypto/aes.h>
+#include <crypto/algapi.h>
 #include <crypto/sha.h>
 #include <crypto/skcipher.h>
 #include "fscrypt_private.h"
 
 static struct crypto_shash *essiv_hash_tfm;
 
+/* Table of keys referenced by FS_POLICY_FLAG_DIRECT_KEY policies */
+static DEFINE_HASHTABLE(fscrypt_master_keys, 6); /* 6 bits = 64 buckets */
+static DEFINE_SPINLOCK(fscrypt_master_keys_lock);
+
 /*
  * Key derivation function.  This generates the derived key by encrypting the
  * master key with AES-128-ECB using the inode's nonce as the AES key.
@@ -123,56 +129,37 @@
 	return ERR_PTR(-ENOKEY);
 }
 
-/* Find the master key, then derive the inode's actual encryption key */
-static int find_and_derive_key(const struct inode *inode,
-			       const struct fscrypt_context *ctx,
-			       u8 *derived_key, unsigned int derived_keysize)
-{
-	struct key *key;
-	const struct fscrypt_key *payload;
-	int err;
-
-	key = find_and_lock_process_key(FS_KEY_DESC_PREFIX,
-					ctx->master_key_descriptor,
-					derived_keysize, &payload);
-	if (key == ERR_PTR(-ENOKEY) && inode->i_sb->s_cop->key_prefix) {
-		key = find_and_lock_process_key(inode->i_sb->s_cop->key_prefix,
-						ctx->master_key_descriptor,
-						derived_keysize, &payload);
-	}
-	if (IS_ERR(key))
-		return PTR_ERR(key);
-	err = derive_key_aes(payload->raw, ctx, derived_key, derived_keysize);
-	up_read(&key->sem);
-	key_put(key);
-	return err;
-}
-
-static struct fscrypt_mode {
-	const char *friendly_name;
-	const char *cipher_str;
-	int keysize;
-	bool logged_impl_name;
-} available_modes[] = {
+static struct fscrypt_mode available_modes[] = {
 	[FS_ENCRYPTION_MODE_AES_256_XTS] = {
 		.friendly_name = "AES-256-XTS",
 		.cipher_str = "xts(aes)",
 		.keysize = 64,
+		.ivsize = 16,
 	},
 	[FS_ENCRYPTION_MODE_AES_256_CTS] = {
 		.friendly_name = "AES-256-CTS-CBC",
 		.cipher_str = "cts(cbc(aes))",
 		.keysize = 32,
+		.ivsize = 16,
 	},
 	[FS_ENCRYPTION_MODE_AES_128_CBC] = {
 		.friendly_name = "AES-128-CBC",
 		.cipher_str = "cbc(aes)",
 		.keysize = 16,
+		.ivsize = 16,
+		.needs_essiv = true,
 	},
 	[FS_ENCRYPTION_MODE_AES_128_CTS] = {
 		.friendly_name = "AES-128-CTS-CBC",
 		.cipher_str = "cts(cbc(aes))",
 		.keysize = 16,
+		.ivsize = 16,
+	},
+	[FS_ENCRYPTION_MODE_ADIANTUM] = {
+		.friendly_name = "Adiantum",
+		.cipher_str = "adiantum(xchacha12,aes)",
+		.keysize = 32,
+		.ivsize = 32,
 	},
 };
 
@@ -198,14 +185,196 @@
 	return ERR_PTR(-EINVAL);
 }
 
-static void put_crypt_info(struct fscrypt_info *ci)
+/* Find the master key, then derive the inode's actual encryption key */
+static int find_and_derive_key(const struct inode *inode,
+			       const struct fscrypt_context *ctx,
+			       u8 *derived_key, const struct fscrypt_mode *mode)
 {
-	if (!ci)
-		return;
+	struct key *key;
+	const struct fscrypt_key *payload;
+	int err;
 
-	crypto_free_skcipher(ci->ci_ctfm);
-	crypto_free_cipher(ci->ci_essiv_tfm);
-	kmem_cache_free(fscrypt_info_cachep, ci);
+	key = find_and_lock_process_key(FS_KEY_DESC_PREFIX,
+					ctx->master_key_descriptor,
+					mode->keysize, &payload);
+	if (key == ERR_PTR(-ENOKEY) && inode->i_sb->s_cop->key_prefix) {
+		key = find_and_lock_process_key(inode->i_sb->s_cop->key_prefix,
+						ctx->master_key_descriptor,
+						mode->keysize, &payload);
+	}
+	if (IS_ERR(key))
+		return PTR_ERR(key);
+
+	if (ctx->flags & FS_POLICY_FLAG_DIRECT_KEY) {
+		if (mode->ivsize < offsetofend(union fscrypt_iv, nonce)) {
+			fscrypt_warn(inode->i_sb,
+				     "direct key mode not allowed with %s",
+				     mode->friendly_name);
+			err = -EINVAL;
+		} else if (ctx->contents_encryption_mode !=
+			   ctx->filenames_encryption_mode) {
+			fscrypt_warn(inode->i_sb,
+				     "direct key mode not allowed with different contents and filenames modes");
+			err = -EINVAL;
+		} else {
+			memcpy(derived_key, payload->raw, mode->keysize);
+			err = 0;
+		}
+	} else {
+		err = derive_key_aes(payload->raw, ctx, derived_key,
+				     mode->keysize);
+	}
+	up_read(&key->sem);
+	key_put(key);
+	return err;
+}
+
+/* Allocate and key a symmetric cipher object for the given encryption mode */
+static struct crypto_skcipher *
+allocate_skcipher_for_mode(struct fscrypt_mode *mode, const u8 *raw_key,
+			   const struct inode *inode)
+{
+	struct crypto_skcipher *tfm;
+	int err;
+
+	tfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0);
+	if (IS_ERR(tfm)) {
+		fscrypt_warn(inode->i_sb,
+			     "error allocating '%s' transform for inode %lu: %ld",
+			     mode->cipher_str, inode->i_ino, PTR_ERR(tfm));
+		return tfm;
+	}
+	if (unlikely(!mode->logged_impl_name)) {
+		/*
+		 * fscrypt performance can vary greatly depending on which
+		 * crypto algorithm implementation is used.  Help people debug
+		 * performance problems by logging the ->cra_driver_name the
+		 * first time a mode is used.  Note that multiple threads can
+		 * race here, but it doesn't really matter.
+		 */
+		mode->logged_impl_name = true;
+		pr_info("fscrypt: %s using implementation \"%s\"\n",
+			mode->friendly_name,
+			crypto_skcipher_alg(tfm)->base.cra_driver_name);
+	}
+	crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+	err = crypto_skcipher_setkey(tfm, raw_key, mode->keysize);
+	if (err)
+		goto err_free_tfm;
+
+	return tfm;
+
+err_free_tfm:
+	crypto_free_skcipher(tfm);
+	return ERR_PTR(err);
+}
+
+/* Master key referenced by FS_POLICY_FLAG_DIRECT_KEY policy */
+struct fscrypt_master_key {
+	struct hlist_node mk_node;
+	refcount_t mk_refcount;
+	const struct fscrypt_mode *mk_mode;
+	struct crypto_skcipher *mk_ctfm;
+	u8 mk_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+	u8 mk_raw[FS_MAX_KEY_SIZE];
+};
+
+static void free_master_key(struct fscrypt_master_key *mk)
+{
+	if (mk) {
+		crypto_free_skcipher(mk->mk_ctfm);
+		kzfree(mk);
+	}
+}
+
+static void put_master_key(struct fscrypt_master_key *mk)
+{
+	if (!refcount_dec_and_lock(&mk->mk_refcount, &fscrypt_master_keys_lock))
+		return;
+	hash_del(&mk->mk_node);
+	spin_unlock(&fscrypt_master_keys_lock);
+
+	free_master_key(mk);
+}
+
+/*
+ * Find/insert the given master key into the fscrypt_master_keys table.  If
+ * found, it is returned with elevated refcount, and 'to_insert' is freed if
+ * non-NULL.  If not found, 'to_insert' is inserted and returned if it's
+ * non-NULL; otherwise NULL is returned.
+ */
+static struct fscrypt_master_key *
+find_or_insert_master_key(struct fscrypt_master_key *to_insert,
+			  const u8 *raw_key, const struct fscrypt_mode *mode,
+			  const struct fscrypt_info *ci)
+{
+	unsigned long hash_key;
+	struct fscrypt_master_key *mk;
+
+	/*
+	 * Careful: to avoid potentially leaking secret key bytes via timing
+	 * information, we must key the hash table by descriptor rather than by
+	 * raw key, and use crypto_memneq() when comparing raw keys.
+	 */
+
+	BUILD_BUG_ON(sizeof(hash_key) > FS_KEY_DESCRIPTOR_SIZE);
+	memcpy(&hash_key, ci->ci_master_key_descriptor, sizeof(hash_key));
+
+	spin_lock(&fscrypt_master_keys_lock);
+	hash_for_each_possible(fscrypt_master_keys, mk, mk_node, hash_key) {
+		if (memcmp(ci->ci_master_key_descriptor, mk->mk_descriptor,
+			   FS_KEY_DESCRIPTOR_SIZE) != 0)
+			continue;
+		if (mode != mk->mk_mode)
+			continue;
+		if (crypto_memneq(raw_key, mk->mk_raw, mode->keysize))
+			continue;
+		/* using existing tfm with same (descriptor, mode, raw_key) */
+		refcount_inc(&mk->mk_refcount);
+		spin_unlock(&fscrypt_master_keys_lock);
+		free_master_key(to_insert);
+		return mk;
+	}
+	if (to_insert)
+		hash_add(fscrypt_master_keys, &to_insert->mk_node, hash_key);
+	spin_unlock(&fscrypt_master_keys_lock);
+	return to_insert;
+}
+
+/* Prepare to encrypt directly using the master key in the given mode */
+static struct fscrypt_master_key *
+fscrypt_get_master_key(const struct fscrypt_info *ci, struct fscrypt_mode *mode,
+		       const u8 *raw_key, const struct inode *inode)
+{
+	struct fscrypt_master_key *mk;
+	int err;
+
+	/* Is there already a tfm for this key? */
+	mk = find_or_insert_master_key(NULL, raw_key, mode, ci);
+	if (mk)
+		return mk;
+
+	/* Nope, allocate one. */
+	mk = kzalloc(sizeof(*mk), GFP_NOFS);
+	if (!mk)
+		return ERR_PTR(-ENOMEM);
+	refcount_set(&mk->mk_refcount, 1);
+	mk->mk_mode = mode;
+	mk->mk_ctfm = allocate_skcipher_for_mode(mode, raw_key, inode);
+	if (IS_ERR(mk->mk_ctfm)) {
+		err = PTR_ERR(mk->mk_ctfm);
+		mk->mk_ctfm = NULL;
+		goto err_free_mk;
+	}
+	memcpy(mk->mk_descriptor, ci->ci_master_key_descriptor,
+	       FS_KEY_DESCRIPTOR_SIZE);
+	memcpy(mk->mk_raw, raw_key, mode->keysize);
+
+	return find_or_insert_master_key(mk, raw_key, mode, ci);
+
+err_free_mk:
+	free_master_key(mk);
+	return ERR_PTR(err);
 }
 
 static int derive_essiv_salt(const u8 *key, int keysize, u8 *salt)
@@ -275,11 +444,67 @@
 	crypto_free_shash(essiv_hash_tfm);
 }
 
+/*
+ * Given the encryption mode and key (normally the derived key, but for
+ * FS_POLICY_FLAG_DIRECT_KEY mode it's the master key), set up the inode's
+ * symmetric cipher transform object(s).
+ */
+static int setup_crypto_transform(struct fscrypt_info *ci,
+				  struct fscrypt_mode *mode,
+				  const u8 *raw_key, const struct inode *inode)
+{
+	struct fscrypt_master_key *mk;
+	struct crypto_skcipher *ctfm;
+	int err;
+
+	if (ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY) {
+		mk = fscrypt_get_master_key(ci, mode, raw_key, inode);
+		if (IS_ERR(mk))
+			return PTR_ERR(mk);
+		ctfm = mk->mk_ctfm;
+	} else {
+		mk = NULL;
+		ctfm = allocate_skcipher_for_mode(mode, raw_key, inode);
+		if (IS_ERR(ctfm))
+			return PTR_ERR(ctfm);
+	}
+	ci->ci_master_key = mk;
+	ci->ci_ctfm = ctfm;
+
+	if (mode->needs_essiv) {
+		/* ESSIV implies 16-byte IVs which implies !DIRECT_KEY */
+		WARN_ON(mode->ivsize != AES_BLOCK_SIZE);
+		WARN_ON(ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY);
+
+		err = init_essiv_generator(ci, raw_key, mode->keysize);
+		if (err) {
+			fscrypt_warn(inode->i_sb,
+				     "error initializing ESSIV generator for inode %lu: %d",
+				     inode->i_ino, err);
+			return err;
+		}
+	}
+	return 0;
+}
+
+static void put_crypt_info(struct fscrypt_info *ci)
+{
+	if (!ci)
+		return;
+
+	if (ci->ci_master_key) {
+		put_master_key(ci->ci_master_key);
+	} else {
+		crypto_free_skcipher(ci->ci_ctfm);
+		crypto_free_cipher(ci->ci_essiv_tfm);
+	}
+	kmem_cache_free(fscrypt_info_cachep, ci);
+}
+
 int fscrypt_get_encryption_info(struct inode *inode)
 {
 	struct fscrypt_info *crypt_info;
 	struct fscrypt_context ctx;
-	struct crypto_skcipher *ctfm;
 	struct fscrypt_mode *mode;
 	u8 *raw_key = NULL;
 	int res;
@@ -312,74 +537,42 @@
 	if (ctx.flags & ~FS_POLICY_FLAGS_VALID)
 		return -EINVAL;
 
-	crypt_info = kmem_cache_alloc(fscrypt_info_cachep, GFP_NOFS);
+	crypt_info = kmem_cache_zalloc(fscrypt_info_cachep, GFP_NOFS);
 	if (!crypt_info)
 		return -ENOMEM;
 
 	crypt_info->ci_flags = ctx.flags;
 	crypt_info->ci_data_mode = ctx.contents_encryption_mode;
 	crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
-	crypt_info->ci_ctfm = NULL;
-	crypt_info->ci_essiv_tfm = NULL;
-	memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
-				sizeof(crypt_info->ci_master_key));
+	memcpy(crypt_info->ci_master_key_descriptor, ctx.master_key_descriptor,
+	       FS_KEY_DESCRIPTOR_SIZE);
+	memcpy(crypt_info->ci_nonce, ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
 
 	mode = select_encryption_mode(crypt_info, inode);
 	if (IS_ERR(mode)) {
 		res = PTR_ERR(mode);
 		goto out;
 	}
+	WARN_ON(mode->ivsize > FSCRYPT_MAX_IV_SIZE);
+	crypt_info->ci_mode = mode;
 
 	/*
-	 * This cannot be a stack buffer because it is passed to the scatterlist
-	 * crypto API as part of key derivation.
+	 * This cannot be a stack buffer because it may be passed to the
+	 * scatterlist crypto API as part of key derivation.
 	 */
 	res = -ENOMEM;
 	raw_key = kmalloc(mode->keysize, GFP_NOFS);
 	if (!raw_key)
 		goto out;
 
-	res = find_and_derive_key(inode, &ctx, raw_key, mode->keysize);
+	res = find_and_derive_key(inode, &ctx, raw_key, mode);
 	if (res)
 		goto out;
 
-	ctfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0);
-	if (IS_ERR(ctfm)) {
-		res = PTR_ERR(ctfm);
-		fscrypt_warn(inode->i_sb,
-			     "error allocating '%s' transform for inode %lu: %d",
-			     mode->cipher_str, inode->i_ino, res);
-		goto out;
-	}
-	if (unlikely(!mode->logged_impl_name)) {
-		/*
-		 * fscrypt performance can vary greatly depending on which
-		 * crypto algorithm implementation is used.  Help people debug
-		 * performance problems by logging the ->cra_driver_name the
-		 * first time a mode is used.  Note that multiple threads can
-		 * race here, but it doesn't really matter.
-		 */
-		mode->logged_impl_name = true;
-		pr_info("fscrypt: %s using implementation \"%s\"\n",
-			mode->friendly_name,
-			crypto_skcipher_alg(ctfm)->base.cra_driver_name);
-	}
-	crypt_info->ci_ctfm = ctfm;
-	crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_REQ_WEAK_KEY);
-	res = crypto_skcipher_setkey(ctfm, raw_key, mode->keysize);
+	res = setup_crypto_transform(crypt_info, mode, raw_key, inode);
 	if (res)
 		goto out;
 
-	if (S_ISREG(inode->i_mode) &&
-	    crypt_info->ci_data_mode == FS_ENCRYPTION_MODE_AES_128_CBC) {
-		res = init_essiv_generator(crypt_info, raw_key, mode->keysize);
-		if (res) {
-			fscrypt_warn(inode->i_sb,
-				     "error initializing ESSIV generator for inode %lu: %d",
-				     inode->i_ino, res);
-			goto out;
-		}
-	}
 	if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) == NULL)
 		crypt_info = NULL;
 out:

diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 4288839..bf6590c 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c

@@ -201,7 +201,8 @@
 	child_ci = child->i_crypt_info;
 
 	if (parent_ci && child_ci) {
-		return memcmp(parent_ci->ci_master_key, child_ci->ci_master_key,
+		return memcmp(parent_ci->ci_master_key_descriptor,
+			      child_ci->ci_master_key_descriptor,
 			      FS_KEY_DESCRIPTOR_SIZE) == 0 &&
 			(parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
 			(parent_ci->ci_filename_mode ==
@@ -256,7 +257,7 @@
 	ctx.contents_encryption_mode = ci->ci_data_mode;
 	ctx.filenames_encryption_mode = ci->ci_filename_mode;
 	ctx.flags = ci->ci_flags;
-	memcpy(ctx.master_key_descriptor, ci->ci_master_key,
+	memcpy(ctx.master_key_descriptor, ci->ci_master_key_descriptor,
 	       FS_KEY_DESCRIPTOR_SIZE);
 	get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
 	BUILD_BUG_ON(sizeof(ctx) != FSCRYPT_SET_CONTEXT_MAX_SIZE);

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 58f48ea..8b4ded9 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c

@@ -34,6 +34,7 @@
 #include <linux/mutex.h>
 #include <linux/anon_inodes.h>
 #include <linux/device.h>
+#include <linux/freezer.h>
 #include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/mman.h>
@@ -1816,7 +1817,8 @@
 			}
 
 			spin_unlock_irq(&ep->wq.lock);
-			if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
+			if (!freezable_schedule_hrtimeout_range(to, slack,
+								HRTIMER_MODE_ABS))
 				timed_out = 1;
 
 			spin_lock_irq(&ep->wq.lock);

diff --git a/fs/exec.c b/fs/exec.c
index 561ea64..aa9d20c 100644
--- a/fs/exec.c
+++ b/fs/exec.c

@@ -1308,7 +1308,7 @@
 void would_dump(struct linux_binprm *bprm, struct file *file)
 {
 	struct inode *inode = file_inode(file);
-	if (inode_permission(inode, MAY_READ) < 0) {
+	if (inode_permission2(file->f_path.mnt, inode, MAY_READ) < 0) {
 		struct user_namespace *old, *user_ns;
 		bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
 

diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 56f6e17..8a7394c 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c

@@ -12,6 +12,7 @@
 #include "ext4.h"
 #include "xattr.h"
 #include "truncate.h"
+#include <trace/events/android_fs.h>
 
 #define EXT4_XATTR_SYSTEM_DATA	"data"
 #define EXT4_MIN_INLINE_DATA_SIZE	((sizeof(__le32) * EXT4_N_BLOCKS))
@@ -505,6 +506,17 @@
 		return -EAGAIN;
 	}
 
+	if (trace_android_fs_dataread_start_enabled()) {
+		char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+		path = android_fstrace_get_pathname(pathbuf,
+						    MAX_TRACE_PATHBUF_LEN,
+						    inode);
+		trace_android_fs_dataread_start(inode, page_offset(page),
+						PAGE_SIZE, current->pid,
+						path, current->comm);
+	}
+
 	/*
 	 * Current inline data can only exist in the 1st page,
 	 * So for all the other pages, just set them uptodate.
@@ -516,6 +528,8 @@
 		SetPageUptodate(page);
 	}
 
+	trace_android_fs_dataread_end(inode, page_offset(page), PAGE_SIZE);
+
 	up_read(&EXT4_I(inode)->xattr_sem);
 
 	unlock_page(page);

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e65559b..c4b6ba3 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c

@@ -47,6 +47,7 @@
 #include "truncate.h"
 
 #include <trace/events/ext4.h>
+#include <trace/events/android_fs.h>
 
 #define MPAGE_DA_EXTENT_TAIL 0x01
 
@@ -1259,6 +1260,16 @@
 	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
 		return -EIO;
 
+	if (trace_android_fs_datawrite_start_enabled()) {
+		char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+		path = android_fstrace_get_pathname(pathbuf,
+						    MAX_TRACE_PATHBUF_LEN,
+						    inode);
+		trace_android_fs_datawrite_start(inode, pos, len,
+						 current->pid, path,
+						 current->comm);
+	}
 	trace_ext4_write_begin(inode, pos, len, flags);
 	/*
 	 * Reserve one block more for addition to orphan list in case
@@ -1397,6 +1408,7 @@
 	int i_size_changed = 0;
 	int inline_data = ext4_has_inline_data(inode);
 
+	trace_android_fs_datawrite_end(inode, pos, len);
 	trace_ext4_write_end(inode, pos, len, copied);
 	if (inline_data) {
 		ret = ext4_write_inline_data_end(inode, pos, len,
@@ -1502,6 +1514,7 @@
 	int size_changed = 0;
 	int inline_data = ext4_has_inline_data(inode);
 
+	trace_android_fs_datawrite_end(inode, pos, len);
 	trace_ext4_journalled_write_end(inode, pos, len, copied);
 	from = pos & (PAGE_SIZE - 1);
 	to = from + len;
@@ -3031,6 +3044,16 @@
 					len, flags, pagep, fsdata);
 	}
 	*fsdata = (void *)0;
+	if (trace_android_fs_datawrite_start_enabled()) {
+		char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+		path = android_fstrace_get_pathname(pathbuf,
+						    MAX_TRACE_PATHBUF_LEN,
+						    inode);
+		trace_android_fs_datawrite_start(inode, pos, len,
+						 current->pid,
+						 path, current->comm);
+	}
 	trace_ext4_da_write_begin(inode, pos, len, flags);
 
 	if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
@@ -3149,6 +3172,7 @@
 		return ext4_write_end(file, mapping, pos,
 				      len, copied, page, fsdata);
 
+	trace_android_fs_datawrite_end(inode, pos, len);
 	trace_ext4_da_write_end(inode, pos, len, copied);
 	start = pos & (PAGE_SIZE - 1);
 	end = start + copied - 1;
@@ -3854,6 +3878,7 @@
 	size_t count = iov_iter_count(iter);
 	loff_t offset = iocb->ki_pos;
 	ssize_t ret;
+	int rw = iov_iter_rw(iter);
 
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
 	if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode))
@@ -3870,12 +3895,42 @@
 	if (ext4_has_inline_data(inode))
 		return 0;
 
+	if (trace_android_fs_dataread_start_enabled() &&
+	    (rw == READ)) {
+		char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+		path = android_fstrace_get_pathname(pathbuf,
+						    MAX_TRACE_PATHBUF_LEN,
+						    inode);
+		trace_android_fs_dataread_start(inode, offset, count,
+						current->pid, path,
+						current->comm);
+	}
+	if (trace_android_fs_datawrite_start_enabled() &&
+	    (rw == WRITE)) {
+		char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+		path = android_fstrace_get_pathname(pathbuf,
+						    MAX_TRACE_PATHBUF_LEN,
+						    inode);
+		trace_android_fs_datawrite_start(inode, offset, count,
+						 current->pid, path,
+						 current->comm);
+	}
 	trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
 	if (iov_iter_rw(iter) == READ)
 		ret = ext4_direct_IO_read(iocb, iter);
 	else
 		ret = ext4_direct_IO_write(iocb, iter);
 	trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret);
+
+	if (trace_android_fs_dataread_start_enabled() &&
+	    (rw == READ))
+		trace_android_fs_dataread_end(inode, offset, count);
+	if (trace_android_fs_datawrite_start_enabled() &&
+	    (rw == WRITE))
+		trace_android_fs_datawrite_end(inode, offset, count);
+
 	return ret;
 }
 

diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index f461d75..aa1b9e1 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c

@@ -46,6 +46,7 @@
 #include <linux/cleancache.h>
 
 #include "ext4.h"
+#include <trace/events/android_fs.h>
 
 static inline bool ext4_bio_encrypted(struct bio *bio)
 {
@@ -56,6 +57,17 @@
 #endif
 }
 
+static void
+ext4_trace_read_completion(struct bio *bio)
+{
+	struct page *first_page = bio->bi_io_vec[0].bv_page;
+
+	if (first_page != NULL)
+		trace_android_fs_dataread_end(first_page->mapping->host,
+					      page_offset(first_page),
+					      bio->bi_iter.bi_size);
+}
+
 /*
  * I/O completion handler for multipage BIOs.
  *
@@ -73,6 +85,9 @@
 	struct bio_vec *bv;
 	int i;
 
+	if (trace_android_fs_dataread_start_enabled())
+		ext4_trace_read_completion(bio);
+
 	if (ext4_bio_encrypted(bio)) {
 		if (bio->bi_status) {
 			fscrypt_release_ctx(bio->bi_private);
@@ -96,6 +111,30 @@
 	bio_put(bio);
 }
 
+static void
+ext4_submit_bio_read(struct bio *bio)
+{
+	if (trace_android_fs_dataread_start_enabled()) {
+		struct page *first_page = bio->bi_io_vec[0].bv_page;
+
+		if (first_page != NULL) {
+			char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+			path = android_fstrace_get_pathname(pathbuf,
+						    MAX_TRACE_PATHBUF_LEN,
+						    first_page->mapping->host);
+			trace_android_fs_dataread_start(
+				first_page->mapping->host,
+				page_offset(first_page),
+				bio->bi_iter.bi_size,
+				current->pid,
+				path,
+				current->comm);
+		}
+	}
+	submit_bio(bio);
+}
+
 int ext4_mpage_readpages(struct address_space *mapping,
 			 struct list_head *pages, struct page *page,
 			 unsigned nr_pages, bool is_readahead)
@@ -236,7 +275,7 @@
 		 */
 		if (bio && (last_block_in_bio != blocks[0] - 1)) {
 		submit_and_realloc:
-			submit_bio(bio);
+			ext4_submit_bio_read(bio);
 			bio = NULL;
 		}
 		if (bio == NULL) {
@@ -270,14 +309,14 @@
 		if (((map.m_flags & EXT4_MAP_BOUNDARY) &&
 		     (relative_block == map.m_len)) ||
 		    (first_hole != blocks_per_page)) {
-			submit_bio(bio);
+			ext4_submit_bio_read(bio);
 			bio = NULL;
 		} else
 			last_block_in_bio = blocks[blocks_per_page - 1];
 		goto next_page;
 	confused:
 		if (bio) {
-			submit_bio(bio);
+			ext4_submit_bio_read(bio);
 			bio = NULL;
 		}
 		if (!PageUptodate(page))
@@ -290,6 +329,6 @@
 	}
 	BUG_ON(pages && !list_empty(pages));
 	if (bio)
-		submit_bio(bio);
+		ext4_submit_bio_read(bio);
 	return 0;
 }

diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index b9fe937..63e5995 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/acl.c
  *
@@ -7,10 +8,6 @@
  * Portions of this code from linux/fs/ext2/acl.c
  *
  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/f2fs_fs.h>
 #include "f2fs.h"
@@ -53,6 +50,9 @@
 	struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1);
 	const char *end = value + size;
 
+	if (size < sizeof(struct f2fs_acl_header))
+		return ERR_PTR(-EINVAL);
+
 	if (hdr->a_version != cpu_to_le32(F2FS_ACL_VERSION))
 		return ERR_PTR(-EINVAL);
 
@@ -160,7 +160,7 @@
 	return (void *)f2fs_acl;
 
 fail:
-	kfree(f2fs_acl);
+	kvfree(f2fs_acl);
 	return ERR_PTR(-EINVAL);
 }
 
@@ -190,7 +190,7 @@
 		acl = NULL;
 	else
 		acl = ERR_PTR(retval);
-	kfree(value);
+	kvfree(value);
 
 	return acl;
 }
@@ -240,7 +240,7 @@
 
 	error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0);
 
-	kfree(value);
+	kvfree(value);
 	if (!error)
 		set_cached_acl(inode, type, acl);
 
@@ -396,12 +396,16 @@
 		error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl,
 				       ipage);
 		posix_acl_release(default_acl);
+	} else {
+		inode->i_default_acl = NULL;
 	}
 	if (acl) {
 		if (!error)
 			error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl,
 					       ipage);
 		posix_acl_release(acl);
+	} else {
+		inode->i_acl = NULL;
 	}
 
 	return error;

diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h
index 2c68518..b96823c 100644
--- a/fs/f2fs/acl.h
+++ b/fs/f2fs/acl.h

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/acl.h
  *
@@ -7,10 +8,6 @@
  * Portions of this code from linux/fs/ext2/acl.h
  *
  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef __F2FS_ACL_H__
 #define __F2FS_ACL_H__

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 59d0472..a98e1b0 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/checkpoint.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/bio.h>
@@ -47,7 +44,7 @@
 		cond_resched();
 		goto repeat;
 	}
-	f2fs_wait_on_page_writeback(page, META, true);
+	f2fs_wait_on_page_writeback(page, META, true, true);
 	if (!PageUptodate(page))
 		SetPageUptodate(page);
 	return page;
@@ -122,11 +119,8 @@
 		if (PTR_ERR(page) == -EIO &&
 				++count <= DEFAULT_RETRY_IO_COUNT)
 			goto retry;
-
 		f2fs_stop_checkpoint(sbi, false);
-		f2fs_bug_on(sbi, 1);
 	}
-
 	return page;
 }
 
@@ -282,8 +276,7 @@
 	dec_page_count(sbi, F2FS_DIRTY_META);
 
 	if (wbc->for_reclaim)
-		f2fs_submit_merged_write_cond(sbi, page->mapping->host,
-						0, page->index, META);
+		f2fs_submit_merged_write_cond(sbi, NULL, page, 0, META);
 
 	unlock_page(page);
 
@@ -313,8 +306,9 @@
 		goto skip_write;
 
 	/* collect a number of dirty meta pages and write together */
-	if (wbc->for_kupdate ||
-		get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
+	if (wbc->sync_mode != WB_SYNC_ALL &&
+			get_pages(sbi, F2FS_DIRTY_META) <
+					nr_pages_to_skip(sbi, META))
 		goto skip_write;
 
 	/* if locked failed, cp will flush dirty pages instead */
@@ -377,9 +371,8 @@
 				goto continue_unlock;
 			}
 
-			f2fs_wait_on_page_writeback(page, META, true);
+			f2fs_wait_on_page_writeback(page, META, true, true);
 
-			BUG_ON(PageWriteback(page));
 			if (!clear_page_dirty_for_io(page))
 				goto continue_unlock;
 
@@ -413,7 +406,7 @@
 	if (!PageDirty(page)) {
 		__set_page_dirty_nobuffers(page);
 		inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
-		SetPagePrivate(page);
+		f2fs_set_page_private(page, 0);
 		f2fs_trace_pid(page);
 		return 1;
 	}
@@ -918,7 +911,7 @@
 	f2fs_put_page(cp1, 1);
 	f2fs_put_page(cp2, 1);
 fail_no_cp:
-	kfree(sbi->ckpt);
+	kvfree(sbi->ckpt);
 	return -EINVAL;
 }
 
@@ -964,7 +957,7 @@
 	inode_inc_dirty_pages(inode);
 	spin_unlock(&sbi->inode_lock[type]);
 
-	SetPagePrivate(page);
+	f2fs_set_page_private(page, 0);
 	f2fs_trace_pid(page);
 }
 
@@ -1086,6 +1079,21 @@
 	ckpt->next_free_nid = cpu_to_le32(last_nid);
 }
 
+static bool __need_flush_quota(struct f2fs_sb_info *sbi)
+{
+	if (!is_journalled_quota(sbi))
+		return false;
+	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
+		return false;
+	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
+		return false;
+	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
+		return true;
+	if (get_pages(sbi, F2FS_DIRTY_QDATA))
+		return true;
+	return false;
+}
+
 /*
  * Freeze all the FS-operations for checkpoint.
  */
@@ -1097,12 +1105,36 @@
 		.for_reclaim = 0,
 	};
 	struct blk_plug plug;
-	int err = 0;
+	int err = 0, cnt = 0;
 
 	blk_start_plug(&plug);
 
-retry_flush_dents:
+retry_flush_quotas:
+	if (__need_flush_quota(sbi)) {
+		int locked;
+
+		if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) {
+			set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
+			f2fs_lock_all(sbi);
+			goto retry_flush_dents;
+		}
+		clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
+
+		/* only failed during mount/umount/freeze/quotactl */
+		locked = down_read_trylock(&sbi->sb->s_umount);
+		f2fs_quota_sync(sbi->sb, -1);
+		if (locked)
+			up_read(&sbi->sb->s_umount);
+	}
+
 	f2fs_lock_all(sbi);
+	if (__need_flush_quota(sbi)) {
+		f2fs_unlock_all(sbi);
+		cond_resched();
+		goto retry_flush_quotas;
+	}
+
+retry_flush_dents:
 	/* write all the dirty dentry pages */
 	if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
 		f2fs_unlock_all(sbi);
@@ -1110,7 +1142,7 @@
 		if (err)
 			goto out;
 		cond_resched();
-		goto retry_flush_dents;
+		goto retry_flush_quotas;
 	}
 
 	/*
@@ -1119,6 +1151,12 @@
 	 */
 	down_write(&sbi->node_change);
 
+	if (__need_flush_quota(sbi)) {
+		up_write(&sbi->node_change);
+		f2fs_unlock_all(sbi);
+		goto retry_flush_quotas;
+	}
+
 	if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
 		up_write(&sbi->node_change);
 		f2fs_unlock_all(sbi);
@@ -1126,7 +1164,7 @@
 		if (err)
 			goto out;
 		cond_resched();
-		goto retry_flush_dents;
+		goto retry_flush_quotas;
 	}
 
 retry_flush_nodes:
@@ -1217,6 +1255,26 @@
 	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
 		__set_ckpt_flags(ckpt, CP_FSCK_FLAG);
 
+	if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
+		__set_ckpt_flags(ckpt, CP_DISABLED_FLAG);
+	else
+		__clear_ckpt_flags(ckpt, CP_DISABLED_FLAG);
+
+	if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK))
+		__set_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG);
+	else
+		__clear_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG);
+
+	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
+		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
+	/*
+	 * TODO: we count on fsck.f2fs to clear this flag until we figure out
+	 * missing cases which clear it incorrectly.
+	 */
+
+	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
+		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
+
 	/* set this flag to activate crc|cp_ver for recovery */
 	__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
 	__clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
@@ -1239,11 +1297,11 @@
 	struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
 	int err;
 
-	memcpy(page_address(page), src, PAGE_SIZE);
-	set_page_dirty(page);
+	f2fs_wait_on_page_writeback(page, META, true, true);
 
-	f2fs_wait_on_page_writeback(page, META, true);
-	f2fs_bug_on(sbi, PageWriteback(page));
+	memcpy(page_address(page), src, PAGE_SIZE);
+
+	set_page_dirty(page);
 	if (unlikely(!clear_page_dirty_for_io(page)))
 		f2fs_bug_on(sbi, 1);
 
@@ -1277,11 +1335,9 @@
 	int err;
 
 	/* Flush all the NAT/SIT pages */
-	while (get_pages(sbi, F2FS_DIRTY_META)) {
-		f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
-		if (unlikely(f2fs_cp_error(sbi)))
-			break;
-	}
+	f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
+	f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) &&
+					!f2fs_cp_error(sbi));
 
 	/*
 	 * modify checkpoint
@@ -1354,14 +1410,6 @@
 		for (i = 0; i < nm_i->nat_bits_blocks; i++)
 			f2fs_update_meta_page(sbi, nm_i->nat_bits +
 					(i << F2FS_BLKSIZE_BITS), blk + i);
-
-		/* Flush all the NAT BITS pages */
-		while (get_pages(sbi, F2FS_DIRTY_META)) {
-			f2fs_sync_meta_pages(sbi, META, LONG_MAX,
-							FS_CP_META_IO);
-			if (unlikely(f2fs_cp_error(sbi)))
-				break;
-		}
 	}
 
 	/* write out checkpoint buffer at block 0 */
@@ -1397,6 +1445,8 @@
 
 	/* Here, we have one bio having CP pack except cp pack 2 page */
 	f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
+	f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) &&
+					!f2fs_cp_error(sbi));
 
 	/* wait for previous submitted meta pages writeback */
 	f2fs_wait_on_all_pages_writeback(sbi);
@@ -1414,7 +1464,7 @@
 	 * invalidate intermediate page cache borrowed from meta inode
 	 * which are used for migration of encrypted inode's blocks.
 	 */
-	if (f2fs_sb_has_encrypt(sbi->sb))
+	if (f2fs_sb_has_encrypt(sbi))
 		invalidate_mapping_pages(META_MAPPING(sbi),
 				MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);
 
@@ -1424,6 +1474,8 @@
 
 	clear_sbi_flag(sbi, SBI_IS_DIRTY);
 	clear_sbi_flag(sbi, SBI_NEED_CP);
+	clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
+	sbi->unusable_block_count = 0;
 	__set_cp_next_pack(sbi);
 
 	/*
@@ -1448,6 +1500,12 @@
 	unsigned long long ckpt_ver;
 	int err = 0;
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+		if (cpc->reason != CP_PAUSE)
+			return 0;
+		f2fs_msg(sbi->sb, KERN_WARNING,
+				"Start checkpoint disabled!");
+	}
 	mutex_lock(&sbi->cp_mutex);
 
 	if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
@@ -1499,7 +1557,10 @@
 	ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
 
 	/* write cached NAT/SIT entries to NAT/SIT area */
-	f2fs_flush_nat_entries(sbi, cpc);
+	err = f2fs_flush_nat_entries(sbi, cpc);
+	if (err)
+		goto stop;
+
 	f2fs_flush_sit_entries(sbi, cpc);
 
 	/* unlock all the fs_lock[] in do_checkpoint() */
@@ -1508,7 +1569,7 @@
 		f2fs_release_discard_addrs(sbi);
 	else
 		f2fs_clear_prefree_segments(sbi, cpc);
-
+stop:
 	unblock_operations(sbi);
 	stat_inc_cp_count(sbi->stat_info);
 

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 4d02e76..acbd6d4 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/data.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
@@ -27,6 +24,7 @@
 #include "segment.h"
 #include "trace.h"
 #include <trace/events/f2fs.h>
+#include <trace/events/android_fs.h>
 
 #define NUM_PREALLOC_POST_READ_CTXS	128
 
@@ -49,12 +47,29 @@
 			inode->i_ino ==  F2FS_NODE_INO(sbi) ||
 			S_ISDIR(inode->i_mode) ||
 			(S_ISREG(inode->i_mode) &&
-			is_inode_flag_set(inode, FI_ATOMIC_FILE)) ||
+			(f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
 			is_cold_data(page))
 		return true;
 	return false;
 }
 
+static enum count_type __read_io_type(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+
+	if (mapping) {
+		struct inode *inode = mapping->host;
+		struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+		if (inode->i_ino == F2FS_META_INO(sbi))
+			return F2FS_RD_META;
+
+		if (inode->i_ino == F2FS_NODE_INO(sbi))
+			return F2FS_RD_NODE;
+	}
+	return F2FS_RD_DATA;
+}
+
 /* postprocessing steps for read bios */
 enum bio_post_read_step {
 	STEP_INITIAL = 0,
@@ -85,6 +100,7 @@
 		} else {
 			SetPageUptodate(page);
 		}
+		dec_page_count(F2FS_P_SB(page), __read_io_type(page));
 		unlock_page(page);
 	}
 	if (bio->bi_private)
@@ -127,8 +143,11 @@
 
 static void f2fs_read_end_io(struct bio *bio)
 {
-	if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)), FAULT_IO)) {
-		f2fs_show_injection_info(FAULT_IO);
+	struct page *first_page = bio->bi_io_vec[0].bv_page;
+
+	if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)),
+						FAULT_READ_IO)) {
+		f2fs_show_injection_info(FAULT_READ_IO);
 		bio->bi_status = BLK_STS_IOERR;
 	}
 
@@ -140,6 +159,13 @@
 		return;
 	}
 
+	if (first_page != NULL &&
+		__read_io_type(first_page) == F2FS_RD_DATA) {
+		trace_android_fs_dataread_end(first_page->mapping->host,
+						page_offset(first_page),
+						bio->bi_iter.bi_size);
+	}
+
 	__read_end_io(bio);
 }
 
@@ -149,6 +175,11 @@
 	struct bio_vec *bvec;
 	int i;
 
+	if (time_to_inject(sbi, FAULT_WRITE_IO)) {
+		f2fs_show_injection_info(FAULT_WRITE_IO);
+		bio->bi_status = BLK_STS_IOERR;
+	}
+
 	bio_for_each_segment_all(bvec, bio, i) {
 		struct page *page = bvec->bv_page;
 		enum count_type type = WB_DATA_TYPE(page);
@@ -283,9 +314,10 @@
 		for (; start < F2FS_IO_SIZE(sbi); start++) {
 			struct page *page =
 				mempool_alloc(sbi->write_io_dummy,
-					GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL);
+					      GFP_NOIO | __GFP_NOFAIL);
 			f2fs_bug_on(sbi, !page);
 
+			zero_user_segment(page, 0, PAGE_SIZE);
 			SetPagePrivate(page);
 			set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
 			lock_page(page);
@@ -307,6 +339,32 @@
 	submit_bio(bio);
 }
 
+static void __f2fs_submit_read_bio(struct f2fs_sb_info *sbi,
+				struct bio *bio, enum page_type type)
+{
+	if (trace_android_fs_dataread_start_enabled() && (type == DATA)) {
+		struct page *first_page = bio->bi_io_vec[0].bv_page;
+
+		if (first_page != NULL &&
+			__read_io_type(first_page) == F2FS_RD_DATA) {
+			char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+			path = android_fstrace_get_pathname(pathbuf,
+						MAX_TRACE_PATHBUF_LEN,
+						first_page->mapping->host);
+
+			trace_android_fs_dataread_start(
+				first_page->mapping->host,
+				page_offset(first_page),
+				bio->bi_iter.bi_size,
+				current->pid,
+				path,
+				current->comm);
+		}
+	}
+	__submit_bio(sbi, bio, type);
+}
+
 static void __submit_merged_bio(struct f2fs_bio_info *io)
 {
 	struct f2fs_io_info *fio = &io->fio;
@@ -325,8 +383,8 @@
 	io->bio = NULL;
 }
 
-static bool __has_merged_page(struct f2fs_bio_info *io,
-				struct inode *inode, nid_t ino, pgoff_t idx)
+static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
+						struct page *page, nid_t ino)
 {
 	struct bio_vec *bvec;
 	struct page *target;
@@ -335,7 +393,7 @@
 	if (!io->bio)
 		return false;
 
-	if (!inode && !ino)
+	if (!inode && !page && !ino)
 		return true;
 
 	bio_for_each_segment_all(bvec, io->bio, i) {
@@ -345,11 +403,10 @@
 		else
 			target = fscrypt_control_page(bvec->bv_page);
 
-		if (idx != target->index)
-			continue;
-
 		if (inode && inode == target->mapping->host)
 			return true;
+		if (page && page == target)
+			return true;
 		if (ino && ino == ino_of_node(target))
 			return true;
 	}
@@ -357,28 +414,6 @@
 	return false;
 }
 
-static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
-				nid_t ino, pgoff_t idx, enum page_type type)
-{
-	enum page_type btype = PAGE_TYPE_OF_BIO(type);
-	enum temp_type temp;
-	struct f2fs_bio_info *io;
-	bool ret = false;
-
-	for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
-		io = sbi->write_io[btype] + temp;
-
-		down_read(&io->io_rwsem);
-		ret = __has_merged_page(io, inode, ino, idx);
-		up_read(&io->io_rwsem);
-
-		/* TODO: use HOT temp only for meta pages now. */
-		if (ret || btype == META)
-			break;
-	}
-	return ret;
-}
-
 static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
 				enum page_type type, enum temp_type temp)
 {
@@ -400,17 +435,23 @@
 }
 
 static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
-				struct inode *inode, nid_t ino, pgoff_t idx,
-				enum page_type type, bool force)
+				struct inode *inode, struct page *page,
+				nid_t ino, enum page_type type, bool force)
 {
 	enum temp_type temp;
-
-	if (!force && !has_merged_page(sbi, inode, ino, idx, type))
-		return;
+	bool ret = true;
 
 	for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
+		if (!force)	{
+			enum page_type btype = PAGE_TYPE_OF_BIO(type);
+			struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
 
-		__f2fs_submit_merged_write(sbi, type, temp);
+			down_read(&io->io_rwsem);
+			ret = __has_merged_page(io, inode, page, ino);
+			up_read(&io->io_rwsem);
+		}
+		if (ret)
+			__f2fs_submit_merged_write(sbi, type, temp);
 
 		/* TODO: use HOT temp only for meta pages now. */
 		if (type >= META)
@@ -424,10 +465,10 @@
 }
 
 void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
-				struct inode *inode, nid_t ino, pgoff_t idx,
-				enum page_type type)
+				struct inode *inode, struct page *page,
+				nid_t ino, enum page_type type)
 {
-	__submit_merged_write_cond(sbi, inode, ino, idx, type, false);
+	__submit_merged_write_cond(sbi, inode, page, ino, type, false);
 }
 
 void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
@@ -468,10 +509,10 @@
 
 	bio_set_op_attrs(bio, fio->op, fio->op_flags);
 
-	if (!is_read_io(fio->op))
-		inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page));
+	inc_page_count(fio->sbi, is_read_io(fio->op) ?
+			__read_io_type(page): WB_DATA_TYPE(fio->page));
 
-	__submit_bio(fio->sbi, bio, fio->type);
+	__f2fs_submit_read_bio(fio->sbi, bio, fio->type);
 	return 0;
 }
 
@@ -543,6 +584,9 @@
 	if (fio->in_list)
 		goto next;
 out:
+	if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
+				f2fs_is_checkpoint_ready(sbi))
+		__submit_merged_bio(io);
 	up_write(&io->io_rwsem);
 }
 
@@ -575,9 +619,6 @@
 		ctx->bio = bio;
 		ctx->enabled_steps = post_read_steps;
 		bio->bi_private = ctx;
-
-		/* wait the page to be moved by cleaning */
-		f2fs_wait_on_block_writeback(sbi, blkaddr);
 	}
 
 	return bio;
@@ -592,12 +633,16 @@
 	if (IS_ERR(bio))
 		return PTR_ERR(bio);
 
+	/* wait for GCed page writeback via META_MAPPING */
+	f2fs_wait_on_block_writeback(inode, blkaddr);
+
 	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
 		bio_put(bio);
 		return -EFAULT;
 	}
 	ClearPageError(page);
-	__submit_bio(F2FS_I_SB(inode), bio, DATA);
+	inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
+	__f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
 	return 0;
 }
 
@@ -623,7 +668,7 @@
  */
 void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
 {
-	f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
+	f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
 	__set_data_blkaddr(dn);
 	if (set_page_dirty(dn->node_page))
 		dn->node_changed = true;
@@ -653,7 +698,7 @@
 	trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
 						dn->ofs_in_node, count);
 
-	f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
+	f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
 
 	for (; count > 0; dn->ofs_in_node++) {
 		block_t blkaddr = datablock_addr(dn->inode,
@@ -887,7 +932,6 @@
 	struct f2fs_summary sum;
 	struct node_info ni;
 	block_t old_blkaddr;
-	pgoff_t fofs;
 	blkcnt_t count = 1;
 	int err;
 
@@ -900,7 +944,7 @@
 
 	dn->data_blkaddr = datablock_addr(dn->inode,
 				dn->node_page, dn->ofs_in_node);
-	if (dn->data_blkaddr == NEW_ADDR)
+	if (dn->data_blkaddr != NULL_ADDR)
 		goto alloc;
 
 	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
@@ -916,12 +960,10 @@
 					old_blkaddr, old_blkaddr);
 	f2fs_set_data_blkaddr(dn);
 
-	/* update i_size */
-	fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
-							dn->ofs_in_node;
-	if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT))
-		f2fs_i_size_write(dn->inode,
-				((loff_t)(fofs + 1) << PAGE_SHIFT));
+	/*
+	 * i_size will be updated by direct_IO. Otherwise, we'll get stale
+	 * data from unwritten block via dio_read.
+	 */
 	return 0;
 }
 
@@ -940,6 +982,9 @@
 			return err;
 	}
 
+	if (direct_io && allow_outplace_dio(inode, iocb, from))
+		return 0;
+
 	if (is_inode_flag_set(inode, FI_NO_PREALLOC))
 		return 0;
 
@@ -953,10 +998,11 @@
 	map.m_next_pgofs = NULL;
 	map.m_next_extent = NULL;
 	map.m_seg_type = NO_CHECK_TYPE;
+	map.m_may_create = true;
 
 	if (direct_io) {
 		map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
-		flag = f2fs_force_buffered_io(inode, WRITE) ?
+		flag = f2fs_force_buffered_io(inode, iocb, from) ?
 					F2FS_GET_BLOCK_PRE_AIO :
 					F2FS_GET_BLOCK_PRE_DIO;
 		goto map_blocks;
@@ -981,7 +1027,7 @@
 	return err;
 }
 
-static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
+void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
 {
 	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
 		if (lock)
@@ -1011,7 +1057,7 @@
 	unsigned int maxblocks = map->m_len;
 	struct dnode_of_data dn;
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-	int mode = create ? ALLOC_NODE : LOOKUP_NODE;
+	int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
 	pgoff_t pgofs, end_offset, end;
 	int err = 0, ofs = 1;
 	unsigned int ofs_in_node, last_ofs_in_node;
@@ -1031,16 +1077,25 @@
 	end = pgofs + maxblocks;
 
 	if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
+		if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
+							map->m_may_create)
+			goto next_dnode;
+
 		map->m_pblk = ei.blk + pgofs - ei.fofs;
 		map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
 		map->m_flags = F2FS_MAP_MAPPED;
 		if (map->m_next_extent)
 			*map->m_next_extent = pgofs + map->m_len;
+
+		/* for hardware encryption, but to avoid potential issue in future */
+		if (flag == F2FS_GET_BLOCK_DIO)
+			f2fs_wait_on_block_writeback_range(inode,
+						map->m_pblk, map->m_len);
 		goto out;
 	}
 
 next_dnode:
-	if (create)
+	if (map->m_may_create)
 		__do_map_lock(sbi, flag, true);
 
 	/* When reading holes, we need its node page */
@@ -1075,7 +1130,17 @@
 		goto sync_out;
 	}
 
-	if (!is_valid_data_blkaddr(sbi, blkaddr)) {
+	if (is_valid_data_blkaddr(sbi, blkaddr)) {
+		/* use out-place-update for driect IO under LFS mode */
+		if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
+							map->m_may_create) {
+			err = __allocate_data_block(&dn, map->m_seg_type);
+			if (!err) {
+				blkaddr = dn.data_blkaddr;
+				set_inode_flag(inode, FI_APPEND_WRITE);
+			}
+		}
+	} else {
 		if (create) {
 			if (unlikely(f2fs_cp_error(sbi))) {
 				err = -EIO;
@@ -1087,6 +1152,8 @@
 					last_ofs_in_node = dn.ofs_in_node;
 				}
 			} else {
+				WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO &&
+					flag != F2FS_GET_BLOCK_DIO);
 				err = __allocate_data_block(&dn,
 							map->m_seg_type);
 				if (!err)
@@ -1177,13 +1244,19 @@
 
 	f2fs_put_dnode(&dn);
 
-	if (create) {
+	if (map->m_may_create) {
 		__do_map_lock(sbi, flag, false);
 		f2fs_balance_fs(sbi, dn.node_changed);
 	}
 	goto next_dnode;
 
 sync_out:
+
+	/* for hardware encryption, but to avoid potential issue in future */
+	if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED)
+		f2fs_wait_on_block_writeback_range(inode,
+						map->m_pblk, map->m_len);
+
 	if (flag == F2FS_GET_BLOCK_PRECACHE) {
 		if (map->m_flags & F2FS_MAP_MAPPED) {
 			unsigned int ofs = start_pgofs - map->m_lblk;
@@ -1197,7 +1270,7 @@
 	}
 	f2fs_put_dnode(&dn);
 unlock_out:
-	if (create) {
+	if (map->m_may_create) {
 		__do_map_lock(sbi, flag, false);
 		f2fs_balance_fs(sbi, dn.node_changed);
 	}
@@ -1219,6 +1292,7 @@
 	map.m_next_pgofs = NULL;
 	map.m_next_extent = NULL;
 	map.m_seg_type = NO_CHECK_TYPE;
+	map.m_may_create = false;
 	last_lblk = F2FS_BLK_ALIGN(pos + len);
 
 	while (map.m_lblk < last_lblk) {
@@ -1233,7 +1307,7 @@
 
 static int __get_data_block(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh, int create, int flag,
-			pgoff_t *next_pgofs, int seg_type)
+			pgoff_t *next_pgofs, int seg_type, bool may_write)
 {
 	struct f2fs_map_blocks map;
 	int err;
@@ -1243,6 +1317,7 @@
 	map.m_next_pgofs = next_pgofs;
 	map.m_next_extent = NULL;
 	map.m_seg_type = seg_type;
+	map.m_may_create = may_write;
 
 	err = f2fs_map_blocks(inode, &map, create, flag);
 	if (!err) {
@@ -1259,16 +1334,25 @@
 {
 	return __get_data_block(inode, iblock, bh_result, create,
 							flag, next_pgofs,
-							NO_CHECK_TYPE);
+							NO_CHECK_TYPE, create);
+}
+
+static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
+			struct buffer_head *bh_result, int create)
+{
+	return __get_data_block(inode, iblock, bh_result, create,
+				F2FS_GET_BLOCK_DIO, NULL,
+				f2fs_rw_hint_to_seg_type(inode->i_write_hint),
+				true);
 }
 
 static int get_data_block_dio(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh_result, int create)
 {
 	return __get_data_block(inode, iblock, bh_result, create,
-						F2FS_GET_BLOCK_DEFAULT, NULL,
-						f2fs_rw_hint_to_seg_type(
-							inode->i_write_hint));
+				F2FS_GET_BLOCK_DIO, NULL,
+				f2fs_rw_hint_to_seg_type(inode->i_write_hint),
+				false);
 }
 
 static int get_data_block_bmap(struct inode *inode, sector_t iblock,
@@ -1280,7 +1364,7 @@
 
 	return __get_data_block(inode, iblock, bh_result, create,
 						F2FS_GET_BLOCK_BMAP, NULL,
-						NO_CHECK_TYPE);
+						NO_CHECK_TYPE, create);
 }
 
 static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
@@ -1487,6 +1571,7 @@
 	map.m_next_pgofs = NULL;
 	map.m_next_extent = NULL;
 	map.m_seg_type = NO_CHECK_TYPE;
+	map.m_may_create = false;
 
 	for (; nr_pages; nr_pages--) {
 		if (pages) {
@@ -1507,6 +1592,9 @@
 		if (last_block > last_block_in_file)
 			last_block = last_block_in_file;
 
+		/* just zeroing out page which is beyond EOF */
+		if (block_in_file >= last_block)
+			goto zero_out;
 		/*
 		 * Map blocks using the previous result first.
 		 */
@@ -1519,16 +1607,11 @@
 		 * Then do more f2fs_map_blocks() calls until we are
 		 * done with this page.
 		 */
-		map.m_flags = 0;
+		map.m_lblk = block_in_file;
+		map.m_len = last_block - block_in_file;
 
-		if (block_in_file < last_block) {
-			map.m_lblk = block_in_file;
-			map.m_len = last_block - block_in_file;
-
-			if (f2fs_map_blocks(inode, &map, 0,
-						F2FS_GET_BLOCK_DEFAULT))
-				goto set_error_page;
-		}
+		if (f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT))
+			goto set_error_page;
 got_it:
 		if ((map.m_flags & F2FS_MAP_MAPPED)) {
 			block_nr = map.m_pblk + block_in_file - map.m_lblk;
@@ -1543,6 +1626,7 @@
 								DATA_GENERIC))
 				goto set_error_page;
 		} else {
+zero_out:
 			zero_user_segment(page, 0, PAGE_SIZE);
 			if (!PageUptodate(page))
 				SetPageUptodate(page);
@@ -1557,7 +1641,7 @@
 		if (bio && (last_block_in_bio != block_nr - 1 ||
 			!__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
 submit_and_realloc:
-			__submit_bio(F2FS_I_SB(inode), bio, DATA);
+			__f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
 			bio = NULL;
 		}
 		if (bio == NULL) {
@@ -1569,9 +1653,16 @@
 			}
 		}
 
+		/*
+		 * If the page is under writeback, we need to wait for
+		 * its completion to see the correct decrypted data.
+		 */
+		f2fs_wait_on_block_writeback(inode, block_nr);
+
 		if (bio_add_page(bio, page, blocksize, 0) < blocksize)
 			goto submit_and_realloc;
 
+		inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
 		ClearPageError(page);
 		last_block_in_bio = block_nr;
 		goto next_page;
@@ -1582,7 +1673,7 @@
 		goto next_page;
 confused:
 		if (bio) {
-			__submit_bio(F2FS_I_SB(inode), bio, DATA);
+			__f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
 			bio = NULL;
 		}
 		unlock_page(page);
@@ -1592,7 +1683,7 @@
 	}
 	BUG_ON(pages && !list_empty(pages));
 	if (bio)
-		__submit_bio(F2FS_I_SB(inode), bio, DATA);
+		__f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
 	return 0;
 }
 
@@ -1637,7 +1728,7 @@
 		return 0;
 
 	/* wait for GCed page writeback via META_MAPPING */
-	f2fs_wait_on_block_writeback(fio->sbi, fio->old_blkaddr);
+	f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
 
 retry_encrypt:
 	fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
@@ -1694,6 +1785,10 @@
 			is_inode_flag_set(inode, FI_NEED_IPU))
 		return true;
 
+	if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+			!f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
+		return true;
+
 	return false;
 }
 
@@ -1717,6 +1812,8 @@
 		return true;
 	if (S_ISDIR(inode->i_mode))
 		return true;
+	if (IS_NOQUOTA(inode))
+		return true;
 	if (f2fs_is_atomic_file(inode))
 		return true;
 	if (fio) {
@@ -1724,6 +1821,9 @@
 			return true;
 		if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
 			return true;
+		if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+			f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
+			return true;
 	}
 	return false;
 }
@@ -1775,6 +1875,7 @@
 	/* This page is already truncated */
 	if (fio->old_blkaddr == NULL_ADDR) {
 		ClearPageUptodate(page);
+		clear_cold_data(page);
 		goto out_writepage;
 	}
 got_it:
@@ -1800,6 +1901,13 @@
 		if (fio->need_lock == LOCK_REQ)
 			f2fs_unlock_op(fio->sbi);
 		err = f2fs_inplace_write_data(fio);
+		if (err) {
+			if (f2fs_encrypted_file(inode))
+				fscrypt_pullback_bio_page(&fio->encrypted_page,
+									true);
+			if (PageWriteback(page))
+				end_page_writeback(page);
+		}
 		trace_f2fs_do_write_data_page(fio->page, IPU);
 		set_inode_flag(inode, FI_UPDATE_WRITE);
 		return err;
@@ -1950,18 +2058,20 @@
 
 out:
 	inode_dec_dirty_pages(inode);
-	if (err)
+	if (err) {
 		ClearPageUptodate(page);
+		clear_cold_data(page);
+	}
 
 	if (wbc->for_reclaim) {
-		f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA);
+		f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA);
 		clear_inode_flag(inode, FI_HOT_DATA);
 		f2fs_remove_dirty_inode(inode);
 		submitted = NULL;
 	}
 
 	unlock_page(page);
-	if (!S_ISDIR(inode->i_mode))
+	if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode))
 		f2fs_balance_fs(sbi, need_balance_fs);
 
 	if (unlikely(f2fs_cp_error(sbi))) {
@@ -2012,10 +2122,10 @@
 	pgoff_t index;
 	pgoff_t end;		/* Inclusive */
 	pgoff_t done_index;
-	pgoff_t last_idx = ULONG_MAX;
 	int cycled;
 	int range_whole = 0;
 	int tag;
+	int nwritten = 0;
 
 	pagevec_init(&pvec);
 
@@ -2085,12 +2195,11 @@
 			if (PageWriteback(page)) {
 				if (wbc->sync_mode != WB_SYNC_NONE)
 					f2fs_wait_on_page_writeback(page,
-								DATA, true);
+							DATA, true, true);
 				else
 					goto continue_unlock;
 			}
 
-			BUG_ON(PageWriteback(page));
 			if (!clear_page_dirty_for_io(page))
 				goto continue_unlock;
 
@@ -2118,7 +2227,7 @@
 				done = 1;
 				break;
 			} else if (submitted) {
-				last_idx = page->index;
+				nwritten++;
 			}
 
 			if (--wbc->nr_to_write <= 0 &&
@@ -2140,9 +2249,9 @@
 	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
 		mapping->writeback_index = done_index;
 
-	if (last_idx != ULONG_MAX)
+	if (nwritten)
 		f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
-						0, last_idx, DATA);
+								NULL, 0, DATA);
 
 	return ret;
 }
@@ -2152,6 +2261,8 @@
 {
 	if (!S_ISREG(inode->i_mode))
 		return false;
+	if (IS_NOQUOTA(inode))
+		return false;
 	if (wbc->sync_mode != WB_SYNC_ALL)
 		return true;
 	if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
@@ -2181,7 +2292,8 @@
 	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
 		goto skip_write;
 
-	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
+	if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
+			wbc->sync_mode == WB_SYNC_NONE &&
 			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
 			f2fs_available_free_memory(sbi, DIRTY_DENTS))
 		goto skip_write;
@@ -2246,7 +2358,8 @@
 		down_write(&F2FS_I(inode)->i_mmap_sem);
 
 		truncate_pagecache(inode, i_size);
-		f2fs_truncate_blocks(inode, i_size, true);
+		if (!IS_NOQUOTA(inode))
+			f2fs_truncate_blocks(inode, i_size, true);
 
 		up_write(&F2FS_I(inode)->i_mmap_sem);
 		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -2350,8 +2463,22 @@
 	block_t blkaddr = NULL_ADDR;
 	int err = 0;
 
+	if (trace_android_fs_datawrite_start_enabled()) {
+		char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+		path = android_fstrace_get_pathname(pathbuf,
+						    MAX_TRACE_PATHBUF_LEN,
+						    inode);
+		trace_android_fs_datawrite_start(inode, pos, len,
+						 current->pid, path,
+						 current->comm);
+	}
 	trace_f2fs_write_begin(inode, pos, len, flags);
 
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		goto fail;
+
 	if ((f2fs_is_atomic_file(inode) &&
 			!f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
 			is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
@@ -2389,7 +2516,8 @@
 	if (err)
 		goto fail;
 
-	if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) {
+	if (need_balance && !IS_NOQUOTA(inode) &&
+			has_not_enough_free_secs(sbi, 0, 0)) {
 		unlock_page(page);
 		f2fs_balance_fs(sbi, true);
 		lock_page(page);
@@ -2400,11 +2528,7 @@
 		}
 	}
 
-	f2fs_wait_on_page_writeback(page, DATA, false);
-
-	/* wait for GCed page writeback via META_MAPPING */
-	if (f2fs_post_read_required(inode))
-		f2fs_wait_on_block_writeback(sbi, blkaddr);
+	f2fs_wait_on_page_writeback(page, DATA, false, true);
 
 	if (len == PAGE_SIZE || PageUptodate(page))
 		return 0;
@@ -2449,6 +2573,7 @@
 {
 	struct inode *inode = page->mapping->host;
 
+	trace_android_fs_datawrite_end(inode, pos, len);
 	trace_f2fs_write_end(inode, pos, len, copied);
 
 	/*
@@ -2495,41 +2620,128 @@
 	return 0;
 }
 
+static void f2fs_dio_end_io(struct bio *bio)
+{
+	struct f2fs_private_dio *dio = bio->bi_private;
+
+	dec_page_count(F2FS_I_SB(dio->inode),
+			dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
+
+	bio->bi_private = dio->orig_private;
+	bio->bi_end_io = dio->orig_end_io;
+
+	kvfree(dio);
+
+	bio_endio(bio);
+}
+
+static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
+							loff_t file_offset)
+{
+	struct f2fs_private_dio *dio;
+	bool write = (bio_op(bio) == REQ_OP_WRITE);
+
+	dio = f2fs_kzalloc(F2FS_I_SB(inode),
+			sizeof(struct f2fs_private_dio), GFP_NOFS);
+	if (!dio)
+		goto out;
+
+	dio->inode = inode;
+	dio->orig_end_io = bio->bi_end_io;
+	dio->orig_private = bio->bi_private;
+	dio->write = write;
+
+	bio->bi_end_io = f2fs_dio_end_io;
+	bio->bi_private = dio;
+
+	inc_page_count(F2FS_I_SB(inode),
+			write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
+
+	submit_bio(bio);
+	return;
+out:
+	bio->bi_status = BLK_STS_IOERR;
+	bio_endio(bio);
+}
+
 static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 {
 	struct address_space *mapping = iocb->ki_filp->f_mapping;
 	struct inode *inode = mapping->host;
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct f2fs_inode_info *fi = F2FS_I(inode);
 	size_t count = iov_iter_count(iter);
 	loff_t offset = iocb->ki_pos;
 	int rw = iov_iter_rw(iter);
 	int err;
 	enum rw_hint hint = iocb->ki_hint;
 	int whint_mode = F2FS_OPTION(sbi).whint_mode;
+	bool do_opu;
 
 	err = check_direct_IO(inode, iter, offset);
 	if (err)
 		return err < 0 ? err : 0;
 
-	if (f2fs_force_buffered_io(inode, rw))
+	if (f2fs_force_buffered_io(inode, iocb, iter))
 		return 0;
 
+	do_opu = allow_outplace_dio(inode, iocb, iter);
+
 	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
 
+	if (trace_android_fs_dataread_start_enabled() &&
+	    (rw == READ)) {
+		char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+		path = android_fstrace_get_pathname(pathbuf,
+						    MAX_TRACE_PATHBUF_LEN,
+						    inode);
+		trace_android_fs_dataread_start(inode, offset,
+						count, current->pid, path,
+						current->comm);
+	}
+	if (trace_android_fs_datawrite_start_enabled() &&
+	    (rw == WRITE)) {
+		char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+		path = android_fstrace_get_pathname(pathbuf,
+						    MAX_TRACE_PATHBUF_LEN,
+						    inode);
+		trace_android_fs_datawrite_start(inode, offset, count,
+						 current->pid, path,
+						 current->comm);
+	}
+
 	if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
 		iocb->ki_hint = WRITE_LIFE_NOT_SET;
 
-	if (!down_read_trylock(&F2FS_I(inode)->i_gc_rwsem[rw])) {
-		if (iocb->ki_flags & IOCB_NOWAIT) {
+	if (iocb->ki_flags & IOCB_NOWAIT) {
+		if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
 			iocb->ki_hint = hint;
 			err = -EAGAIN;
 			goto out;
 		}
-		down_read(&F2FS_I(inode)->i_gc_rwsem[rw]);
+		if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
+			up_read(&fi->i_gc_rwsem[rw]);
+			iocb->ki_hint = hint;
+			err = -EAGAIN;
+			goto out;
+		}
+	} else {
+		down_read(&fi->i_gc_rwsem[rw]);
+		if (do_opu)
+			down_read(&fi->i_gc_rwsem[READ]);
 	}
 
-	err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio);
-	up_read(&F2FS_I(inode)->i_gc_rwsem[rw]);
+	err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
+			iter, rw == WRITE ? get_data_block_dio_write :
+			get_data_block_dio, NULL, f2fs_dio_submit_bio,
+			DIO_LOCKING | DIO_SKIP_HOLES);
+
+	if (do_opu)
+		up_read(&fi->i_gc_rwsem[READ]);
+
+	up_read(&fi->i_gc_rwsem[rw]);
 
 	if (rw == WRITE) {
 		if (whint_mode == WHINT_MODE_OFF)
@@ -2537,13 +2749,21 @@
 		if (err > 0) {
 			f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
 									err);
-			set_inode_flag(inode, FI_UPDATE_WRITE);
+			if (!do_opu)
+				set_inode_flag(inode, FI_UPDATE_WRITE);
 		} else if (err < 0) {
 			f2fs_write_failed(mapping, offset + count);
 		}
 	}
 
 out:
+	if (trace_android_fs_dataread_start_enabled() &&
+	    (rw == READ))
+		trace_android_fs_dataread_end(inode, offset, count);
+	if (trace_android_fs_datawrite_start_enabled() &&
+	    (rw == WRITE))
+		trace_android_fs_datawrite_end(inode, offset, count);
+
 	trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
 
 	return err;
@@ -2570,12 +2790,12 @@
 		}
 	}
 
-	/* This is atomic written page, keep Private */
+	clear_cold_data(page);
+
 	if (IS_ATOMIC_WRITTEN_PAGE(page))
 		return f2fs_drop_inmem_page(inode, page);
 
-	set_page_private(page, 0);
-	ClearPagePrivate(page);
+	f2fs_clear_page_private(page);
 }
 
 int f2fs_release_page(struct page *page, gfp_t wait)
@@ -2588,8 +2808,8 @@
 	if (IS_ATOMIC_WRITTEN_PAGE(page))
 		return 0;
 
-	set_page_private(page, 0);
-	ClearPagePrivate(page);
+	clear_cold_data(page);
+	f2fs_clear_page_private(page);
 	return 1;
 }
 
@@ -2657,12 +2877,8 @@
 			return -EAGAIN;
 	}
 
-	/*
-	 * A reference is expected if PagePrivate set when move mapping,
-	 * however F2FS breaks this for maintaining dirty page counts when
-	 * truncating pages. So here adjusting the 'extra_count' make it work.
-	 */
-	extra_count = (atomic_written ? 1 : 0) - page_has_private(page);
+	/* one extra reference was held for atomic_write page */
+	extra_count = atomic_written ? 1 : 0;
 	rc = migrate_page_move_mapping(mapping, newpage,
 				page, NULL, mode, extra_count);
 	if (rc != MIGRATEPAGE_SUCCESS) {
@@ -2683,9 +2899,10 @@
 		get_page(newpage);
 	}
 
-	if (PagePrivate(page))
-		SetPagePrivate(newpage);
-	set_page_private(newpage, page_private(page));
+	if (PagePrivate(page)) {
+		f2fs_set_page_private(newpage, page_private(page));
+		f2fs_clear_page_private(page);
+	}
 
 	if (mode != MIGRATE_SYNC_NO_COPY)
 		migrate_page_copy(newpage, page);

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index bbe1554..99e9a5c 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * f2fs debugging statistics
  *
@@ -5,10 +6,6 @@
  *             http://www.samsung.com/
  * Copyright (c) 2012 Linux Foundation
  * Copyright (c) 2012 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/fs.h>
@@ -56,13 +53,18 @@
 	si->vw_cnt = atomic_read(&sbi->vw_cnt);
 	si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
 	si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt);
+	si->nr_dio_read = get_pages(sbi, F2FS_DIO_READ);
+	si->nr_dio_write = get_pages(sbi, F2FS_DIO_WRITE);
 	si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
 	si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
+	si->nr_rd_data = get_pages(sbi, F2FS_RD_DATA);
+	si->nr_rd_node = get_pages(sbi, F2FS_RD_NODE);
+	si->nr_rd_meta = get_pages(sbi, F2FS_RD_META);
 	if (SM_I(sbi) && SM_I(sbi)->fcc_info) {
 		si->nr_flushed =
 			atomic_read(&SM_I(sbi)->fcc_info->issued_flush);
 		si->nr_flushing =
-			atomic_read(&SM_I(sbi)->fcc_info->issing_flush);
+			atomic_read(&SM_I(sbi)->fcc_info->queued_flush);
 		si->flush_list_empty =
 			llist_empty(&SM_I(sbi)->fcc_info->issue_list);
 	}
@@ -70,7 +72,7 @@
 		si->nr_discarded =
 			atomic_read(&SM_I(sbi)->dcc_info->issued_discard);
 		si->nr_discarding =
-			atomic_read(&SM_I(sbi)->dcc_info->issing_discard);
+			atomic_read(&SM_I(sbi)->dcc_info->queued_discard);
 		si->nr_discard_cmd =
 			atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt);
 		si->undiscard_blks = SM_I(sbi)->dcc_info->undiscard_blks;
@@ -106,6 +108,8 @@
 	si->avail_nids = NM_I(sbi)->available_nids;
 	si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID];
 	si->bg_gc = sbi->bg_gc;
+	si->io_skip_bggc = sbi->io_skip_bggc;
+	si->other_skip_bggc = sbi->other_skip_bggc;
 	si->skipped_atomic_files[BG_GC] = sbi->skipped_atomic_files[BG_GC];
 	si->skipped_atomic_files[FG_GC] = sbi->skipped_atomic_files[FG_GC];
 	si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
@@ -123,6 +127,9 @@
 		si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]);
 	}
 
+	for (i = META_CP; i < META_MAX; i++)
+		si->meta_count[i] = atomic_read(&sbi->meta_count[i]);
+
 	for (i = 0; i < 2; i++) {
 		si->segment_count[i] = sbi->segment_count[i];
 		si->block_count[i] = sbi->block_count[i];
@@ -193,7 +200,7 @@
 	si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
 	si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
 	si->base_mem += SIT_VBLOCK_MAP_SIZE;
-	if (sbi->segs_per_sec > 1)
+	if (__is_large_section(sbi))
 		si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry);
 	si->base_mem += __bitmap_size(sbi, SIT_BITMAP);
 
@@ -275,7 +282,8 @@
 		seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n",
 			si->sbi->sb->s_bdev, i++,
 			f2fs_readonly(si->sbi->sb) ? "RO": "RW",
-			f2fs_cp_error(si->sbi) ? "Error": "Good");
+			is_set_ckpt_flags(si->sbi, CP_DISABLED_FLAG) ?
+			"Disabled": (f2fs_cp_error(si->sbi) ? "Error": "Good"));
 		seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
 			   si->sit_area_segs, si->nat_area_segs);
 		seq_printf(s, "[SSA: %d] [MAIN: %d",
@@ -337,6 +345,13 @@
 			   si->prefree_count, si->free_segs, si->free_secs);
 		seq_printf(s, "CP calls: %d (BG: %d)\n",
 				si->cp_count, si->bg_cp_count);
+		seq_printf(s, "  - cp blocks : %u\n", si->meta_count[META_CP]);
+		seq_printf(s, "  - sit blocks : %u\n",
+				si->meta_count[META_SIT]);
+		seq_printf(s, "  - nat blocks : %u\n",
+				si->meta_count[META_NAT]);
+		seq_printf(s, "  - ssa blocks : %u\n",
+				si->meta_count[META_SSA]);
 		seq_printf(s, "GC calls: %d (BG: %d)\n",
 			   si->call_count, si->bg_gc);
 		seq_printf(s, "  - data segments : %d (%d)\n",
@@ -353,6 +368,8 @@
 				si->skipped_atomic_files[BG_GC] +
 				si->skipped_atomic_files[FG_GC],
 				si->skipped_atomic_files[BG_GC]);
+		seq_printf(s, "BG skip : IO: %u, Other: %u\n",
+				si->io_skip_bggc, si->other_skip_bggc);
 		seq_puts(s, "\nExtent Cache:\n");
 		seq_printf(s, "  - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n",
 				si->hit_largest, si->hit_cached,
@@ -364,7 +381,11 @@
 		seq_printf(s, "  - Inner Struct Count: tree: %d(%d), node: %d\n",
 				si->ext_tree, si->zombie_tree, si->ext_node);
 		seq_puts(s, "\nBalancing F2FS Async:\n");
-		seq_printf(s, "  - IO (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), "
+		seq_printf(s, "  - DIO (R: %4d, W: %4d)\n",
+			   si->nr_dio_read, si->nr_dio_write);
+		seq_printf(s, "  - IO_R (Data: %4d, Node: %4d, Meta: %4d\n",
+			   si->nr_rd_data, si->nr_rd_node, si->nr_rd_meta);
+		seq_printf(s, "  - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), "
 			"Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n",
 			   si->nr_wb_cp_data, si->nr_wb_data,
 			   si->nr_flushing, si->nr_flushed,
@@ -432,23 +453,13 @@
 	return 0;
 }
 
-static int stat_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, stat_show, inode->i_private);
-}
-
-static const struct file_operations stat_fops = {
-	.owner = THIS_MODULE,
-	.open = stat_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(stat);
 
 int f2fs_build_stats(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
 	struct f2fs_stat_info *si;
+	int i;
 
 	si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL);
 	if (!si)
@@ -474,6 +485,8 @@
 	atomic_set(&sbi->inline_inode, 0);
 	atomic_set(&sbi->inline_dir, 0);
 	atomic_set(&sbi->inplace_count, 0);
+	for (i = META_CP; i < META_MAX; i++)
+		atomic_set(&sbi->meta_count[i], 0);
 
 	atomic_set(&sbi->aw_cnt, 0);
 	atomic_set(&sbi->vw_cnt, 0);
@@ -495,33 +508,19 @@
 	list_del(&si->stat_list);
 	mutex_unlock(&f2fs_stat_mutex);
 
-	kfree(si);
+	kvfree(si);
 }
 
-int __init f2fs_create_root_stats(void)
+void __init f2fs_create_root_stats(void)
 {
-	struct dentry *file;
-
 	f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
-	if (!f2fs_debugfs_root)
-		return -ENOMEM;
 
-	file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
-			NULL, &stat_fops);
-	if (!file) {
-		debugfs_remove(f2fs_debugfs_root);
-		f2fs_debugfs_root = NULL;
-		return -ENOMEM;
-	}
-
-	return 0;
+	debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, NULL,
+			    &stat_fops);
 }
 
 void f2fs_destroy_root_stats(void)
 {
-	if (!f2fs_debugfs_root)
-		return;
-
 	debugfs_remove_recursive(f2fs_debugfs_root);
 	f2fs_debugfs_root = NULL;
 }

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index ecc3a4e2..d3eafe9 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/dir.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
@@ -296,7 +293,7 @@
 {
 	enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA;
 	lock_page(page);
-	f2fs_wait_on_page_writeback(page, type, true);
+	f2fs_wait_on_page_writeback(page, type, true, true);
 	de->ino = cpu_to_le32(inode->i_ino);
 	set_de_type(de, inode->i_mode);
 	set_page_dirty(page);
@@ -310,7 +307,7 @@
 {
 	struct f2fs_inode *ri;
 
-	f2fs_wait_on_page_writeback(ipage, NODE, true);
+	f2fs_wait_on_page_writeback(ipage, NODE, true, true);
 
 	/* copy name info. to this inode page */
 	ri = F2FS_INODE(ipage);
@@ -553,7 +550,7 @@
 	++level;
 	goto start;
 add_dentry:
-	f2fs_wait_on_page_writeback(dentry_page, DATA, true);
+	f2fs_wait_on_page_writeback(dentry_page, DATA, true, true);
 
 	if (inode) {
 		down_write(&F2FS_I(inode)->i_sem);
@@ -658,9 +655,9 @@
 	f2fs_put_page(page, 1);
 
 	clear_inode_flag(inode, FI_NEW_INODE);
+	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
 fail:
 	up_write(&F2FS_I(inode)->i_sem);
-	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
 	return err;
 }
 
@@ -708,7 +705,7 @@
 		return f2fs_delete_inline_entry(dentry, page, dir, inode);
 
 	lock_page(page);
-	f2fs_wait_on_page_writeback(page, DATA, true);
+	f2fs_wait_on_page_writeback(page, DATA, true, true);
 
 	dentry_blk = page_address(page);
 	bit_pos = dentry - dentry_blk->dentry;
@@ -731,8 +728,9 @@
 		!f2fs_truncate_hole(dir, page->index, page->index + 1)) {
 		f2fs_clear_radix_tree_dirty_tag(page);
 		clear_page_dirty_for_io(page);
-		ClearPagePrivate(page);
+		f2fs_clear_page_private(page);
 		ClearPageUptodate(page);
+		clear_cold_data(page);
 		inode_dec_dirty_pages(dir);
 		f2fs_remove_dirty_inode(dir);
 	}
@@ -784,9 +782,15 @@
 	struct f2fs_dir_entry *de = NULL;
 	struct fscrypt_str de_name = FSTR_INIT(NULL, 0);
 	struct f2fs_sb_info *sbi = F2FS_I_SB(d->inode);
+	struct blk_plug plug;
+	bool readdir_ra = sbi->readdir_ra == 1;
+	int err = 0;
 
 	bit_pos = ((unsigned long)ctx->pos % d->max);
 
+	if (readdir_ra)
+		blk_start_plug(&plug);
+
 	while (bit_pos < d->max) {
 		bit_pos = find_next_bit_le(d->bitmap, d->max, bit_pos);
 		if (bit_pos >= d->max)
@@ -796,6 +800,10 @@
 		if (de->name_len == 0) {
 			bit_pos++;
 			ctx->pos = start_pos + bit_pos;
+			printk_ratelimited(
+				"%s, invalid namelen(0), ino:%u, run fsck to fix.",
+				KERN_WARNING, le32_to_cpu(de->ino));
+			set_sbi_flag(sbi, SBI_NEED_FSCK);
 			continue;
 		}
 
@@ -804,31 +812,46 @@
 		de_name.name = d->filename[bit_pos];
 		de_name.len = le16_to_cpu(de->name_len);
 
+		/* check memory boundary before moving forward */
+		bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
+		if (unlikely(bit_pos > d->max ||
+				le16_to_cpu(de->name_len) > F2FS_NAME_LEN)) {
+			f2fs_msg(sbi->sb, KERN_WARNING,
+				"%s: corrupted namelen=%d, run fsck to fix.",
+				__func__, le16_to_cpu(de->name_len));
+			set_sbi_flag(sbi, SBI_NEED_FSCK);
+			err = -EINVAL;
+			goto out;
+		}
+
 		if (f2fs_encrypted_inode(d->inode)) {
 			int save_len = fstr->len;
-			int err;
 
 			err = fscrypt_fname_disk_to_usr(d->inode,
 						(u32)de->hash_code, 0,
 						&de_name, fstr);
 			if (err)
-				return err;
+				goto out;
 
 			de_name = *fstr;
 			fstr->len = save_len;
 		}
 
 		if (!dir_emit(ctx, de_name.name, de_name.len,
-					le32_to_cpu(de->ino), d_type))
-			return 1;
+					le32_to_cpu(de->ino), d_type)) {
+			err = 1;
+			goto out;
+		}
 
-		if (sbi->readdir_ra == 1)
+		if (readdir_ra)
 			f2fs_ra_node_page(sbi, le32_to_cpu(de->ino));
 
-		bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
 		ctx->pos = start_pos + bit_pos;
 	}
-	return 0;
+out:
+	if (readdir_ra)
+		blk_finish_plug(&plug);
+	return err;
 }
 
 static int f2fs_readdir(struct file *file, struct dir_context *ctx)
@@ -873,7 +896,7 @@
 			page_cache_sync_readahead(inode->i_mapping, ra, file, n,
 				min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES));
 
-		dentry_page = f2fs_get_lock_data_page(inode, n, false);
+		dentry_page = f2fs_find_data_page(inode, n);
 		if (IS_ERR(dentry_page)) {
 			err = PTR_ERR(dentry_page);
 			if (err == -ENOENT) {
@@ -891,11 +914,11 @@
 		err = f2fs_fill_dentries(ctx, &d,
 				n * NR_DENTRY_IN_BLOCK, &fstr);
 		if (err) {
-			f2fs_put_page(dentry_page, 1);
+			f2fs_put_page(dentry_page, 0);
 			break;
 		}
 
-		f2fs_put_page(dentry_page, 1);
+		f2fs_put_page(dentry_page, 0);
 	}
 out_free:
 	fscrypt_fname_free_buffer(&fstr);

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index a70cd25..caf77fe 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * f2fs extent cache support
  *
@@ -5,10 +6,6 @@
  * Copyright (c) 2015 Samsung Electronics
  * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
  *          Chao Yu <chao2.yu@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/fs.h>
@@ -30,10 +27,10 @@
 	return NULL;
 }
 
-static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root,
+static struct rb_entry *__lookup_rb_tree_slow(struct rb_root_cached *root,
 							unsigned int ofs)
 {
-	struct rb_node *node = root->rb_node;
+	struct rb_node *node = root->rb_root.rb_node;
 	struct rb_entry *re;
 
 	while (node) {
@@ -49,7 +46,7 @@
 	return NULL;
 }
 
-struct rb_entry *f2fs_lookup_rb_tree(struct rb_root *root,
+struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root,
 				struct rb_entry *cached_re, unsigned int ofs)
 {
 	struct rb_entry *re;
@@ -62,22 +59,25 @@
 }
 
 struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
-				struct rb_root *root, struct rb_node **parent,
-				unsigned int ofs)
+				struct rb_root_cached *root,
+				struct rb_node **parent,
+				unsigned int ofs, bool *leftmost)
 {
-	struct rb_node **p = &root->rb_node;
+	struct rb_node **p = &root->rb_root.rb_node;
 	struct rb_entry *re;
 
 	while (*p) {
 		*parent = *p;
 		re = rb_entry(*parent, struct rb_entry, rb_node);
 
-		if (ofs < re->ofs)
+		if (ofs < re->ofs) {
 			p = &(*p)->rb_left;
-		else if (ofs >= re->ofs + re->len)
+		} else if (ofs >= re->ofs + re->len) {
 			p = &(*p)->rb_right;
-		else
+			*leftmost = false;
+		} else {
 			f2fs_bug_on(sbi, 1);
+		}
 	}
 
 	return p;
@@ -92,16 +92,16 @@
  * in order to simpfy the insertion after.
  * tree must stay unchanged between lookup and insertion.
  */
-struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root *root,
+struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
 				struct rb_entry *cached_re,
 				unsigned int ofs,
 				struct rb_entry **prev_entry,
 				struct rb_entry **next_entry,
 				struct rb_node ***insert_p,
 				struct rb_node **insert_parent,
-				bool force)
+				bool force, bool *leftmost)
 {
-	struct rb_node **pnode = &root->rb_node;
+	struct rb_node **pnode = &root->rb_root.rb_node;
 	struct rb_node *parent = NULL, *tmp_node;
 	struct rb_entry *re = cached_re;
 
@@ -110,7 +110,7 @@
 	*prev_entry = NULL;
 	*next_entry = NULL;
 
-	if (RB_EMPTY_ROOT(root))
+	if (RB_EMPTY_ROOT(&root->rb_root))
 		return NULL;
 
 	if (re) {
@@ -118,16 +118,22 @@
 			goto lookup_neighbors;
 	}
 
+	if (leftmost)
+		*leftmost = true;
+
 	while (*pnode) {
 		parent = *pnode;
 		re = rb_entry(*pnode, struct rb_entry, rb_node);
 
-		if (ofs < re->ofs)
+		if (ofs < re->ofs) {
 			pnode = &(*pnode)->rb_left;
-		else if (ofs >= re->ofs + re->len)
+		} else if (ofs >= re->ofs + re->len) {
 			pnode = &(*pnode)->rb_right;
-		else
+			if (leftmost)
+				*leftmost = false;
+		} else {
 			goto lookup_neighbors;
+		}
 	}
 
 	*insert_p = pnode;
@@ -160,10 +166,10 @@
 }
 
 bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
-						struct rb_root *root)
+						struct rb_root_cached *root)
 {
 #ifdef CONFIG_F2FS_CHECK_FS
-	struct rb_node *cur = rb_first(root), *next;
+	struct rb_node *cur = rb_first_cached(root), *next;
 	struct rb_entry *cur_re, *next_re;
 
 	if (!cur)
@@ -196,7 +202,8 @@
 
 static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
 				struct extent_tree *et, struct extent_info *ei,
-				struct rb_node *parent, struct rb_node **p)
+				struct rb_node *parent, struct rb_node **p,
+				bool leftmost)
 {
 	struct extent_node *en;
 
@@ -209,7 +216,7 @@
 	en->et = et;
 
 	rb_link_node(&en->rb_node, parent, p);
-	rb_insert_color(&en->rb_node, &et->root);
+	rb_insert_color_cached(&en->rb_node, &et->root, leftmost);
 	atomic_inc(&et->node_cnt);
 	atomic_inc(&sbi->total_ext_node);
 	return en;
@@ -218,7 +225,7 @@
 static void __detach_extent_node(struct f2fs_sb_info *sbi,
 				struct extent_tree *et, struct extent_node *en)
 {
-	rb_erase(&en->rb_node, &et->root);
+	rb_erase_cached(&en->rb_node, &et->root);
 	atomic_dec(&et->node_cnt);
 	atomic_dec(&sbi->total_ext_node);
 
@@ -257,7 +264,7 @@
 		f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
 		memset(et, 0, sizeof(struct extent_tree));
 		et->ino = ino;
-		et->root = RB_ROOT;
+		et->root = RB_ROOT_CACHED;
 		et->cached_en = NULL;
 		rwlock_init(&et->lock);
 		INIT_LIST_HEAD(&et->list);
@@ -278,10 +285,10 @@
 static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi,
 				struct extent_tree *et, struct extent_info *ei)
 {
-	struct rb_node **p = &et->root.rb_node;
+	struct rb_node **p = &et->root.rb_root.rb_node;
 	struct extent_node *en;
 
-	en = __attach_extent_node(sbi, et, ei, NULL, p);
+	en = __attach_extent_node(sbi, et, ei, NULL, p, true);
 	if (!en)
 		return NULL;
 
@@ -297,7 +304,7 @@
 	struct extent_node *en;
 	unsigned int count = atomic_read(&et->node_cnt);
 
-	node = rb_first(&et->root);
+	node = rb_first_cached(&et->root);
 	while (node) {
 		next = rb_next(node);
 		en = rb_entry(node, struct extent_node, rb_node);
@@ -455,7 +462,8 @@
 static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
 				struct extent_tree *et, struct extent_info *ei,
 				struct rb_node **insert_p,
-				struct rb_node *insert_parent)
+				struct rb_node *insert_parent,
+				bool leftmost)
 {
 	struct rb_node **p;
 	struct rb_node *parent = NULL;
@@ -467,9 +475,12 @@
 		goto do_insert;
 	}
 
-	p = f2fs_lookup_rb_tree_for_insert(sbi, &et->root, &parent, ei->fofs);
+	leftmost = true;
+
+	p = f2fs_lookup_rb_tree_for_insert(sbi, &et->root, &parent,
+						ei->fofs, &leftmost);
 do_insert:
-	en = __attach_extent_node(sbi, et, ei, parent, p);
+	en = __attach_extent_node(sbi, et, ei, parent, p, leftmost);
 	if (!en)
 		return NULL;
 
@@ -495,6 +506,7 @@
 	unsigned int end = fofs + len;
 	unsigned int pos = (unsigned int)fofs;
 	bool updated = false;
+	bool leftmost = false;
 
 	if (!et)
 		return;
@@ -522,7 +534,8 @@
 					(struct rb_entry *)et->cached_en, fofs,
 					(struct rb_entry **)&prev_en,
 					(struct rb_entry **)&next_en,
-					&insert_p, &insert_parent, false);
+					&insert_p, &insert_parent, false,
+					&leftmost);
 	if (!en)
 		en = next_en;
 
@@ -549,7 +562,7 @@
 						end - dei.fofs + dei.blk,
 						org_end - end);
 				en1 = __insert_extent_tree(sbi, et, &ei,
-							NULL, NULL);
+							NULL, NULL, true);
 				next_en = en1;
 			} else {
 				en->ei.fofs = end;
@@ -590,7 +603,7 @@
 		set_extent_info(&ei, fofs, blkaddr, len);
 		if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
 			__insert_extent_tree(sbi, et, &ei,
-						insert_p, insert_parent);
+					insert_p, insert_parent, leftmost);
 
 		/* give up extent_cache, if split and small updates happen */
 		if (dei.len >= 1 &&

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 44ea7ac..5fdbb04 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h

@@ -1,16 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/f2fs.h
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef _LINUX_F2FS_H
 #define _LINUX_F2FS_H
 
+#include <linux/uio.h>
 #include <linux/types.h>
 #include <linux/page-flags.h>
 #include <linux/buffer_head.h>
@@ -53,9 +51,10 @@
 	FAULT_DIR_DEPTH,
 	FAULT_EVICT_INODE,
 	FAULT_TRUNCATE,
-	FAULT_IO,
+	FAULT_READ_IO,
 	FAULT_CHECKPOINT,
 	FAULT_DISCARD,
+	FAULT_WRITE_IO,
 	FAULT_MAX,
 };
 
@@ -68,7 +67,7 @@
 	unsigned int inject_type;
 };
 
-extern char *f2fs_fault_name[FAULT_MAX];
+extern const char *f2fs_fault_name[FAULT_MAX];
 #define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type)))
 #endif
 
@@ -100,6 +99,7 @@
 #define F2FS_MOUNT_QUOTA		0x00400000
 #define F2FS_MOUNT_INLINE_XATTR_SIZE	0x00800000
 #define F2FS_MOUNT_RESERVE_ROOT		0x01000000
+#define F2FS_MOUNT_DISABLE_CHECKPOINT	0x02000000
 
 #define F2FS_OPTION(sbi)	((sbi)->mount_opt)
 #define clear_opt(sbi, option)	(F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
@@ -150,13 +150,15 @@
 #define F2FS_FEATURE_INODE_CRTIME	0x0100
 #define F2FS_FEATURE_LOST_FOUND		0x0200
 #define F2FS_FEATURE_VERITY		0x0400	/* reserved */
+#define F2FS_FEATURE_SB_CHKSUM		0x0800
 
-#define F2FS_HAS_FEATURE(sb, mask)					\
-	((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
-#define F2FS_SET_FEATURE(sb, mask)					\
-	(F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask))
-#define F2FS_CLEAR_FEATURE(sb, mask)					\
-	(F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask))
+#define __F2FS_HAS_FEATURE(raw_super, mask)				\
+	((raw_super->feature & cpu_to_le32(mask)) != 0)
+#define F2FS_HAS_FEATURE(sbi, mask)	__F2FS_HAS_FEATURE(sbi->raw_super, mask)
+#define F2FS_SET_FEATURE(sbi, mask)					\
+	(sbi->raw_super->feature |= cpu_to_le32(mask))
+#define F2FS_CLEAR_FEATURE(sbi, mask)					\
+	(sbi->raw_super->feature &= ~cpu_to_le32(mask))
 
 /*
  * Default values for user and/or group using reserved blocks
@@ -178,6 +180,7 @@
 #define	CP_RECOVERY	0x00000008
 #define	CP_DISCARD	0x00000010
 #define CP_TRIMMED	0x00000020
+#define CP_PAUSE	0x00000040
 
 #define MAX_DISCARD_BLOCKS(sbi)		BLKS_PER_SEC(sbi)
 #define DEF_MAX_DISCARD_REQUEST		8	/* issue 8 discards per round */
@@ -187,6 +190,9 @@
 #define DEF_DISCARD_URGENT_UTIL		80	/* do more discard over 80% */
 #define DEF_CP_INTERVAL			60	/* 60 secs */
 #define DEF_IDLE_INTERVAL		5	/* 5 secs */
+#define DEF_DISABLE_INTERVAL		5	/* 5 secs */
+#define DEF_DISABLE_QUICK_INTERVAL	1	/* 1 secs */
+#define DEF_UMOUNT_DISCARD_TIMEOUT	5	/* 5 secs */
 
 struct cp_control {
 	int reason;
@@ -203,6 +209,7 @@
 	META_NAT,
 	META_SIT,
 	META_SSA,
+	META_MAX,
 	META_POR,
 	DATA_GENERIC,
 	META_GENERIC,
@@ -249,7 +256,7 @@
 /* max discard pend list number */
 #define MAX_PLIST_NUM		512
 #define plist_idx(blk_num)	((blk_num) >= MAX_PLIST_NUM ?		\
-					(MAX_PLIST_NUM - 1) : (blk_num - 1))
+					(MAX_PLIST_NUM - 1) : ((blk_num) - 1))
 
 enum {
 	D_PREP,			/* initial */
@@ -280,7 +287,7 @@
 	struct block_device *bdev;	/* bdev */
 	unsigned short ref;		/* reference count */
 	unsigned char state;		/* state */
-	unsigned char issuing;		/* issuing discard */
+	unsigned char queued;		/* queued discard */
 	int error;			/* bio error */
 	spinlock_t lock;		/* for state/bio_ref updating */
 	unsigned short bio_ref;		/* bio reference count */
@@ -305,6 +312,7 @@
 	bool sync;			/* submit discard with REQ_SYNC flag */
 	bool ordered;			/* issue discard by lba order */
 	unsigned int granularity;	/* discard granularity */
+	int timeout;			/* discard timeout for put_super */
 };
 
 struct discard_cmd_control {
@@ -322,9 +330,9 @@
 	unsigned int undiscard_blks;		/* # of undiscard blocks */
 	unsigned int next_pos;			/* next discard position */
 	atomic_t issued_discard;		/* # of issued discard */
-	atomic_t issing_discard;		/* # of issing discard */
+	atomic_t queued_discard;		/* # of queued discard */
 	atomic_t discard_cmd_cnt;		/* # of cached cmd count */
-	struct rb_root root;			/* root of discard rb-tree */
+	struct rb_root_cached root;		/* root of discard rb-tree */
 	bool rbtree_check;			/* config for consistence check */
 };
 
@@ -412,6 +420,7 @@
 #define F2FS_GOING_DOWN_METASYNC	0x1	/* going down with metadata */
 #define F2FS_GOING_DOWN_NOSYNC		0x2	/* going down */
 #define F2FS_GOING_DOWN_METAFLUSH	0x3	/* going down with meta flush */
+#define F2FS_GOING_DOWN_NEED_FSCK	0x4	/* going down to trigger fsck */
 
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /*
@@ -526,6 +535,9 @@
 
 #define DEFAULT_RETRY_IO_COUNT	8	/* maximum retry read IO count */
 
+/* maximum retry quota flush count */
+#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT		8
+
 #define F2FS_LINK_MAX	0xffffffff	/* maximum link count per file */
 
 #define MAX_DIR_RA_PAGES	4	/* maximum ra pages of dir */
@@ -549,23 +561,15 @@
 };
 
 struct extent_node {
-	struct rb_node rb_node;
-	union {
-		struct {
-			unsigned int fofs;
-			unsigned int len;
-			u32 blk;
-		};
-		struct extent_info ei;	/* extent info */
-
-	};
+	struct rb_node rb_node;		/* rb node located in rb-tree */
+	struct extent_info ei;		/* extent info */
 	struct list_head list;		/* node in global extent list of sbi */
 	struct extent_tree *et;		/* extent tree pointer */
 };
 
 struct extent_tree {
 	nid_t ino;			/* inode number */
-	struct rb_root root;		/* root of extent info rb-tree */
+	struct rb_root_cached root;	/* root of extent info rb-tree */
 	struct extent_node *cached_en;	/* recently accessed extent node */
 	struct extent_info largest;	/* largested extent info */
 	struct list_head list;		/* to be used by sbi->zombie_list */
@@ -593,6 +597,7 @@
 	pgoff_t *m_next_pgofs;		/* point next possible non-hole pgofs */
 	pgoff_t *m_next_extent;		/* point to next possible extent */
 	int m_seg_type;
+	bool m_may_create;		/* indicate it is from write path */
 };
 
 /* for flag in get_data_block */
@@ -600,6 +605,7 @@
 	F2FS_GET_BLOCK_DEFAULT,
 	F2FS_GET_BLOCK_FIEMAP,
 	F2FS_GET_BLOCK_BMAP,
+	F2FS_GET_BLOCK_DIO,
 	F2FS_GET_BLOCK_PRE_DIO,
 	F2FS_GET_BLOCK_PRE_AIO,
 	F2FS_GET_BLOCK_PRECACHE,
@@ -880,7 +886,7 @@
 	struct task_struct *f2fs_issue_flush;	/* flush thread */
 	wait_queue_head_t flush_wait_queue;	/* waiting queue for wake-up */
 	atomic_t issued_flush;			/* # of issued flushes */
-	atomic_t issing_flush;			/* # of issing flushes */
+	atomic_t queued_flush;			/* # of queued flushes */
 	struct llist_head issue_list;		/* list for command issue */
 	struct llist_node *dispatch_list;	/* list for command dispatch */
 };
@@ -944,6 +950,11 @@
 	F2FS_DIRTY_IMETA,
 	F2FS_WB_CP_DATA,
 	F2FS_WB_DATA,
+	F2FS_RD_DATA,
+	F2FS_RD_NODE,
+	F2FS_RD_META,
+	F2FS_DIO_WRITE,
+	F2FS_DIO_READ,
 	NR_COUNT_TYPE,
 };
 
@@ -1089,11 +1100,20 @@
 	SBI_NEED_CP,				/* need to checkpoint */
 	SBI_IS_SHUTDOWN,			/* shutdown by ioctl */
 	SBI_IS_RECOVERED,			/* recovered orphan/data */
+	SBI_CP_DISABLED,			/* CP was disabled last mount */
+	SBI_CP_DISABLED_QUICK,			/* CP was disabled quickly */
+	SBI_QUOTA_NEED_FLUSH,			/* need to flush quota info in CP */
+	SBI_QUOTA_SKIP_FLUSH,			/* skip flushing quota in current CP */
+	SBI_QUOTA_NEED_REPAIR,			/* quota file may be corrupted */
 };
 
 enum {
 	CP_TIME,
 	REQ_TIME,
+	DISCARD_TIME,
+	GC_TIME,
+	DISABLE_TIME,
+	UMOUNT_DISCARD_TIMEOUT,
 	MAX_TIME,
 };
 
@@ -1151,8 +1171,6 @@
 
 	/* for bio operations */
 	struct f2fs_bio_info *write_io[NR_PAGE_TYPE];	/* for write bios */
-	struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE];
-						/* bio ordering for NODE/DATA */
 	/* keep migration IO order for LFS mode */
 	struct rw_semaphore io_order_lock;
 	mempool_t *write_io_dummy;		/* Dummy pages */
@@ -1210,7 +1228,6 @@
 	unsigned int total_valid_node_count;	/* valid node block count */
 	loff_t max_file_blocks;			/* max block index of file */
 	int dir_level;				/* directory level */
-	unsigned int trigger_ssr_threshold;	/* threshold to trigger ssr */
 	int readdir_ra;				/* readahead inode in readdir */
 
 	block_t user_block_count;		/* # of user blocks */
@@ -1220,9 +1237,10 @@
 	block_t reserved_blocks;		/* configurable reserved blocks */
 	block_t current_reserved_blocks;	/* current reserved blocks */
 
-	unsigned int nquota_files;		/* # of quota sysfile */
+	/* Additional tracking for no checkpoint mode */
+	block_t unusable_block_count;		/* # of blocks saved by last cp */
 
-	u32 s_next_generation;			/* for NFS support */
+	unsigned int nquota_files;		/* # of quota sysfile */
 
 	/* # of pages, see count_type */
 	atomic_t nr_pages[NR_COUNT_TYPE];
@@ -1242,6 +1260,7 @@
 	struct f2fs_gc_kthread	*gc_thread;	/* GC thread */
 	unsigned int cur_victim_sec;		/* current victim section num */
 	unsigned int gc_mode;			/* current GC state */
+	unsigned int next_victim_seg[2];	/* next segment in victim section */
 	/* for skip statistic */
 	unsigned long long skipped_atomic_files[2];	/* FG_GC and BG_GC */
 	unsigned long long skipped_gc_rwsem;		/* FG_GC only */
@@ -1251,6 +1270,8 @@
 
 	/* maximum # of trials to find a victim segment for SSR and GC */
 	unsigned int max_victim_search;
+	/* migration granularity of garbage collection, unit: segment */
+	unsigned int migration_granularity;
 
 	/*
 	 * for stat information.
@@ -1258,6 +1279,7 @@
 	 */
 #ifdef CONFIG_F2FS_STAT_FS
 	struct f2fs_stat_info *stat_info;	/* FS status information */
+	atomic_t meta_count[META_MAX];		/* # of meta blocks */
 	unsigned int segment_count[2];		/* # of allocated segments */
 	unsigned int block_count[2];		/* # of allocated blocks */
 	atomic_t inplace_count;		/* # of inplace update */
@@ -1273,6 +1295,8 @@
 	atomic_t max_aw_cnt;			/* max # of atomic writes */
 	atomic_t max_vw_cnt;			/* max # of volatile writes */
 	int bg_gc;				/* background gc calls */
+	unsigned int io_skip_bggc;		/* skip background gc for in-flight IO */
+	unsigned int other_skip_bggc;		/* skip background gc for other reasons */
 	unsigned int ndirty_inode[NR_INODE_TYPE];	/* # of dirty inodes */
 #endif
 	spinlock_t stat_lock;			/* lock for stat operations */
@@ -1306,10 +1330,17 @@
 	__u32 s_chksum_seed;
 };
 
+struct f2fs_private_dio {
+	struct inode *inode;
+	void *orig_private;
+	bio_end_io_t *orig_end_io;
+	bool write;
+};
+
 #ifdef CONFIG_F2FS_FAULT_INJECTION
-#define f2fs_show_injection_info(type)				\
-	printk("%sF2FS-fs : inject %s in %s of %pF\n",		\
-		KERN_INFO, f2fs_fault_name[type],		\
+#define f2fs_show_injection_info(type)					\
+	printk_ratelimited("%sF2FS-fs : inject %s in %s of %pF\n",	\
+		KERN_INFO, f2fs_fault_name[type],			\
 		__func__, __builtin_return_address(0))
 static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
 {
@@ -1356,7 +1387,15 @@
 
 static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
 {
-	sbi->last_time[type] = jiffies;
+	unsigned long now = jiffies;
+
+	sbi->last_time[type] = now;
+
+	/* DISCARD_TIME and GC_TIME are based on REQ_TIME */
+	if (type == REQ_TIME) {
+		sbi->last_time[DISCARD_TIME] = now;
+		sbi->last_time[GC_TIME] = now;
+	}
 }
 
 static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
@@ -1366,16 +1405,18 @@
 	return time_after(jiffies, sbi->last_time[type] + interval);
 }
 
-static inline bool is_idle(struct f2fs_sb_info *sbi)
+static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi,
+						int type)
 {
-	struct block_device *bdev = sbi->sb->s_bdev;
-	struct request_queue *q = bdev_get_queue(bdev);
-	struct request_list *rl = &q->root_rl;
+	unsigned long interval = sbi->interval_time[type] * HZ;
+	unsigned int wait_ms = 0;
+	long delta;
 
-	if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC])
-		return false;
+	delta = (sbi->last_time[type] + interval) - jiffies;
+	if (delta > 0)
+		wait_ms = jiffies_to_msecs(delta);
 
-	return f2fs_time_over(sbi, REQ_TIME);
+	return wait_ms;
 }
 
 /*
@@ -1585,12 +1626,16 @@
 {
 	unsigned long flags;
 
-	set_sbi_flag(sbi, SBI_NEED_FSCK);
+	/*
+	 * In order to re-enable nat_bits we need to call fsck.f2fs by
+	 * set_sbi_flag(sbi, SBI_NEED_FSCK). But it may give huge cost,
+	 * so let's rely on regular fsck or unclean shutdown.
+	 */
 
 	if (lock)
 		spin_lock_irqsave(&sbi->cp_lock, flags);
 	__clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
-	kfree(NM_I(sbi)->nat_bits);
+	kvfree(NM_I(sbi)->nat_bits);
 	NM_I(sbi)->nat_bits = NULL;
 	if (lock)
 		spin_unlock_irqrestore(&sbi->cp_lock, flags);
@@ -1716,7 +1761,8 @@
 
 	if (!__allow_reserved_blocks(sbi, inode, true))
 		avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
-
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		avail_user_block_count -= sbi->unusable_block_count;
 	if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
 		diff = sbi->total_valid_block_count - avail_user_block_count;
 		if (diff > *count)
@@ -1775,11 +1821,12 @@
 {
 	atomic_inc(&sbi->nr_pages[count_type]);
 
-	if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES ||
-		count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA)
-		return;
-
-	set_sbi_flag(sbi, SBI_IS_DIRTY);
+	if (count_type == F2FS_DIRTY_DENTS ||
+			count_type == F2FS_DIRTY_NODES ||
+			count_type == F2FS_DIRTY_META ||
+			count_type == F2FS_DIRTY_QDATA ||
+			count_type == F2FS_DIRTY_IMETA)
+		set_sbi_flag(sbi, SBI_IS_DIRTY);
 }
 
 static inline void inode_inc_dirty_pages(struct inode *inode)
@@ -1912,12 +1959,18 @@
 {
 	block_t	valid_block_count;
 	unsigned int valid_node_count;
-	bool quota = inode && !is_inode;
+	int err;
 
-	if (quota) {
-		int ret = dquot_reserve_block(inode, 1);
-		if (ret)
-			return ret;
+	if (is_inode) {
+		if (inode) {
+			err = dquot_alloc_inode(inode);
+			if (err)
+				return err;
+		}
+	} else {
+		err = dquot_reserve_block(inode, 1);
+		if (err)
+			return err;
 	}
 
 	if (time_to_inject(sbi, FAULT_BLOCK)) {
@@ -1932,6 +1985,8 @@
 
 	if (!__allow_reserved_blocks(sbi, inode, false))
 		valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		valid_block_count += sbi->unusable_block_count;
 
 	if (unlikely(valid_block_count > sbi->user_block_count)) {
 		spin_unlock(&sbi->stat_lock);
@@ -1959,8 +2014,12 @@
 	return 0;
 
 enospc:
-	if (quota)
+	if (is_inode) {
+		if (inode)
+			dquot_free_inode(inode);
+	} else {
 		dquot_release_reservation_block(inode, 1);
+	}
 	return -ENOSPC;
 }
 
@@ -1981,7 +2040,9 @@
 
 	spin_unlock(&sbi->stat_lock);
 
-	if (!is_inode)
+	if (is_inode)
+		dquot_free_inode(inode);
+	else
 		f2fs_i_blocks_write(inode, 1, false, true);
 }
 
@@ -2111,6 +2172,26 @@
 	return bio_alloc(GFP_KERNEL, npages);
 }
 
+static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
+{
+	if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) ||
+		get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) ||
+		get_pages(sbi, F2FS_WB_CP_DATA) ||
+		get_pages(sbi, F2FS_DIO_READ) ||
+		get_pages(sbi, F2FS_DIO_WRITE))
+		return false;
+
+	if (SM_I(sbi) && SM_I(sbi)->dcc_info &&
+			atomic_read(&SM_I(sbi)->dcc_info->queued_discard))
+		return false;
+
+	if (SM_I(sbi) && SM_I(sbi)->fcc_info &&
+			atomic_read(&SM_I(sbi)->fcc_info->queued_flush))
+		return false;
+
+	return f2fs_time_over(sbi, type);
+}
+
 static inline void f2fs_radix_tree_insert(struct radix_tree_root *root,
 				unsigned long index, void *item)
 {
@@ -2248,11 +2329,12 @@
 #define F2FS_EXTENTS_FL			0x00080000 /* Inode uses extents */
 #define F2FS_EA_INODE_FL	        0x00200000 /* Inode used for large EA */
 #define F2FS_EOFBLOCKS_FL		0x00400000 /* Blocks allocated beyond EOF */
+#define F2FS_NOCOW_FL			0x00800000 /* Do not cow file */
 #define F2FS_INLINE_DATA_FL		0x10000000 /* Inode has inline data. */
 #define F2FS_PROJINHERIT_FL		0x20000000 /* Create with parents projid */
 #define F2FS_RESERVED_FL		0x80000000 /* reserved for ext4 lib */
 
-#define F2FS_FL_USER_VISIBLE		0x304BDFFF /* User visible flags */
+#define F2FS_FL_USER_VISIBLE		0x30CBDFFF /* User visible flags */
 #define F2FS_FL_USER_MODIFIABLE		0x204BC0FF /* User modifiable flags */
 
 /* Flags we can manipulate with through F2FS_IOC_FSSETXATTR */
@@ -2330,6 +2412,7 @@
 	case FI_NEW_INODE:
 		if (set)
 			return;
+		/* fall through */
 	case FI_DATA_EXIST:
 	case FI_INLINE_DOTS:
 	case FI_PIN_FILE:
@@ -2653,12 +2736,18 @@
 static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
 					size_t size, gfp_t flags)
 {
+	void *ret;
+
 	if (time_to_inject(sbi, FAULT_KMALLOC)) {
 		f2fs_show_injection_info(FAULT_KMALLOC);
 		return NULL;
 	}
 
-	return kmalloc(size, flags);
+	ret = kmalloc(size, flags);
+	if (ret)
+		return ret;
+
+	return kvmalloc(size, flags);
 }
 
 static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi,
@@ -2704,9 +2793,9 @@
 
 #define F2FS_OLD_ATTRIBUTE_SIZE	(offsetof(struct f2fs_inode, i_addr))
 #define F2FS_FITS_IN_INODE(f2fs_inode, extra_isize, field)		\
-		((offsetof(typeof(*f2fs_inode), field) +	\
+		((offsetof(typeof(*(f2fs_inode)), field) +	\
 		sizeof((f2fs_inode)->field))			\
-		<= (F2FS_OLD_ATTRIBUTE_SIZE + extra_isize))	\
+		<= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize)))	\
 
 static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
 {
@@ -2733,8 +2822,10 @@
 	spin_unlock(&sbi->iostat_lock);
 }
 
-#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META &&	\
-				(!is_read_io(fio->op) || fio->is_meta))
+#define __is_large_section(sbi)		((sbi)->segs_per_sec > 1)
+
+#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META &&	\
+				(!is_read_io((fio)->op) || (fio)->is_meta))
 
 bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
 					block_t blkaddr, int type);
@@ -2765,6 +2856,27 @@
 	return true;
 }
 
+static inline void f2fs_set_page_private(struct page *page,
+						unsigned long data)
+{
+	if (PagePrivate(page))
+		return;
+
+	get_page(page);
+	SetPagePrivate(page);
+	set_page_private(page, data);
+}
+
+static inline void f2fs_clear_page_private(struct page *page)
+{
+	if (!PagePrivate(page))
+		return;
+
+	set_page_private(page, 0);
+	ClearPagePrivate(page);
+	f2fs_put_page(page, 0);
+}
+
 /*
  * file.c
  */
@@ -2780,6 +2892,7 @@
 int f2fs_precache_extents(struct inode *inode);
 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
 long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid);
 int f2fs_pin_file_control(struct inode *inode, bool inc);
 
 /*
@@ -2858,6 +2971,7 @@
 int f2fs_inode_dirtied(struct inode *inode, bool sync);
 void f2fs_inode_synced(struct inode *inode);
 int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
+int f2fs_quota_sync(struct super_block *sb, int type);
 void f2fs_quota_off_umount(struct super_block *sb);
 int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
 int f2fs_sync_fs(struct super_block *sb, int sync);
@@ -2900,7 +3014,7 @@
 void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid);
 struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid);
 struct page *f2fs_get_node_page_ra(struct page *parent, int start);
-void f2fs_move_node_page(struct page *node_page, int gc_type);
+int f2fs_move_node_page(struct page *node_page, int gc_type);
 int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
 			struct writeback_control *wbc, bool atomic,
 			unsigned int *seq_id);
@@ -2917,7 +3031,7 @@
 int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
 int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
 			unsigned int segno, struct f2fs_summary_block *sum);
-void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
 int f2fs_build_node_manager(struct f2fs_sb_info *sbi);
 void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi);
 int __init f2fs_create_node_manager_caches(void);
@@ -2942,9 +3056,11 @@
 bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
 void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi);
 void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi);
-bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
+bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi);
 void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
 					struct cp_control *cpc);
+void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
+int f2fs_disable_cp_again(struct f2fs_sb_info *sbi);
 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
 void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
@@ -2972,8 +3088,10 @@
 			struct f2fs_summary *sum, int type,
 			struct f2fs_io_info *fio, bool add_list);
 void f2fs_wait_on_page_writeback(struct page *page,
-			enum page_type type, bool ordered);
-void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr);
+			enum page_type type, bool ordered, bool locked);
+void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr);
+void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
+								block_t len);
 void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
 void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
 int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
@@ -3033,8 +3151,8 @@
 void f2fs_destroy_post_read_processing(void);
 void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
 void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
-				struct inode *inode, nid_t ino, pgoff_t idx,
-				enum page_type type);
+				struct inode *inode, struct page *page,
+				nid_t ino, enum page_type type);
 void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi);
 int f2fs_submit_page_bio(struct f2fs_io_info *fio);
 void f2fs_submit_page_write(struct f2fs_io_info *fio);
@@ -3056,6 +3174,7 @@
 struct page *f2fs_get_new_data_page(struct inode *inode,
 			struct page *ipage, pgoff_t index, bool new_i_size);
 int f2fs_do_write_data_page(struct f2fs_io_info *fio);
+void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock);
 int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
 			int create, int flag);
 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
@@ -3108,6 +3227,9 @@
 	int free_nids, avail_nids, alloc_nids;
 	int total_count, utilization;
 	int bg_gc, nr_wb_cp_data, nr_wb_data;
+	int nr_rd_data, nr_rd_node, nr_rd_meta;
+	int nr_dio_read, nr_dio_write;
+	unsigned int io_skip_bggc, other_skip_bggc;
 	int nr_flushing, nr_flushed, flush_list_empty;
 	int nr_discarding, nr_discarded;
 	int nr_discard_cmd;
@@ -3129,6 +3251,7 @@
 	int cursec[NR_CURSEG_TYPE];
 	int curzone[NR_CURSEG_TYPE];
 
+	unsigned int meta_count[META_MAX];
 	unsigned int segment_count[2];
 	unsigned int block_count[2];
 	unsigned int inplace_count;
@@ -3144,6 +3267,8 @@
 #define stat_inc_bg_cp_count(si)	((si)->bg_cp_count++)
 #define stat_inc_call_count(si)		((si)->call_count++)
 #define stat_inc_bggc_count(sbi)	((sbi)->bg_gc++)
+#define stat_io_skip_bggc_count(sbi)	((sbi)->io_skip_bggc++)
+#define stat_other_skip_bggc_count(sbi)	((sbi)->other_skip_bggc++)
 #define stat_inc_dirty_inode(sbi, type)	((sbi)->ndirty_inode[type]++)
 #define stat_dec_dirty_inode(sbi, type)	((sbi)->ndirty_inode[type]--)
 #define stat_inc_total_hit(sbi)		(atomic64_inc(&(sbi)->total_hit_ext))
@@ -3180,6 +3305,17 @@
 		if (f2fs_has_inline_dentry(inode))			\
 			(atomic_dec(&F2FS_I_SB(inode)->inline_dir));	\
 	} while (0)
+#define stat_inc_meta_count(sbi, blkaddr)				\
+	do {								\
+		if (blkaddr < SIT_I(sbi)->sit_base_addr)		\
+			atomic_inc(&(sbi)->meta_count[META_CP]);	\
+		else if (blkaddr < NM_I(sbi)->nat_blkaddr)		\
+			atomic_inc(&(sbi)->meta_count[META_SIT]);	\
+		else if (blkaddr < SM_I(sbi)->ssa_blkaddr)		\
+			atomic_inc(&(sbi)->meta_count[META_NAT]);	\
+		else if (blkaddr < SM_I(sbi)->main_blkaddr)		\
+			atomic_inc(&(sbi)->meta_count[META_SSA]);	\
+	} while (0)
 #define stat_inc_seg_type(sbi, curseg)					\
 		((sbi)->segment_count[(curseg)->alloc_type]++)
 #define stat_inc_block_count(sbi, curseg)				\
@@ -3242,13 +3378,15 @@
 
 int f2fs_build_stats(struct f2fs_sb_info *sbi);
 void f2fs_destroy_stats(struct f2fs_sb_info *sbi);
-int __init f2fs_create_root_stats(void);
+void __init f2fs_create_root_stats(void);
 void f2fs_destroy_root_stats(void);
 #else
 #define stat_inc_cp_count(si)				do { } while (0)
 #define stat_inc_bg_cp_count(si)			do { } while (0)
 #define stat_inc_call_count(si)				do { } while (0)
 #define stat_inc_bggc_count(si)				do { } while (0)
+#define stat_io_skip_bggc_count(sbi)			do { } while (0)
+#define stat_other_skip_bggc_count(sbi)			do { } while (0)
 #define stat_inc_dirty_inode(sbi, type)			do { } while (0)
 #define stat_dec_dirty_inode(sbi, type)			do { } while (0)
 #define stat_inc_total_hit(sb)				do { } while (0)
@@ -3267,6 +3405,7 @@
 #define stat_inc_volatile_write(inode)			do { } while (0)
 #define stat_dec_volatile_write(inode)			do { } while (0)
 #define stat_update_max_volatile_write(inode)		do { } while (0)
+#define stat_inc_meta_count(sbi, blkaddr)		do { } while (0)
 #define stat_inc_seg_type(sbi, curseg)			do { } while (0)
 #define stat_inc_block_count(sbi, curseg)		do { } while (0)
 #define stat_inc_inplace_blocks(sbi)			do { } while (0)
@@ -3277,7 +3416,7 @@
 
 static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
 static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
-static inline int __init f2fs_create_root_stats(void) { return 0; }
+static inline void __init f2fs_create_root_stats(void) { }
 static inline void f2fs_destroy_root_stats(void) { }
 #endif
 
@@ -3336,18 +3475,19 @@
 /*
  * extent_cache.c
  */
-struct rb_entry *f2fs_lookup_rb_tree(struct rb_root *root,
+struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root,
 				struct rb_entry *cached_re, unsigned int ofs);
 struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
-				struct rb_root *root, struct rb_node **parent,
-				unsigned int ofs);
-struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root *root,
+				struct rb_root_cached *root,
+				struct rb_node **parent,
+				unsigned int ofs, bool *leftmost);
+struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
 		struct rb_entry *cached_re, unsigned int ofs,
 		struct rb_entry **prev_entry, struct rb_entry **next_entry,
 		struct rb_node ***insert_p, struct rb_node **insert_parent,
-		bool force);
+		bool force, bool *leftmost);
 bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
-						struct rb_root *root);
+						struct rb_root_cached *root);
 unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
 bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
 void f2fs_drop_extent_tree(struct inode *inode);
@@ -3387,7 +3527,7 @@
 {
 #ifdef CONFIG_F2FS_FS_ENCRYPTION
 	file_set_encrypt(inode);
-	inode->i_flags |= S_ENCRYPTED;
+	f2fs_set_inode_flags(inode);
 #endif
 }
 
@@ -3401,9 +3541,9 @@
 }
 
 #define F2FS_FEATURE_FUNCS(name, flagname) \
-static inline int f2fs_sb_has_##name(struct super_block *sb) \
+static inline int f2fs_sb_has_##name(struct f2fs_sb_info *sbi) \
 { \
-	return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_##flagname); \
+	return F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_##flagname); \
 }
 
 F2FS_FEATURE_FUNCS(encrypt, ENCRYPT);
@@ -3415,6 +3555,7 @@
 F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO);
 F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME);
 F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND);
+F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM);
 
 #ifdef CONFIG_BLK_DEV_ZONED
 static inline int get_blkz_type(struct f2fs_sb_info *sbi,
@@ -3432,7 +3573,7 @@
 
 static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi)
 {
-	return f2fs_sb_has_blkzoned(sbi->sb);
+	return f2fs_sb_has_blkzoned(sbi);
 }
 
 static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi)
@@ -3472,11 +3613,50 @@
 #endif
 }
 
-static inline bool f2fs_force_buffered_io(struct inode *inode, int rw)
+static inline int block_unaligned_IO(struct inode *inode,
+				struct kiocb *iocb, struct iov_iter *iter)
 {
-	return (f2fs_post_read_required(inode) ||
-			(rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) ||
-			f2fs_is_multi_device(F2FS_I_SB(inode)));
+	unsigned int i_blkbits = READ_ONCE(inode->i_blkbits);
+	unsigned int blocksize_mask = (1 << i_blkbits) - 1;
+	loff_t offset = iocb->ki_pos;
+	unsigned long align = offset | iov_iter_alignment(iter);
+
+	return align & blocksize_mask;
+}
+
+static inline int allow_outplace_dio(struct inode *inode,
+				struct kiocb *iocb, struct iov_iter *iter)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	int rw = iov_iter_rw(iter);
+
+	return (test_opt(sbi, LFS) && (rw == WRITE) &&
+				!block_unaligned_IO(inode, iocb, iter));
+}
+
+static inline bool f2fs_force_buffered_io(struct inode *inode,
+				struct kiocb *iocb, struct iov_iter *iter)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	int rw = iov_iter_rw(iter);
+
+	if (f2fs_post_read_required(inode))
+		return true;
+	if (sbi->s_ndevs)
+		return true;
+	/*
+	 * for blkzoned device, fallback direct IO to buffered IO, so
+	 * all IOs can be serialized by log-structured write.
+	 */
+	if (f2fs_sb_has_blkzoned(sbi))
+		return true;
+	if (test_opt(sbi, LFS) && (rw == WRITE) &&
+				block_unaligned_IO(inode, iocb, iter))
+		return true;
+	if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED))
+		return true;
+
+	return false;
 }
 
 #ifdef CONFIG_F2FS_FAULT_INJECTION
@@ -3486,4 +3666,17 @@
 #define f2fs_build_fault_attr(sbi, rate, type)		do { } while (0)
 #endif
 
+static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
+{
+#ifdef CONFIG_QUOTA
+	if (f2fs_sb_has_quota_ino(sbi))
+		return true;
+	if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
+		F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
+		F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
+		return true;
+#endif
+	return false;
+}
+
 #endif

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 8d1eb8d..012815d 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/file.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
@@ -50,7 +47,7 @@
 	struct page *page = vmf->page;
 	struct inode *inode = file_inode(vmf->vma->vm_file);
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-	struct dnode_of_data dn;
+	struct dnode_of_data dn = { .node_changed = false };
 	int err;
 
 	if (unlikely(f2fs_cp_error(sbi))) {
@@ -62,19 +59,6 @@
 
 	f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
 
-	/* block allocation */
-	f2fs_lock_op(sbi);
-	set_new_dnode(&dn, inode, NULL, NULL, 0);
-	err = f2fs_reserve_block(&dn, page->index);
-	if (err) {
-		f2fs_unlock_op(sbi);
-		goto out;
-	}
-	f2fs_put_dnode(&dn);
-	f2fs_unlock_op(sbi);
-
-	f2fs_balance_fs(sbi, dn.node_changed);
-
 	file_update_time(vmf->vma->vm_file);
 	down_read(&F2FS_I(inode)->i_mmap_sem);
 	lock_page(page);
@@ -86,11 +70,28 @@
 		goto out_sem;
 	}
 
+	/* block allocation */
+	__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
+	set_new_dnode(&dn, inode, NULL, NULL, 0);
+	err = f2fs_get_block(&dn, page->index);
+	f2fs_put_dnode(&dn);
+	__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
+	if (err) {
+		unlock_page(page);
+		goto out_sem;
+	}
+
+	/* fill the page */
+	f2fs_wait_on_page_writeback(page, DATA, false, true);
+
+	/* wait for GCed page writeback via META_MAPPING */
+	f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
+
 	/*
 	 * check to see if the page is mapped already (no holes)
 	 */
 	if (PageMappedToDisk(page))
-		goto mapped;
+		goto out_sem;
 
 	/* page is wholly or partially inside EOF */
 	if (((loff_t)(page->index + 1) << PAGE_SHIFT) >
@@ -105,21 +106,15 @@
 		SetPageUptodate(page);
 
 	f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE);
+	f2fs_update_time(sbi, REQ_TIME);
 
 	trace_f2fs_vm_page_mkwrite(page, DATA);
-mapped:
-	/* fill the page */
-	f2fs_wait_on_page_writeback(page, DATA, false);
-
-	/* wait for GCed page writeback via META_MAPPING */
-	if (f2fs_post_read_required(inode))
-		f2fs_wait_on_block_writeback(sbi, dn.data_blkaddr);
-
 out_sem:
 	up_read(&F2FS_I(inode)->i_mmap_sem);
-out:
+
+	f2fs_balance_fs(sbi, dn.node_changed);
+
 	sb_end_pagefault(inode->i_sb);
-	f2fs_update_time(sbi, REQ_TIME);
 err:
 	return block_page_mkwrite_return(err);
 }
@@ -215,7 +210,8 @@
 	};
 	unsigned int seq_id = 0;
 
-	if (unlikely(f2fs_readonly(inode->i_sb)))
+	if (unlikely(f2fs_readonly(inode->i_sb) ||
+				is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
 		return 0;
 
 	trace_f2fs_sync_file_enter(inode);
@@ -582,7 +578,7 @@
 	if (IS_ERR(page))
 		return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page);
 truncate_out:
-	f2fs_wait_on_page_writeback(page, DATA, true);
+	f2fs_wait_on_page_writeback(page, DATA, true, true);
 	zero_user(page, offset, PAGE_SIZE - offset);
 
 	/* An encrypted inode should have a key and truncate the last page. */
@@ -701,7 +697,7 @@
 	unsigned int flags;
 
 	if (f2fs_has_extra_attr(inode) &&
-			f2fs_sb_has_inode_crtime(inode->i_sb) &&
+			f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) &&
 			F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
 		stat->result_mask |= STATX_BTIME;
 		stat->btime.tv_sec = fi->i_crtime.tv_sec;
@@ -791,9 +787,24 @@
 		!uid_eq(attr->ia_uid, inode->i_uid)) ||
 		(attr->ia_valid & ATTR_GID &&
 		!gid_eq(attr->ia_gid, inode->i_gid))) {
+		f2fs_lock_op(F2FS_I_SB(inode));
 		err = dquot_transfer(inode, attr);
-		if (err)
+		if (err) {
+			set_sbi_flag(F2FS_I_SB(inode),
+					SBI_QUOTA_NEED_REPAIR);
+			f2fs_unlock_op(F2FS_I_SB(inode));
 			return err;
+		}
+		/*
+		 * update uid/gid under lock_op(), so that dquot and inode can
+		 * be updated atomically.
+		 */
+		if (attr->ia_valid & ATTR_UID)
+			inode->i_uid = attr->ia_uid;
+		if (attr->ia_valid & ATTR_GID)
+			inode->i_gid = attr->ia_gid;
+		f2fs_mark_inode_dirty_sync(inode, true);
+		f2fs_unlock_op(F2FS_I_SB(inode));
 	}
 
 	if (attr->ia_valid & ATTR_SIZE) {
@@ -879,7 +890,7 @@
 	if (IS_ERR(page))
 		return PTR_ERR(page);
 
-	f2fs_wait_on_page_writeback(page, DATA, true);
+	f2fs_wait_on_page_writeback(page, DATA, true, true);
 	zero_user(page, start, len);
 	set_page_dirty(page);
 	f2fs_put_page(page, 1);
@@ -1483,7 +1494,8 @@
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct f2fs_map_blocks map = { .m_next_pgofs = NULL,
-			.m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE };
+			.m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE,
+			.m_may_create = true };
 	pgoff_t pg_end;
 	loff_t new_size = i_size_read(inode);
 	loff_t off_end;
@@ -1634,6 +1646,8 @@
 		flags |= F2FS_ENCRYPT_FL;
 	if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
 		flags |= F2FS_INLINE_DATA_FL;
+	if (is_inode_flag_set(inode, FI_PIN_FILE))
+		flags |= F2FS_NOCOW_FL;
 
 	flags &= F2FS_FL_USER_VISIBLE;
 
@@ -1668,7 +1682,7 @@
 
 	inode->i_ctime = current_time(inode);
 	f2fs_set_inode_flags(inode);
-	f2fs_mark_inode_dirty_sync(inode, false);
+	f2fs_mark_inode_dirty_sync(inode, true);
 	return 0;
 }
 
@@ -1951,6 +1965,13 @@
 		f2fs_stop_checkpoint(sbi, false);
 		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
 		break;
+	case F2FS_GOING_DOWN_NEED_FSCK:
+		set_sbi_flag(sbi, SBI_NEED_FSCK);
+		set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
+		set_sbi_flag(sbi, SBI_IS_DIRTY);
+		/* do checkpoint only */
+		ret = f2fs_sync_fs(sb, 1);
+		goto out;
 	default:
 		ret = -EINVAL;
 		goto out;
@@ -1966,6 +1987,9 @@
 out:
 	if (in != F2FS_GOING_DOWN_FULLSYNC)
 		mnt_drop_write_file(filp);
+
+	trace_f2fs_shutdown(sbi, in, ret);
+
 	return ret;
 }
 
@@ -2019,7 +2043,7 @@
 {
 	struct inode *inode = file_inode(filp);
 
-	if (!f2fs_sb_has_encrypt(inode->i_sb))
+	if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode)))
 		return -EOPNOTSUPP;
 
 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
@@ -2029,7 +2053,7 @@
 
 static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
 {
-	if (!f2fs_sb_has_encrypt(file_inode(filp)->i_sb))
+	if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
 		return -EOPNOTSUPP;
 	return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
 }
@@ -2040,7 +2064,7 @@
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	int err;
 
-	if (!f2fs_sb_has_encrypt(inode->i_sb))
+	if (!f2fs_sb_has_encrypt(sbi))
 		return -EOPNOTSUPP;
 
 	err = mnt_want_write_file(filp);
@@ -2144,7 +2168,7 @@
 	}
 
 	ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start));
-	range.start += sbi->blocks_per_seg;
+	range.start += BLKS_PER_SEC(sbi);
 	if (range.start <= end)
 		goto do_more;
 out:
@@ -2164,6 +2188,12 @@
 	if (f2fs_readonly(sbi->sb))
 		return -EROFS;
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+		f2fs_msg(sbi->sb, KERN_INFO,
+			"Skipping Checkpoint. Checkpoints currently disabled.");
+		return -EINVAL;
+	}
+
 	ret = mnt_want_write_file(filp);
 	if (ret)
 		return ret;
@@ -2180,7 +2210,8 @@
 {
 	struct inode *inode = file_inode(filp);
 	struct f2fs_map_blocks map = { .m_next_extent = NULL,
-					.m_seg_type = NO_CHECK_TYPE };
+					.m_seg_type = NO_CHECK_TYPE ,
+					.m_may_create = false };
 	struct extent_info ei = {0, 0, 0};
 	pgoff_t pg_start, pg_end, next_pgofs;
 	unsigned int blk_per_seg = sbi->blocks_per_seg;
@@ -2535,12 +2566,15 @@
 	if (f2fs_readonly(sbi->sb))
 		return -EROFS;
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		return -EINVAL;
+
 	if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
 							sizeof(range)))
 		return -EFAULT;
 
-	if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num ||
-			sbi->segs_per_sec != 1) {
+	if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num ||
+			__is_large_section(sbi)) {
 		f2fs_msg(sbi->sb, KERN_WARNING,
 			"Can't flush %u in %d for segs_per_sec %u != 1\n",
 				range.dev_num, sbi->s_ndevs,
@@ -2593,18 +2627,33 @@
 }
 
 #ifdef CONFIG_QUOTA
+int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
+{
+	struct dquot *transfer_to[MAXQUOTAS] = {};
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct super_block *sb = sbi->sb;
+	int err = 0;
+
+	transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
+	if (!IS_ERR(transfer_to[PRJQUOTA])) {
+		err = __dquot_transfer(inode, transfer_to);
+		if (err)
+			set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+		dqput(transfer_to[PRJQUOTA]);
+	}
+	return err;
+}
+
 static int f2fs_ioc_setproject(struct file *filp, __u32 projid)
 {
 	struct inode *inode = file_inode(filp);
 	struct f2fs_inode_info *fi = F2FS_I(inode);
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-	struct super_block *sb = sbi->sb;
-	struct dquot *transfer_to[MAXQUOTAS] = {};
 	struct page *ipage;
 	kprojid_t kprojid;
 	int err;
 
-	if (!f2fs_sb_has_project_quota(sb)) {
+	if (!f2fs_sb_has_project_quota(sbi)) {
 		if (projid != F2FS_DEF_PROJID)
 			return -EOPNOTSUPP;
 		else
@@ -2619,53 +2668,45 @@
 	if (projid_eq(kprojid, F2FS_I(inode)->i_projid))
 		return 0;
 
-	err = mnt_want_write_file(filp);
-	if (err)
-		return err;
-
 	err = -EPERM;
-	inode_lock(inode);
-
 	/* Is it quota file? Do not allow user to mess with it */
 	if (IS_NOQUOTA(inode))
-		goto out_unlock;
+		return err;
 
 	ipage = f2fs_get_node_page(sbi, inode->i_ino);
-	if (IS_ERR(ipage)) {
-		err = PTR_ERR(ipage);
-		goto out_unlock;
-	}
+	if (IS_ERR(ipage))
+		return PTR_ERR(ipage);
 
 	if (!F2FS_FITS_IN_INODE(F2FS_INODE(ipage), fi->i_extra_isize,
 								i_projid)) {
 		err = -EOVERFLOW;
 		f2fs_put_page(ipage, 1);
-		goto out_unlock;
+		return err;
 	}
 	f2fs_put_page(ipage, 1);
 
 	err = dquot_initialize(inode);
 	if (err)
-		goto out_unlock;
+		return err;
 
-	transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
-	if (!IS_ERR(transfer_to[PRJQUOTA])) {
-		err = __dquot_transfer(inode, transfer_to);
-		dqput(transfer_to[PRJQUOTA]);
-		if (err)
-			goto out_dirty;
-	}
+	f2fs_lock_op(sbi);
+	err = f2fs_transfer_project_quota(inode, kprojid);
+	if (err)
+		goto out_unlock;
 
 	F2FS_I(inode)->i_projid = kprojid;
 	inode->i_ctime = current_time(inode);
-out_dirty:
 	f2fs_mark_inode_dirty_sync(inode, true);
 out_unlock:
-	inode_unlock(inode);
-	mnt_drop_write_file(filp);
+	f2fs_unlock_op(sbi);
 	return err;
 }
 #else
+int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
+{
+	return 0;
+}
+
 static int f2fs_ioc_setproject(struct file *filp, __u32 projid)
 {
 	if (projid != F2FS_DEF_PROJID)
@@ -2729,7 +2770,7 @@
 	fa.fsx_xflags = f2fs_iflags_to_xflags(fi->i_flags &
 				F2FS_FL_USER_VISIBLE);
 
-	if (f2fs_sb_has_project_quota(inode->i_sb))
+	if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)))
 		fa.fsx_projid = (__u32)from_kprojid(&init_user_ns,
 							fi->i_projid);
 
@@ -2738,6 +2779,30 @@
 	return 0;
 }
 
+static int f2fs_ioctl_check_project(struct inode *inode, struct fsxattr *fa)
+{
+	/*
+	 * Project Quota ID state is only allowed to change from within the init
+	 * namespace. Enforce that restriction only if we are trying to change
+	 * the quota ID state. Everything else is allowed in user namespaces.
+	 */
+	if (current_user_ns() == &init_user_ns)
+		return 0;
+
+	if (__kprojid_val(F2FS_I(inode)->i_projid) != fa->fsx_projid)
+		return -EINVAL;
+
+	if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) {
+		if (!(fa->fsx_xflags & FS_XFLAG_PROJINHERIT))
+			return -EINVAL;
+	} else {
+		if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg)
 {
 	struct inode *inode = file_inode(filp);
@@ -2765,19 +2830,20 @@
 		return err;
 
 	inode_lock(inode);
+	err = f2fs_ioctl_check_project(inode, &fa);
+	if (err)
+		goto out;
 	flags = (fi->i_flags & ~F2FS_FL_XFLAG_VISIBLE) |
 				(flags & F2FS_FL_XFLAG_VISIBLE);
 	err = __f2fs_ioc_setflags(inode, flags);
-	inode_unlock(inode);
-	mnt_drop_write_file(filp);
 	if (err)
-		return err;
+		goto out;
 
 	err = f2fs_ioc_setproject(filp, fa.fsx_projid);
-	if (err)
-		return err;
-
-	return 0;
+out:
+	inode_unlock(inode);
+	mnt_drop_write_file(filp);
+	return err;
 }
 
 int f2fs_pin_file_control(struct inode *inode, bool inc)
@@ -2807,8 +2873,8 @@
 	__u32 pin;
 	int ret = 0;
 
-	if (!inode_owner_or_capable(inode))
-		return -EACCES;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
 
 	if (get_user(pin, (__u32 __user *)arg))
 		return -EFAULT;
@@ -2879,6 +2945,7 @@
 	map.m_next_pgofs = NULL;
 	map.m_next_extent = &m_next_extent;
 	map.m_seg_type = NO_CHECK_TYPE;
+	map.m_may_create = false;
 	end = F2FS_I_SB(inode)->max_file_blocks;
 
 	while (map.m_lblk < end) {
@@ -2994,7 +3061,8 @@
 				if (!f2fs_overwrite_io(inode, iocb->ki_pos,
 						iov_iter_count(from)) ||
 					f2fs_has_inline_data(inode) ||
-					f2fs_force_buffered_io(inode, WRITE)) {
+					f2fs_force_buffered_io(inode,
+							iocb, from)) {
 						clear_inode_flag(inode,
 								FI_NO_PREALLOC);
 						inode_unlock(inode);

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index d44b57a..195cf0f 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/gc.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/module.h>
@@ -43,13 +40,16 @@
 		if (gc_th->gc_wake)
 			gc_th->gc_wake = 0;
 
-		if (try_to_freeze())
+		if (try_to_freeze()) {
+			stat_other_skip_bggc_count(sbi);
 			continue;
+		}
 		if (kthread_should_stop())
 			break;
 
 		if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) {
 			increase_sleep_time(gc_th, &wait_ms);
+			stat_other_skip_bggc_count(sbi);
 			continue;
 		}
 
@@ -58,8 +58,10 @@
 			f2fs_stop_checkpoint(sbi, false);
 		}
 
-		if (!sb_start_write_trylock(sbi->sb))
+		if (!sb_start_write_trylock(sbi->sb)) {
+			stat_other_skip_bggc_count(sbi);
 			continue;
+		}
 
 		/*
 		 * [GC triggering condition]
@@ -80,12 +82,15 @@
 			goto do_gc;
 		}
 
-		if (!mutex_trylock(&sbi->gc_mutex))
+		if (!mutex_trylock(&sbi->gc_mutex)) {
+			stat_other_skip_bggc_count(sbi);
 			goto next;
+		}
 
-		if (!is_idle(sbi)) {
+		if (!is_idle(sbi, GC_TIME)) {
 			increase_sleep_time(gc_th, &wait_ms);
 			mutex_unlock(&sbi->gc_mutex);
+			stat_io_skip_bggc_count(sbi);
 			goto next;
 		}
 
@@ -137,7 +142,7 @@
 			"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
 	if (IS_ERR(gc_th->f2fs_gc_task)) {
 		err = PTR_ERR(gc_th->f2fs_gc_task);
-		kfree(gc_th);
+		kvfree(gc_th);
 		sbi->gc_thread = NULL;
 	}
 out:
@@ -150,7 +155,7 @@
 	if (!gc_th)
 		return;
 	kthread_stop(gc_th->f2fs_gc_task);
-	kfree(gc_th);
+	kvfree(gc_th);
 	sbi->gc_thread = NULL;
 }
 
@@ -318,8 +323,7 @@
 	p.min_cost = get_max_cost(sbi, &p);
 
 	if (*result != NULL_SEGNO) {
-		if (IS_DATASEG(get_seg_entry(sbi, *result)->type) &&
-			get_valid_blocks(sbi, *result, false) &&
+		if (get_valid_blocks(sbi, *result, false) &&
 			!sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
 			p.min_segno = *result;
 		goto out;
@@ -328,6 +332,22 @@
 	if (p.max_search == 0)
 		goto out;
 
+	if (__is_large_section(sbi) && p.alloc_mode == LFS) {
+		if (sbi->next_victim_seg[BG_GC] != NULL_SEGNO) {
+			p.min_segno = sbi->next_victim_seg[BG_GC];
+			*result = p.min_segno;
+			sbi->next_victim_seg[BG_GC] = NULL_SEGNO;
+			goto got_result;
+		}
+		if (gc_type == FG_GC &&
+				sbi->next_victim_seg[FG_GC] != NULL_SEGNO) {
+			p.min_segno = sbi->next_victim_seg[FG_GC];
+			*result = p.min_segno;
+			sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
+			goto got_result;
+		}
+	}
+
 	last_victim = sm->last_victim[p.gc_mode];
 	if (p.alloc_mode == LFS && gc_type == FG_GC) {
 		p.min_segno = check_bg_victims(sbi);
@@ -365,6 +385,10 @@
 
 		if (sec_usage_check(sbi, secno))
 			goto next;
+		/* Don't touch checkpointed data */
+		if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+					get_ckpt_valid_blocks(sbi, segno)))
+			goto next;
 		if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
 			goto next;
 
@@ -386,6 +410,8 @@
 	}
 	if (p.min_segno != NULL_SEGNO) {
 got_it:
+		*result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
+got_result:
 		if (p.alloc_mode == LFS) {
 			secno = GET_SEC_FROM_SEG(sbi, p.min_segno);
 			if (gc_type == FG_GC)
@@ -393,13 +419,13 @@
 			else
 				set_bit(secno, dirty_i->victim_secmap);
 		}
-		*result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
 
+	}
+out:
+	if (p.min_segno != NULL_SEGNO)
 		trace_f2fs_get_victim(sbi->sb, type, gc_type, &p,
 				sbi->cur_victim_sec,
 				prefree_segments(sbi), free_segments(sbi));
-	}
-out:
 	mutex_unlock(&dirty_i->seglist_lock);
 
 	return (p.min_segno == NULL_SEGNO) ? 0 : 1;
@@ -464,7 +490,7 @@
  * On validity, copy that node with cold status, otherwise (invalid node)
  * ignore that.
  */
-static void gc_node_segment(struct f2fs_sb_info *sbi,
+static int gc_node_segment(struct f2fs_sb_info *sbi,
 		struct f2fs_summary *sum, unsigned int segno, int gc_type)
 {
 	struct f2fs_summary *entry;
@@ -472,6 +498,7 @@
 	int off;
 	int phase = 0;
 	bool fggc = (gc_type == FG_GC);
+	int submitted = 0;
 
 	start_addr = START_BLOCK(sbi, segno);
 
@@ -485,10 +512,11 @@
 		nid_t nid = le32_to_cpu(entry->nid);
 		struct page *node_page;
 		struct node_info ni;
+		int err;
 
 		/* stop BG_GC if there is not enough free sections. */
 		if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0))
-			return;
+			return submitted;
 
 		if (check_valid_map(sbi, segno, off) == 0)
 			continue;
@@ -525,7 +553,9 @@
 			continue;
 		}
 
-		f2fs_move_node_page(node_page, gc_type);
+		err = f2fs_move_node_page(node_page, gc_type);
+		if (!err && gc_type == FG_GC)
+			submitted++;
 		stat_inc_node_blk_count(sbi, 1, gc_type);
 	}
 
@@ -534,6 +564,7 @@
 
 	if (fggc)
 		atomic_dec(&sbi->wb_sync_req[NODE]);
+	return submitted;
 }
 
 /*
@@ -644,6 +675,14 @@
 	fio.page = page;
 	fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
 
+	/*
+	 * don't cache encrypted data into meta inode until previous dirty
+	 * data were writebacked to avoid racing between GC and flush.
+	 */
+	f2fs_wait_on_page_writeback(page, DATA, true, true);
+
+	f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
+
 	fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi),
 					dn.data_blkaddr,
 					FGP_LOCK | FGP_CREAT, GFP_NOFS);
@@ -669,7 +708,7 @@
  * Move data block via META_MAPPING while keeping locked data page.
  * This can be used to move blocks, aka LBAs, directly on disk.
  */
-static void move_data_block(struct inode *inode, block_t bidx,
+static int move_data_block(struct inode *inode, block_t bidx,
 				int gc_type, unsigned int segno, int off)
 {
 	struct f2fs_io_info fio = {
@@ -688,25 +727,29 @@
 	struct node_info ni;
 	struct page *page, *mpage;
 	block_t newaddr;
-	int err;
+	int err = 0;
 	bool lfs_mode = test_opt(fio.sbi, LFS);
 
 	/* do not read out */
 	page = f2fs_grab_cache_page(inode->i_mapping, bidx, false);
 	if (!page)
-		return;
+		return -ENOMEM;
 
-	if (!check_valid_map(F2FS_I_SB(inode), segno, off))
+	if (!check_valid_map(F2FS_I_SB(inode), segno, off)) {
+		err = -ENOENT;
 		goto out;
+	}
 
 	if (f2fs_is_atomic_file(inode)) {
 		F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++;
 		F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++;
+		err = -EAGAIN;
 		goto out;
 	}
 
 	if (f2fs_is_pinned_file(inode)) {
 		f2fs_pin_file_control(inode, true);
+		err = -EAGAIN;
 		goto out;
 	}
 
@@ -717,6 +760,7 @@
 
 	if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
 		ClearPageUptodate(page);
+		err = -ENOENT;
 		goto put_out;
 	}
 
@@ -724,7 +768,9 @@
 	 * don't cache encrypted data into meta inode until previous dirty
 	 * data were writebacked to avoid racing between GC and flush.
 	 */
-	f2fs_wait_on_page_writeback(page, DATA, true);
+	f2fs_wait_on_page_writeback(page, DATA, true, true);
+
+	f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
 
 	err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
 	if (err)
@@ -783,8 +829,8 @@
 	}
 
 write_page:
+	f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true, true);
 	set_page_dirty(fio.encrypted_page);
-	f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true);
 	if (clear_page_dirty_for_io(fio.encrypted_page))
 		dec_page_count(fio.sbi, F2FS_DIRTY_META);
 
@@ -792,13 +838,14 @@
 	ClearPageError(page);
 
 	/* allocate block address */
-	f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
+	f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true);
 
 	fio.op = REQ_OP_WRITE;
 	fio.op_flags = REQ_SYNC;
 	fio.new_blkaddr = newaddr;
 	f2fs_submit_page_write(&fio);
 	if (fio.retry) {
+		err = -EAGAIN;
 		if (PageWriteback(fio.encrypted_page))
 			end_page_writeback(fio.encrypted_page);
 		goto put_page_out;
@@ -822,34 +869,42 @@
 	f2fs_put_dnode(&dn);
 out:
 	f2fs_put_page(page, 1);
+	return err;
 }
 
-static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
+static int move_data_page(struct inode *inode, block_t bidx, int gc_type,
 							unsigned int segno, int off)
 {
 	struct page *page;
+	int err = 0;
 
 	page = f2fs_get_lock_data_page(inode, bidx, true);
 	if (IS_ERR(page))
-		return;
+		return PTR_ERR(page);
 
-	if (!check_valid_map(F2FS_I_SB(inode), segno, off))
+	if (!check_valid_map(F2FS_I_SB(inode), segno, off)) {
+		err = -ENOENT;
 		goto out;
+	}
 
 	if (f2fs_is_atomic_file(inode)) {
 		F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++;
 		F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++;
+		err = -EAGAIN;
 		goto out;
 	}
 	if (f2fs_is_pinned_file(inode)) {
 		if (gc_type == FG_GC)
 			f2fs_pin_file_control(inode, true);
+		err = -EAGAIN;
 		goto out;
 	}
 
 	if (gc_type == BG_GC) {
-		if (PageWriteback(page))
+		if (PageWriteback(page)) {
+			err = -EAGAIN;
 			goto out;
+		}
 		set_page_dirty(page);
 		set_cold_data(page);
 	} else {
@@ -867,11 +922,11 @@
 			.io_type = FS_GC_DATA_IO,
 		};
 		bool is_dirty = PageDirty(page);
-		int err;
 
 retry:
+		f2fs_wait_on_page_writeback(page, DATA, true, true);
+
 		set_page_dirty(page);
-		f2fs_wait_on_page_writeback(page, DATA, true);
 		if (clear_page_dirty_for_io(page)) {
 			inode_dec_dirty_pages(inode);
 			f2fs_remove_dirty_inode(inode);
@@ -892,6 +947,7 @@
 	}
 out:
 	f2fs_put_page(page, 1);
+	return err;
 }
 
 /*
@@ -901,7 +957,7 @@
  * If the parent node is not valid or the data block address is different,
  * the victim data block is ignored.
  */
-static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
 		struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
 {
 	struct super_block *sb = sbi->sb;
@@ -909,6 +965,7 @@
 	block_t start_addr;
 	int off;
 	int phase = 0;
+	int submitted = 0;
 
 	start_addr = START_BLOCK(sbi, segno);
 
@@ -925,7 +982,7 @@
 
 		/* stop BG_GC if there is not enough free sections. */
 		if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0))
-			return;
+			return submitted;
 
 		if (check_valid_map(sbi, segno, off) == 0)
 			continue;
@@ -997,6 +1054,7 @@
 		if (inode) {
 			struct f2fs_inode_info *fi = F2FS_I(inode);
 			bool locked = false;
+			int err;
 
 			if (S_ISREG(inode->i_mode)) {
 				if (!down_write_trylock(&fi->i_gc_rwsem[READ]))
@@ -1016,12 +1074,16 @@
 			start_bidx = f2fs_start_bidx_of_node(nofs, inode)
 								+ ofs_in_node;
 			if (f2fs_post_read_required(inode))
-				move_data_block(inode, start_bidx, gc_type,
-								segno, off);
+				err = move_data_block(inode, start_bidx,
+							gc_type, segno, off);
 			else
-				move_data_page(inode, start_bidx, gc_type,
+				err = move_data_page(inode, start_bidx, gc_type,
 								segno, off);
 
+			if (!err && (gc_type == FG_GC ||
+					f2fs_post_read_required(inode)))
+				submitted++;
+
 			if (locked) {
 				up_write(&fi->i_gc_rwsem[WRITE]);
 				up_write(&fi->i_gc_rwsem[READ]);
@@ -1033,6 +1095,8 @@
 
 	if (++phase < 5)
 		goto next_step;
+
+	return submitted;
 }
 
 static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
@@ -1057,18 +1121,34 @@
 	struct blk_plug plug;
 	unsigned int segno = start_segno;
 	unsigned int end_segno = start_segno + sbi->segs_per_sec;
-	int seg_freed = 0;
+	int seg_freed = 0, migrated = 0;
 	unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
 						SUM_TYPE_DATA : SUM_TYPE_NODE;
+	int submitted = 0;
+
+	if (__is_large_section(sbi))
+		end_segno = rounddown(end_segno, sbi->segs_per_sec);
 
 	/* readahead multi ssa blocks those have contiguous address */
-	if (sbi->segs_per_sec > 1)
+	if (__is_large_section(sbi))
 		f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
-					sbi->segs_per_sec, META_SSA, true);
+					end_segno - segno, META_SSA, true);
 
 	/* reference all summary page */
 	while (segno < end_segno) {
 		sum_page = f2fs_get_sum_page(sbi, segno++);
+		if (IS_ERR(sum_page)) {
+			int err = PTR_ERR(sum_page);
+
+			end_segno = segno - 1;
+			for (segno = start_segno; segno < end_segno; segno++) {
+				sum_page = find_get_page(META_MAPPING(sbi),
+						GET_SUM_BLOCK(sbi, segno));
+				f2fs_put_page(sum_page, 0);
+				f2fs_put_page(sum_page, 0);
+			}
+			return err;
+		}
 		unlock_page(sum_page);
 	}
 
@@ -1081,10 +1161,13 @@
 					GET_SUM_BLOCK(sbi, segno));
 		f2fs_put_page(sum_page, 0);
 
-		if (get_valid_blocks(sbi, segno, false) == 0 ||
-				!PageUptodate(sum_page) ||
-				unlikely(f2fs_cp_error(sbi)))
-			goto next;
+		if (get_valid_blocks(sbi, segno, false) == 0)
+			goto freed;
+		if (__is_large_section(sbi) &&
+				migrated >= sbi->migration_granularity)
+			goto skip;
+		if (!PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi)))
+			goto skip;
 
 		sum = page_address(sum_page);
 		if (type != GET_SUM_TYPE((&sum->footer))) {
@@ -1092,7 +1175,7 @@
 				"type [%d, %d] in SSA and SIT",
 				segno, type, GET_SUM_TYPE((&sum->footer)));
 			set_sbi_flag(sbi, SBI_NEED_FSCK);
-			goto next;
+			goto skip;
 		}
 
 		/*
@@ -1103,21 +1186,27 @@
 		 *                                  - lock_page(sum_page)
 		 */
 		if (type == SUM_TYPE_NODE)
-			gc_node_segment(sbi, sum->entries, segno, gc_type);
-		else
-			gc_data_segment(sbi, sum->entries, gc_list, segno,
+			submitted += gc_node_segment(sbi, sum->entries, segno,
 								gc_type);
+		else
+			submitted += gc_data_segment(sbi, sum->entries, gc_list,
+							segno, gc_type);
 
 		stat_inc_seg_count(sbi, type, gc_type);
 
+freed:
 		if (gc_type == FG_GC &&
 				get_valid_blocks(sbi, segno, false) == 0)
 			seg_freed++;
-next:
+		migrated++;
+
+		if (__is_large_section(sbi) && segno + 1 < end_segno)
+			sbi->next_victim_seg[gc_type] = segno + 1;
+skip:
 		f2fs_put_page(sum_page, 0);
 	}
 
-	if (gc_type == FG_GC)
+	if (submitted)
 		f2fs_submit_merged_write(sbi,
 				(type == SUM_TYPE_NODE) ? NODE : DATA);
 
@@ -1172,7 +1261,8 @@
 		 * threshold, we can make them free by checkpoint. Then, we
 		 * secure free segments which doesn't need fggc any more.
 		 */
-		if (prefree_segments(sbi)) {
+		if (prefree_segments(sbi) &&
+				!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
 			ret = f2fs_write_checkpoint(sbi, &cpc);
 			if (ret)
 				goto stop;
@@ -1224,7 +1314,7 @@
 			segno = NULL_SEGNO;
 			goto gc_more;
 		}
-		if (gc_type == FG_GC)
+		if (gc_type == FG_GC && !is_sbi_flag_set(sbi, SBI_CP_DISABLED))
 			ret = f2fs_write_checkpoint(sbi, &cpc);
 	}
 stop:
@@ -1244,7 +1334,7 @@
 
 	put_gc_inode(&gc_list);
 
-	if (sync)
+	if (sync && !ret)
 		ret = sec_freed ? 0 : -EAGAIN;
 	return ret;
 }
@@ -1256,7 +1346,7 @@
 	sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES;
 
 	/* give warm/cold data area from slower device */
-	if (f2fs_is_multi_device(sbi) && sbi->segs_per_sec == 1)
+	if (sbi->s_ndevs && !__is_large_section(sbi))
 		SIT_I(sbi)->last_victim[ALLOC_NEXT] =
 				GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
 }

diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index c8619e4..bbac9d3 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/gc.h
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define GC_THREAD_MIN_WB_PAGES		1	/*
 						 * a threshold to determine

diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index eb2e031..cc82f14 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/hash.c
  *
@@ -7,10 +8,6 @@
  * Portions of this code from linux/fs/ext3/hash.c
  *
  * Copyright (C) 2002 by Theodore Ts'o
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/types.h>
 #include <linux/fs.h>

diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 92703ef..8422a13 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c

@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/inline.c
  * Copyright (c) 2013, Intel Corporation
  * Authors: Huajun Li <huajun.li@intel.com>
  *          Haicheng Li <haicheng.li@intel.com>
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/fs.h>
@@ -13,6 +11,7 @@
 
 #include "f2fs.h"
 #include "node.h"
+#include <trace/events/android_fs.h>
 
 bool f2fs_may_inline_data(struct inode *inode)
 {
@@ -74,7 +73,7 @@
 
 	addr = inline_data_addr(inode, ipage);
 
-	f2fs_wait_on_page_writeback(ipage, NODE, true);
+	f2fs_wait_on_page_writeback(ipage, NODE, true, true);
 	memset(addr + from, 0, MAX_INLINE_DATA(inode) - from);
 	set_page_dirty(ipage);
 
@@ -86,14 +85,29 @@
 {
 	struct page *ipage;
 
+	if (trace_android_fs_dataread_start_enabled()) {
+		char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+		path = android_fstrace_get_pathname(pathbuf,
+						    MAX_TRACE_PATHBUF_LEN,
+						    inode);
+		trace_android_fs_dataread_start(inode, page_offset(page),
+						PAGE_SIZE, current->pid,
+						path, current->comm);
+	}
+
 	ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
 	if (IS_ERR(ipage)) {
+		trace_android_fs_dataread_end(inode, page_offset(page),
+					      PAGE_SIZE);
 		unlock_page(page);
 		return PTR_ERR(ipage);
 	}
 
 	if (!f2fs_has_inline_data(inode)) {
 		f2fs_put_page(ipage, 1);
+		trace_android_fs_dataread_end(inode, page_offset(page),
+					      PAGE_SIZE);
 		return -EAGAIN;
 	}
 
@@ -105,6 +119,8 @@
 	if (!PageUptodate(page))
 		SetPageUptodate(page);
 	f2fs_put_page(ipage, 1);
+	trace_android_fs_dataread_end(inode, page_offset(page),
+				      PAGE_SIZE);
 	unlock_page(page);
 	return 0;
 }
@@ -163,7 +179,7 @@
 	fio.old_blkaddr = dn->data_blkaddr;
 	set_inode_flag(dn->inode, FI_HOT_DATA);
 	f2fs_outplace_write_data(dn, &fio);
-	f2fs_wait_on_page_writeback(page, DATA, true);
+	f2fs_wait_on_page_writeback(page, DATA, true, true);
 	if (dirty) {
 		inode_dec_dirty_pages(dn->inode);
 		f2fs_remove_dirty_inode(dn->inode);
@@ -238,7 +254,7 @@
 
 	f2fs_bug_on(F2FS_I_SB(inode), page->index);
 
-	f2fs_wait_on_page_writeback(dn.inode_page, NODE, true);
+	f2fs_wait_on_page_writeback(dn.inode_page, NODE, true, true);
 	src_addr = kmap_atomic(page);
 	dst_addr = inline_data_addr(inode, dn.inode_page);
 	memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode));
@@ -279,7 +295,7 @@
 		ipage = f2fs_get_node_page(sbi, inode->i_ino);
 		f2fs_bug_on(sbi, IS_ERR(ipage));
 
-		f2fs_wait_on_page_writeback(ipage, NODE, true);
+		f2fs_wait_on_page_writeback(ipage, NODE, true, true);
 
 		src_addr = inline_data_addr(inode, npage);
 		dst_addr = inline_data_addr(inode, ipage);
@@ -393,7 +409,7 @@
 		goto out;
 	}
 
-	f2fs_wait_on_page_writeback(page, DATA, true);
+	f2fs_wait_on_page_writeback(page, DATA, true, true);
 
 	dentry_blk = page_address(page);
 
@@ -503,18 +519,18 @@
 
 	stat_dec_inline_dir(dir);
 	clear_inode_flag(dir, FI_INLINE_DENTRY);
-	kfree(backup_dentry);
+	kvfree(backup_dentry);
 	return 0;
 recover:
 	lock_page(ipage);
-	f2fs_wait_on_page_writeback(ipage, NODE, true);
+	f2fs_wait_on_page_writeback(ipage, NODE, true, true);
 	memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir));
 	f2fs_i_depth_write(dir, 0);
 	f2fs_i_size_write(dir, MAX_INLINE_DATA(dir));
 	set_page_dirty(ipage);
 	f2fs_put_page(ipage, 1);
 
-	kfree(backup_dentry);
+	kvfree(backup_dentry);
 	return err;
 }
 
@@ -567,7 +583,7 @@
 		}
 	}
 
-	f2fs_wait_on_page_writeback(ipage, NODE, true);
+	f2fs_wait_on_page_writeback(ipage, NODE, true, true);
 
 	name_hash = f2fs_dentry_hash(new_name, NULL);
 	f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos);
@@ -599,7 +615,7 @@
 	int i;
 
 	lock_page(page);
-	f2fs_wait_on_page_writeback(page, NODE, true);
+	f2fs_wait_on_page_writeback(page, NODE, true, true);
 
 	inline_dentry = inline_data_addr(dir, page);
 	make_dentry_ptr_inline(dir, &d, inline_dentry);

diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 0f31df0..2812f19 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/inode.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
@@ -17,6 +14,7 @@
 #include "f2fs.h"
 #include "node.h"
 #include "segment.h"
+#include "xattr.h"
 
 #include <trace/events/f2fs.h>
 
@@ -106,7 +104,7 @@
 
 	while (start < end) {
 		if (*start++) {
-			f2fs_wait_on_page_writeback(ipage, NODE, true);
+			f2fs_wait_on_page_writeback(ipage, NODE, true, true);
 
 			set_inode_flag(inode, FI_DATA_EXIST);
 			set_raw_inline(inode, F2FS_INODE(ipage));
@@ -121,7 +119,7 @@
 {
 	struct f2fs_inode *ri = &F2FS_NODE(page)->i;
 
-	if (!f2fs_sb_has_inode_chksum(sbi->sb))
+	if (!f2fs_sb_has_inode_chksum(sbi))
 		return false;
 
 	if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR))
@@ -221,7 +219,7 @@
 		return false;
 	}
 
-	if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)
+	if (f2fs_sb_has_flexible_inline_xattr(sbi)
 			&& !f2fs_has_extra_attr(inode)) {
 		set_sbi_flag(sbi, SBI_NEED_FSCK);
 		f2fs_msg(sbi->sb, KERN_WARNING,
@@ -231,7 +229,7 @@
 	}
 
 	if (f2fs_has_extra_attr(inode) &&
-			!f2fs_sb_has_extra_attr(sbi->sb)) {
+			!f2fs_sb_has_extra_attr(sbi)) {
 		set_sbi_flag(sbi, SBI_NEED_FSCK);
 		f2fs_msg(sbi->sb, KERN_WARNING,
 			"%s: inode (ino=%lx) is with extra_attr, "
@@ -251,6 +249,20 @@
 		return false;
 	}
 
+	if (f2fs_has_extra_attr(inode) &&
+		f2fs_sb_has_flexible_inline_xattr(sbi) &&
+		f2fs_has_inline_xattr(inode) &&
+		(!fi->i_inline_xattr_size ||
+		fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) {
+		set_sbi_flag(sbi, SBI_NEED_FSCK);
+		f2fs_msg(sbi->sb, KERN_WARNING,
+			"%s: inode (ino=%lx) has corrupted "
+			"i_inline_xattr_size: %d, max: %zu",
+			__func__, inode->i_ino, fi->i_inline_xattr_size,
+			MAX_INLINE_XATTR_SIZE);
+		return false;
+	}
+
 	if (F2FS_I(inode)->extent_tree) {
 		struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
 
@@ -343,7 +355,7 @@
 	fi->i_extra_isize = f2fs_has_extra_attr(inode) ?
 					le16_to_cpu(ri->i_extra_isize) : 0;
 
-	if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) {
+	if (f2fs_sb_has_flexible_inline_xattr(sbi)) {
 		fi->i_inline_xattr_size = le16_to_cpu(ri->i_inline_xattr_size);
 	} else if (f2fs_has_inline_xattr(inode) ||
 				f2fs_has_inline_dentry(inode)) {
@@ -393,14 +405,14 @@
 	if (fi->i_flags & F2FS_PROJINHERIT_FL)
 		set_inode_flag(inode, FI_PROJ_INHERIT);
 
-	if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi->sb) &&
+	if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi) &&
 			F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid))
 		i_projid = (projid_t)le32_to_cpu(ri->i_projid);
 	else
 		i_projid = F2FS_DEF_PROJID;
 	fi->i_projid = make_kprojid(&init_user_ns, i_projid);
 
-	if (f2fs_has_extra_attr(inode) && f2fs_sb_has_inode_crtime(sbi->sb) &&
+	if (f2fs_has_extra_attr(inode) && f2fs_sb_has_inode_crtime(sbi) &&
 			F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
 		fi->i_crtime.tv_sec = le64_to_cpu(ri->i_crtime);
 		fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec);
@@ -501,7 +513,7 @@
 	struct f2fs_inode *ri;
 	struct extent_tree *et = F2FS_I(inode)->extent_tree;
 
-	f2fs_wait_on_page_writeback(node_page, NODE, true);
+	f2fs_wait_on_page_writeback(node_page, NODE, true, true);
 	set_page_dirty(node_page);
 
 	f2fs_inode_synced(inode);
@@ -546,11 +558,11 @@
 	if (f2fs_has_extra_attr(inode)) {
 		ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize);
 
-		if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)->sb))
+		if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)))
 			ri->i_inline_xattr_size =
 				cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size);
 
-		if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) &&
+		if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) &&
 			F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
 								i_projid)) {
 			projid_t i_projid;
@@ -560,7 +572,7 @@
 			ri->i_projid = cpu_to_le32(i_projid);
 		}
 
-		if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)->sb) &&
+		if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) &&
 			F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
 								i_crtime)) {
 			ri->i_crtime =
@@ -617,6 +629,9 @@
 	if (!is_inode_flag_set(inode, FI_DIRTY_INODE))
 		return 0;
 
+	if (f2fs_is_checkpoint_ready(sbi))
+		return -ENOSPC;
+
 	/*
 	 * We need to balance fs here to prevent from producing dirty node pages
 	 * during the urgent cleaning time when runing out of free sections.
@@ -655,7 +670,11 @@
 	if (inode->i_nlink || is_bad_inode(inode))
 		goto no_delete;
 
-	dquot_initialize(inode);
+	err = dquot_initialize(inode);
+	if (err) {
+		err = 0;
+		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+	}
 
 	f2fs_remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
 	f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
@@ -687,9 +706,10 @@
 		goto retry;
 	}
 
-	if (err)
+	if (err) {
 		f2fs_update_inode_page(inode);
-	dquot_free_inode(inode);
+		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+	}
 	sb_end_intwrite(inode->i_sb);
 no_delete:
 	dquot_drop(inode);
@@ -698,7 +718,8 @@
 	stat_dec_inline_dir(inode);
 	stat_dec_inline_inode(inode);
 
-	if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG)))
+	if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG) &&
+				!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
 		f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
 	else
 		f2fs_inode_synced(inode);

diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 1f67e38..f218d94 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c

@@ -1,24 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/namei.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
 #include <linux/pagemap.h>
 #include <linux/sched.h>
 #include <linux/ctype.h>
+#include <linux/random.h>
 #include <linux/dcache.h>
 #include <linux/namei.h>
 #include <linux/quotaops.h>
 
 #include "f2fs.h"
 #include "node.h"
+#include "segment.h"
 #include "xattr.h"
 #include "acl.h"
 #include <trace/events/f2fs.h>
@@ -52,7 +51,7 @@
 	inode->i_blocks = 0;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
 	F2FS_I(inode)->i_crtime = inode->i_mtime;
-	inode->i_generation = sbi->s_next_generation++;
+	inode->i_generation = prandom_u32();
 
 	if (S_ISDIR(inode->i_mode))
 		F2FS_I(inode)->i_current_depth = 1;
@@ -63,7 +62,7 @@
 		goto fail;
 	}
 
-	if (f2fs_sb_has_project_quota(sbi->sb) &&
+	if (f2fs_sb_has_project_quota(sbi) &&
 		(F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL))
 		F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid;
 	else
@@ -74,10 +73,6 @@
 	if (err)
 		goto fail_drop;
 
-	err = dquot_alloc_inode(inode);
-	if (err)
-		goto fail_drop;
-
 	set_inode_flag(inode, FI_NEW_INODE);
 
 	/* If the directory encrypted, then we should encrypt the inode. */
@@ -85,7 +80,7 @@
 				f2fs_may_encrypt(inode))
 		f2fs_set_encrypted_inode(inode);
 
-	if (f2fs_sb_has_extra_attr(sbi->sb)) {
+	if (f2fs_sb_has_extra_attr(sbi)) {
 		set_inode_flag(inode, FI_EXTRA_ATTR);
 		F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE;
 	}
@@ -98,7 +93,7 @@
 	if (f2fs_may_inline_dentry(inode))
 		set_inode_flag(inode, FI_INLINE_DENTRY);
 
-	if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) {
+	if (f2fs_sb_has_flexible_inline_xattr(sbi)) {
 		f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode));
 		if (f2fs_has_inline_xattr(inode))
 			xattr_size = F2FS_OPTION(sbi).inline_xattr_size;
@@ -124,6 +119,8 @@
 	if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL)
 		set_inode_flag(inode, FI_PROJ_INHERIT);
 
+	f2fs_set_inode_flags(inode);
+
 	trace_f2fs_new_inode(inode, 0);
 	return inode;
 
@@ -184,16 +181,19 @@
 	hot_count = sbi->raw_super->hot_ext_count;
 
 	for (i = 0; i < cold_count + hot_count; i++) {
-		if (!is_extension_exist(name, extlist[i]))
-			continue;
-		if (i < cold_count)
-			file_set_cold(inode);
-		else
-			file_set_hot(inode);
-		break;
+		if (is_extension_exist(name, extlist[i]))
+			break;
 	}
 
 	up_read(&sbi->sb_lock);
+
+	if (i == cold_count + hot_count)
+		return;
+
+	if (i < cold_count)
+		file_set_cold(inode);
+	else
+		file_set_hot(inode);
 }
 
 int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
@@ -272,6 +272,9 @@
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	err = dquot_initialize(dir);
 	if (err)
@@ -318,6 +321,9 @@
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	err = fscrypt_prepare_link(old_dentry, dir, dentry);
 	if (err)
@@ -564,6 +570,9 @@
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
 				      &disk_link);
@@ -627,7 +636,7 @@
 	f2fs_handle_failed_inode(inode);
 out_free_encrypted_link:
 	if (disk_link.name != (unsigned char *)symname)
-		kfree(disk_link.name);
+		kvfree(disk_link.name);
 	return err;
 }
 
@@ -693,6 +702,9 @@
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	err = dquot_initialize(dir);
 	if (err)
@@ -823,10 +835,13 @@
 	struct f2fs_dir_entry *old_entry;
 	struct f2fs_dir_entry *new_entry;
 	bool is_old_inline = f2fs_has_inline_dentry(old_dir);
-	int err = -ENOENT;
+	int err;
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
 			(!projid_eq(F2FS_I(new_dir)->i_projid,
@@ -847,6 +862,7 @@
 			goto out;
 	}
 
+	err = -ENOENT;
 	old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
 	if (!old_entry) {
 		if (IS_ERR(old_page))
@@ -983,6 +999,8 @@
 
 	if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
 		f2fs_sync_fs(sbi->sb, 1);
+
+	f2fs_update_time(sbi, REQ_TIME);
 	return 0;
 
 put_out_dir:
@@ -1012,10 +1030,13 @@
 	struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL;
 	struct f2fs_dir_entry *old_entry, *new_entry;
 	int old_nlink = 0, new_nlink = 0;
-	int err = -ENOENT;
+	int err;
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
 			!projid_eq(F2FS_I(new_dir)->i_projid,
@@ -1033,6 +1054,7 @@
 	if (err)
 		goto out;
 
+	err = -ENOENT;
 	old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
 	if (!old_entry) {
 		if (IS_ERR(old_page))
@@ -1136,6 +1158,8 @@
 
 	if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
 		f2fs_sync_fs(sbi->sb, 1);
+
+	f2fs_update_time(sbi, REQ_TIME);
 	return 0;
 out_new_dir:
 	if (new_dir_entry) {

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index e2d9eda..e662b5d 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/node.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
@@ -129,6 +126,8 @@
 
 	/* get current nat block page with lock */
 	src_page = get_current_nat_page(sbi, nid);
+	if (IS_ERR(src_page))
+		return src_page;
 	dst_page = f2fs_grab_meta_page(sbi, dst_off);
 	f2fs_bug_on(sbi, PageDirty(src_page));
 
@@ -1107,7 +1106,7 @@
 				ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
 			lock_page(page);
 			BUG_ON(page->mapping != NODE_MAPPING(sbi));
-			f2fs_wait_on_page_writeback(page, NODE, true);
+			f2fs_wait_on_page_writeback(page, NODE, true, true);
 			ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
 			set_page_dirty(page);
 			unlock_page(page);
@@ -1241,7 +1240,7 @@
 	new_ni.version = 0;
 	set_node_addr(sbi, &new_ni, NEW_ADDR, false);
 
-	f2fs_wait_on_page_writeback(page, NODE, true);
+	f2fs_wait_on_page_writeback(page, NODE, true, true);
 	fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
 	set_cold_node(page, S_ISDIR(dn->inode->i_mode));
 	if (!PageUptodate(page))
@@ -1575,8 +1574,7 @@
 	up_read(&sbi->node_write);
 
 	if (wbc->for_reclaim) {
-		f2fs_submit_merged_write_cond(sbi, page->mapping->host, 0,
-						page->index, NODE);
+		f2fs_submit_merged_write_cond(sbi, NULL, page, 0, NODE);
 		submitted = NULL;
 	}
 
@@ -1598,8 +1596,10 @@
 	return AOP_WRITEPAGE_ACTIVATE;
 }
 
-void f2fs_move_node_page(struct page *node_page, int gc_type)
+int f2fs_move_node_page(struct page *node_page, int gc_type)
 {
+	int err = 0;
+
 	if (gc_type == FG_GC) {
 		struct writeback_control wbc = {
 			.sync_mode = WB_SYNC_ALL,
@@ -1607,16 +1607,20 @@
 			.for_reclaim = 0,
 		};
 
-		set_page_dirty(node_page);
-		f2fs_wait_on_page_writeback(node_page, NODE, true);
+		f2fs_wait_on_page_writeback(node_page, NODE, true, true);
 
-		f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page));
-		if (!clear_page_dirty_for_io(node_page))
+		set_page_dirty(node_page);
+
+		if (!clear_page_dirty_for_io(node_page)) {
+			err = -EAGAIN;
 			goto out_page;
+		}
 
 		if (__write_node_page(node_page, false, NULL,
-					&wbc, false, FS_GC_NODE_IO, NULL))
+					&wbc, false, FS_GC_NODE_IO, NULL)) {
+			err = -EAGAIN;
 			unlock_page(node_page);
+		}
 		goto release_page;
 	} else {
 		/* set page dirty and write it */
@@ -1627,6 +1631,7 @@
 	unlock_page(node_page);
 release_page:
 	f2fs_put_page(node_page, 0);
+	return err;
 }
 
 static int f2fs_write_node_page(struct page *page,
@@ -1641,13 +1646,13 @@
 			unsigned int *seq_id)
 {
 	pgoff_t index;
-	pgoff_t last_idx = ULONG_MAX;
 	struct pagevec pvec;
 	int ret = 0;
 	struct page *last_page = NULL;
 	bool marked = false;
 	nid_t ino = inode->i_ino;
 	int nr_pages;
+	int nwritten = 0;
 
 	if (atomic) {
 		last_page = last_fsync_dnode(sbi, ino);
@@ -1693,8 +1698,7 @@
 				goto continue_unlock;
 			}
 
-			f2fs_wait_on_page_writeback(page, NODE, true);
-			BUG_ON(PageWriteback(page));
+			f2fs_wait_on_page_writeback(page, NODE, true, true);
 
 			set_fsync_mark(page, 0);
 			set_dentry_mark(page, 0);
@@ -1725,7 +1729,7 @@
 				f2fs_put_page(last_page, 0);
 				break;
 			} else if (submitted) {
-				last_idx = page->index;
+				nwritten++;
 			}
 
 			if (page == last_page) {
@@ -1745,14 +1749,14 @@
 			"Retry to write fsync mark: ino=%u, idx=%lx",
 					ino, last_page->index);
 		lock_page(last_page);
-		f2fs_wait_on_page_writeback(last_page, NODE, true);
+		f2fs_wait_on_page_writeback(last_page, NODE, true, true);
 		set_page_dirty(last_page);
 		unlock_page(last_page);
 		goto retry;
 	}
 out:
-	if (last_idx != ULONG_MAX)
-		f2fs_submit_merged_write_cond(sbi, NULL, ino, last_idx, NODE);
+	if (nwritten)
+		f2fs_submit_merged_write_cond(sbi, NULL, NULL, ino, NODE);
 	return ret ? -EIO: 0;
 }
 
@@ -1826,9 +1830,8 @@
 				goto lock_node;
 			}
 
-			f2fs_wait_on_page_writeback(page, NODE, true);
+			f2fs_wait_on_page_writeback(page, NODE, true, true);
 
-			BUG_ON(PageWriteback(page));
 			if (!clear_page_dirty_for_io(page))
 				goto continue_unlock;
 
@@ -1895,7 +1898,7 @@
 		get_page(page);
 		spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
 
-		f2fs_wait_on_page_writeback(page, NODE, true);
+		f2fs_wait_on_page_writeback(page, NODE, true, false);
 		if (TestClearPageError(page))
 			ret = -EIO;
 
@@ -1926,7 +1929,9 @@
 	f2fs_balance_fs_bg(sbi);
 
 	/* collect a number of dirty node pages and write together */
-	if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
+	if (wbc->sync_mode != WB_SYNC_ALL &&
+			get_pages(sbi, F2FS_DIRTY_NODES) <
+					nr_pages_to_skip(sbi, NODE))
 		goto skip_write;
 
 	if (wbc->sync_mode == WB_SYNC_ALL)
@@ -1965,7 +1970,7 @@
 	if (!PageDirty(page)) {
 		__set_page_dirty_nobuffers(page);
 		inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
-		SetPagePrivate(page);
+		f2fs_set_page_private(page, 0);
 		f2fs_trace_pid(page);
 		return 1;
 	}
@@ -2282,15 +2287,19 @@
 						nm_i->nat_block_bitmap)) {
 			struct page *page = get_current_nat_page(sbi, nid);
 
-			ret = scan_nat_page(sbi, page, nid);
-			f2fs_put_page(page, 1);
+			if (IS_ERR(page)) {
+				ret = PTR_ERR(page);
+			} else {
+				ret = scan_nat_page(sbi, page, nid);
+				f2fs_put_page(page, 1);
+			}
 
 			if (ret) {
 				up_read(&nm_i->nat_tree_lock);
 				f2fs_bug_on(sbi, !mount);
 				f2fs_msg(sbi->sb, KERN_ERR,
 					"NAT is corrupt, run fsck to fix it");
-				return -EINVAL;
+				return ret;
 			}
 		}
 
@@ -2367,8 +2376,9 @@
 	spin_unlock(&nm_i->nid_list_lock);
 
 	/* Let's scan nat pages and its caches to get free nids */
-	f2fs_build_free_nids(sbi, true, false);
-	goto retry;
+	if (!f2fs_build_free_nids(sbi, true, false))
+		goto retry;
+	return false;
 }
 
 /*
@@ -2471,7 +2481,7 @@
 	src_addr = inline_xattr_addr(inode, page);
 	inline_size = inline_xattr_size(inode);
 
-	f2fs_wait_on_page_writeback(ipage, NODE, true);
+	f2fs_wait_on_page_writeback(ipage, NODE, true, true);
 	memcpy(dst_addr, src_addr, inline_size);
 update_inode:
 	f2fs_update_inode(inode, ipage);
@@ -2565,17 +2575,17 @@
 	if (dst->i_inline & F2FS_EXTRA_ATTR) {
 		dst->i_extra_isize = src->i_extra_isize;
 
-		if (f2fs_sb_has_flexible_inline_xattr(sbi->sb) &&
+		if (f2fs_sb_has_flexible_inline_xattr(sbi) &&
 			F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
 							i_inline_xattr_size))
 			dst->i_inline_xattr_size = src->i_inline_xattr_size;
 
-		if (f2fs_sb_has_project_quota(sbi->sb) &&
+		if (f2fs_sb_has_project_quota(sbi) &&
 			F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
 								i_projid))
 			dst->i_projid = src->i_projid;
 
-		if (f2fs_sb_has_inode_crtime(sbi->sb) &&
+		if (f2fs_sb_has_inode_crtime(sbi) &&
 			F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
 							i_crtime_nsec)) {
 			dst->i_crtime = src->i_crtime;
@@ -2724,7 +2734,7 @@
 		__clear_bit_le(nat_index, nm_i->full_nat_bits);
 }
 
-static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
+static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 		struct nat_entry_set *set, struct cp_control *cpc)
 {
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -2748,6 +2758,9 @@
 		down_write(&curseg->journal_rwsem);
 	} else {
 		page = get_next_nat_page(sbi, start_nid);
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+
 		nat_blk = page_address(page);
 		f2fs_bug_on(sbi, !nat_blk);
 	}
@@ -2793,12 +2806,13 @@
 		radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
 		kmem_cache_free(nat_entry_set_slab, set);
 	}
+	return 0;
 }
 
 /*
  * This function is called during the checkpointing process.
  */
-void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -2808,6 +2822,7 @@
 	unsigned int found;
 	nid_t set_idx = 0;
 	LIST_HEAD(sets);
+	int err = 0;
 
 	/* during unmount, let's flush nat_bits before checking dirty_nat_cnt */
 	if (enabled_nat_bits(sbi, cpc)) {
@@ -2817,7 +2832,7 @@
 	}
 
 	if (!nm_i->dirty_nat_cnt)
-		return;
+		return 0;
 
 	down_write(&nm_i->nat_tree_lock);
 
@@ -2840,11 +2855,16 @@
 	}
 
 	/* flush dirty nats in nat entry set */
-	list_for_each_entry_safe(set, tmp, &sets, set_list)
-		__flush_nat_entry_set(sbi, set, cpc);
+	list_for_each_entry_safe(set, tmp, &sets, set_list) {
+		err = __flush_nat_entry_set(sbi, set, cpc);
+		if (err)
+			break;
+	}
 
 	up_write(&nm_i->nat_tree_lock);
 	/* Allow dirty nats by node block allocation in write_begin */
+
+	return err;
 }
 
 static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
@@ -2871,10 +2891,8 @@
 		struct page *page;
 
 		page = f2fs_get_meta_page(sbi, nat_bits_addr++);
-		if (IS_ERR(page)) {
-			disable_nat_bits(sbi, true);
+		if (IS_ERR(page))
 			return PTR_ERR(page);
-		}
 
 		memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
 					page_address(page), F2FS_BLKSIZE);
@@ -3109,17 +3127,17 @@
 
 		for (i = 0; i < nm_i->nat_blocks; i++)
 			kvfree(nm_i->free_nid_bitmap[i]);
-		kfree(nm_i->free_nid_bitmap);
+		kvfree(nm_i->free_nid_bitmap);
 	}
 	kvfree(nm_i->free_nid_count);
 
-	kfree(nm_i->nat_bitmap);
-	kfree(nm_i->nat_bits);
+	kvfree(nm_i->nat_bitmap);
+	kvfree(nm_i->nat_bits);
 #ifdef CONFIG_F2FS_CHECK_FS
-	kfree(nm_i->nat_bitmap_mir);
+	kvfree(nm_i->nat_bitmap_mir);
 #endif
 	sbi->nm_info = NULL;
-	kfree(nm_i);
+	kvfree(nm_i);
 }
 
 int __init f2fs_create_node_manager_caches(void)

diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 0f4db7a..e05af5d 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/node.h
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 /* start node id of a node block dedicated to the given node id */
 #define	START_NID(nid) (((nid) / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK)
@@ -364,7 +361,7 @@
 {
 	struct f2fs_node *rn = F2FS_NODE(p);
 
-	f2fs_wait_on_page_writeback(p, NODE, true);
+	f2fs_wait_on_page_writeback(p, NODE, true, true);
 
 	if (i)
 		rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);

diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index bf5c5f4..73338c4 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/recovery.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
@@ -198,6 +195,33 @@
 	return err;
 }
 
+static int recover_quota_data(struct inode *inode, struct page *page)
+{
+	struct f2fs_inode *raw = F2FS_INODE(page);
+	struct iattr attr;
+	uid_t i_uid = le32_to_cpu(raw->i_uid);
+	gid_t i_gid = le32_to_cpu(raw->i_gid);
+	int err;
+
+	memset(&attr, 0, sizeof(attr));
+
+	attr.ia_uid = make_kuid(inode->i_sb->s_user_ns, i_uid);
+	attr.ia_gid = make_kgid(inode->i_sb->s_user_ns, i_gid);
+
+	if (!uid_eq(attr.ia_uid, inode->i_uid))
+		attr.ia_valid |= ATTR_UID;
+	if (!gid_eq(attr.ia_gid, inode->i_gid))
+		attr.ia_valid |= ATTR_GID;
+
+	if (!attr.ia_valid)
+		return 0;
+
+	err = dquot_transfer(inode, &attr);
+	if (err)
+		set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR);
+	return err;
+}
+
 static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
 {
 	if (ri->i_inline & F2FS_PIN_FILE)
@@ -210,12 +234,41 @@
 		clear_inode_flag(inode, FI_DATA_EXIST);
 }
 
-static void recover_inode(struct inode *inode, struct page *page)
+static int recover_inode(struct inode *inode, struct page *page)
 {
 	struct f2fs_inode *raw = F2FS_INODE(page);
 	char *name;
+	int err;
 
 	inode->i_mode = le16_to_cpu(raw->i_mode);
+
+	err = recover_quota_data(inode, page);
+	if (err)
+		return err;
+
+	i_uid_write(inode, le32_to_cpu(raw->i_uid));
+	i_gid_write(inode, le32_to_cpu(raw->i_gid));
+
+	if (raw->i_inline & F2FS_EXTRA_ATTR) {
+		if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) &&
+			F2FS_FITS_IN_INODE(raw, le16_to_cpu(raw->i_extra_isize),
+								i_projid)) {
+			projid_t i_projid;
+			kprojid_t kprojid;
+
+			i_projid = (projid_t)le32_to_cpu(raw->i_projid);
+			kprojid = make_kprojid(&init_user_ns, i_projid);
+
+			if (!projid_eq(kprojid, F2FS_I(inode)->i_projid)) {
+				err = f2fs_transfer_project_quota(inode,
+								kprojid);
+				if (err)
+					return err;
+				F2FS_I(inode)->i_projid = kprojid;
+			}
+		}
+	}
+
 	f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
 	inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime);
 	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
@@ -226,9 +279,14 @@
 
 	F2FS_I(inode)->i_advise = raw->i_advise;
 	F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags);
+	f2fs_set_inode_flags(inode);
+	F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] =
+				le16_to_cpu(raw->i_gc_failures);
 
 	recover_inline_flags(inode, raw);
 
+	f2fs_mark_inode_dirty_sync(inode, true);
+
 	if (file_enc_name(inode))
 		name = "<encrypted>";
 	else
@@ -237,6 +295,7 @@
 	f2fs_msg(inode->i_sb, KERN_NOTICE,
 		"recover_inode: ino = %x, name = %s, inline = %x",
 			ino_of_node(page), name, raw->i_inline);
+	return 0;
 }
 
 static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
@@ -363,6 +422,8 @@
 	}
 
 	sum_page = f2fs_get_sum_page(sbi, segno);
+	if (IS_ERR(sum_page))
+		return PTR_ERR(sum_page);
 	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
 	sum = sum_node->entries[blkoff];
 	f2fs_put_page(sum_page, 1);
@@ -478,7 +539,7 @@
 		goto out;
 	}
 
-	f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
+	f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true);
 
 	err = f2fs_get_node_info(sbi, dn.nid, &ni);
 	if (err)
@@ -611,8 +672,11 @@
 		 * In this case, we can lose the latest inode(x).
 		 * So, call recover_inode for the inode update.
 		 */
-		if (IS_INODE(page))
-			recover_inode(entry->inode, page);
+		if (IS_INODE(page)) {
+			err = recover_inode(entry->inode, page);
+			if (err)
+				break;
+		}
 		if (entry->last_dentry == blkaddr) {
 			err = recover_dentry(entry->inode, page, dir_list);
 			if (err) {

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 92f72bb..6fcc04a 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/segment.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
@@ -179,6 +176,8 @@
 		return false;
 	if (sbi->gc_mode == GC_URGENT)
 		return true;
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		return true;
 
 	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
 			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
@@ -192,8 +191,7 @@
 
 	f2fs_trace_pid(page);
 
-	set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
-	SetPagePrivate(page);
+	f2fs_set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
 
 	new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
 
@@ -240,7 +238,7 @@
 			lock_page(page);
 		}
 
-		f2fs_wait_on_page_writeback(page, DATA, true);
+		f2fs_wait_on_page_writeback(page, DATA, true, true);
 
 		if (recover) {
 			struct dnode_of_data dn;
@@ -277,10 +275,11 @@
 		}
 next:
 		/* we don't need to invalidate this in the sccessful status */
-		if (drop || recover)
+		if (drop || recover) {
 			ClearPageUptodate(page);
-		set_page_private(page, 0);
-		ClearPagePrivate(page);
+			clear_cold_data(page);
+		}
+		f2fs_clear_page_private(page);
 		f2fs_put_page(page, 1);
 
 		list_del(&cur->list);
@@ -369,8 +368,7 @@
 	kmem_cache_free(inmem_entry_slab, cur);
 
 	ClearPageUptodate(page);
-	set_page_private(page, 0);
-	ClearPagePrivate(page);
+	f2fs_clear_page_private(page);
 	f2fs_put_page(page, 0);
 
 	trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
@@ -390,7 +388,7 @@
 		.io_type = FS_DATA_IO,
 	};
 	struct list_head revoke_list;
-	pgoff_t last_idx = ULONG_MAX;
+	bool submit_bio = false;
 	int err = 0;
 
 	INIT_LIST_HEAD(&revoke_list);
@@ -402,8 +400,9 @@
 		if (page->mapping == inode->i_mapping) {
 			trace_f2fs_commit_inmem_page(page, INMEM);
 
+			f2fs_wait_on_page_writeback(page, DATA, true, true);
+
 			set_page_dirty(page);
-			f2fs_wait_on_page_writeback(page, DATA, true);
 			if (clear_page_dirty_for_io(page)) {
 				inode_dec_dirty_pages(inode);
 				f2fs_remove_dirty_inode(inode);
@@ -425,14 +424,14 @@
 			}
 			/* record old blkaddr for revoking */
 			cur->old_addr = fio.old_blkaddr;
-			last_idx = page->index;
+			submit_bio = true;
 		}
 		unlock_page(page);
 		list_move_tail(&cur->list, &revoke_list);
 	}
 
-	if (last_idx != ULONG_MAX)
-		f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA);
+	if (submit_bio)
+		f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA);
 
 	if (err) {
 		/*
@@ -502,6 +501,9 @@
 	if (need && excess_cached_nats(sbi))
 		f2fs_balance_fs_bg(sbi);
 
+	if (f2fs_is_checkpoint_ready(sbi))
+		return;
+
 	/*
 	 * We should do GC or end up with checkpoint, if there are so many dirty
 	 * dir/node pages without enough free segments.
@@ -530,7 +532,7 @@
 	else
 		f2fs_build_free_nids(sbi, false, false);
 
-	if (!is_idle(sbi) &&
+	if (!is_idle(sbi, REQ_TIME) &&
 		(!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
 		return;
 
@@ -556,9 +558,13 @@
 static int __submit_flush_wait(struct f2fs_sb_info *sbi,
 				struct block_device *bdev)
 {
-	struct bio *bio = f2fs_bio_alloc(sbi, 0, true);
+	struct bio *bio;
 	int ret;
 
+	bio = f2fs_bio_alloc(sbi, 0, false);
+	if (!bio)
+		return -ENOMEM;
+
 	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
 	bio_set_dev(bio, bdev);
 	ret = submit_bio_wait(bio);
@@ -635,15 +641,16 @@
 		return 0;
 
 	if (!test_opt(sbi, FLUSH_MERGE)) {
+		atomic_inc(&fcc->queued_flush);
 		ret = submit_flush_wait(sbi, ino);
+		atomic_dec(&fcc->queued_flush);
 		atomic_inc(&fcc->issued_flush);
 		return ret;
 	}
 
-	if (atomic_inc_return(&fcc->issing_flush) == 1 ||
-	    f2fs_is_multi_device(sbi)) {
+	if (atomic_inc_return(&fcc->queued_flush) == 1 || sbi->s_ndevs > 1) {
 		ret = submit_flush_wait(sbi, ino);
-		atomic_dec(&fcc->issing_flush);
+		atomic_dec(&fcc->queued_flush);
 
 		atomic_inc(&fcc->issued_flush);
 		return ret;
@@ -662,14 +669,14 @@
 
 	if (fcc->f2fs_issue_flush) {
 		wait_for_completion(&cmd.wait);
-		atomic_dec(&fcc->issing_flush);
+		atomic_dec(&fcc->queued_flush);
 	} else {
 		struct llist_node *list;
 
 		list = llist_del_all(&fcc->issue_list);
 		if (!list) {
 			wait_for_completion(&cmd.wait);
-			atomic_dec(&fcc->issing_flush);
+			atomic_dec(&fcc->queued_flush);
 		} else {
 			struct flush_cmd *tmp, *next;
 
@@ -678,7 +685,7 @@
 			llist_for_each_entry_safe(tmp, next, list, llnode) {
 				if (tmp == &cmd) {
 					cmd.ret = ret;
-					atomic_dec(&fcc->issing_flush);
+					atomic_dec(&fcc->queued_flush);
 					continue;
 				}
 				tmp->ret = ret;
@@ -707,7 +714,7 @@
 	if (!fcc)
 		return -ENOMEM;
 	atomic_set(&fcc->issued_flush, 0);
-	atomic_set(&fcc->issing_flush, 0);
+	atomic_set(&fcc->queued_flush, 0);
 	init_waitqueue_head(&fcc->flush_wait_queue);
 	init_llist_head(&fcc->issue_list);
 	SM_I(sbi)->fcc_info = fcc;
@@ -719,7 +726,7 @@
 				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
 	if (IS_ERR(fcc->f2fs_issue_flush)) {
 		err = PTR_ERR(fcc->f2fs_issue_flush);
-		kfree(fcc);
+		kvfree(fcc);
 		SM_I(sbi)->fcc_info = NULL;
 		return err;
 	}
@@ -738,7 +745,7 @@
 		kthread_stop(flush_thread);
 	}
 	if (free) {
-		kfree(fcc);
+		kvfree(fcc);
 		SM_I(sbi)->fcc_info = NULL;
 	}
 }
@@ -819,7 +826,7 @@
 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 {
 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
-	unsigned short valid_blocks;
+	unsigned short valid_blocks, ckpt_valid_blocks;
 
 	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
 		return;
@@ -827,8 +834,10 @@
 	mutex_lock(&dirty_i->seglist_lock);
 
 	valid_blocks = get_valid_blocks(sbi, segno, false);
+	ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
 
-	if (valid_blocks == 0) {
+	if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
+				ckpt_valid_blocks == sbi->blocks_per_seg)) {
 		__locate_dirty_segment(sbi, segno, PRE);
 		__remove_dirty_segment(sbi, segno, DIRTY);
 	} else if (valid_blocks < sbi->blocks_per_seg) {
@@ -841,6 +850,69 @@
 	mutex_unlock(&dirty_i->seglist_lock);
 }
 
+/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
+void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	unsigned int segno;
+
+	mutex_lock(&dirty_i->seglist_lock);
+	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+		if (get_valid_blocks(sbi, segno, false))
+			continue;
+		if (IS_CURSEG(sbi, segno))
+			continue;
+		__locate_dirty_segment(sbi, segno, PRE);
+		__remove_dirty_segment(sbi, segno, DIRTY);
+	}
+	mutex_unlock(&dirty_i->seglist_lock);
+}
+
+int f2fs_disable_cp_again(struct f2fs_sb_info *sbi)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	block_t ovp = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
+	block_t holes[2] = {0, 0};	/* DATA and NODE */
+	struct seg_entry *se;
+	unsigned int segno;
+
+	mutex_lock(&dirty_i->seglist_lock);
+	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+		se = get_seg_entry(sbi, segno);
+		if (IS_NODESEG(se->type))
+			holes[NODE] += sbi->blocks_per_seg - se->valid_blocks;
+		else
+			holes[DATA] += sbi->blocks_per_seg - se->valid_blocks;
+	}
+	mutex_unlock(&dirty_i->seglist_lock);
+
+	if (holes[DATA] > ovp || holes[NODE] > ovp)
+		return -EAGAIN;
+	if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
+		dirty_segments(sbi) > overprovision_segments(sbi))
+		return -EAGAIN;
+	return 0;
+}
+
+/* This is only used by SBI_CP_DISABLED */
+static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	unsigned int segno = 0;
+
+	mutex_lock(&dirty_i->seglist_lock);
+	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+		if (get_valid_blocks(sbi, segno, false))
+			continue;
+		if (get_ckpt_valid_blocks(sbi, segno))
+			continue;
+		mutex_unlock(&dirty_i->seglist_lock);
+		return segno;
+	}
+	mutex_unlock(&dirty_i->seglist_lock);
+	return NULL_SEGNO;
+}
+
 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
 		struct block_device *bdev, block_t lstart,
 		block_t start, block_t len)
@@ -861,7 +933,7 @@
 	dc->len = len;
 	dc->ref = 0;
 	dc->state = D_PREP;
-	dc->issuing = 0;
+	dc->queued = 0;
 	dc->error = 0;
 	init_completion(&dc->wait);
 	list_add_tail(&dc->list, pend_list);
@@ -876,7 +948,8 @@
 static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
 				struct block_device *bdev, block_t lstart,
 				block_t start, block_t len,
-				struct rb_node *parent, struct rb_node **p)
+				struct rb_node *parent, struct rb_node **p,
+				bool leftmost)
 {
 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 	struct discard_cmd *dc;
@@ -884,7 +957,7 @@
 	dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
 
 	rb_link_node(&dc->rb_node, parent, p);
-	rb_insert_color(&dc->rb_node, &dcc->root);
+	rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
 
 	return dc;
 }
@@ -893,10 +966,10 @@
 							struct discard_cmd *dc)
 {
 	if (dc->state == D_DONE)
-		atomic_sub(dc->issuing, &dcc->issing_discard);
+		atomic_sub(dc->queued, &dcc->queued_discard);
 
 	list_del(&dc->list);
-	rb_erase(&dc->rb_node, &dcc->root);
+	rb_erase_cached(&dc->rb_node, &dcc->root);
 	dcc->undiscard_blks -= dc->len;
 
 	kmem_cache_free(discard_cmd_slab, dc);
@@ -925,9 +998,9 @@
 		dc->error = 0;
 
 	if (dc->error)
-		f2fs_msg(sbi->sb, KERN_INFO,
-			"Issue discard(%u, %u, %u) failed, ret: %d",
-			dc->lstart, dc->start, dc->len, dc->error);
+		printk_ratelimited(
+			"%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d",
+			KERN_INFO, dc->lstart, dc->start, dc->len, dc->error);
 	__detach_discard_cmd(dcc, dc);
 }
 
@@ -987,6 +1060,7 @@
 
 	dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
 	dpolicy->io_aware_gran = MAX_PLIST_NUM;
+	dpolicy->timeout = 0;
 
 	if (discard_type == DPOLICY_BG) {
 		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
@@ -1009,6 +1083,8 @@
 	} else if (discard_type == DPOLICY_UMOUNT) {
 		dpolicy->max_requests = UINT_MAX;
 		dpolicy->io_aware = false;
+		/* we need to issue all to keep CP_TRIMMED_FLAG */
+		dpolicy->granularity = 1;
 	}
 }
 
@@ -1096,12 +1172,12 @@
 		dc->bio_ref++;
 		spin_unlock_irqrestore(&dc->lock, flags);
 
-		atomic_inc(&dcc->issing_discard);
-		dc->issuing++;
+		atomic_inc(&dcc->queued_discard);
+		dc->queued++;
 		list_move_tail(&dc->list, wait_list);
 
 		/* sanity check on discard range */
-		__check_sit_bitmap(sbi, start, start + len);
+		__check_sit_bitmap(sbi, lstart, lstart + len);
 
 		bio->bi_private = dc;
 		bio->bi_end_io = f2fs_submit_discard_endio;
@@ -1133,6 +1209,7 @@
 	struct rb_node **p;
 	struct rb_node *parent = NULL;
 	struct discard_cmd *dc = NULL;
+	bool leftmost = true;
 
 	if (insert_p && insert_parent) {
 		parent = insert_parent;
@@ -1140,9 +1217,11 @@
 		goto do_insert;
 	}
 
-	p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart);
+	p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent,
+							lstart, &leftmost);
 do_insert:
-	dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p);
+	dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
+								p, leftmost);
 	if (!dc)
 		return NULL;
 
@@ -1210,7 +1289,7 @@
 					NULL, lstart,
 					(struct rb_entry **)&prev_dc,
 					(struct rb_entry **)&next_dc,
-					&insert_p, &insert_parent, true);
+					&insert_p, &insert_parent, true, NULL);
 	if (dc)
 		prev_dc = dc;
 
@@ -1318,7 +1397,7 @@
 					NULL, pos,
 					(struct rb_entry **)&prev_dc,
 					(struct rb_entry **)&next_dc,
-					&insert_p, &insert_parent, true);
+					&insert_p, &insert_parent, true, NULL);
 	if (!dc)
 		dc = next_dc;
 
@@ -1331,7 +1410,7 @@
 		if (dc->state != D_PREP)
 			goto next;
 
-		if (dpolicy->io_aware && !is_idle(sbi)) {
+		if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
 			io_interrupted = true;
 			break;
 		}
@@ -1371,7 +1450,14 @@
 	int i, issued = 0;
 	bool io_interrupted = false;
 
+	if (dpolicy->timeout != 0)
+		f2fs_update_time(sbi, dpolicy->timeout);
+
 	for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
+		if (dpolicy->timeout != 0 &&
+				f2fs_time_over(sbi, dpolicy->timeout))
+			break;
+
 		if (i + 1 < dpolicy->granularity)
 			break;
 
@@ -1391,7 +1477,7 @@
 			f2fs_bug_on(sbi, dc->state != D_PREP);
 
 			if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
-								!is_idle(sbi)) {
+						!is_idle(sbi, DISCARD_TIME)) {
 				io_interrupted = true;
 				break;
 			}
@@ -1558,7 +1644,7 @@
 }
 
 /* This comes from f2fs_put_super */
-bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
+bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
 {
 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 	struct discard_policy dpolicy;
@@ -1566,6 +1652,7 @@
 
 	__init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
 					dcc->discard_granularity);
+	dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT;
 	__issue_discard_cmd(sbi, &dpolicy);
 	dropped = __drop_discard_cmd(sbi);
 
@@ -1599,6 +1686,10 @@
 		if (dcc->discard_wake)
 			dcc->discard_wake = 0;
 
+		/* clean up pending candidates before going to sleep */
+		if (atomic_read(&dcc->queued_discard))
+			__wait_all_discard_cmd(sbi, NULL);
+
 		if (try_to_freeze())
 			continue;
 		if (f2fs_readonly(sbi->sb))
@@ -1620,7 +1711,9 @@
 			__wait_all_discard_cmd(sbi, &dpolicy);
 			wait_ms = dpolicy.min_interval;
 		} else if (issued == -1){
-			wait_ms = dpolicy.mid_interval;
+			wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
+			if (!wait_ms)
+				wait_ms = dpolicy.mid_interval;
 		} else {
 			wait_ms = dpolicy.max_interval;
 		}
@@ -1682,7 +1775,7 @@
 		struct block_device *bdev, block_t blkstart, block_t blklen)
 {
 #ifdef CONFIG_BLK_DEV_ZONED
-	if (f2fs_sb_has_blkzoned(sbi->sb) &&
+	if (f2fs_sb_has_blkzoned(sbi) &&
 				bdev_zoned_model(bdev) != BLK_ZONED_NONE)
 		return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
 #endif
@@ -1830,7 +1923,7 @@
 	unsigned int start = 0, end = -1;
 	unsigned int secno, start_segno;
 	bool force = (cpc->reason & CP_DISCARD);
-	bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
+	bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
 
 	mutex_lock(&dirty_i->seglist_lock);
 
@@ -1862,7 +1955,7 @@
 					(end - 1) <= cpc->trim_end)
 				continue;
 
-		if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) {
+		if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) {
 			f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
 				(end - start) << sbi->log_blocks_per_seg);
 			continue;
@@ -1894,7 +1987,7 @@
 					sbi->blocks_per_seg, cur_pos);
 			len = next_pos - cur_pos;
 
-			if (f2fs_sb_has_blkzoned(sbi->sb) ||
+			if (f2fs_sb_has_blkzoned(sbi) ||
 			    (force && len < cpc->trim_minlen))
 				goto skip;
 
@@ -1942,13 +2035,13 @@
 	INIT_LIST_HEAD(&dcc->fstrim_list);
 	mutex_init(&dcc->cmd_lock);
 	atomic_set(&dcc->issued_discard, 0);
-	atomic_set(&dcc->issing_discard, 0);
+	atomic_set(&dcc->queued_discard, 0);
 	atomic_set(&dcc->discard_cmd_cnt, 0);
 	dcc->nr_discards = 0;
 	dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
 	dcc->undiscard_blks = 0;
 	dcc->next_pos = 0;
-	dcc->root = RB_ROOT;
+	dcc->root = RB_ROOT_CACHED;
 	dcc->rbtree_check = false;
 
 	init_waitqueue_head(&dcc->discard_wait_queue);
@@ -1958,7 +2051,7 @@
 				"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
 	if (IS_ERR(dcc->f2fs_issue_discard)) {
 		err = PTR_ERR(dcc->f2fs_issue_discard);
-		kfree(dcc);
+		kvfree(dcc);
 		SM_I(sbi)->dcc_info = NULL;
 		return err;
 	}
@@ -1975,7 +2068,7 @@
 
 	f2fs_stop_discard_thread(sbi);
 
-	kfree(dcc);
+	kvfree(dcc);
 	SM_I(sbi)->dcc_info = NULL;
 }
 
@@ -2049,7 +2142,8 @@
 			sbi->discard_blks--;
 
 		/* don't overwrite by SSR to keep node chain */
-		if (IS_NODESEG(se->type)) {
+		if (IS_NODESEG(se->type) &&
+				!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
 			if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
 				se->ckpt_valid_blocks++;
 		}
@@ -2071,6 +2165,15 @@
 			f2fs_bug_on(sbi, 1);
 			se->valid_blocks++;
 			del = 0;
+		} else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+			/*
+			 * If checkpoints are off, we must not reuse data that
+			 * was used in the previous checkpoint. If it was used
+			 * before, we must track that to know how much space we
+			 * really have.
+			 */
+			if (f2fs_test_bit(offset, se->ckpt_valid_map))
+				sbi->unusable_block_count++;
 		}
 
 		if (f2fs_test_and_clear_bit(offset, se->discard_map))
@@ -2084,7 +2187,7 @@
 	/* update total number of valid blocks to be written in ckpt area */
 	SIT_I(sbi)->written_valid_blocks += del;
 
-	if (sbi->segs_per_sec > 1)
+	if (__is_large_section(sbi))
 		get_sec_entry(sbi, segno)->valid_blocks += del;
 }
 
@@ -2350,9 +2453,12 @@
 static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
 {
 	/* if segs_per_sec is large than 1, we need to keep original policy. */
-	if (sbi->segs_per_sec != 1)
+	if (__is_large_section(sbi))
 		return CURSEG_I(sbi, type)->segno;
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		return 0;
+
 	if (test_opt(sbi, NOHEAP) &&
 		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
 		return 0;
@@ -2450,6 +2556,7 @@
 	__next_free_blkoff(sbi, curseg, 0);
 
 	sum_page = f2fs_get_sum_page(sbi, new_segno);
+	f2fs_bug_on(sbi, IS_ERR(sum_page));
 	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
 	memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
 	f2fs_put_page(sum_page, 1);
@@ -2496,6 +2603,15 @@
 			return 1;
 		}
 	}
+
+	/* find valid_blocks=0 in dirty list */
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+		segno = get_free_segment(sbi);
+		if (segno != NULL_SEGNO) {
+			curseg->next_segno = segno;
+			return 1;
+		}
+	}
 	return 0;
 }
 
@@ -2513,7 +2629,8 @@
 	else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
 					type == CURSEG_WARM_NODE)
 		new_curseg(sbi, type, false);
-	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
+	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
+			likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
 		new_curseg(sbi, type, false);
 	else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
 		change_curseg(sbi, type);
@@ -2588,7 +2705,7 @@
 					NULL, start,
 					(struct rb_entry **)&prev_dc,
 					(struct rb_entry **)&next_dc,
-					&insert_p, &insert_parent, true);
+					&insert_p, &insert_parent, true, NULL);
 	if (!dc)
 		dc = next_dc;
 
@@ -2646,7 +2763,7 @@
 	struct discard_policy dpolicy;
 	unsigned long long trimmed = 0;
 	int err = 0;
-	bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
+	bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
 
 	if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
 		return -EINVAL;
@@ -3038,6 +3155,7 @@
 	ClearPageError(page);
 	f2fs_submit_page_write(&fio);
 
+	stat_inc_meta_count(sbi, page->index);
 	f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
 }
 
@@ -3085,10 +3203,10 @@
 	stat_inc_inplace_blocks(fio->sbi);
 
 	err = f2fs_submit_page_bio(fio);
-	if (!err)
+	if (!err) {
 		update_device_state(fio);
-
-	f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
+		f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
+	}
 
 	return err;
 }
@@ -3200,34 +3318,48 @@
 }
 
 void f2fs_wait_on_page_writeback(struct page *page,
-				enum page_type type, bool ordered)
+				enum page_type type, bool ordered, bool locked)
 {
 	if (PageWriteback(page)) {
 		struct f2fs_sb_info *sbi = F2FS_P_SB(page);
 
-		f2fs_submit_merged_write_cond(sbi, page->mapping->host,
-						0, page->index, type);
-		if (ordered)
+		f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
+		if (ordered) {
 			wait_on_page_writeback(page);
-		else
+			f2fs_bug_on(sbi, locked && PageWriteback(page));
+		} else {
 			wait_for_stable_page(page);
+		}
 	}
 }
 
-void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr)
+void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
 {
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct page *cpage;
 
+	if (!f2fs_post_read_required(inode))
+		return;
+
 	if (!is_valid_data_blkaddr(sbi, blkaddr))
 		return;
 
 	cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
 	if (cpage) {
-		f2fs_wait_on_page_writeback(cpage, DATA, true);
+		f2fs_wait_on_page_writeback(cpage, DATA, true, true);
 		f2fs_put_page(cpage, 1);
 	}
 }
 
+void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
+								block_t len)
+{
+	block_t i;
+
+	for (i = 0; i < len; i++)
+		f2fs_wait_on_block_writeback(inode, blkaddr + i);
+}
+
 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
@@ -3801,7 +3933,7 @@
 	if (!sit_i->tmp_map)
 		return -ENOMEM;
 
-	if (sbi->segs_per_sec > 1) {
+	if (__is_large_section(sbi)) {
 		sit_i->sec_entries =
 			f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
 						      MAIN_SECS(sbi)),
@@ -3930,6 +4062,8 @@
 
 			se = &sit_i->sentries[start];
 			page = get_current_sit_page(sbi, start);
+			if (IS_ERR(page))
+				return PTR_ERR(page);
 			sit_blk = (struct f2fs_sit_block *)page_address(page);
 			sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
 			f2fs_put_page(page, 1);
@@ -3954,7 +4088,7 @@
 					se->valid_blocks;
 			}
 
-			if (sbi->segs_per_sec > 1)
+			if (__is_large_section(sbi))
 				get_sec_entry(sbi, start)->valid_blocks +=
 							se->valid_blocks;
 		}
@@ -3998,7 +4132,7 @@
 			sbi->discard_blks -= se->valid_blocks;
 		}
 
-		if (sbi->segs_per_sec > 1) {
+		if (__is_large_section(sbi)) {
 			get_sec_entry(sbi, start)->valid_blocks +=
 							se->valid_blocks;
 			get_sec_entry(sbi, start)->valid_blocks -=
@@ -4233,7 +4367,7 @@
 
 	destroy_victim_secmap(sbi);
 	SM_I(sbi)->dirty_info = NULL;
-	kfree(dirty_i);
+	kvfree(dirty_i);
 }
 
 static void destroy_curseg(struct f2fs_sb_info *sbi)
@@ -4245,10 +4379,10 @@
 		return;
 	SM_I(sbi)->curseg_array = NULL;
 	for (i = 0; i < NR_CURSEG_TYPE; i++) {
-		kfree(array[i].sum_blk);
-		kfree(array[i].journal);
+		kvfree(array[i].sum_blk);
+		kvfree(array[i].journal);
 	}
-	kfree(array);
+	kvfree(array);
 }
 
 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
@@ -4259,7 +4393,7 @@
 	SM_I(sbi)->free_info = NULL;
 	kvfree(free_i->free_segmap);
 	kvfree(free_i->free_secmap);
-	kfree(free_i);
+	kvfree(free_i);
 }
 
 static void destroy_sit_info(struct f2fs_sb_info *sbi)
@@ -4272,26 +4406,26 @@
 
 	if (sit_i->sentries) {
 		for (start = 0; start < MAIN_SEGS(sbi); start++) {
-			kfree(sit_i->sentries[start].cur_valid_map);
+			kvfree(sit_i->sentries[start].cur_valid_map);
 #ifdef CONFIG_F2FS_CHECK_FS
-			kfree(sit_i->sentries[start].cur_valid_map_mir);
+			kvfree(sit_i->sentries[start].cur_valid_map_mir);
 #endif
-			kfree(sit_i->sentries[start].ckpt_valid_map);
-			kfree(sit_i->sentries[start].discard_map);
+			kvfree(sit_i->sentries[start].ckpt_valid_map);
+			kvfree(sit_i->sentries[start].discard_map);
 		}
 	}
-	kfree(sit_i->tmp_map);
+	kvfree(sit_i->tmp_map);
 
 	kvfree(sit_i->sentries);
 	kvfree(sit_i->sec_entries);
 	kvfree(sit_i->dirty_sentries_bitmap);
 
 	SM_I(sbi)->sit_info = NULL;
-	kfree(sit_i->sit_bitmap);
+	kvfree(sit_i->sit_bitmap);
 #ifdef CONFIG_F2FS_CHECK_FS
-	kfree(sit_i->sit_bitmap_mir);
+	kvfree(sit_i->sit_bitmap_mir);
 #endif
-	kfree(sit_i);
+	kvfree(sit_i);
 }
 
 void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
@@ -4307,7 +4441,7 @@
 	destroy_free_segmap(sbi);
 	destroy_sit_info(sbi);
 	sbi->sm_info = NULL;
-	kfree(sm_info);
+	kvfree(sm_info);
 }
 
 int __init f2fs_create_segment_manager_caches(void)

diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 5079532..6f48e07 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/segment.h
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
@@ -336,12 +333,18 @@
 	 * In order to get # of valid blocks in a section instantly from many
 	 * segments, f2fs manages two counting structures separately.
 	 */
-	if (use_section && sbi->segs_per_sec > 1)
+	if (use_section && __is_large_section(sbi))
 		return get_sec_entry(sbi, segno)->valid_blocks;
 	else
 		return get_seg_entry(sbi, segno)->valid_blocks;
 }
 
+static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
+				unsigned int segno)
+{
+	return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+}
+
 static inline void seg_info_from_raw_sit(struct seg_entry *se,
 					struct f2fs_sit_entry *rs)
 {
@@ -579,6 +582,15 @@
 		reserved_sections(sbi) + needed);
 }
 
+static inline int f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi)
+{
+	if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		return 0;
+	if (likely(!has_not_enough_free_secs(sbi, 0, 0)))
+		return 0;
+	return -ENOSPC;
+}
+
 static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
 {
 	return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments;
@@ -852,7 +864,7 @@
 		}
 	}
 	mutex_unlock(&dcc->cmd_lock);
-	if (!wakeup)
+	if (!wakeup || !is_idle(sbi, DISCARD_TIME))
 		return;
 wake_up:
 	dcc->discard_wake = 1;

diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 29042e6..a467aca 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * f2fs shrinker support
  *   the basic infra was copied from fs/ubifs/shrinker.c
  *
  * Copyright (c) 2015 Motorola Mobility
  * Copyright (c) 2015 Jaegeuk Kim <jaegeuk@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 1871031..21750df 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/super.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/module.h>
 #include <linux/init.h>
@@ -41,7 +38,7 @@
 
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 
-char *f2fs_fault_name[FAULT_MAX] = {
+const char *f2fs_fault_name[FAULT_MAX] = {
 	[FAULT_KMALLOC]		= "kmalloc",
 	[FAULT_KVMALLOC]	= "kvmalloc",
 	[FAULT_PAGE_ALLOC]	= "page alloc",
@@ -53,9 +50,10 @@
 	[FAULT_DIR_DEPTH]	= "too big dir depth",
 	[FAULT_EVICT_INODE]	= "evict_inode fail",
 	[FAULT_TRUNCATE]	= "truncate fail",
-	[FAULT_IO]		= "IO error",
+	[FAULT_READ_IO]		= "read IO error",
 	[FAULT_CHECKPOINT]	= "checkpoint error",
 	[FAULT_DISCARD]		= "discard error",
+	[FAULT_WRITE_IO]	= "write IO error",
 };
 
 void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
@@ -138,6 +136,7 @@
 	Opt_alloc,
 	Opt_fsync,
 	Opt_test_dummy_encryption,
+	Opt_checkpoint,
 	Opt_err,
 };
 
@@ -196,6 +195,7 @@
 	{Opt_alloc, "alloc_mode=%s"},
 	{Opt_fsync, "fsync_mode=%s"},
 	{Opt_test_dummy_encryption, "test_dummy_encryption"},
+	{Opt_checkpoint, "checkpoint=%s"},
 	{Opt_err, NULL},
 };
 
@@ -207,7 +207,7 @@
 	va_start(args, fmt);
 	vaf.fmt = fmt;
 	vaf.va = &args;
-	printk_ratelimited("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf);
+	printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf);
 	va_end(args);
 }
 
@@ -259,7 +259,7 @@
 			"quota options when quota turned on");
 		return -EINVAL;
 	}
-	if (f2fs_sb_has_quota_ino(sb)) {
+	if (f2fs_sb_has_quota_ino(sbi)) {
 		f2fs_msg(sb, KERN_INFO,
 			"QUOTA feature is enabled, so ignore qf_name");
 		return 0;
@@ -269,7 +269,7 @@
 	if (!qname) {
 		f2fs_msg(sb, KERN_ERR,
 			"Not enough memory for storing quotafile name");
-		return -EINVAL;
+		return -ENOMEM;
 	}
 	if (F2FS_OPTION(sbi).s_qf_names[qtype]) {
 		if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0)
@@ -289,7 +289,7 @@
 	set_opt(sbi, QUOTA);
 	return 0;
 errout:
-	kfree(qname);
+	kvfree(qname);
 	return ret;
 }
 
@@ -302,7 +302,7 @@
 			" when quota turned on");
 		return -EINVAL;
 	}
-	kfree(F2FS_OPTION(sbi).s_qf_names[qtype]);
+	kvfree(F2FS_OPTION(sbi).s_qf_names[qtype]);
 	F2FS_OPTION(sbi).s_qf_names[qtype] = NULL;
 	return 0;
 }
@@ -314,7 +314,7 @@
 	 * 'grpquota' mount options are allowed even without quota feature
 	 * to support legacy quotas in quota files.
 	 */
-	if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi->sb)) {
+	if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi)) {
 		f2fs_msg(sbi->sb, KERN_ERR, "Project quota feature not enabled. "
 			 "Cannot enable project quota enforcement.");
 		return -1;
@@ -348,7 +348,7 @@
 		}
 	}
 
-	if (f2fs_sb_has_quota_ino(sbi->sb) && F2FS_OPTION(sbi).s_jquota_fmt) {
+	if (f2fs_sb_has_quota_ino(sbi) && F2FS_OPTION(sbi).s_jquota_fmt) {
 		f2fs_msg(sbi->sb, KERN_INFO,
 			"QUOTA feature is enabled, so ignore jquota_fmt");
 		F2FS_OPTION(sbi).s_jquota_fmt = 0;
@@ -399,10 +399,10 @@
 				set_opt(sbi, BG_GC);
 				set_opt(sbi, FORCE_FG_GC);
 			} else {
-				kfree(name);
+				kvfree(name);
 				return -EINVAL;
 			}
-			kfree(name);
+			kvfree(name);
 			break;
 		case Opt_disable_roll_forward:
 			set_opt(sbi, DISABLE_ROLL_FORWARD);
@@ -417,7 +417,7 @@
 			set_opt(sbi, DISCARD);
 			break;
 		case Opt_nodiscard:
-			if (f2fs_sb_has_blkzoned(sb)) {
+			if (f2fs_sb_has_blkzoned(sbi)) {
 				f2fs_msg(sb, KERN_WARNING,
 					"discard is required for zoned block devices");
 				return -EINVAL;
@@ -566,11 +566,11 @@
 				return -ENOMEM;
 			if (strlen(name) == 8 &&
 					!strncmp(name, "adaptive", 8)) {
-				if (f2fs_sb_has_blkzoned(sb)) {
+				if (f2fs_sb_has_blkzoned(sbi)) {
 					f2fs_msg(sb, KERN_WARNING,
 						 "adaptive mode is not allowed with "
 						 "zoned block device feature");
-					kfree(name);
+					kvfree(name);
 					return -EINVAL;
 				}
 				set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
@@ -578,15 +578,15 @@
 					!strncmp(name, "lfs", 3)) {
 				set_opt_mode(sbi, F2FS_MOUNT_LFS);
 			} else {
-				kfree(name);
+				kvfree(name);
 				return -EINVAL;
 			}
-			kfree(name);
+			kvfree(name);
 			break;
 		case Opt_io_size_bits:
 			if (args->from && match_int(args, &arg))
 				return -EINVAL;
-			if (arg > __ilog2_u32(BIO_MAX_PAGES)) {
+			if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_PAGES)) {
 				f2fs_msg(sb, KERN_WARNING,
 					"Not support %d, larger than %d",
 					1 << arg, BIO_MAX_PAGES);
@@ -594,28 +594,31 @@
 			}
 			F2FS_OPTION(sbi).write_io_size_bits = arg;
 			break;
+#ifdef CONFIG_F2FS_FAULT_INJECTION
 		case Opt_fault_injection:
 			if (args->from && match_int(args, &arg))
 				return -EINVAL;
-#ifdef CONFIG_F2FS_FAULT_INJECTION
 			f2fs_build_fault_attr(sbi, arg, F2FS_ALL_FAULT_TYPE);
 			set_opt(sbi, FAULT_INJECTION);
-#else
-			f2fs_msg(sb, KERN_INFO,
-				"FAULT_INJECTION was not selected");
-#endif
 			break;
+
 		case Opt_fault_type:
 			if (args->from && match_int(args, &arg))
 				return -EINVAL;
-#ifdef CONFIG_F2FS_FAULT_INJECTION
 			f2fs_build_fault_attr(sbi, 0, arg);
 			set_opt(sbi, FAULT_INJECTION);
-#else
-			f2fs_msg(sb, KERN_INFO,
-				"FAULT_INJECTION was not selected");
-#endif
 			break;
+#else
+		case Opt_fault_injection:
+			f2fs_msg(sb, KERN_INFO,
+				"fault_injection options not supported");
+			break;
+
+		case Opt_fault_type:
+			f2fs_msg(sb, KERN_INFO,
+				"fault_type options not supported");
+			break;
+#endif
 		case Opt_lazytime:
 			sb->s_flags |= SB_LAZYTIME;
 			break;
@@ -711,10 +714,10 @@
 					!strncmp(name, "fs-based", 8)) {
 				F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS;
 			} else {
-				kfree(name);
+				kvfree(name);
 				return -EINVAL;
 			}
-			kfree(name);
+			kvfree(name);
 			break;
 		case Opt_alloc:
 			name = match_strdup(&args[0]);
@@ -728,10 +731,10 @@
 					!strncmp(name, "reuse", 5)) {
 				F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
 			} else {
-				kfree(name);
+				kvfree(name);
 				return -EINVAL;
 			}
-			kfree(name);
+			kvfree(name);
 			break;
 		case Opt_fsync:
 			name = match_strdup(&args[0]);
@@ -748,14 +751,14 @@
 				F2FS_OPTION(sbi).fsync_mode =
 							FSYNC_MODE_NOBARRIER;
 			} else {
-				kfree(name);
+				kvfree(name);
 				return -EINVAL;
 			}
-			kfree(name);
+			kvfree(name);
 			break;
 		case Opt_test_dummy_encryption:
 #ifdef CONFIG_F2FS_FS_ENCRYPTION
-			if (!f2fs_sb_has_encrypt(sb)) {
+			if (!f2fs_sb_has_encrypt(sbi)) {
 				f2fs_msg(sb, KERN_ERR, "Encrypt feature is off");
 				return -EINVAL;
 			}
@@ -768,6 +771,23 @@
 					"Test dummy encryption mount option ignored");
 #endif
 			break;
+		case Opt_checkpoint:
+			name = match_strdup(&args[0]);
+			if (!name)
+				return -ENOMEM;
+
+			if (strlen(name) == 6 &&
+					!strncmp(name, "enable", 6)) {
+				clear_opt(sbi, DISABLE_CHECKPOINT);
+			} else if (strlen(name) == 7 &&
+					!strncmp(name, "disable", 7)) {
+				set_opt(sbi, DISABLE_CHECKPOINT);
+			} else {
+				kvfree(name);
+				return -EINVAL;
+			}
+			kvfree(name);
+			break;
 		default:
 			f2fs_msg(sb, KERN_ERR,
 				"Unrecognized mount option \"%s\" or missing value",
@@ -779,13 +799,13 @@
 	if (f2fs_check_quota_options(sbi))
 		return -EINVAL;
 #else
-	if (f2fs_sb_has_quota_ino(sbi->sb) && !f2fs_readonly(sbi->sb)) {
+	if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) {
 		f2fs_msg(sbi->sb, KERN_INFO,
 			 "Filesystem with quota feature cannot be mounted RDWR "
 			 "without CONFIG_QUOTA");
 		return -EINVAL;
 	}
-	if (f2fs_sb_has_project_quota(sbi->sb) && !f2fs_readonly(sbi->sb)) {
+	if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) {
 		f2fs_msg(sb, KERN_ERR,
 			"Filesystem with project quota feature cannot be "
 			"mounted RDWR without CONFIG_QUOTA");
@@ -801,8 +821,10 @@
 	}
 
 	if (test_opt(sbi, INLINE_XATTR_SIZE)) {
-		if (!f2fs_sb_has_extra_attr(sb) ||
-			!f2fs_sb_has_flexible_inline_xattr(sb)) {
+		int min_size, max_size;
+
+		if (!f2fs_sb_has_extra_attr(sbi) ||
+			!f2fs_sb_has_flexible_inline_xattr(sbi)) {
 			f2fs_msg(sb, KERN_ERR,
 					"extra_attr or flexible_inline_xattr "
 					"feature is off");
@@ -814,19 +836,25 @@
 					"set with inline_xattr option");
 			return -EINVAL;
 		}
-		if (F2FS_OPTION(sbi).inline_xattr_size <
-			sizeof(struct f2fs_xattr_header) / sizeof(__le32) ||
-			F2FS_OPTION(sbi).inline_xattr_size >
-			DEF_ADDRS_PER_INODE -
-			F2FS_TOTAL_EXTRA_ATTR_SIZE / sizeof(__le32) -
-			DEF_INLINE_RESERVED_SIZE -
-			MIN_INLINE_DENTRY_SIZE / sizeof(__le32)) {
+
+		min_size = sizeof(struct f2fs_xattr_header) / sizeof(__le32);
+		max_size = MAX_INLINE_XATTR_SIZE;
+
+		if (F2FS_OPTION(sbi).inline_xattr_size < min_size ||
+				F2FS_OPTION(sbi).inline_xattr_size > max_size) {
 			f2fs_msg(sb, KERN_ERR,
-					"inline xattr size is out of range");
+				"inline xattr size is out of range: %d ~ %d",
+				min_size, max_size);
 			return -EINVAL;
 		}
 	}
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) {
+		f2fs_msg(sb, KERN_ERR,
+				"LFS not compatible with checkpoint=disable\n");
+		return -EINVAL;
+	}
+
 	/* Not pass down write hints if the number of active logs is lesser
 	 * than NR_CURSEG_TYPE.
 	 */
@@ -890,6 +918,10 @@
 			sb_start_intwrite(inode->i_sb);
 			f2fs_i_size_write(inode, 0);
 
+			f2fs_submit_merged_write_cond(F2FS_I_SB(inode),
+					inode, NULL, 0, DATA);
+			truncate_inode_pages_final(inode->i_mapping);
+
 			if (F2FS_HAS_BLOCKS(inode))
 				f2fs_truncate(inode);
 
@@ -992,10 +1024,10 @@
 	for (i = 0; i < sbi->s_ndevs; i++) {
 		blkdev_put(FDEV(i).bdev, FMODE_EXCL);
 #ifdef CONFIG_BLK_DEV_ZONED
-		kfree(FDEV(i).blkz_type);
+		kvfree(FDEV(i).blkz_type);
 #endif
 	}
-	kfree(sbi->devs);
+	kvfree(sbi->devs);
 }
 
 static void f2fs_put_super(struct super_block *sb)
@@ -1014,8 +1046,8 @@
 	 * But, the previous checkpoint was not done by umount, it needs to do
 	 * clean checkpoint again.
 	 */
-	if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
-			!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+	if ((is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
+			!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG))) {
 		struct cp_control cpc = {
 			.reason = CP_UMOUNT,
 		};
@@ -1023,7 +1055,7 @@
 	}
 
 	/* be sure to wait for any on-going discard commands */
-	dropped = f2fs_wait_discard_bios(sbi);
+	dropped = f2fs_issue_discard_timeout(sbi);
 
 	if ((f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) &&
 					!sbi->discard_blks && !dropped) {
@@ -1065,25 +1097,25 @@
 	f2fs_destroy_node_manager(sbi);
 	f2fs_destroy_segment_manager(sbi);
 
-	kfree(sbi->ckpt);
+	kvfree(sbi->ckpt);
 
 	f2fs_unregister_sysfs(sbi);
 
 	sb->s_fs_info = NULL;
 	if (sbi->s_chksum_driver)
 		crypto_free_shash(sbi->s_chksum_driver);
-	kfree(sbi->raw_super);
+	kvfree(sbi->raw_super);
 
 	destroy_device_list(sbi);
 	mempool_destroy(sbi->write_io_dummy);
 #ifdef CONFIG_QUOTA
 	for (i = 0; i < MAXQUOTAS; i++)
-		kfree(F2FS_OPTION(sbi).s_qf_names[i]);
+		kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
 #endif
 	destroy_percpu_info(sbi);
 	for (i = 0; i < NR_PAGE_TYPE; i++)
-		kfree(sbi->write_io[i]);
-	kfree(sbi);
+		kvfree(sbi->write_io[i]);
+	kvfree(sbi);
 }
 
 int f2fs_sync_fs(struct super_block *sb, int sync)
@@ -1093,6 +1125,8 @@
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return 0;
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		return 0;
 
 	trace_f2fs_sync_fs(sb, sync);
 
@@ -1192,6 +1226,11 @@
 	buf->f_blocks = total_count - start_count;
 	buf->f_bfree = user_block_count - valid_user_blocks(sbi) -
 						sbi->current_reserved_blocks;
+	if (unlikely(buf->f_bfree <= sbi->unusable_block_count))
+		buf->f_bfree = 0;
+	else
+		buf->f_bfree -= sbi->unusable_block_count;
+
 	if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks)
 		buf->f_bavail = buf->f_bfree -
 				F2FS_OPTION(sbi).root_reserved_blocks;
@@ -1336,7 +1375,8 @@
 				from_kgid_munged(&init_user_ns,
 					F2FS_OPTION(sbi).s_resgid));
 	if (F2FS_IO_SIZE_BITS(sbi))
-		seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
+		seq_printf(seq, ",io_bits=%u",
+				F2FS_OPTION(sbi).write_io_size_bits);
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 	if (test_opt(sbi, FAULT_INJECTION)) {
 		seq_printf(seq, ",fault_injection=%u",
@@ -1370,6 +1410,9 @@
 	else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
 		seq_printf(seq, ",alloc_mode=%s", "reuse");
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		seq_puts(seq, ",checkpoint=disable");
+
 	if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX)
 		seq_printf(seq, ",fsync_mode=%s", "posix");
 	else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
@@ -1397,10 +1440,11 @@
 	set_opt(sbi, INLINE_DENTRY);
 	set_opt(sbi, EXTENT_CACHE);
 	set_opt(sbi, NOHEAP);
+	clear_opt(sbi, DISABLE_CHECKPOINT);
 	sbi->sb->s_flags |= SB_LAZYTIME;
 	set_opt(sbi, FLUSH_MERGE);
 	set_opt(sbi, DISCARD);
-	if (f2fs_sb_has_blkzoned(sbi->sb))
+	if (f2fs_sb_has_blkzoned(sbi))
 		set_opt_mode(sbi, F2FS_MOUNT_LFS);
 	else
 		set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
@@ -1418,6 +1462,69 @@
 #ifdef CONFIG_QUOTA
 static int f2fs_enable_quotas(struct super_block *sb);
 #endif
+
+static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
+{
+	unsigned int s_flags = sbi->sb->s_flags;
+	struct cp_control cpc;
+	int err = 0;
+	int ret;
+
+	if (s_flags & SB_RDONLY) {
+		f2fs_msg(sbi->sb, KERN_ERR,
+				"checkpoint=disable on readonly fs");
+		return -EINVAL;
+	}
+	sbi->sb->s_flags |= SB_ACTIVE;
+
+	f2fs_update_time(sbi, DISABLE_TIME);
+
+	while (!f2fs_time_over(sbi, DISABLE_TIME)) {
+		mutex_lock(&sbi->gc_mutex);
+		err = f2fs_gc(sbi, true, false, NULL_SEGNO);
+		if (err == -ENODATA) {
+			err = 0;
+			break;
+		}
+		if (err && err != -EAGAIN)
+			break;
+	}
+
+	ret = sync_filesystem(sbi->sb);
+	if (ret || err) {
+		err = ret ? ret: err;
+		goto restore_flag;
+	}
+
+	if (f2fs_disable_cp_again(sbi)) {
+		err = -EAGAIN;
+		goto restore_flag;
+	}
+
+	mutex_lock(&sbi->gc_mutex);
+	cpc.reason = CP_PAUSE;
+	set_sbi_flag(sbi, SBI_CP_DISABLED);
+	f2fs_write_checkpoint(sbi, &cpc);
+
+	sbi->unusable_block_count = 0;
+	mutex_unlock(&sbi->gc_mutex);
+restore_flag:
+	sbi->sb->s_flags = s_flags;	/* Restore MS_RDONLY status */
+	return err;
+}
+
+static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
+{
+	mutex_lock(&sbi->gc_mutex);
+	f2fs_dirty_to_prefree(sbi);
+
+	clear_sbi_flag(sbi, SBI_CP_DISABLED);
+	set_sbi_flag(sbi, SBI_IS_DIRTY);
+	mutex_unlock(&sbi->gc_mutex);
+
+	f2fs_sync_fs(sbi->sb, 1);
+}
+
 static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -1427,6 +1534,8 @@
 	bool need_restart_gc = false;
 	bool need_stop_gc = false;
 	bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
+	bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
+	bool checkpoint_changed;
 #ifdef CONFIG_QUOTA
 	int i, j;
 #endif
@@ -1447,7 +1556,7 @@
 				GFP_KERNEL);
 			if (!org_mount_opt.s_qf_names[i]) {
 				for (j = 0; j < i; j++)
-					kfree(org_mount_opt.s_qf_names[j]);
+					kvfree(org_mount_opt.s_qf_names[j]);
 				return -ENOMEM;
 			}
 		} else {
@@ -1471,6 +1580,8 @@
 	err = parse_options(sb, data);
 	if (err)
 		goto restore_opts;
+	checkpoint_changed =
+			disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT);
 
 	/*
 	 * Previous and new state of filesystem is RO,
@@ -1484,12 +1595,12 @@
 		err = dquot_suspend(sb, -1);
 		if (err < 0)
 			goto restore_opts;
-	} else if (f2fs_readonly(sb) && !(*flags & MS_RDONLY)) {
+	} else if (f2fs_readonly(sb) && !(*flags & SB_RDONLY)) {
 		/* dquot_resume needs RW */
 		sb->s_flags &= ~SB_RDONLY;
 		if (sb_any_quota_suspended(sb)) {
 			dquot_resume(sb, -1);
-		} else if (f2fs_sb_has_quota_ino(sb)) {
+		} else if (f2fs_sb_has_quota_ino(sbi)) {
 			err = f2fs_enable_quotas(sb);
 			if (err)
 				goto restore_opts;
@@ -1504,6 +1615,13 @@
 		goto restore_opts;
 	}
 
+	if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
+		err = -EINVAL;
+		f2fs_msg(sbi->sb, KERN_WARNING,
+			"disabling checkpoint not compatible with read-only");
+		goto restore_opts;
+	}
+
 	/*
 	 * We stop the GC thread if FS is mounted as RO
 	 * or if background_gc = off is passed in mount
@@ -1532,6 +1650,16 @@
 		clear_sbi_flag(sbi, SBI_IS_CLOSE);
 	}
 
+	if (checkpoint_changed) {
+		if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+			err = f2fs_disable_checkpoint(sbi);
+			if (err)
+				goto restore_gc;
+		} else {
+			f2fs_enable_checkpoint(sbi);
+		}
+	}
+
 	/*
 	 * We stop issue flush thread if FS is mounted as RO
 	 * or if flush_merge is not passed in mount option.
@@ -1548,13 +1676,14 @@
 #ifdef CONFIG_QUOTA
 	/* Release old quota file names */
 	for (i = 0; i < MAXQUOTAS; i++)
-		kfree(org_mount_opt.s_qf_names[i]);
+		kvfree(org_mount_opt.s_qf_names[i]);
 #endif
 	/* Update the POSIXACL Flag */
 	sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
 		(test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0);
 
 	limit_reserve_root(sbi);
+	*flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
 	return 0;
 restore_gc:
 	if (need_restart_gc) {
@@ -1568,7 +1697,7 @@
 #ifdef CONFIG_QUOTA
 	F2FS_OPTION(sbi).s_jquota_fmt = org_mount_opt.s_jquota_fmt;
 	for (i = 0; i < MAXQUOTAS; i++) {
-		kfree(F2FS_OPTION(sbi).s_qf_names[i]);
+		kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
 		F2FS_OPTION(sbi).s_qf_names[i] = org_mount_opt.s_qf_names[i];
 	}
 #endif
@@ -1607,6 +1736,7 @@
 				congestion_wait(BLK_RW_ASYNC, HZ/50);
 				goto repeat;
 			}
+			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
 			return PTR_ERR(page);
 		}
 
@@ -1618,6 +1748,7 @@
 		}
 		if (unlikely(!PageUptodate(page))) {
 			f2fs_put_page(page, 1);
+			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
 			return -EIO;
 		}
 
@@ -1659,6 +1790,7 @@
 				congestion_wait(BLK_RW_ASYNC, HZ/50);
 				goto retry;
 			}
+			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
 			break;
 		}
 
@@ -1695,6 +1827,12 @@
 
 static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
 {
+	if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) {
+		f2fs_msg(sbi->sb, KERN_ERR,
+			"quota sysfile may be corrupted, skip loading it");
+		return 0;
+	}
+
 	return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type],
 					F2FS_OPTION(sbi).s_jquota_fmt, type);
 }
@@ -1704,7 +1842,7 @@
 	int enabled = 0;
 	int i, err;
 
-	if (f2fs_sb_has_quota_ino(sbi->sb) && rdonly) {
+	if (f2fs_sb_has_quota_ino(sbi) && rdonly) {
 		err = f2fs_enable_quotas(sbi->sb);
 		if (err) {
 			f2fs_msg(sbi->sb, KERN_ERR,
@@ -1735,7 +1873,7 @@
 	unsigned long qf_inum;
 	int err;
 
-	BUG_ON(!f2fs_sb_has_quota_ino(sb));
+	BUG_ON(!f2fs_sb_has_quota_ino(F2FS_SB(sb)));
 
 	qf_inum = f2fs_qf_ino(sb, type);
 	if (!qf_inum)
@@ -1765,7 +1903,14 @@
 		test_opt(F2FS_SB(sb), PRJQUOTA),
 	};
 
-	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
+	if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) {
+		f2fs_msg(sb, KERN_ERR,
+			"quota file may be corrupted, skip loading it");
+		return 0;
+	}
+
+	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
+
 	for (type = 0; type < MAXQUOTAS; type++) {
 		qf_inum = f2fs_qf_ino(sb, type);
 		if (qf_inum) {
@@ -1779,6 +1924,8 @@
 					"fsck to fix.", type, err);
 				for (type--; type >= 0; type--)
 					dquot_quota_off(sb, type);
+				set_sbi_flag(F2FS_SB(sb),
+						SBI_QUOTA_NEED_REPAIR);
 				return err;
 			}
 		}
@@ -1786,35 +1933,51 @@
 	return 0;
 }
 
-static int f2fs_quota_sync(struct super_block *sb, int type)
+int f2fs_quota_sync(struct super_block *sb, int type)
 {
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
 	struct quota_info *dqopt = sb_dqopt(sb);
 	int cnt;
 	int ret;
 
 	ret = dquot_writeback_dquots(sb, type);
 	if (ret)
-		return ret;
+		goto out;
 
 	/*
 	 * Now when everything is written we can discard the pagecache so
 	 * that userspace sees the changes.
 	 */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		struct address_space *mapping;
+
 		if (type != -1 && cnt != type)
 			continue;
 		if (!sb_has_quota_active(sb, cnt))
 			continue;
 
-		ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping);
+		mapping = dqopt->files[cnt]->i_mapping;
+
+		ret = filemap_fdatawrite(mapping);
 		if (ret)
-			return ret;
+			goto out;
+
+		/* if we are using journalled quota */
+		if (is_journalled_quota(sbi))
+			continue;
+
+		ret = filemap_fdatawait(mapping);
+		if (ret)
+			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
 
 		inode_lock(dqopt->files[cnt]);
 		truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
 		inode_unlock(dqopt->files[cnt]);
 	}
-	return 0;
+out:
+	if (ret)
+		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
+	return ret;
 }
 
 static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
@@ -1835,8 +1998,7 @@
 
 	inode_lock(inode);
 	F2FS_I(inode)->i_flags |= F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL;
-	inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
-					S_NOATIME | S_IMMUTABLE);
+	f2fs_set_inode_flags(inode);
 	inode_unlock(inode);
 	f2fs_mark_inode_dirty_sync(inode, false);
 
@@ -1856,12 +2018,12 @@
 		goto out_put;
 
 	err = dquot_quota_off(sb, type);
-	if (err || f2fs_sb_has_quota_ino(sb))
+	if (err || f2fs_sb_has_quota_ino(F2FS_SB(sb)))
 		goto out_put;
 
 	inode_lock(inode);
 	F2FS_I(inode)->i_flags &= ~(F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL);
-	inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
+	f2fs_set_inode_flags(inode);
 	inode_unlock(inode);
 	f2fs_mark_inode_dirty_sync(inode, false);
 out_put:
@@ -1883,9 +2045,15 @@
 				"Fail to turn off disk quota "
 				"(type: %d, err: %d, ret:%d), Please "
 				"run fsck to fix it.", type, err, ret);
-			set_sbi_flag(F2FS_SB(sb), SBI_NEED_FSCK);
+			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
 		}
 	}
+	/*
+	 * In case of checkpoint=disable, we must flush quota blocks.
+	 * This can cause NULL exception for node_inode in end_io, since
+	 * put_super already dropped it.
+	 */
+	sync_filesystem(sb);
 }
 
 static void f2fs_truncate_quota_inode_pages(struct super_block *sb)
@@ -1900,6 +2068,61 @@
 	}
 }
 
+static int f2fs_dquot_commit(struct dquot *dquot)
+{
+	int ret;
+
+	ret = dquot_commit(dquot);
+	if (ret < 0)
+		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
+	return ret;
+}
+
+static int f2fs_dquot_acquire(struct dquot *dquot)
+{
+	int ret;
+
+	ret = dquot_acquire(dquot);
+	if (ret < 0)
+		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
+
+	return ret;
+}
+
+static int f2fs_dquot_release(struct dquot *dquot)
+{
+	int ret;
+
+	ret = dquot_release(dquot);
+	if (ret < 0)
+		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
+	return ret;
+}
+
+static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
+{
+	struct super_block *sb = dquot->dq_sb;
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	int ret;
+
+	ret = dquot_mark_dquot_dirty(dquot);
+
+	/* if we are using journalled quota */
+	if (is_journalled_quota(sbi))
+		set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
+
+	return ret;
+}
+
+static int f2fs_dquot_commit_info(struct super_block *sb, int type)
+{
+	int ret;
+
+	ret = dquot_commit_info(sb, type);
+	if (ret < 0)
+		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
+	return ret;
+}
 
 static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
 {
@@ -1909,11 +2132,11 @@
 
 static const struct dquot_operations f2fs_quota_operations = {
 	.get_reserved_space = f2fs_get_reserved_space,
-	.write_dquot	= dquot_commit,
-	.acquire_dquot	= dquot_acquire,
-	.release_dquot	= dquot_release,
-	.mark_dirty	= dquot_mark_dquot_dirty,
-	.write_info	= dquot_commit_info,
+	.write_dquot	= f2fs_dquot_commit,
+	.acquire_dquot	= f2fs_dquot_acquire,
+	.release_dquot	= f2fs_dquot_release,
+	.mark_dirty	= f2fs_dquot_mark_dquot_dirty,
+	.write_info	= f2fs_dquot_commit_info,
 	.alloc_dquot	= dquot_alloc,
 	.destroy_dquot	= dquot_destroy,
 	.get_projid	= f2fs_get_projid,
@@ -1931,6 +2154,11 @@
 	.get_nextdqblk	= dquot_get_next_dqblk,
 };
 #else
+int f2fs_quota_sync(struct super_block *sb, int type)
+{
+	return 0;
+}
+
 void f2fs_quota_off_umount(struct super_block *sb)
 {
 }
@@ -1976,7 +2204,7 @@
 	 * if LOST_FOUND feature is enabled.
 	 *
 	 */
-	if (f2fs_sb_has_lost_found(sbi->sb) &&
+	if (f2fs_sb_has_lost_found(sbi) &&
 			inode->i_ino == F2FS_ROOT_INO(sbi))
 		return -EPERM;
 
@@ -2195,6 +2423,26 @@
 					(bh->b_data + F2FS_SUPER_OFFSET);
 	struct super_block *sb = sbi->sb;
 	unsigned int blocksize;
+	size_t crc_offset = 0;
+	__u32 crc = 0;
+
+	/* Check checksum_offset and crc in superblock */
+	if (__F2FS_HAS_FEATURE(raw_super, F2FS_FEATURE_SB_CHKSUM)) {
+		crc_offset = le32_to_cpu(raw_super->checksum_offset);
+		if (crc_offset !=
+			offsetof(struct f2fs_super_block, crc)) {
+			f2fs_msg(sb, KERN_INFO,
+				"Invalid SB checksum offset: %zu",
+				crc_offset);
+			return 1;
+		}
+		crc = le32_to_cpu(raw_super->crc);
+		if (!f2fs_crc_valid(sbi, crc, raw_super, crc_offset)) {
+			f2fs_msg(sb, KERN_INFO,
+				"Invalid SB checksum value: %u", crc);
+			return 1;
+		}
+	}
 
 	if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) {
 		f2fs_msg(sb, KERN_INFO,
@@ -2457,7 +2705,7 @@
 static void init_sb_info(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_super_block *raw_super = sbi->raw_super;
-	int i, j;
+	int i;
 
 	sbi->log_sectors_per_block =
 		le32_to_cpu(raw_super->log_sectors_per_block);
@@ -2475,11 +2723,19 @@
 	sbi->node_ino_num = le32_to_cpu(raw_super->node_ino);
 	sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino);
 	sbi->cur_victim_sec = NULL_SECNO;
+	sbi->next_victim_seg[BG_GC] = NULL_SEGNO;
+	sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
 	sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
+	sbi->migration_granularity = sbi->segs_per_sec;
 
 	sbi->dir_level = DEF_DIR_LEVEL;
 	sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
 	sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
+	sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL;
+	sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL;
+	sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL;
+	sbi->interval_time[UMOUNT_DISCARD_TIMEOUT] =
+				DEF_UMOUNT_DISCARD_TIMEOUT;
 	clear_sbi_flag(sbi, SBI_NEED_FSCK);
 
 	for (i = 0; i < NR_COUNT_TYPE; i++)
@@ -2490,9 +2746,6 @@
 
 	INIT_LIST_HEAD(&sbi->s_list);
 	mutex_init(&sbi->umount_mutex);
-	for (i = 0; i < NR_PAGE_TYPE - 1; i++)
-		for (j = HOT; j < NR_TEMP_TYPE; j++)
-			mutex_init(&sbi->wio_mutex[i][j]);
 	init_rwsem(&sbi->io_order_lock);
 	spin_lock_init(&sbi->cp_lock);
 
@@ -2510,8 +2763,12 @@
 	if (err)
 		return err;
 
-	return percpu_counter_init(&sbi->total_valid_inode_count, 0,
+	err = percpu_counter_init(&sbi->total_valid_inode_count, 0,
 								GFP_KERNEL);
+	if (err)
+		percpu_counter_destroy(&sbi->alloc_valid_block_count);
+
+	return err;
 }
 
 #ifdef CONFIG_BLK_DEV_ZONED
@@ -2525,7 +2782,7 @@
 	unsigned int n = 0;
 	int err = -EIO;
 
-	if (!f2fs_sb_has_blkzoned(sbi->sb))
+	if (!f2fs_sb_has_blkzoned(sbi))
 		return 0;
 
 	if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
@@ -2576,7 +2833,7 @@
 		}
 	}
 
-	kfree(zones);
+	kvfree(zones);
 
 	return err;
 }
@@ -2636,7 +2893,7 @@
 
 	/* No valid superblock */
 	if (!*raw_super)
-		kfree(super);
+		kvfree(super);
 	else
 		err = 0;
 
@@ -2646,6 +2903,7 @@
 int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
 {
 	struct buffer_head *bh;
+	__u32 crc = 0;
 	int err;
 
 	if ((recover && f2fs_readonly(sbi->sb)) ||
@@ -2654,6 +2912,13 @@
 		return -EROFS;
 	}
 
+	/* we should update superblock crc here */
+	if (!recover && f2fs_sb_has_sb_chksum(sbi)) {
+		crc = f2fs_crc32(sbi, F2FS_RAW_SUPER(sbi),
+				offsetof(struct f2fs_super_block, crc));
+		F2FS_RAW_SUPER(sbi)->crc = cpu_to_le32(crc);
+	}
+
 	/* write back-up superblock first */
 	bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1);
 	if (!bh)
@@ -2736,7 +3001,7 @@
 
 #ifdef CONFIG_BLK_DEV_ZONED
 		if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
-				!f2fs_sb_has_blkzoned(sbi->sb)) {
+				!f2fs_sb_has_blkzoned(sbi)) {
 			f2fs_msg(sbi->sb, KERN_ERR,
 				"Zoned block device feature not enabled\n");
 			return -EINVAL;
@@ -2790,10 +3055,11 @@
 	struct f2fs_super_block *raw_super;
 	struct inode *root;
 	int err;
-	bool retry = true, need_fsck = false;
+	bool skip_recovery = false, need_fsck = false;
 	char *options = NULL;
 	int recovery, i, valid_super_block;
 	struct curseg_info *seg_i;
+	int retry_cnt = 1;
 
 try_onemore:
 	err = -EINVAL;
@@ -2832,7 +3098,7 @@
 	sbi->raw_super = raw_super;
 
 	/* precompute checksum seed for metadata */
-	if (f2fs_sb_has_inode_chksum(sb))
+	if (f2fs_sb_has_inode_chksum(sbi))
 		sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid,
 						sizeof(raw_super->uuid));
 
@@ -2842,7 +3108,7 @@
 	 * devices, but mandatory for host-managed zoned block devices.
 	 */
 #ifndef CONFIG_BLK_DEV_ZONED
-	if (f2fs_sb_has_blkzoned(sb)) {
+	if (f2fs_sb_has_blkzoned(sbi)) {
 		f2fs_msg(sb, KERN_ERR,
 			 "Zoned block device support is not enabled\n");
 		err = -EOPNOTSUPP;
@@ -2865,17 +3131,16 @@
 	sb->s_maxbytes = sbi->max_file_blocks <<
 				le32_to_cpu(raw_super->log_blocksize);
 	sb->s_max_links = F2FS_LINK_MAX;
-	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
 
 #ifdef CONFIG_QUOTA
 	sb->dq_op = &f2fs_quota_operations;
-	if (f2fs_sb_has_quota_ino(sb))
+	if (f2fs_sb_has_quota_ino(sbi))
 		sb->s_qcop = &dquot_quotactl_sysfile_ops;
 	else
 		sb->s_qcop = &f2fs_quotactl_ops;
 	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
 
-	if (f2fs_sb_has_quota_ino(sbi->sb)) {
+	if (f2fs_sb_has_quota_ino(sbi)) {
 		for (i = 0; i < MAXQUOTAS; i++) {
 			if (f2fs_qf_ino(sbi->sb, i))
 				sbi->nquota_files++;
@@ -2923,7 +3188,7 @@
 				     GFP_KERNEL);
 		if (!sbi->write_io[i]) {
 			err = -ENOMEM;
-			goto free_options;
+			goto free_bio_info;
 		}
 
 		for (j = HOT; j < n; j++) {
@@ -2966,6 +3231,13 @@
 		goto free_meta_inode;
 	}
 
+	if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG))
+		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+	if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_DISABLED_QUICK_FLAG)) {
+		set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
+		sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_QUICK_INTERVAL;
+	}
+
 	/* Initialize device list */
 	err = f2fs_scan_devices(sbi);
 	if (err) {
@@ -3053,7 +3325,7 @@
 	sb->s_root = d_make_root(root); /* allocate root dentry */
 	if (!sb->s_root) {
 		err = -ENOMEM;
-		goto free_root_inode;
+		goto free_node_inode;
 	}
 
 	err = f2fs_register_sysfs(sbi);
@@ -3062,13 +3334,11 @@
 
 #ifdef CONFIG_QUOTA
 	/* Enable quota usage during mount */
-	if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) {
+	if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) {
 		err = f2fs_enable_quotas(sb);
-		if (err) {
+		if (err)
 			f2fs_msg(sb, KERN_ERR,
 				"Cannot turn on quotas: error %d", err);
-			goto free_sysfs;
-		}
 	}
 #endif
 	/* if there are nt orphan nodes free them */
@@ -3076,6 +3346,9 @@
 	if (err)
 		goto free_meta;
 
+	if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)))
+		goto reset_checkpoint;
+
 	/* recover fsynced data */
 	if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
 		/*
@@ -3091,11 +3364,13 @@
 		if (need_fsck)
 			set_sbi_flag(sbi, SBI_NEED_FSCK);
 
-		if (!retry)
-			goto skip_recovery;
+		if (skip_recovery)
+			goto reset_checkpoint;
 
 		err = f2fs_recover_fsync_data(sbi, false);
 		if (err < 0) {
+			if (err != -ENOMEM)
+				skip_recovery = true;
 			need_fsck = true;
 			f2fs_msg(sb, KERN_ERR,
 				"Cannot recover all fsync data errno=%d", err);
@@ -3111,10 +3386,18 @@
 			goto free_meta;
 		}
 	}
-skip_recovery:
+reset_checkpoint:
 	/* f2fs_recover_fsync_data() cleared this already */
 	clear_sbi_flag(sbi, SBI_POR_DOING);
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+		err = f2fs_disable_checkpoint(sbi);
+		if (err)
+			goto sync_free_meta;
+	} else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) {
+		f2fs_enable_checkpoint(sbi);
+	}
+
 	/*
 	 * If filesystem is not mounted as read-only then
 	 * do start the gc_thread.
@@ -3123,9 +3406,9 @@
 		/* After POR, we can run background GC thread.*/
 		err = f2fs_start_gc_thread(sbi);
 		if (err)
-			goto free_meta;
+			goto sync_free_meta;
 	}
-	kfree(options);
+	kvfree(options);
 
 	/* recover broken superblock */
 	if (recovery) {
@@ -3143,12 +3426,18 @@
 				cur_cp_version(F2FS_CKPT(sbi)));
 	f2fs_update_time(sbi, CP_TIME);
 	f2fs_update_time(sbi, REQ_TIME);
+	clear_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
 	return 0;
 
+sync_free_meta:
+	/* safe to flush all the data */
+	sync_filesystem(sbi->sb);
+	retry_cnt = 0;
+
 free_meta:
 #ifdef CONFIG_QUOTA
 	f2fs_truncate_quota_inode_pages(sb);
-	if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb))
+	if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb))
 		f2fs_quota_off_umount(sbi->sb);
 #endif
 	/*
@@ -3158,9 +3447,8 @@
 	 * falls into an infinite loop in f2fs_sync_meta_pages().
 	 */
 	truncate_inode_pages_final(META_MAPPING(sbi));
-#ifdef CONFIG_QUOTA
-free_sysfs:
-#endif
+	/* evict some inodes being cached by GC */
+	evict_inodes(sb);
 	f2fs_unregister_sysfs(sbi);
 free_root_inode:
 	dput(sb->s_root);
@@ -3178,7 +3466,7 @@
 	f2fs_destroy_segment_manager(sbi);
 free_devices:
 	destroy_device_list(sbi);
-	kfree(sbi->ckpt);
+	kvfree(sbi->ckpt);
 free_meta_inode:
 	make_bad_inode(sbi->meta_inode);
 	iput(sbi->meta_inode);
@@ -3189,23 +3477,23 @@
 	destroy_percpu_info(sbi);
 free_bio_info:
 	for (i = 0; i < NR_PAGE_TYPE; i++)
-		kfree(sbi->write_io[i]);
+		kvfree(sbi->write_io[i]);
 free_options:
 #ifdef CONFIG_QUOTA
 	for (i = 0; i < MAXQUOTAS; i++)
-		kfree(F2FS_OPTION(sbi).s_qf_names[i]);
+		kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
 #endif
-	kfree(options);
+	kvfree(options);
 free_sb_buf:
-	kfree(raw_super);
+	kvfree(raw_super);
 free_sbi:
 	if (sbi->s_chksum_driver)
 		crypto_free_shash(sbi->s_chksum_driver);
-	kfree(sbi);
+	kvfree(sbi);
 
 	/* give only one another chance */
-	if (retry) {
-		retry = false;
+	if (retry_cnt > 0 && skip_recovery) {
+		retry_cnt--;
 		shrink_dcache_sb(sb);
 		goto try_onemore;
 	}
@@ -3306,9 +3594,7 @@
 	err = register_filesystem(&f2fs_fs_type);
 	if (err)
 		goto free_shrinker;
-	err = f2fs_create_root_stats();
-	if (err)
-		goto free_filesystem;
+	f2fs_create_root_stats();
 	err = f2fs_init_post_read_processing();
 	if (err)
 		goto free_root_stats;
@@ -3316,7 +3602,6 @@
 
 free_root_stats:
 	f2fs_destroy_root_stats();
-free_filesystem:
 	unregister_filesystem(&f2fs_fs_type);
 free_shrinker:
 	unregister_shrinker(&f2fs_shrinker_info);

diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 9888718..18c627e 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * f2fs sysfs interface
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
  * Copyright (c) 2017 Chao Yu <chao@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/compiler.h>
 #include <linux/proc_fs.h>
@@ -93,33 +90,36 @@
 	if (!sb->s_bdev->bd_part)
 		return snprintf(buf, PAGE_SIZE, "0\n");
 
-	if (f2fs_sb_has_encrypt(sb))
+	if (f2fs_sb_has_encrypt(sbi))
 		len += snprintf(buf, PAGE_SIZE - len, "%s",
 						"encryption");
-	if (f2fs_sb_has_blkzoned(sb))
+	if (f2fs_sb_has_blkzoned(sbi))
 		len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
 				len ? ", " : "", "blkzoned");
-	if (f2fs_sb_has_extra_attr(sb))
+	if (f2fs_sb_has_extra_attr(sbi))
 		len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
 				len ? ", " : "", "extra_attr");
-	if (f2fs_sb_has_project_quota(sb))
+	if (f2fs_sb_has_project_quota(sbi))
 		len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
 				len ? ", " : "", "projquota");
-	if (f2fs_sb_has_inode_chksum(sb))
+	if (f2fs_sb_has_inode_chksum(sbi))
 		len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
 				len ? ", " : "", "inode_checksum");
-	if (f2fs_sb_has_flexible_inline_xattr(sb))
+	if (f2fs_sb_has_flexible_inline_xattr(sbi))
 		len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
 				len ? ", " : "", "flexible_inline_xattr");
-	if (f2fs_sb_has_quota_ino(sb))
+	if (f2fs_sb_has_quota_ino(sbi))
 		len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
 				len ? ", " : "", "quota_ino");
-	if (f2fs_sb_has_inode_crtime(sb))
+	if (f2fs_sb_has_inode_crtime(sbi))
 		len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
 				len ? ", " : "", "inode_crtime");
-	if (f2fs_sb_has_lost_found(sb))
+	if (f2fs_sb_has_lost_found(sbi))
 		len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
 				len ? ", " : "", "lost_found");
+	if (f2fs_sb_has_sb_chksum(sbi))
+		len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
+				len ? ", " : "", "sb_checksum");
 	len += snprintf(buf + len, PAGE_SIZE - len, "\n");
 	return len;
 }
@@ -222,6 +222,8 @@
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 	if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX))
 		return -EINVAL;
+	if (a->struct_type == FAULT_INFO_RATE && t >= UINT_MAX)
+		return -EINVAL;
 #endif
 	if (a->struct_type == RESERVED_BLOCKS) {
 		spin_lock(&sbi->stat_lock);
@@ -246,6 +248,11 @@
 		return count;
 	}
 
+	if (!strcmp(a->attr.name, "migration_granularity")) {
+		if (t == 0 || t > sbi->segs_per_sec)
+			return -EINVAL;
+	}
+
 	if (!strcmp(a->attr.name, "trim_sections"))
 		return -EINVAL;
 
@@ -343,6 +350,7 @@
 	FEAT_QUOTA_INO,
 	FEAT_INODE_CRTIME,
 	FEAT_LOST_FOUND,
+	FEAT_SB_CHECKSUM,
 };
 
 static ssize_t f2fs_feature_show(struct f2fs_attr *a,
@@ -359,6 +367,7 @@
 	case FEAT_QUOTA_INO:
 	case FEAT_INODE_CRTIME:
 	case FEAT_LOST_FOUND:
+	case FEAT_SB_CHECKSUM:
 		return snprintf(buf, PAGE_SIZE, "supported\n");
 	}
 	return 0;
@@ -410,9 +419,15 @@
 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, migration_granularity, migration_granularity);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval,
+					interval_time[DISCARD_TIME]);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info,
+		umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold);
@@ -440,6 +455,7 @@
 F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO);
 F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME);
 F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND);
+F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM);
 
 #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
 static struct attribute *f2fs_attrs[] = {
@@ -460,12 +476,16 @@
 	ATTR_LIST(min_hot_blocks),
 	ATTR_LIST(min_ssr_sections),
 	ATTR_LIST(max_victim_search),
+	ATTR_LIST(migration_granularity),
 	ATTR_LIST(dir_level),
 	ATTR_LIST(ram_thresh),
 	ATTR_LIST(ra_nid_pages),
 	ATTR_LIST(dirty_nats_ratio),
 	ATTR_LIST(cp_interval),
 	ATTR_LIST(idle_interval),
+	ATTR_LIST(discard_idle_interval),
+	ATTR_LIST(gc_idle_interval),
+	ATTR_LIST(umount_discard_timeout),
 	ATTR_LIST(iostat_enable),
 	ATTR_LIST(readdir_ra),
 	ATTR_LIST(gc_pin_file_thresh),
@@ -497,6 +517,7 @@
 	ATTR_LIST(quota_ino),
 	ATTR_LIST(inode_crtime),
 	ATTR_LIST(lost_found),
+	ATTR_LIST(sb_checksum),
 	NULL,
 };
 

diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c
index 8ac1851..d0ab533 100644
--- a/fs/f2fs/trace.c
+++ b/fs/f2fs/trace.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * f2fs IO tracer
  *
  * Copyright (c) 2014 Motorola Mobility
  * Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>

diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h
index 67db24a..e8075fc 100644
--- a/fs/f2fs/trace.h
+++ b/fs/f2fs/trace.h

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * f2fs IO tracer
  *
  * Copyright (c) 2014 Motorola Mobility
  * Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef __F2FS_TRACE_H__
 #define __F2FS_TRACE_H__

diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 88e30f7..e791741 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/xattr.c
  *
@@ -13,10 +14,6 @@
  *  suggestion of Luka Renko <luka.renko@hermes.si>.
  * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
  *  Red Hat Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/rwsem.h>
 #include <linux/f2fs_fs.h>
@@ -361,7 +358,7 @@
 	*base_addr = txattr_addr;
 	return 0;
 out:
-	kzfree(txattr_addr);
+	kvfree(txattr_addr);
 	return err;
 }
 
@@ -404,7 +401,7 @@
 	*base_addr = txattr_addr;
 	return 0;
 fail:
-	kzfree(txattr_addr);
+	kvfree(txattr_addr);
 	return err;
 }
 
@@ -438,7 +435,7 @@
 		}
 
 		f2fs_wait_on_page_writeback(ipage ? ipage : in_page,
-							NODE, true);
+							NODE, true, true);
 		/* no need to use xattr node block */
 		if (hsize <= inline_size) {
 			err = f2fs_truncate_xattr_node(inode);
@@ -462,7 +459,7 @@
 			goto in_page_out;
 		}
 		f2fs_bug_on(sbi, new_nid);
-		f2fs_wait_on_page_writeback(xpage, NODE, true);
+		f2fs_wait_on_page_writeback(xpage, NODE, true, true);
 	} else {
 		struct dnode_of_data dn;
 		set_new_dnode(&dn, inode, NULL, NULL, new_nid);
@@ -531,7 +528,7 @@
 	}
 	error = size;
 out:
-	kzfree(base_addr);
+	kvfree(base_addr);
 	return error;
 }
 
@@ -559,7 +556,7 @@
 		if (!handler || (handler->list && !handler->list(dentry)))
 			continue;
 
-		prefix = handler->prefix ?: handler->name;
+		prefix = xattr_prefix(handler);
 		prefix_len = strlen(prefix);
 		size = prefix_len + entry->e_name_len + 1;
 		if (buffer) {
@@ -577,7 +574,7 @@
 	}
 	error = buffer_size - rest;
 cleanup:
-	kzfree(base_addr);
+	kvfree(base_addr);
 	return error;
 }
 
@@ -715,7 +712,7 @@
 	if (!error && S_ISDIR(inode->i_mode))
 		set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP);
 exit:
-	kzfree(base_addr);
+	kvfree(base_addr);
 	return error;
 }
 

diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index 2a4ecaf..a90920e 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/xattr.h
  *
@@ -9,10 +10,6 @@
  * On-disk format of extended attributes for the ext2 filesystem.
  *
  * (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef __F2FS_XATTR_H__
 #define __F2FS_XATTR_H__
@@ -83,6 +80,12 @@
 				sizeof(struct f2fs_xattr_header) -	\
 				sizeof(struct f2fs_xattr_entry))
 
+#define MAX_INLINE_XATTR_SIZE						\
+			(DEF_ADDRS_PER_INODE -				\
+			F2FS_TOTAL_EXTRA_ATTR_SIZE / sizeof(__le32) -	\
+			DEF_INLINE_RESERVED_SIZE -			\
+			MIN_INLINE_DENTRY_SIZE / sizeof(__le32))
+
 /*
  * On-disk structure of f2fs_xattr
  * We use inline xattrs space + 1 block for xattr.

diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index be02507..987c95b 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c

@@ -45,6 +45,7 @@
 	if (old_pwd.dentry)
 		path_put(&old_pwd);
 }
+EXPORT_SYMBOL(set_fs_pwd);
 
 static inline int replace_path(struct path *p, const struct path *old, const struct path *new)
 {
@@ -90,6 +91,7 @@
 	path_put(&fs->pwd);
 	kmem_cache_free(fs_cachep, fs);
 }
+EXPORT_SYMBOL(free_fs_struct);
 
 void exit_fs(struct task_struct *tsk)
 {
@@ -128,6 +130,7 @@
 	}
 	return fs;
 }
+EXPORT_SYMBOL_GPL(copy_fs_struct);
 
 int unshare_fs_struct(void)
 {

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 7a24f91a..5c12fed 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c

@@ -528,7 +528,7 @@
 		inode_lock(inode);
 
 		/* protected by i_mutex */
-		if (info->seals & F_SEAL_WRITE) {
+		if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
 			inode_unlock(inode);
 			return -EPERM;
 		}

diff --git a/fs/inode.c b/fs/inode.c
index 5c63693..88e78f8 100644
--- a/fs/inode.c
+++ b/fs/inode.c

@@ -1794,7 +1794,7 @@
 	return mask;
 }
 
-static int __remove_privs(struct dentry *dentry, int kill)
+static int __remove_privs(struct vfsmount *mnt, struct dentry *dentry, int kill)
 {
 	struct iattr newattrs;
 
@@ -1803,7 +1803,7 @@
 	 * Note we call this on write, so notify_change will not
 	 * encounter any conflicting delegations:
 	 */
-	return notify_change(dentry, &newattrs, NULL);
+	return notify_change2(mnt, dentry, &newattrs, NULL);
 }
 
 /*
@@ -1830,7 +1830,7 @@
 	if (kill < 0)
 		return kill;
 	if (kill)
-		error = __remove_privs(dentry, kill);
+		error = __remove_privs(file->f_path.mnt, dentry, kill);
 	if (!error)
 		inode_has_no_xattr(inode);
 

diff --git a/fs/internal.h b/fs/internal.h
index d109665..8d0ca52 100644
--- a/fs/internal.h
+++ b/fs/internal.h

@@ -55,8 +55,6 @@
  * namei.c
  */
 extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
-extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
-			   const char *, unsigned int, struct path *);
 long do_mknodat(int dfd, const char __user *filename, umode_t mode,
 		unsigned int dev);
 long do_mkdirat(int dfd, const char __user *pathname, umode_t mode);
@@ -98,9 +96,11 @@
  * super.c
  */
 extern int do_remount_sb(struct super_block *, int, void *, int);
+extern int do_remount_sb2(struct vfsmount *, struct super_block *, int,
+								void *, int);
 extern bool trylock_super(struct super_block *sb);
 extern struct dentry *mount_fs(struct file_system_type *,
-			       int, const char *, void *);
+			       int, const char *, struct vfsmount *, void *);
 extern struct super_block *user_get_super(dev_t);
 
 /*

diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 7d8654a..f8fb89b 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c

@@ -109,9 +109,9 @@
 	return ret;
 }
 
-int jffs2_do_readpage_unlock(struct inode *inode, struct page *pg)
+int jffs2_do_readpage_unlock(void *data, struct page *pg)
 {
-	int ret = jffs2_do_readpage_nolock(inode, pg);
+	int ret = jffs2_do_readpage_nolock(data, pg);
 	unlock_page(pg);
 	return ret;
 }

diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index eab04ec..7fbe8a7 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c

@@ -686,7 +686,7 @@
 	struct page *pg;
 
 	pg = read_cache_page(inode->i_mapping, offset >> PAGE_SHIFT,
-			     (void *)jffs2_do_readpage_unlock, inode);
+			     jffs2_do_readpage_unlock, inode);
 	if (IS_ERR(pg))
 		return (void *)pg;
 

diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index a2dbbb3..bd3d5f0d 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h

@@ -155,7 +155,7 @@
 extern const struct inode_operations jffs2_file_inode_operations;
 extern const struct address_space_operations jffs2_file_address_operations;
 int jffs2_fsync(struct file *, loff_t, loff_t, int);
-int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg);
+int jffs2_do_readpage_unlock(void *data, struct page *pg);
 
 /* ioctl.c */
 long jffs2_ioctl(struct file *, unsigned int, unsigned long);

diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index dbf5bc2..2d8b91f 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c

@@ -832,26 +832,35 @@
  * to see if it supports poll (Neither 'poll' nor 'select' return
  * an appropriate error code).  When in doubt, set a suitable timeout value.
  */
+__poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait)
+{
+	struct kernfs_node *kn = kernfs_dentry_node(of->file->f_path.dentry);
+	struct kernfs_open_node *on = kn->attr.open;
+
+	poll_wait(of->file, &on->poll, wait);
+
+	if (of->event != atomic_read(&on->event))
+		return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
+
+	return DEFAULT_POLLMASK;
+}
+
 static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait)
 {
 	struct kernfs_open_file *of = kernfs_of(filp);
 	struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry);
-	struct kernfs_open_node *on = kn->attr.open;
+	__poll_t ret;
 
 	if (!kernfs_get_active(kn))
-		goto trigger;
+		return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
 
-	poll_wait(filp, &on->poll, wait);
+	if (kn->attr.ops->poll)
+		ret = kn->attr.ops->poll(of, wait);
+	else
+		ret = kernfs_generic_poll(of, wait);
 
 	kernfs_put_active(kn);
-
-	if (of->event != atomic_read(&on->event))
-		goto trigger;
-
-	return DEFAULT_POLLMASK;
-
- trigger:
-	return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
+	return ret;
 }
 
 static void kernfs_notify_workfn(struct work_struct *work)

diff --git a/fs/mpage.c b/fs/mpage.c
index c820dc9..a526582 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c

@@ -32,6 +32,14 @@
 #include <linux/cleancache.h>
 #include "internal.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/android_fs.h>
+
+EXPORT_TRACEPOINT_SYMBOL(android_fs_datawrite_start);
+EXPORT_TRACEPOINT_SYMBOL(android_fs_datawrite_end);
+EXPORT_TRACEPOINT_SYMBOL(android_fs_dataread_start);
+EXPORT_TRACEPOINT_SYMBOL(android_fs_dataread_end);
+
 /*
  * I/O completion handler for multipage BIOs.
  *
@@ -49,6 +57,16 @@
 	struct bio_vec *bv;
 	int i;
 
+	if (trace_android_fs_dataread_end_enabled() &&
+	    (bio_data_dir(bio) == READ)) {
+		struct page *first_page = bio->bi_io_vec[0].bv_page;
+
+		if (first_page != NULL)
+			trace_android_fs_dataread_end(first_page->mapping->host,
+						      page_offset(first_page),
+						      bio->bi_iter.bi_size);
+	}
+
 	bio_for_each_segment_all(bv, bio, i) {
 		struct page *page = bv->bv_page;
 		page_endio(page, bio_op(bio),
@@ -60,6 +78,24 @@
 
 static struct bio *mpage_bio_submit(int op, int op_flags, struct bio *bio)
 {
+	if (trace_android_fs_dataread_start_enabled() && (op == REQ_OP_READ)) {
+		struct page *first_page = bio->bi_io_vec[0].bv_page;
+
+		if (first_page != NULL) {
+			char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+			path = android_fstrace_get_pathname(pathbuf,
+						    MAX_TRACE_PATHBUF_LEN,
+						    first_page->mapping->host);
+			trace_android_fs_dataread_start(
+				first_page->mapping->host,
+				page_offset(first_page),
+				bio->bi_iter.bi_size,
+				current->pid,
+				path,
+				current->comm);
+		}
+	}
 	bio->bi_end_io = mpage_end_io;
 	bio_set_op_attrs(bio, op, op_flags);
 	guard_bio_eod(op, bio);

diff --git a/fs/namei.c b/fs/namei.c
index 914178c..83c9b42 100644
--- a/fs/namei.c
+++ b/fs/namei.c

@@ -379,9 +379,11 @@
  * flag in inode->i_opflags, that says "this has not special
  * permission function, use the fast case".
  */
-static inline int do_inode_permission(struct inode *inode, int mask)
+static inline int do_inode_permission(struct vfsmount *mnt, struct inode *inode, int mask)
 {
 	if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
+		if (likely(mnt && inode->i_op->permission2))
+			return inode->i_op->permission2(mnt, inode, mask);
 		if (likely(inode->i_op->permission))
 			return inode->i_op->permission(inode, mask);
 
@@ -414,7 +416,8 @@
 }
 
 /**
- * inode_permission - Check for access rights to a given inode
+ * inode_permission2 - Check for access rights to a given inode
+ * @mnt:
  * @inode: Inode to check permission on
  * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
  *
@@ -424,7 +427,7 @@
  *
  * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
  */
-int inode_permission(struct inode *inode, int mask)
+int inode_permission2(struct vfsmount *mnt, struct inode *inode, int mask)
 {
 	int retval;
 
@@ -448,7 +451,7 @@
 			return -EACCES;
 	}
 
-	retval = do_inode_permission(inode, mask);
+	retval = do_inode_permission(mnt, inode, mask);
 	if (retval)
 		return retval;
 
@@ -456,7 +459,14 @@
 	if (retval)
 		return retval;
 
-	return security_inode_permission(inode, mask);
+	retval = security_inode_permission(inode, mask);
+	return retval;
+}
+EXPORT_SYMBOL(inode_permission2);
+
+int inode_permission(struct inode *inode, int mask)
+{
+	return inode_permission2(NULL, inode, mask);
 }
 EXPORT_SYMBOL(inode_permission);
 
@@ -1693,13 +1703,13 @@
 static inline int may_lookup(struct nameidata *nd)
 {
 	if (nd->flags & LOOKUP_RCU) {
-		int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
+		int err = inode_permission2(nd->path.mnt, nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
 		if (err != -ECHILD)
 			return err;
 		if (unlazy_walk(nd))
 			return -ECHILD;
 	}
-	return inode_permission(nd->inode, MAY_EXEC);
+	return inode_permission2(nd->path.mnt, nd->inode, MAY_EXEC);
 }
 
 static inline int handle_dots(struct nameidata *nd, int type)
@@ -2455,8 +2465,8 @@
 }
 EXPORT_SYMBOL(vfs_path_lookup);
 
-static int lookup_one_len_common(const char *name, struct dentry *base,
-				 int len, struct qstr *this)
+static int lookup_one_len_common(const char *name, struct vfsmount *mnt,
+				 struct dentry *base, int len, struct qstr *this)
 {
 	this->name = name;
 	this->len = len;
@@ -2484,7 +2494,7 @@
 			return err;
 	}
 
-	return inode_permission(base->d_inode, MAY_EXEC);
+	return inode_permission2(mnt, base->d_inode, MAY_EXEC);
 }
 
 /**
@@ -2508,7 +2518,7 @@
 
 	WARN_ON_ONCE(!inode_is_locked(base->d_inode));
 
-	err = lookup_one_len_common(name, base, len, &this);
+	err = lookup_one_len_common(name, NULL, base, len, &this);
 	if (err)
 		return ERR_PTR(err);
 
@@ -2527,7 +2537,7 @@
  *
  * The caller must hold base->i_mutex.
  */
-struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
+struct dentry *lookup_one_len2(const char *name, struct vfsmount *mnt, struct dentry *base, int len)
 {
 	struct dentry *dentry;
 	struct qstr this;
@@ -2535,13 +2545,19 @@
 
 	WARN_ON_ONCE(!inode_is_locked(base->d_inode));
 
-	err = lookup_one_len_common(name, base, len, &this);
+	err = lookup_one_len_common(name, mnt, base, len, &this);
 	if (err)
 		return ERR_PTR(err);
 
 	dentry = lookup_dcache(&this, base, 0);
 	return dentry ? dentry : __lookup_slow(&this, base, 0);
 }
+EXPORT_SYMBOL(lookup_one_len2);
+
+struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
+{
+	return lookup_one_len2(name, NULL, base, len);
+}
 EXPORT_SYMBOL(lookup_one_len);
 
 /**
@@ -2563,7 +2579,7 @@
 	int err;
 	struct dentry *ret;
 
-	err = lookup_one_len_common(name, base, len, &this);
+	err = lookup_one_len_common(name, NULL, base, len, &this);
 	if (err)
 		return ERR_PTR(err);
 
@@ -2787,7 +2803,7 @@
  * 11. We don't allow removal of NFS sillyrenamed files; it's handled by
  *     nfs_async_unlink().
  */
-static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
+static int may_delete(struct vfsmount *mnt, struct inode *dir, struct dentry *victim, bool isdir)
 {
 	struct inode *inode = d_backing_inode(victim);
 	int error;
@@ -2804,7 +2820,7 @@
 
 	audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
 
-	error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+	error = inode_permission2(mnt, dir, MAY_WRITE | MAY_EXEC);
 	if (error)
 		return error;
 	if (IS_APPEND(dir))
@@ -2836,7 +2852,7 @@
  *  4. We should have write and exec permissions on dir
  *  5. We can't do it if dir is immutable (done in permission())
  */
-static inline int may_create(struct inode *dir, struct dentry *child)
+static inline int may_create(struct vfsmount *mnt, struct inode *dir, struct dentry *child)
 {
 	struct user_namespace *s_user_ns;
 	audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
@@ -2848,7 +2864,7 @@
 	if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
 	    !kgid_has_mapping(s_user_ns, current_fsgid()))
 		return -EOVERFLOW;
-	return inode_permission(dir, MAY_WRITE | MAY_EXEC);
+	return inode_permission2(mnt, dir, MAY_WRITE | MAY_EXEC);
 }
 
 /*
@@ -2895,10 +2911,10 @@
 }
 EXPORT_SYMBOL(unlock_rename);
 
-int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-		bool want_excl)
+int vfs_create2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry,
+		umode_t mode, bool want_excl)
 {
-	int error = may_create(dir, dentry);
+	int error = may_create(mnt, dir, dentry);
 	if (error)
 		return error;
 
@@ -2914,14 +2930,21 @@
 		fsnotify_create(dir, dentry);
 	return error;
 }
+EXPORT_SYMBOL(vfs_create2);
+
+int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+		bool want_excl)
+{
+	return vfs_create2(NULL, dir, dentry, mode, want_excl);
+}
 EXPORT_SYMBOL(vfs_create);
 
-int vfs_mkobj(struct dentry *dentry, umode_t mode,
+int vfs_mkobj2(struct vfsmount *mnt, struct dentry *dentry, umode_t mode,
 		int (*f)(struct dentry *, umode_t, void *),
 		void *arg)
 {
 	struct inode *dir = dentry->d_parent->d_inode;
-	int error = may_create(dir, dentry);
+	int error = may_create(mnt, dir, dentry);
 	if (error)
 		return error;
 
@@ -2935,6 +2958,15 @@
 		fsnotify_create(dir, dentry);
 	return error;
 }
+EXPORT_SYMBOL(vfs_mkobj2);
+
+
+int vfs_mkobj(struct dentry *dentry, umode_t mode,
+		int (*f)(struct dentry *, umode_t, void *),
+		void *arg)
+{
+	return vfs_mkobj2(NULL, dentry, mode, f, arg);
+}
 EXPORT_SYMBOL(vfs_mkobj);
 
 bool may_open_dev(const struct path *path)
@@ -2946,6 +2978,7 @@
 static int may_open(const struct path *path, int acc_mode, int flag)
 {
 	struct dentry *dentry = path->dentry;
+	struct vfsmount *mnt = path->mnt;
 	struct inode *inode = dentry->d_inode;
 	int error;
 
@@ -2970,7 +3003,7 @@
 		break;
 	}
 
-	error = inode_permission(inode, MAY_OPEN | acc_mode);
+	error = inode_permission2(mnt, inode, MAY_OPEN | acc_mode);
 	if (error)
 		return error;
 
@@ -3005,7 +3038,7 @@
 	if (!error)
 		error = security_path_truncate(path);
 	if (!error) {
-		error = do_truncate(path->dentry, 0,
+		error = do_truncate2(path->mnt, path->dentry, 0,
 				    ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
 				    filp);
 	}
@@ -3032,7 +3065,7 @@
 	    !kgid_has_mapping(s_user_ns, current_fsgid()))
 		return -EOVERFLOW;
 
-	error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC);
+	error = inode_permission2(dir->mnt, dir->dentry->d_inode, MAY_WRITE | MAY_EXEC);
 	if (error)
 		return error;
 
@@ -3440,7 +3473,8 @@
 	int error;
 
 	/* we want directory to be writable */
-	error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+	error = inode_permission2(ERR_PTR(-EOPNOTSUPP), dir,
+					MAY_WRITE | MAY_EXEC);
 	if (error)
 		goto out_err;
 	error = -EOPNOTSUPP;
@@ -3694,9 +3728,9 @@
 }
 EXPORT_SYMBOL(user_path_create);
 
-int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
+int vfs_mknod2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
 {
-	int error = may_create(dir, dentry);
+	int error = may_create(mnt, dir, dentry);
 
 	if (error)
 		return error;
@@ -3720,6 +3754,12 @@
 		fsnotify_create(dir, dentry);
 	return error;
 }
+EXPORT_SYMBOL(vfs_mknod2);
+
+int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
+{
+	return vfs_mknod2(NULL, dir, dentry, mode, dev);
+}
 EXPORT_SYMBOL(vfs_mknod);
 
 static int may_mknod(umode_t mode)
@@ -3762,12 +3802,12 @@
 		goto out;
 	switch (mode & S_IFMT) {
 		case 0: case S_IFREG:
-			error = vfs_create(path.dentry->d_inode,dentry,mode,true);
+			error = vfs_create2(path.mnt, path.dentry->d_inode,dentry,mode,true);
 			if (!error)
 				ima_post_path_mknod(dentry);
 			break;
 		case S_IFCHR: case S_IFBLK:
-			error = vfs_mknod(path.dentry->d_inode,dentry,mode,
+			error = vfs_mknod2(path.mnt, path.dentry->d_inode,dentry,mode,
 					new_decode_dev(dev));
 			break;
 		case S_IFIFO: case S_IFSOCK:
@@ -3794,9 +3834,9 @@
 	return do_mknodat(AT_FDCWD, filename, mode, dev);
 }
 
-int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+int vfs_mkdir2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, umode_t mode)
 {
-	int error = may_create(dir, dentry);
+	int error = may_create(mnt, dir, dentry);
 	unsigned max_links = dir->i_sb->s_max_links;
 
 	if (error)
@@ -3818,6 +3858,12 @@
 		fsnotify_mkdir(dir, dentry);
 	return error;
 }
+EXPORT_SYMBOL(vfs_mkdir2);
+
+int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	return vfs_mkdir2(NULL, dir, dentry, mode);
+}
 EXPORT_SYMBOL(vfs_mkdir);
 
 long do_mkdirat(int dfd, const char __user *pathname, umode_t mode)
@@ -3836,7 +3882,7 @@
 		mode &= ~current_umask();
 	error = security_path_mkdir(&path, dentry, mode);
 	if (!error)
-		error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
+		error = vfs_mkdir2(path.mnt, path.dentry->d_inode, dentry, mode);
 	done_path_create(&path, dentry);
 	if (retry_estale(error, lookup_flags)) {
 		lookup_flags |= LOOKUP_REVAL;
@@ -3855,9 +3901,9 @@
 	return do_mkdirat(AT_FDCWD, pathname, mode);
 }
 
-int vfs_rmdir(struct inode *dir, struct dentry *dentry)
+int vfs_rmdir2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry)
 {
-	int error = may_delete(dir, dentry, 1);
+	int error = may_delete(mnt, dir, dentry, 1);
 
 	if (error)
 		return error;
@@ -3892,6 +3938,12 @@
 		d_delete(dentry);
 	return error;
 }
+EXPORT_SYMBOL(vfs_rmdir2);
+
+int vfs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	return vfs_rmdir2(NULL, dir, dentry);
+}
 EXPORT_SYMBOL(vfs_rmdir);
 
 long do_rmdir(int dfd, const char __user *pathname)
@@ -3937,7 +3989,7 @@
 	error = security_path_rmdir(&path, dentry);
 	if (error)
 		goto exit3;
-	error = vfs_rmdir(path.dentry->d_inode, dentry);
+	error = vfs_rmdir2(path.mnt, path.dentry->d_inode, dentry);
 exit3:
 	dput(dentry);
 exit2:
@@ -3976,10 +4028,10 @@
  * be appropriate for callers that expect the underlying filesystem not
  * to be NFS exported.
  */
-int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
+int vfs_unlink2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
 {
 	struct inode *target = dentry->d_inode;
-	int error = may_delete(dir, dentry, 0);
+	int error = may_delete(mnt, dir, dentry, 0);
 
 	if (error)
 		return error;
@@ -4014,6 +4066,12 @@
 
 	return error;
 }
+EXPORT_SYMBOL(vfs_unlink2);
+
+int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
+{
+	return vfs_unlink2(NULL, dir, dentry, delegated_inode);
+}
 EXPORT_SYMBOL(vfs_unlink);
 
 /*
@@ -4059,7 +4117,7 @@
 		error = security_path_unlink(&path, dentry);
 		if (error)
 			goto exit2;
-		error = vfs_unlink(path.dentry->d_inode, dentry, &delegated_inode);
+		error = vfs_unlink2(path.mnt, path.dentry->d_inode, dentry, &delegated_inode);
 exit2:
 		dput(dentry);
 	}
@@ -4109,9 +4167,9 @@
 	return do_unlinkat(AT_FDCWD, getname(pathname));
 }
 
-int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
+int vfs_symlink2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, const char *oldname)
 {
-	int error = may_create(dir, dentry);
+	int error = may_create(mnt, dir, dentry);
 
 	if (error)
 		return error;
@@ -4128,6 +4186,12 @@
 		fsnotify_create(dir, dentry);
 	return error;
 }
+EXPORT_SYMBOL(vfs_symlink2);
+
+int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
+{
+	return vfs_symlink2(NULL, dir, dentry, oldname);
+}
 EXPORT_SYMBOL(vfs_symlink);
 
 long do_symlinkat(const char __user *oldname, int newdfd,
@@ -4150,7 +4214,7 @@
 
 	error = security_path_symlink(&path, dentry, from->name);
 	if (!error)
-		error = vfs_symlink(path.dentry->d_inode, dentry, from->name);
+		error = vfs_symlink2(path.mnt, path.dentry->d_inode, dentry, from->name);
 	done_path_create(&path, dentry);
 	if (retry_estale(error, lookup_flags)) {
 		lookup_flags |= LOOKUP_REVAL;
@@ -4191,7 +4255,7 @@
  * be appropriate for callers that expect the underlying filesystem not
  * to be NFS exported.
  */
-int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
+int vfs_link2(struct vfsmount *mnt, struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
 {
 	struct inode *inode = old_dentry->d_inode;
 	unsigned max_links = dir->i_sb->s_max_links;
@@ -4200,7 +4264,7 @@
 	if (!inode)
 		return -ENOENT;
 
-	error = may_create(dir, new_dentry);
+	error = may_create(mnt, dir, new_dentry);
 	if (error)
 		return error;
 
@@ -4250,6 +4314,12 @@
 		fsnotify_link(dir, inode, new_dentry);
 	return error;
 }
+EXPORT_SYMBOL(vfs_link2);
+
+int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
+{
+	return vfs_link2(NULL, old_dentry, dir, new_dentry, delegated_inode);
+}
 EXPORT_SYMBOL(vfs_link);
 
 /*
@@ -4305,7 +4375,7 @@
 	error = security_path_link(old_path.dentry, &new_path, new_dentry);
 	if (error)
 		goto out_dput;
-	error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
+	error = vfs_link2(old_path.mnt, old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
 out_dput:
 	done_path_create(&new_path, new_dentry);
 	if (delegated_inode) {
@@ -4387,7 +4457,8 @@
  *	   ->i_mutex on parents, which works but leads to some truly excessive
  *	   locking].
  */
-int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+int vfs_rename2(struct vfsmount *mnt,
+	       struct inode *old_dir, struct dentry *old_dentry,
 	       struct inode *new_dir, struct dentry *new_dentry,
 	       struct inode **delegated_inode, unsigned int flags)
 {
@@ -4402,19 +4473,19 @@
 	if (source == target)
 		return 0;
 
-	error = may_delete(old_dir, old_dentry, is_dir);
+	error = may_delete(mnt, old_dir, old_dentry, is_dir);
 	if (error)
 		return error;
 
 	if (!target) {
-		error = may_create(new_dir, new_dentry);
+		error = may_create(mnt, new_dir, new_dentry);
 	} else {
 		new_is_dir = d_is_dir(new_dentry);
 
 		if (!(flags & RENAME_EXCHANGE))
-			error = may_delete(new_dir, new_dentry, is_dir);
+			error = may_delete(mnt, new_dir, new_dentry, is_dir);
 		else
-			error = may_delete(new_dir, new_dentry, new_is_dir);
+			error = may_delete(mnt, new_dir, new_dentry, new_is_dir);
 	}
 	if (error)
 		return error;
@@ -4428,12 +4499,12 @@
 	 */
 	if (new_dir != old_dir) {
 		if (is_dir) {
-			error = inode_permission(source, MAY_WRITE);
+			error = inode_permission2(mnt, source, MAY_WRITE);
 			if (error)
 				return error;
 		}
 		if ((flags & RENAME_EXCHANGE) && new_is_dir) {
-			error = inode_permission(target, MAY_WRITE);
+			error = inode_permission2(mnt, target, MAY_WRITE);
 			if (error)
 				return error;
 		}
@@ -4510,6 +4581,14 @@
 
 	return error;
 }
+EXPORT_SYMBOL(vfs_rename2);
+
+int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+	       struct inode *new_dir, struct dentry *new_dentry,
+	       struct inode **delegated_inode, unsigned int flags)
+{
+	return vfs_rename2(NULL, old_dir, old_dentry, new_dir, new_dentry, delegated_inode, flags);
+}
 EXPORT_SYMBOL(vfs_rename);
 
 static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
@@ -4623,7 +4702,7 @@
 				     &new_path, new_dentry, flags);
 	if (error)
 		goto exit5;
-	error = vfs_rename(old_path.dentry->d_inode, old_dentry,
+	error = vfs_rename2(old_path.mnt, old_path.dentry->d_inode, old_dentry,
 			   new_path.dentry->d_inode, new_dentry,
 			   &delegated_inode, flags);
 exit5:
@@ -4674,7 +4753,7 @@
 
 int vfs_whiteout(struct inode *dir, struct dentry *dentry)
 {
-	int error = may_create(dir, dentry);
+	int error = may_create(NULL, dir, dentry);
 	if (error)
 		return error;
 

diff --git a/fs/namespace.c b/fs/namespace.c
index 1fce41b..6b14059 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c

@@ -200,6 +200,7 @@
 		mnt->mnt_count = 1;
 		mnt->mnt_writers = 0;
 #endif
+		mnt->mnt.data = NULL;
 
 		INIT_HLIST_NODE(&mnt->mnt_hash);
 		INIT_LIST_HEAD(&mnt->mnt_child);
@@ -552,6 +553,7 @@
 
 static void free_vfsmnt(struct mount *mnt)
 {
+	kfree(mnt->mnt.data);
 	kfree_const(mnt->mnt_devname);
 #ifdef CONFIG_SMP
 	free_percpu(mnt->mnt_pcp);
@@ -955,10 +957,18 @@
 	if (!mnt)
 		return ERR_PTR(-ENOMEM);
 
+	if (type->alloc_mnt_data) {
+		mnt->mnt.data = type->alloc_mnt_data();
+		if (!mnt->mnt.data) {
+			mnt_free_id(mnt);
+			free_vfsmnt(mnt);
+			return ERR_PTR(-ENOMEM);
+		}
+	}
 	if (flags & SB_KERNMOUNT)
 		mnt->mnt.mnt_flags = MNT_INTERNAL;
 
-	root = mount_fs(type, flags, name, data);
+	root = mount_fs(type, flags, name, &mnt->mnt, data);
 	if (IS_ERR(root)) {
 		mnt_free_id(mnt);
 		free_vfsmnt(mnt);
@@ -1002,6 +1012,14 @@
 	if (!mnt)
 		return ERR_PTR(-ENOMEM);
 
+	if (sb->s_op->clone_mnt_data) {
+		mnt->mnt.data = sb->s_op->clone_mnt_data(old->mnt.data);
+		if (!mnt->mnt.data) {
+			err = -ENOMEM;
+			goto out_free;
+		}
+	}
+
 	if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
 		mnt->mnt_group_id = 0; /* not a peer of original */
 	else
@@ -2286,8 +2304,14 @@
 		err = change_mount_flags(path->mnt, ms_flags);
 	else if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
 		err = -EPERM;
-	else
-		err = do_remount_sb(sb, sb_flags, data, 0);
+	else {
+		err = do_remount_sb2(path->mnt, sb, sb_flags, data, 0);
+		namespace_lock();
+		lock_mount_hash();
+		propagate_remount(mnt);
+		unlock_mount_hash();
+		namespace_unlock();
+	}
 	if (!err) {
 		lock_mount_hash();
 		mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 71b2e39..3baeed0 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c

@@ -664,8 +664,9 @@
  * We only need to convert from xdr once so future lookups are much simpler
  */
 static
-int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
+int nfs_readdir_filler(void *data, struct page* page)
 {
+	nfs_readdir_descriptor_t *desc = data;
 	struct inode	*inode = file_inode(desc->file);
 	int ret;
 
@@ -697,8 +698,8 @@
 static
 struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
 {
-	return read_cache_page(desc->file->f_mapping,
-			desc->page_index, (filler_t *)nfs_readdir_filler, desc);
+	return read_cache_page(desc->file->f_mapping, desc->page_index,
+			nfs_readdir_filler, desc);
 }
 
 /*

diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 06eb44b..25ba299 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c

@@ -26,8 +26,9 @@
  * and straight-forward than readdir caching.
  */
 
-static int nfs_symlink_filler(struct inode *inode, struct page *page)
+static int nfs_symlink_filler(void *data, struct page *page)
 {
+	struct inode *inode = data;
 	int error;
 
 	error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE);
@@ -65,8 +66,8 @@
 		err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
 		if (err)
 			return err;
-		page = read_cache_page(&inode->i_data, 0,
-					(filler_t *)nfs_symlink_filler, inode);
+		page = read_cache_page(&inode->i_data, 0, nfs_symlink_filler,
+				inode);
 		if (IS_ERR(page))
 			return ERR_CAST(page);
 	}

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 6905488..06c5729 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c

@@ -490,7 +490,7 @@
 	}
 
 	/* you can only watch an inode if you have read permissions on it */
-	ret = inode_permission(path->dentry->d_inode, MAY_READ);
+	ret = inode_permission2(path->mnt, path->dentry->d_inode, MAY_READ);
 	if (ret)
 		path_put(path);
 out:

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 97a5169..a415ffa 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c

@@ -350,7 +350,7 @@
 	if (error)
 		return error;
 	/* you can only watch an inode if you have read permissions on it */
-	error = inode_permission(path->dentry->d_inode, MAY_READ);
+	error = inode_permission2(path->mnt, path->dentry->d_inode, MAY_READ);
 	if (error)
 		path_put(path);
 	return error;
@@ -702,6 +702,8 @@
 	struct fsnotify_group *group;
 	struct inode *inode;
 	struct path path;
+	struct path alteredpath;
+	struct path *canonical_path = &path;
 	struct fd f;
 	int ret;
 	unsigned flags = 0;
@@ -747,13 +749,22 @@
 	if (ret)
 		goto fput_and_out;
 
+	/* support stacked filesystems */
+	if(path.dentry && path.dentry->d_op) {
+		if (path.dentry->d_op->d_canonical_path) {
+			path.dentry->d_op->d_canonical_path(&path, &alteredpath);
+			canonical_path = &alteredpath;
+			path_put(&path);
+		}
+	}
+
 	/* inode held in place by reference to path; group by fget on fd */
-	inode = path.dentry->d_inode;
+	inode = canonical_path->dentry->d_inode;
 	group = f.file->private_data;
 
 	/* create/update an inode mark */
 	ret = inotify_update_watch(group, inode, mask);
-	path_put(&path);
+	path_put(canonical_path);
 fput_and_out:
 	fdput(f);
 	return ret;

diff --git a/fs/open.c b/fs/open.c
index 8784787..886da56 100644
--- a/fs/open.c
+++ b/fs/open.c

@@ -34,8 +34,8 @@
 
 #include "internal.h"
 
-int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
-	struct file *filp)
+int do_truncate2(struct vfsmount *mnt, struct dentry *dentry, loff_t length,
+		unsigned int time_attrs, struct file *filp)
 {
 	int ret;
 	struct iattr newattrs;
@@ -60,17 +60,24 @@
 
 	inode_lock(dentry->d_inode);
 	/* Note any delegations or leases have already been broken: */
-	ret = notify_change(dentry, &newattrs, NULL);
+	ret = notify_change2(mnt, dentry, &newattrs, NULL);
 	inode_unlock(dentry->d_inode);
 	return ret;
 }
+int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
+	struct file *filp)
+{
+	return do_truncate2(NULL, dentry, length, time_attrs, filp);
+}
 
 long vfs_truncate(const struct path *path, loff_t length)
 {
 	struct inode *inode;
+	struct vfsmount *mnt;
 	long error;
 
 	inode = path->dentry->d_inode;
+	mnt = path->mnt;
 
 	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
 	if (S_ISDIR(inode->i_mode))
@@ -82,7 +89,7 @@
 	if (error)
 		goto out;
 
-	error = inode_permission(inode, MAY_WRITE);
+	error = inode_permission2(mnt, inode, MAY_WRITE);
 	if (error)
 		goto mnt_drop_write_and_out;
 
@@ -106,7 +113,7 @@
 	if (!error)
 		error = security_path_truncate(path);
 	if (!error)
-		error = do_truncate(path->dentry, length, 0, NULL);
+		error = do_truncate2(mnt, path->dentry, length, 0, NULL);
 
 put_write_and_out:
 	put_write_access(inode);
@@ -155,6 +162,7 @@
 {
 	struct inode *inode;
 	struct dentry *dentry;
+	struct vfsmount *mnt;
 	struct fd f;
 	int error;
 
@@ -171,6 +179,7 @@
 		small = 0;
 
 	dentry = f.file->f_path.dentry;
+	mnt = f.file->f_path.mnt;
 	inode = dentry->d_inode;
 	error = -EINVAL;
 	if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE))
@@ -191,7 +200,7 @@
 	if (!error)
 		error = security_path_truncate(&f.file->f_path);
 	if (!error)
-		error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file);
+		error = do_truncate2(mnt, dentry, length, ATTR_MTIME|ATTR_CTIME, f.file);
 	sb_end_write(inode->i_sb);
 out_putf:
 	fdput(f);
@@ -350,6 +359,7 @@
 	struct cred *override_cred;
 	struct path path;
 	struct inode *inode;
+	struct vfsmount *mnt;
 	int res;
 	unsigned int lookup_flags = LOOKUP_FOLLOW;
 
@@ -399,6 +409,7 @@
 		goto out;
 
 	inode = d_backing_inode(path.dentry);
+	mnt = path.mnt;
 
 	if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
 		/*
@@ -410,7 +421,7 @@
 			goto out_path_release;
 	}
 
-	res = inode_permission(inode, mode | MAY_ACCESS);
+	res = inode_permission2(mnt, inode, mode | MAY_ACCESS);
 	/* SuS v2 requires we report a read only fs too */
 	if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
 		goto out_path_release;
@@ -459,7 +470,7 @@
 	if (error)
 		goto out;
 
-	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
+	error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
 	if (error)
 		goto dput_and_out;
 
@@ -493,7 +504,8 @@
 	if (!d_can_lookup(f.file->f_path.dentry))
 		goto out_putf;
 
-	error = inode_permission(file_inode(f.file), MAY_EXEC | MAY_CHDIR);
+	error = inode_permission2(f.file->f_path.mnt, file_inode(f.file),
+				MAY_EXEC | MAY_CHDIR);
 	if (!error)
 		set_fs_pwd(current->fs, &f.file->f_path);
 out_putf:
@@ -512,7 +524,7 @@
 	if (error)
 		goto out;
 
-	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
+	error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
 	if (error)
 		goto dput_and_out;
 
@@ -557,7 +569,7 @@
 		goto out_unlock;
 	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-	error = notify_change(path->dentry, &newattrs, &delegated_inode);
+	error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode);
 out_unlock:
 	inode_unlock(inode);
 	if (delegated_inode) {
@@ -648,7 +660,7 @@
 	inode_lock(inode);
 	error = security_path_chown(path, uid, gid);
 	if (!error)
-		error = notify_change(path->dentry, &newattrs, &delegated_inode);
+		error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode);
 	inode_unlock(inode);
 	if (delegated_inode) {
 		error = break_deleg_wait(&delegated_inode);

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index ffc7360..076fd2d 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c

@@ -858,7 +858,7 @@
 		dput(parent);
 		dput(next);
 	}
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 
 	return err;
 }

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 336f04d..d0a8920 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c

@@ -567,7 +567,8 @@
 		override_cred->fsgid = inode->i_gid;
 		if (!attr->hardlink) {
 			err = security_dentry_create_files_as(dentry,
-					attr->mode, &dentry->d_name, old_cred,
+					attr->mode, &dentry->d_name,
+					old_cred ? old_cred : current_cred(),
 					override_cred);
 			if (err) {
 				put_cred(override_cred);
@@ -583,7 +584,7 @@
 			err = ovl_create_over_whiteout(dentry, inode, attr);
 	}
 out_revert_creds:
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 	return err;
 }
 
@@ -659,7 +660,7 @@
 
 	old_cred = ovl_override_creds(dentry->d_sb);
 	err = ovl_set_redirect(dentry, false);
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 
 	return err;
 }
@@ -857,7 +858,7 @@
 		err = ovl_remove_upper(dentry, is_dir, &list);
 	else
 		err = ovl_remove_and_whiteout(dentry, &list);
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 	if (!err) {
 		if (is_dir)
 			clear_nlink(dentry->d_inode);
@@ -1225,7 +1226,7 @@
 out_unlock:
 	unlock_rename(new_upperdir, old_upperdir);
 out_revert_creds:
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 	ovl_nlink_end(new, locked);
 out_drop_write:
 	ovl_drop_write(old);

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 0bd276e4c..8cc2413 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c

@@ -35,7 +35,7 @@
 	old_cred = ovl_override_creds(inode->i_sb);
 	realfile = open_with_fake_path(&file->f_path, flags, realinode,
 				       current_cred());
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 
 	pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
 		 file, file, ovl_whatisit(inode, realinode), file->f_flags,
@@ -179,7 +179,7 @@
 
 	old_cred = ovl_override_creds(inode->i_sb);
 	ret = vfs_llseek(real.file, offset, whence);
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 
 	file->f_pos = real.file->f_pos;
 	inode_unlock(inode);
@@ -245,7 +245,7 @@
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
 	ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
 			    ovl_iocb_to_rwf(iocb));
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 
 	ovl_file_accessed(file);
 
@@ -281,7 +281,7 @@
 	ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
 			     ovl_iocb_to_rwf(iocb));
 	file_end_write(real.file);
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 
 	/* Update size */
 	ovl_copyattr(ovl_inode_real(inode), inode);
@@ -308,7 +308,7 @@
 	if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
 		old_cred = ovl_override_creds(file_inode(file)->i_sb);
 		ret = vfs_fsync_range(real.file, start, end, datasync);
-		revert_creds(old_cred);
+		ovl_revert_creds(old_cred);
 	}
 
 	fdput(real);
@@ -332,7 +332,7 @@
 
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
 	ret = call_mmap(vma->vm_file, vma);
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 
 	if (ret) {
 		/* Drop reference count from new vm_file value */
@@ -360,7 +360,7 @@
 
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
 	ret = vfs_fallocate(real.file, mode, offset, len);
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 
 	/* Update size */
 	ovl_copyattr(ovl_inode_real(inode), inode);
@@ -382,7 +382,7 @@
 
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
 	ret = vfs_fadvise(real.file, offset, len, advice);
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 
 	fdput(real);
 
@@ -402,7 +402,7 @@
 
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
 	ret = vfs_ioctl(real.file, cmd, arg);
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 
 	fdput(real);
 
@@ -591,7 +591,7 @@
 						real_out.file, pos_out, len);
 		break;
 	}
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 
 	/* Update size */
 	ovl_copyattr(ovl_inode_real(inode_out), inode_out);

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index f038984..08881de 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c

@@ -64,7 +64,7 @@
 		inode_lock(upperdentry->d_inode);
 		old_cred = ovl_override_creds(dentry->d_sb);
 		err = notify_change(upperdentry, attr, NULL);
-		revert_creds(old_cred);
+		ovl_revert_creds(old_cred);
 		if (!err)
 			ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
 		inode_unlock(upperdentry->d_inode);
@@ -260,7 +260,7 @@
 		stat->nlink = dentry->d_inode->i_nlink;
 
 out:
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 
 	return err;
 }
@@ -294,7 +294,7 @@
 		mask |= MAY_READ;
 	}
 	err = inode_permission(realinode, mask);
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 
 	return err;
 }
@@ -311,7 +311,7 @@
 
 	old_cred = ovl_override_creds(dentry->d_sb);
 	p = vfs_get_link(ovl_dentry_real(dentry), done);
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 	return p;
 }
 
@@ -354,7 +354,7 @@
 		WARN_ON(flags != XATTR_REPLACE);
 		err = vfs_removexattr(realdentry, name);
 	}
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 
 	/* copy c/mtime */
 	ovl_copyattr(d_inode(realdentry), inode);
@@ -365,6 +365,21 @@
 	return err;
 }
 
+int __ovl_xattr_get(struct dentry *dentry, struct inode *inode,
+		    const char *name, void *value, size_t size)
+{
+	ssize_t res;
+	const struct cred *old_cred;
+	struct dentry *realdentry =
+		ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry);
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	res = __vfs_getxattr(realdentry, d_inode(realdentry), name, value,
+			     size);
+	ovl_revert_creds(old_cred);
+	return res;
+}
+
 int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
 		  void *value, size_t size)
 {
@@ -375,7 +390,7 @@
 
 	old_cred = ovl_override_creds(dentry->d_sb);
 	res = vfs_getxattr(realdentry, name, value, size);
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 	return res;
 }
 
@@ -399,7 +414,7 @@
 
 	old_cred = ovl_override_creds(dentry->d_sb);
 	res = vfs_listxattr(realdentry, list, size);
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 	if (res <= 0 || size == 0)
 		return res;
 
@@ -434,7 +449,7 @@
 
 	old_cred = ovl_override_creds(inode->i_sb);
 	acl = get_acl(realinode, type);
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 
 	return acl;
 }
@@ -472,7 +487,7 @@
 		filemap_write_and_wait(realinode->i_mapping);
 
 	err = realinode->i_op->fiemap(realinode, fieinfo, start, len);
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 
 	return err;
 }

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index badf039..499cd1f 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c

@@ -109,10 +109,11 @@
 
 static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name)
 {
-	int res, err;
+	ssize_t res;
+	int err;
 	struct ovl_fh *fh = NULL;
 
-	res = vfs_getxattr(dentry, name, NULL, 0);
+	res = ovl_vfs_getxattr(dentry, name, NULL, 0);
 	if (res < 0) {
 		if (res == -ENODATA || res == -EOPNOTSUPP)
 			return NULL;
@@ -126,7 +127,7 @@
 	if (!fh)
 		return ERR_PTR(-ENOMEM);
 
-	res = vfs_getxattr(dentry, name, fh, res);
+	res = ovl_vfs_getxattr(dentry, name, fh, res);
 	if (res < 0)
 		goto fail;
 
@@ -144,10 +145,11 @@
 	return NULL;
 
 fail:
-	pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
+	pr_warn_ratelimited("overlayfs: failed to get origin (%zi)\n", res);
 	goto out;
 invalid:
-	pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
+	pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n",
+			    (int)res, fh);
 	goto out;
 }
 
@@ -1077,7 +1079,7 @@
 			goto out_free_oe;
 	}
 
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 	if (origin_path) {
 		dput(origin_path->dentry);
 		kfree(origin_path);
@@ -1104,7 +1106,7 @@
 	kfree(upperredirect);
 out:
 	kfree(d.redirect);
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 	return ERR_PTR(err);
 }
 
@@ -1158,7 +1160,7 @@
 			dput(this);
 		}
 	}
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 
 	return positive;
 }

diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 265bf9c..3227f21 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h

@@ -208,6 +208,9 @@
 void ovl_drop_write(struct dentry *dentry);
 struct dentry *ovl_workdir(struct dentry *dentry);
 const struct cred *ovl_override_creds(struct super_block *sb);
+void ovl_revert_creds(const struct cred *oldcred);
+ssize_t ovl_vfs_getxattr(struct dentry *dentry, const char *name, void *buf,
+			 size_t size);
 struct super_block *ovl_same_sb(struct super_block *sb);
 int ovl_can_decode_fh(struct super_block *sb);
 struct dentry *ovl_indexdir(struct super_block *sb);
@@ -350,6 +353,8 @@
 		  const void *value, size_t size, int flags);
 int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
 		  void *value, size_t size);
+int __ovl_xattr_get(struct dentry *dentry, struct inode *inode,
+		    const char *name, void *value, size_t size);
 ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
 struct posix_acl *ovl_get_acl(struct inode *inode, int type);
 int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags);

diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 6ed1ace..c5a78db 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h

@@ -20,6 +20,7 @@
 	bool nfs_export;
 	int xino;
 	bool metacopy;
+	bool override_creds;
 };
 
 struct ovl_sb {

diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index cc8303a..ec591b4 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c

@@ -289,7 +289,7 @@
 		}
 		inode_unlock(dir->d_inode);
 	}
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 
 	return err;
 }
@@ -921,7 +921,7 @@
 
 	old_cred = ovl_override_creds(dentry->d_sb);
 	err = ovl_dir_read_merged(dentry, list, &root);
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 	if (err)
 		return err;
 

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 2d028c0..5fcdab4a 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c

@@ -56,6 +56,11 @@
 MODULE_PARM_DESC(ovl_xino_auto_def,
 		 "Auto enable xino feature");
 
+static bool __read_mostly ovl_override_creds_def = true;
+module_param_named(override_creds, ovl_override_creds_def, bool, 0644);
+MODULE_PARM_DESC(ovl_override_creds_def,
+		 "Use mounter's credentials for accesses");
+
 static void ovl_entry_stack_free(struct ovl_entry *oe)
 {
 	unsigned int i;
@@ -367,6 +372,9 @@
 	if (ofs->config.metacopy != ovl_metacopy_def)
 		seq_printf(m, ",metacopy=%s",
 			   ofs->config.metacopy ? "on" : "off");
+	if (ofs->config.override_creds != ovl_override_creds_def)
+		seq_show_option(m, "override_creds",
+				ofs->config.override_creds ? "on" : "off");
 	return 0;
 }
 
@@ -406,6 +414,8 @@
 	OPT_XINO_AUTO,
 	OPT_METACOPY_ON,
 	OPT_METACOPY_OFF,
+	OPT_OVERRIDE_CREDS_ON,
+	OPT_OVERRIDE_CREDS_OFF,
 	OPT_ERR,
 };
 
@@ -424,6 +434,8 @@
 	{OPT_XINO_AUTO,			"xino=auto"},
 	{OPT_METACOPY_ON,		"metacopy=on"},
 	{OPT_METACOPY_OFF,		"metacopy=off"},
+	{OPT_OVERRIDE_CREDS_ON,		"override_creds=on"},
+	{OPT_OVERRIDE_CREDS_OFF,	"override_creds=off"},
 	{OPT_ERR,			NULL}
 };
 
@@ -482,6 +494,7 @@
 	config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
 	if (!config->redirect_mode)
 		return -ENOMEM;
+	config->override_creds = ovl_override_creds_def;
 
 	while ((p = ovl_next_opt(&opt)) != NULL) {
 		int token;
@@ -562,6 +575,14 @@
 			config->metacopy = false;
 			break;
 
+		case OPT_OVERRIDE_CREDS_ON:
+			config->override_creds = true;
+			break;
+
+		case OPT_OVERRIDE_CREDS_OFF:
+			config->override_creds = false;
+			break;
+
 		default:
 			pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
 			return -EINVAL;
@@ -864,6 +885,14 @@
 }
 
 static int __maybe_unused
+__ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
+			  struct dentry *dentry, struct inode *inode,
+			  const char *name, void *buffer, size_t size)
+{
+	return __ovl_xattr_get(dentry, inode, handler->name, buffer, size);
+}
+
+static int __maybe_unused
 ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
 			struct dentry *dentry, struct inode *inode,
 			const char *name, const void *value,
@@ -943,6 +972,13 @@
 	return ovl_xattr_get(dentry, inode, name, buffer, size);
 }
 
+static int __ovl_other_xattr_get(const struct xattr_handler *handler,
+				 struct dentry *dentry, struct inode *inode,
+				 const char *name, void *buffer, size_t size)
+{
+	return __ovl_xattr_get(dentry, inode, name, buffer, size);
+}
+
 static int ovl_other_xattr_set(const struct xattr_handler *handler,
 			       struct dentry *dentry, struct inode *inode,
 			       const char *name, const void *value,
@@ -956,6 +992,7 @@
 	.name = XATTR_NAME_POSIX_ACL_ACCESS,
 	.flags = ACL_TYPE_ACCESS,
 	.get = ovl_posix_acl_xattr_get,
+	.__get = __ovl_posix_acl_xattr_get,
 	.set = ovl_posix_acl_xattr_set,
 };
 
@@ -964,6 +1001,7 @@
 	.name = XATTR_NAME_POSIX_ACL_DEFAULT,
 	.flags = ACL_TYPE_DEFAULT,
 	.get = ovl_posix_acl_xattr_get,
+	.__get = __ovl_posix_acl_xattr_get,
 	.set = ovl_posix_acl_xattr_set,
 };
 
@@ -976,6 +1014,7 @@
 static const struct xattr_handler ovl_other_xattr_handler = {
 	.prefix	= "", /* catch all */
 	.get = ovl_other_xattr_get,
+	.__get = __ovl_other_xattr_get,
 	.set = ovl_other_xattr_set,
 };
 
@@ -1648,7 +1687,6 @@
 		       ovl_dentry_lower(root_dentry), NULL);
 
 	sb->s_root = root_dentry;
-
 	return 0;
 
 out_free_oe:

diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index db8bdb2..0937b07 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c

@@ -40,9 +40,23 @@
 {
 	struct ovl_fs *ofs = sb->s_fs_info;
 
+	if (!ofs->config.override_creds)
+		return NULL;
 	return override_creds(ofs->creator_cred);
 }
 
+void ovl_revert_creds(const struct cred *old_cred)
+{
+	if (old_cred)
+		revert_creds(old_cred);
+}
+
+ssize_t ovl_vfs_getxattr(struct dentry *dentry, const char *name, void *buf,
+			 size_t size)
+{
+	return __vfs_getxattr(dentry, d_inode(dentry), name, buf, size);
+}
+
 struct super_block *ovl_same_sb(struct super_block *sb)
 {
 	struct ovl_fs *ofs = sb->s_fs_info;
@@ -541,9 +555,9 @@
 
 bool ovl_check_origin_xattr(struct dentry *dentry)
 {
-	int res;
+	ssize_t res;
 
-	res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0);
+	res = ovl_vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0);
 
 	/* Zero size value means "copied up but origin unknown" */
 	if (res >= 0)
@@ -554,13 +568,13 @@
 
 bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
 {
-	int res;
+	ssize_t res;
 	char val;
 
 	if (!d_is_dir(dentry))
 		return false;
 
-	res = vfs_getxattr(dentry, name, &val, 1);
+	res = ovl_vfs_getxattr(dentry, name, &val, 1);
 	if (res == 1 && val == 'y')
 		return true;
 
@@ -795,7 +809,7 @@
 	 * value relative to the upper inode nlink in an upper inode xattr.
 	 */
 	err = ovl_set_nlink_upper(dentry);
-	revert_creds(old_cred);
+	ovl_revert_creds(old_cred);
 
 out:
 	if (err)
@@ -815,7 +829,7 @@
 
 			old_cred = ovl_override_creds(dentry->d_sb);
 			ovl_cleanup_index(dentry);
-			revert_creds(old_cred);
+			ovl_revert_creds(old_cred);
 		}
 
 		mutex_unlock(&OVL_I(d_inode(dentry))->lock);
@@ -844,13 +858,13 @@
 /* err < 0, 0 if no metacopy xattr, 1 if metacopy xattr found */
 int ovl_check_metacopy_xattr(struct dentry *dentry)
 {
-	int res;
+	ssize_t res;
 
 	/* Only regular files can have metacopy xattr */
 	if (!S_ISREG(d_inode(dentry)->i_mode))
 		return 0;
 
-	res = vfs_getxattr(dentry, OVL_XATTR_METACOPY, NULL, 0);
+	res = ovl_vfs_getxattr(dentry, OVL_XATTR_METACOPY, NULL, 0);
 	if (res < 0) {
 		if (res == -ENODATA || res == -EOPNOTSUPP)
 			return 0;
@@ -859,7 +873,7 @@
 
 	return 1;
 out:
-	pr_warn_ratelimited("overlayfs: failed to get metacopy (%i)\n", res);
+	pr_warn_ratelimited("overlayfs: failed to get metacopy (%zi)\n", res);
 	return res;
 }
 
@@ -885,7 +899,7 @@
 	ssize_t res;
 	char *buf = NULL;
 
-	res = vfs_getxattr(dentry, name, NULL, 0);
+	res = ovl_vfs_getxattr(dentry, name, NULL, 0);
 	if (res < 0) {
 		if (res == -ENODATA || res == -EOPNOTSUPP)
 			return -ENODATA;
@@ -897,7 +911,7 @@
 		if (!buf)
 			return -ENOMEM;
 
-		res = vfs_getxattr(dentry, name, buf, res);
+		res = ovl_vfs_getxattr(dentry, name, buf, res);
 		if (res < 0)
 			goto fail;
 	}

diff --git a/fs/pnode.c b/fs/pnode.c
index 53d411a..681916df 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c

@@ -607,3 +607,19 @@
 
 	return 0;
 }
+
+void propagate_remount(struct mount *mnt)
+{
+	struct mount *parent = mnt->mnt_parent;
+	struct mount *p = mnt, *m;
+	struct super_block *sb = mnt->mnt.mnt_sb;
+
+	if (!sb->s_op->copy_mnt_data)
+		return;
+	for (p = propagation_next(parent, parent); p;
+				p = propagation_next(p, parent)) {
+		m = __lookup_mnt(&p->mnt, mnt->mnt_mountpoint);
+		if (m)
+			sb->s_op->copy_mnt_data(m->mnt.data, mnt->mnt.data);
+	}
+}

diff --git a/fs/pnode.h b/fs/pnode.h
index dc87e65..a9a6576 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h

@@ -44,6 +44,7 @@
 int propagate_umount(struct list_head *);
 int propagate_mount_busy(struct mount *, int);
 void propagate_mount_unlock(struct mount *);
+void propagate_remount(struct mount *);
 void mnt_release_group_id(struct mount *);
 int get_dominating_id(struct mount *mnt, const struct path *root);
 unsigned int mnt_get_count(struct mount *mnt);

diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 817c02b1..cad2c60 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig

@@ -97,3 +97,9 @@
 
 	  Say Y if you are running any user-space software which takes benefit from
 	  this interface. For example, rkt is such a piece of software.
+
+config PROC_UID
+	bool "Include /proc/uid/ files"
+	depends on PROC_FS && RT_MUTEXES
+	help
+	Provides aggregated per-uid information under /proc/uid.

diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index ead487e..3f849ca 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile

@@ -27,6 +27,7 @@
 proc-y	+= namespaces.o
 proc-y	+= self.o
 proc-y	+= thread_self.o
+proc-$(CONFIG_PROC_UID)  += uid.o
 proc-$(CONFIG_PROC_SYSCTL)	+= proc_sysctl.o
 proc-$(CONFIG_NET)		+= proc_net.o
 proc-$(CONFIG_PROC_KCORE)	+= kcore.o

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3b9b726..86881d2 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c

@@ -94,6 +94,7 @@
 #include <linux/sched/stat.h>
 #include <linux/flex_array.h>
 #include <linux/posix-timers.h>
+#include <linux/cpufreq_times.h>
 #include <trace/events/oom.h>
 #include "internal.h"
 #include "fd.h"
@@ -3041,6 +3042,9 @@
 #ifdef CONFIG_LIVEPATCH
 	ONE("patch_state",  S_IRUSR, proc_pid_patch_state),
 #endif
+#ifdef CONFIG_CPU_FREQ_TIMES
+	ONE("time_in_state", 0444, proc_time_in_state_show),
+#endif
 };
 
 static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@ -3419,6 +3423,9 @@
 #ifdef CONFIG_LIVEPATCH
 	ONE("patch_state",  S_IRUSR, proc_pid_patch_state),
 #endif
+#ifdef CONFIG_CPU_FREQ_TIMES
+	ONE("time_in_state", 0444, proc_time_in_state_show),
+#endif
 };
 
 static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)

diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index fc5306a..1b4f336 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c

@@ -491,17 +491,13 @@
 	return inode;
 }
 
-int proc_fill_super(struct super_block *s, void *data, int silent)
+int proc_fill_super(struct super_block *s)
 {
-	struct pid_namespace *ns = get_pid_ns(s->s_fs_info);
 	struct inode *root_inode;
 	int ret;
 
-	if (!proc_parse_options(data, ns))
-		return -EINVAL;
-
 	/* User space would break if executables or devices appear on proc */
-	s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV;
+	s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NODEV;
 	s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC;
 	s->s_blocksize = 1024;
 	s->s_blocksize_bits = 10;

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 95b1419..bacad3e 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h

@@ -212,7 +212,7 @@
 void proc_init_kmemcache(void);
 void set_proc_pid_nlink(void);
 extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
-extern int proc_fill_super(struct super_block *, void *data, int flags);
+extern int proc_fill_super(struct super_block *);
 extern void proc_entry_rundown(struct proc_dir_entry *);
 
 /*
@@ -258,6 +258,15 @@
 #endif
 
 /*
+ * uid.c
+ */
+#ifdef CONFIG_PROC_UID
+extern int proc_uid_init(void);
+#else
+static inline void proc_uid_init(void) { }
+#endif
+
+/*
  * proc_tty.c
  */
 #ifdef CONFIG_TTY
@@ -270,7 +279,6 @@
  * root.c
  */
 extern struct proc_dir_entry proc_root;
-extern int proc_parse_options(char *options, struct pid_namespace *pid);
 
 extern void proc_self_init(void);
 extern int proc_remount(struct super_block *, int *, char *);

diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index d066947..8468bae 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c

@@ -10,9 +10,6 @@
 #include <linux/seqlock.h>
 #include <linux/time.h>
 
-#define LOAD_INT(x) ((x) >> FSHIFT)
-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
-
 static int loadavg_proc_show(struct seq_file *m, void *v)
 {
 	unsigned long avnrun[3];

diff --git a/fs/proc/root.c b/fs/proc/root.c
index f4b1a9d..f672dbac 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c

@@ -26,6 +26,21 @@
 
 #include "internal.h"
 
+static int proc_test_super(struct super_block *sb, void *data)
+{
+	return sb->s_fs_info == data;
+}
+
+static int proc_set_super(struct super_block *sb, void *data)
+{
+	int err = set_anon_super(sb, NULL);
+	if (!err) {
+		struct pid_namespace *ns = (struct pid_namespace *)data;
+		sb->s_fs_info = get_pid_ns(ns);
+	}
+	return err;
+}
+
 enum {
 	Opt_gid, Opt_hidepid, Opt_err,
 };
@@ -36,7 +51,7 @@
 	{Opt_err, NULL},
 };
 
-int proc_parse_options(char *options, struct pid_namespace *pid)
+static int proc_parse_options(char *options, struct pid_namespace *pid)
 {
 	char *p;
 	substring_t args[MAX_OPT_ARGS];
@@ -89,16 +104,45 @@
 static struct dentry *proc_mount(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data)
 {
+	int err;
+	struct super_block *sb;
 	struct pid_namespace *ns;
+	char *options;
 
 	if (flags & SB_KERNMOUNT) {
-		ns = data;
-		data = NULL;
+		ns = (struct pid_namespace *)data;
+		options = NULL;
 	} else {
 		ns = task_active_pid_ns(current);
+		options = data;
+
+		/* Does the mounter have privilege over the pid namespace? */
+		if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
+			return ERR_PTR(-EPERM);
 	}
 
-	return mount_ns(fs_type, flags, data, ns, ns->user_ns, proc_fill_super);
+	sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns);
+	if (IS_ERR(sb))
+		return ERR_CAST(sb);
+
+	if (!proc_parse_options(options, ns)) {
+		deactivate_locked_super(sb);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (!sb->s_root) {
+		err = proc_fill_super(sb);
+		if (err) {
+			deactivate_locked_super(sb);
+			return ERR_PTR(err);
+		}
+
+		sb->s_flags |= MS_ACTIVE;
+		/* User space would break if executables appear on proc */
+		sb->s_iflags |= SB_I_NOEXEC;
+	}
+
+	return dget(sb->s_root);
 }
 
 static void proc_kill_sb(struct super_block *sb)
@@ -130,6 +174,7 @@
 	proc_symlink("mounts", NULL, "self/mounts");
 
 	proc_net_init();
+	proc_uid_init();
 	proc_mkdir("fs", NULL);
 	proc_mkdir("driver", NULL);
 	proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 71aba44..1b784b4 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c

@@ -123,6 +123,56 @@
 }
 #endif
 
+static void seq_print_vma_name(struct seq_file *m, struct vm_area_struct *vma)
+{
+	const char __user *name = vma_get_anon_name(vma);
+	struct mm_struct *mm = vma->vm_mm;
+
+	unsigned long page_start_vaddr;
+	unsigned long page_offset;
+	unsigned long num_pages;
+	unsigned long max_len = NAME_MAX;
+	int i;
+
+	page_start_vaddr = (unsigned long)name & PAGE_MASK;
+	page_offset = (unsigned long)name - page_start_vaddr;
+	num_pages = DIV_ROUND_UP(page_offset + max_len, PAGE_SIZE);
+
+	seq_puts(m, "[anon:");
+
+	for (i = 0; i < num_pages; i++) {
+		int len;
+		int write_len;
+		const char *kaddr;
+		long pages_pinned;
+		struct page *page;
+
+		pages_pinned = get_user_pages_remote(current, mm,
+				page_start_vaddr, 1, 0, &page, NULL, NULL);
+		if (pages_pinned < 1) {
+			seq_puts(m, "<fault>]");
+			return;
+		}
+
+		kaddr = (const char *)kmap(page);
+		len = min(max_len, PAGE_SIZE - page_offset);
+		write_len = strnlen(kaddr + page_offset, len);
+		seq_write(m, kaddr + page_offset, write_len);
+		kunmap(page);
+		put_page(page);
+
+		/* if strnlen hit a null terminator then we're done */
+		if (write_len != len)
+			break;
+
+		max_len -= len;
+		page_offset = 0;
+		page_start_vaddr += PAGE_SIZE;
+	}
+
+	seq_putc(m, ']');
+}
+
 static void vma_stop(struct proc_maps_private *priv)
 {
 	struct mm_struct *mm = priv->mm;
@@ -348,8 +398,15 @@
 			goto done;
 		}
 
-		if (is_stack(vma))
+		if (is_stack(vma)) {
 			name = "[stack]";
+			goto done;
+		}
+
+		if (vma_get_anon_name(vma)) {
+			seq_pad(m, ' ');
+			seq_print_vma_name(m, vma);
+		}
 	}
 
 done:
@@ -792,6 +849,11 @@
 	smap_gather_stats(vma, &mss);
 
 	show_map_vma(m, vma);
+	if (vma_get_anon_name(vma)) {
+		seq_puts(m, "Name:           ");
+		seq_print_vma_name(m, vma);
+		seq_putc(m, '\n');
+	}
 
 	SEQ_PUT_DEC("Size:           ", vma->vm_end - vma->vm_start);
 	SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma));

diff --git a/fs/proc/uid.c b/fs/proc/uid.c
new file mode 100644
index 0000000..311717e
--- /dev/null
+++ b/fs/proc/uid.c

@@ -0,0 +1,304 @@
+/*
+ * /proc/uid support
+ */
+
+#include <linux/cpufreq_times.h>
+#include <linux/fs.h>
+#include <linux/hashtable.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/rtmutex.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+static struct proc_dir_entry *proc_uid;
+
+#define UID_HASH_BITS 10
+
+static DECLARE_HASHTABLE(proc_uid_hash_table, UID_HASH_BITS);
+
+/*
+ * use rt_mutex here to avoid priority inversion between high-priority readers
+ * of these files and tasks calling proc_register_uid().
+ */
+static DEFINE_RT_MUTEX(proc_uid_lock); /* proc_uid_hash_table */
+
+struct uid_hash_entry {
+	uid_t uid;
+	struct hlist_node hash;
+};
+
+/* Caller must hold proc_uid_lock */
+static bool uid_hash_entry_exists_locked(uid_t uid)
+{
+	struct uid_hash_entry *entry;
+
+	hash_for_each_possible(proc_uid_hash_table, entry, hash, uid) {
+		if (entry->uid == uid)
+			return true;
+	}
+	return false;
+}
+
+void proc_register_uid(kuid_t kuid)
+{
+	struct uid_hash_entry *entry;
+	bool exists;
+	uid_t uid = from_kuid_munged(current_user_ns(), kuid);
+
+	rt_mutex_lock(&proc_uid_lock);
+	exists = uid_hash_entry_exists_locked(uid);
+	rt_mutex_unlock(&proc_uid_lock);
+	if (exists)
+		return;
+
+	entry = kzalloc(sizeof(struct uid_hash_entry), GFP_KERNEL);
+	if (!entry)
+		return;
+	entry->uid = uid;
+
+	rt_mutex_lock(&proc_uid_lock);
+	if (uid_hash_entry_exists_locked(uid))
+		kfree(entry);
+	else
+		hash_add(proc_uid_hash_table, &entry->hash, uid);
+	rt_mutex_unlock(&proc_uid_lock);
+}
+
+struct uid_entry {
+	const char *name;
+	int len;
+	umode_t mode;
+	const struct inode_operations *iop;
+	const struct file_operations *fop;
+};
+
+#define NOD(NAME, MODE, IOP, FOP) {			\
+	.name	= (NAME),				\
+	.len	= sizeof(NAME) - 1,			\
+	.mode	= MODE,					\
+	.iop	= IOP,					\
+	.fop	= FOP,					\
+}
+
+#ifdef CONFIG_CPU_FREQ_TIMES
+static const struct file_operations proc_uid_time_in_state_operations = {
+	.open		= single_uid_time_in_state_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+#endif
+
+static const struct uid_entry uid_base_stuff[] = {
+#ifdef CONFIG_CPU_FREQ_TIMES
+	NOD("time_in_state", 0444, NULL, &proc_uid_time_in_state_operations),
+#endif
+};
+
+static const struct inode_operations proc_uid_def_inode_operations = {
+	.setattr	= proc_setattr,
+};
+
+static struct inode *proc_uid_make_inode(struct super_block *sb, kuid_t kuid)
+{
+	struct inode *inode;
+
+	inode = new_inode(sb);
+	if (!inode)
+		return NULL;
+
+	inode->i_ino = get_next_ino();
+	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_op = &proc_uid_def_inode_operations;
+	inode->i_uid = kuid;
+
+	return inode;
+}
+
+static struct dentry *proc_uident_instantiate(struct dentry *dentry,
+				   struct task_struct *unused, const void *ptr)
+{
+	const struct uid_entry *u = ptr;
+	struct inode *inode;
+
+	uid_t uid = name_to_int(&dentry->d_name);
+	kuid_t kuid;
+	bool uid_exists;
+	rt_mutex_lock(&proc_uid_lock);
+	uid_exists = uid_hash_entry_exists_locked(uid);
+	rt_mutex_unlock(&proc_uid_lock);
+	if (uid_exists) {
+		kuid = make_kuid(current_user_ns(), uid);
+		inode = proc_uid_make_inode(dentry->d_sb, kuid);
+		if (!inode)
+			return ERR_PTR(-ENOENT);
+	} else {
+		return ERR_PTR(-ENOENT);
+	}
+
+	inode->i_mode = u->mode;
+	if (S_ISDIR(inode->i_mode))
+		set_nlink(inode, 2);
+	if (u->iop)
+		inode->i_op = u->iop;
+	if (u->fop)
+		inode->i_fop = u->fop;
+
+	return d_splice_alias(inode, dentry);
+}
+
+static struct dentry *proc_uid_base_lookup(struct inode *dir,
+					   struct dentry *dentry,
+					   unsigned int flags)
+{
+	const struct uid_entry *u, *last;
+	unsigned int nents = ARRAY_SIZE(uid_base_stuff);
+
+	if (nents == 0)
+		return ERR_PTR(-ENOENT);
+
+	last = &uid_base_stuff[nents - 1];
+	for (u = uid_base_stuff; u <= last; u++) {
+		if (u->len != dentry->d_name.len)
+			continue;
+		if (!memcmp(dentry->d_name.name, u->name, u->len))
+			break;
+	}
+	if (u > last)
+		return ERR_PTR(-ENOENT);
+
+	return proc_uident_instantiate(dentry, NULL, u);
+}
+
+static int proc_uid_base_readdir(struct file *file, struct dir_context *ctx)
+{
+	unsigned int nents = ARRAY_SIZE(uid_base_stuff);
+	const struct uid_entry *u;
+
+	if (!dir_emit_dots(file, ctx))
+		return 0;
+
+	if (ctx->pos >= nents + 2)
+		return 0;
+
+	for (u = uid_base_stuff + (ctx->pos - 2);
+	     u < uid_base_stuff + nents; u++) {
+		if (!proc_fill_cache(file, ctx, u->name, u->len,
+				     proc_uident_instantiate, NULL, u))
+			break;
+		ctx->pos++;
+	}
+
+	return 0;
+}
+
+static const struct inode_operations proc_uid_base_inode_operations = {
+	.lookup		= proc_uid_base_lookup,
+	.setattr	= proc_setattr,
+};
+
+static const struct file_operations proc_uid_base_operations = {
+	.read		= generic_read_dir,
+	.iterate	= proc_uid_base_readdir,
+	.llseek		= default_llseek,
+};
+
+static struct dentry *proc_uid_instantiate(struct dentry *dentry,
+				struct task_struct *unused, const void *ptr)
+{
+	unsigned int i, len;
+	nlink_t nlinks;
+	kuid_t *kuid = (kuid_t *)ptr;
+	struct inode *inode = proc_uid_make_inode(dentry->d_sb, *kuid);
+
+	if (!inode)
+		return ERR_PTR(-ENOENT);
+
+	inode->i_mode = S_IFDIR | 0555;
+	inode->i_op = &proc_uid_base_inode_operations;
+	inode->i_fop = &proc_uid_base_operations;
+	inode->i_flags |= S_IMMUTABLE;
+
+	nlinks = 2;
+	len = ARRAY_SIZE(uid_base_stuff);
+	for (i = 0; i < len; ++i) {
+		if (S_ISDIR(uid_base_stuff[i].mode))
+			++nlinks;
+	}
+	set_nlink(inode, nlinks);
+
+	return d_splice_alias(inode, dentry);
+}
+
+static int proc_uid_readdir(struct file *file, struct dir_context *ctx)
+{
+	int last_shown, i;
+	unsigned long bkt;
+	struct uid_hash_entry *entry;
+
+	if (!dir_emit_dots(file, ctx))
+		return 0;
+
+	i = 0;
+	last_shown = ctx->pos - 2;
+	rt_mutex_lock(&proc_uid_lock);
+	hash_for_each(proc_uid_hash_table, bkt, entry, hash) {
+		int len;
+		char buf[PROC_NUMBUF];
+
+		if (i < last_shown)
+			continue;
+		len = snprintf(buf, sizeof(buf), "%u", entry->uid);
+		if (!proc_fill_cache(file, ctx, buf, len,
+				     proc_uid_instantiate, NULL, &entry->uid))
+			break;
+		i++;
+		ctx->pos++;
+	}
+	rt_mutex_unlock(&proc_uid_lock);
+	return 0;
+}
+
+static struct dentry *proc_uid_lookup(struct inode *dir, struct dentry *dentry,
+				      unsigned int flags)
+{
+	int result = -ENOENT;
+
+	uid_t uid = name_to_int(&dentry->d_name);
+	bool uid_exists;
+
+	rt_mutex_lock(&proc_uid_lock);
+	uid_exists = uid_hash_entry_exists_locked(uid);
+	rt_mutex_unlock(&proc_uid_lock);
+	if (uid_exists) {
+		kuid_t kuid = make_kuid(current_user_ns(), uid);
+
+		return proc_uid_instantiate(dentry, NULL, &kuid);
+	}
+	return ERR_PTR(result);
+}
+
+static const struct file_operations proc_uid_operations = {
+	.read		= generic_read_dir,
+	.iterate	= proc_uid_readdir,
+	.llseek		= default_llseek,
+};
+
+static const struct inode_operations proc_uid_inode_operations = {
+	.lookup		= proc_uid_lookup,
+	.setattr	= proc_setattr,
+};
+
+int __init proc_uid_init(void)
+{
+	proc_uid = proc_mkdir("uid", NULL);
+	if (!proc_uid)
+		return -ENOMEM;
+	proc_uid->proc_iops = &proc_uid_inode_operations;
+	proc_uid->proc_fops = &proc_uid_operations;
+
+	return 0;
+}

diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index e16fb8f..bd07f0f 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c

@@ -121,7 +121,9 @@
 	if (err)
 		goto out;
 	show_mnt_opts(m, mnt);
-	if (sb->s_op->show_options)
+	if (sb->s_op->show_options2)
+			err = sb->s_op->show_options2(mnt, m, mnt_path.dentry);
+	else if (sb->s_op->show_options)
 		err = sb->s_op->show_options(m, mnt_path.dentry);
 	seq_puts(m, " 0 0\n");
 out:
@@ -183,7 +185,9 @@
 	err = show_sb_opts(m, sb);
 	if (err)
 		goto out;
-	if (sb->s_op->show_options)
+	if (sb->s_op->show_options2) {
+		err = sb->s_op->show_options2(mnt, m, mnt->mnt_root);
+	} else if (sb->s_op->show_options)
 		err = sb->s_op->show_options(m, mnt->mnt_root);
 	seq_putc(m, '\n');
 out:

diff --git a/fs/read_write.c b/fs/read_write.c
index 85fd7a8..40b9b07 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c

@@ -460,6 +460,8 @@
 	return ret;
 }
 
+EXPORT_SYMBOL(vfs_read);
+
 static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
 {
 	struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
@@ -558,6 +560,8 @@
 	return ret;
 }
 
+EXPORT_SYMBOL(vfs_write);
+
 static inline loff_t file_pos_read(struct file *file)
 {
 	return file->f_mode & FMODE_STREAM ? 0 : file->f_pos;

diff --git a/fs/sdcardfs/Kconfig b/fs/sdcardfs/Kconfig
new file mode 100644
index 0000000..a1c1033
--- /dev/null
+++ b/fs/sdcardfs/Kconfig

@@ -0,0 +1,13 @@
+config SDCARD_FS
+	tristate "sdcard file system"
+	depends on CONFIGFS_FS
+	default n
+	help
+	  Sdcardfs is based on Wrapfs file system.
+
+config SDCARD_FS_FADV_NOACTIVE
+	bool "sdcardfs fadvise noactive support"
+	depends on FADV_NOACTIVE
+	default y
+	help
+	  Sdcardfs supports fadvise noactive mode.

diff --git a/fs/sdcardfs/Makefile b/fs/sdcardfs/Makefile
new file mode 100644
index 0000000..b84fbb2
--- /dev/null
+++ b/fs/sdcardfs/Makefile

@@ -0,0 +1,7 @@
+SDCARDFS_VERSION="0.1"
+
+EXTRA_CFLAGS += -DSDCARDFS_VERSION=\"$(SDCARDFS_VERSION)\"
+
+obj-$(CONFIG_SDCARD_FS) += sdcardfs.o
+
+sdcardfs-y := dentry.o file.o inode.o main.o super.o lookup.o mmap.o packagelist.o derived_perm.o

diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c
new file mode 100644
index 0000000..cb573f1
--- /dev/null
+++ b/fs/sdcardfs/dentry.c

@@ -0,0 +1,196 @@
+/*
+ * fs/sdcardfs/dentry.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#include "linux/ctype.h"
+
+/*
+ * returns: -ERRNO if error (returned to user)
+ *          0: tell VFS to invalidate dentry
+ *          1: dentry is valid
+ */
+static int sdcardfs_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+	int err = 1;
+	struct path parent_lower_path, lower_path;
+	struct dentry *parent_dentry = NULL;
+	struct dentry *parent_lower_dentry = NULL;
+	struct dentry *lower_cur_parent_dentry = NULL;
+	struct dentry *lower_dentry = NULL;
+	struct inode *inode;
+	struct sdcardfs_inode_data *data;
+
+	if (flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	spin_lock(&dentry->d_lock);
+	if (IS_ROOT(dentry)) {
+		spin_unlock(&dentry->d_lock);
+		return 1;
+	}
+	spin_unlock(&dentry->d_lock);
+
+	/* check uninitialized obb_dentry and
+	 * whether the base obbpath has been changed or not
+	 */
+	if (is_obbpath_invalid(dentry)) {
+		return 0;
+	}
+
+	parent_dentry = dget_parent(dentry);
+	sdcardfs_get_lower_path(parent_dentry, &parent_lower_path);
+	sdcardfs_get_real_lower(dentry, &lower_path);
+	parent_lower_dentry = parent_lower_path.dentry;
+	lower_dentry = lower_path.dentry;
+	lower_cur_parent_dentry = dget_parent(lower_dentry);
+
+	if ((lower_dentry->d_flags & DCACHE_OP_REVALIDATE)) {
+		err = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
+		if (err == 0) {
+			goto out;
+		}
+	}
+
+	spin_lock(&lower_dentry->d_lock);
+	if (d_unhashed(lower_dentry)) {
+		spin_unlock(&lower_dentry->d_lock);
+		err = 0;
+		goto out;
+	}
+	spin_unlock(&lower_dentry->d_lock);
+
+	if (parent_lower_dentry != lower_cur_parent_dentry) {
+		err = 0;
+		goto out;
+	}
+
+	if (dentry < lower_dentry) {
+		spin_lock(&dentry->d_lock);
+		spin_lock_nested(&lower_dentry->d_lock, DENTRY_D_LOCK_NESTED);
+	} else {
+		spin_lock(&lower_dentry->d_lock);
+		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+	}
+
+	if (!qstr_case_eq(&dentry->d_name, &lower_dentry->d_name)) {
+		err = 0;
+	}
+
+	if (dentry < lower_dentry) {
+		spin_unlock(&lower_dentry->d_lock);
+		spin_unlock(&dentry->d_lock);
+	} else {
+		spin_unlock(&dentry->d_lock);
+		spin_unlock(&lower_dentry->d_lock);
+	}
+	if (!err)
+		goto out;
+
+	/* If our top's inode is gone, we may be out of date */
+	inode = igrab(d_inode(dentry));
+	if (inode) {
+		data = top_data_get(SDCARDFS_I(inode));
+		if (!data || data->abandoned) {
+			err = 0;
+		}
+		if (data)
+			data_put(data);
+		iput(inode);
+	}
+
+out:
+	dput(parent_dentry);
+	dput(lower_cur_parent_dentry);
+	sdcardfs_put_lower_path(parent_dentry, &parent_lower_path);
+	sdcardfs_put_real_lower(dentry, &lower_path);
+	return err;
+}
+
+/* 1 = delete, 0 = cache */
+static int sdcardfs_d_delete(const struct dentry *d)
+{
+	return SDCARDFS_SB(d->d_sb)->options.nocache ? 1 : 0;
+}
+
+static void sdcardfs_d_release(struct dentry *dentry)
+{
+	if (!dentry || !dentry->d_fsdata)
+		return;
+	/* release and reset the lower paths */
+	if (has_graft_path(dentry))
+		sdcardfs_put_reset_orig_path(dentry);
+	sdcardfs_put_reset_lower_path(dentry);
+	free_dentry_private_data(dentry);
+}
+
+static int sdcardfs_hash_ci(const struct dentry *dentry,
+				struct qstr *qstr)
+{
+	/*
+	 * This function is copy of vfat_hashi.
+	 * FIXME Should we support national language?
+	 *       Refer to vfat_hashi()
+	 * struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io;
+	 */
+	const unsigned char *name;
+	unsigned int len;
+	unsigned long hash;
+
+	name = qstr->name;
+	len = qstr->len;
+
+	hash = init_name_hash(dentry);
+	while (len--)
+		hash = partial_name_hash(tolower(*name++), hash);
+	qstr->hash = end_name_hash(hash);
+
+	return 0;
+}
+
+/*
+ * Case insensitive compare of two vfat names.
+ */
+static int sdcardfs_cmp_ci(const struct dentry *dentry,
+		unsigned int len, const char *str, const struct qstr *name)
+{
+	/* FIXME Should we support national language? */
+
+	if (name->len == len) {
+		if (str_n_case_eq(name->name, str, len))
+			return 0;
+	}
+	return 1;
+}
+
+static void sdcardfs_canonical_path(const struct path *path,
+				struct path *actual_path)
+{
+	sdcardfs_get_real_lower(path->dentry, actual_path);
+}
+
+const struct dentry_operations sdcardfs_ci_dops = {
+	.d_revalidate	= sdcardfs_d_revalidate,
+	.d_delete	= sdcardfs_d_delete,
+	.d_release	= sdcardfs_d_release,
+	.d_hash	= sdcardfs_hash_ci,
+	.d_compare	= sdcardfs_cmp_ci,
+	.d_canonical_path = sdcardfs_canonical_path,
+};
+

diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c
new file mode 100644
index 0000000..78a669c
--- /dev/null
+++ b/fs/sdcardfs/derived_perm.c

@@ -0,0 +1,477 @@
+/*
+ * fs/sdcardfs/derived_perm.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+
+/* copy derived state from parent inode */
+static void inherit_derived_state(struct inode *parent, struct inode *child)
+{
+	struct sdcardfs_inode_info *pi = SDCARDFS_I(parent);
+	struct sdcardfs_inode_info *ci = SDCARDFS_I(child);
+
+	ci->data->perm = PERM_INHERIT;
+	ci->data->userid = pi->data->userid;
+	ci->data->d_uid = pi->data->d_uid;
+	ci->data->under_android = pi->data->under_android;
+	ci->data->under_cache = pi->data->under_cache;
+	ci->data->under_obb = pi->data->under_obb;
+}
+
+/* helper function for derived state */
+void setup_derived_state(struct inode *inode, perm_t perm, userid_t userid,
+					uid_t uid)
+{
+	struct sdcardfs_inode_info *info = SDCARDFS_I(inode);
+
+	info->data->perm = perm;
+	info->data->userid = userid;
+	info->data->d_uid = uid;
+	info->data->under_android = false;
+	info->data->under_cache = false;
+	info->data->under_obb = false;
+}
+
+/* While renaming, there is a point where we want the path from dentry,
+ * but the name from newdentry
+ */
+void get_derived_permission_new(struct dentry *parent, struct dentry *dentry,
+				const struct qstr *name)
+{
+	struct sdcardfs_inode_info *info = SDCARDFS_I(d_inode(dentry));
+	struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent));
+	struct sdcardfs_inode_data *parent_data = parent_info->data;
+	appid_t appid;
+	unsigned long user_num;
+	int err;
+	struct qstr q_Android = QSTR_LITERAL("Android");
+	struct qstr q_data = QSTR_LITERAL("data");
+	struct qstr q_sandbox = QSTR_LITERAL("sandbox");
+	struct qstr q_obb = QSTR_LITERAL("obb");
+	struct qstr q_media = QSTR_LITERAL("media");
+	struct qstr q_cache = QSTR_LITERAL("cache");
+
+	/* By default, each inode inherits from its parent.
+	 * the properties are maintained on its private fields
+	 * because the inode attributes will be modified with that of
+	 * its lower inode.
+	 * These values are used by our custom permission call instead
+	 * of using the inode permissions.
+	 */
+
+	inherit_derived_state(d_inode(parent), d_inode(dentry));
+
+	/* Files don't get special labels */
+	if (!S_ISDIR(d_inode(dentry)->i_mode)) {
+		set_top(info, parent_info);
+		return;
+	}
+	/* Derive custom permissions based on parent and current node */
+	switch (parent_data->perm) {
+	case PERM_INHERIT:
+	case PERM_ANDROID_PACKAGE_CACHE:
+		set_top(info, parent_info);
+		break;
+	case PERM_PRE_ROOT:
+		/* Legacy internal layout places users at top level */
+		info->data->perm = PERM_ROOT;
+		err = kstrtoul(name->name, 10, &user_num);
+		if (err)
+			info->data->userid = 0;
+		else
+			info->data->userid = user_num;
+		break;
+	case PERM_ROOT:
+		/* Assume masked off by default. */
+		if (qstr_case_eq(name, &q_Android)) {
+			/* App-specific directories inside; let anyone traverse */
+			info->data->perm = PERM_ANDROID;
+			info->data->under_android = true;
+		} else {
+			set_top(info, parent_info);
+		}
+		break;
+	case PERM_ANDROID:
+		if (qstr_case_eq(name, &q_data)) {
+			/* App-specific directories inside; let anyone traverse */
+			info->data->perm = PERM_ANDROID_DATA;
+		} else if (qstr_case_eq(name, &q_sandbox)) {
+			/* App-specific directories inside; let anyone traverse */
+			info->data->perm = PERM_ANDROID_DATA;
+		} else if (qstr_case_eq(name, &q_obb)) {
+			/* App-specific directories inside; let anyone traverse */
+			info->data->perm = PERM_ANDROID_OBB;
+			info->data->under_obb = true;
+			/* Single OBB directory is always shared */
+		} else if (qstr_case_eq(name, &q_media)) {
+			/* App-specific directories inside; let anyone traverse */
+			info->data->perm = PERM_ANDROID_MEDIA;
+		} else {
+			set_top(info, parent_info);
+		}
+		break;
+	case PERM_ANDROID_OBB:
+	case PERM_ANDROID_DATA:
+	case PERM_ANDROID_MEDIA:
+		info->data->perm = PERM_ANDROID_PACKAGE;
+		appid = get_appid(name->name);
+		if (appid != 0 && !is_excluded(name->name, parent_data->userid))
+			info->data->d_uid =
+				multiuser_get_uid(parent_data->userid, appid);
+		break;
+	case PERM_ANDROID_PACKAGE:
+		if (qstr_case_eq(name, &q_cache)) {
+			info->data->perm = PERM_ANDROID_PACKAGE_CACHE;
+			info->data->under_cache = true;
+		}
+		set_top(info, parent_info);
+		break;
+	}
+}
+
+void get_derived_permission(struct dentry *parent, struct dentry *dentry)
+{
+	get_derived_permission_new(parent, dentry, &dentry->d_name);
+}
+
+static appid_t get_type(const char *name)
+{
+	const char *ext = strrchr(name, '.');
+	appid_t id;
+
+	if (ext && ext[0]) {
+		ext = &ext[1];
+		id = get_ext_gid(ext);
+		return id?:AID_MEDIA_RW;
+	}
+	return AID_MEDIA_RW;
+}
+
+void fixup_lower_ownership(struct dentry *dentry, const char *name)
+{
+	struct path path;
+	struct inode *inode;
+	struct inode *delegated_inode = NULL;
+	int error;
+	struct sdcardfs_inode_info *info;
+	struct sdcardfs_inode_data *info_d;
+	struct sdcardfs_inode_data *info_top;
+	perm_t perm;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+	uid_t uid = sbi->options.fs_low_uid;
+	gid_t gid = sbi->options.fs_low_gid;
+	struct iattr newattrs;
+
+	if (!sbi->options.gid_derivation)
+		return;
+
+	info = SDCARDFS_I(d_inode(dentry));
+	info_d = info->data;
+	perm = info_d->perm;
+	if (info_d->under_obb) {
+		perm = PERM_ANDROID_OBB;
+	} else if (info_d->under_cache) {
+		perm = PERM_ANDROID_PACKAGE_CACHE;
+	} else if (perm == PERM_INHERIT) {
+		info_top = top_data_get(info);
+		perm = info_top->perm;
+		data_put(info_top);
+	}
+
+	switch (perm) {
+	case PERM_ROOT:
+	case PERM_ANDROID:
+	case PERM_ANDROID_DATA:
+	case PERM_ANDROID_MEDIA:
+	case PERM_ANDROID_PACKAGE:
+	case PERM_ANDROID_PACKAGE_CACHE:
+		uid = multiuser_get_uid(info_d->userid, uid);
+		break;
+	case PERM_ANDROID_OBB:
+		uid = AID_MEDIA_OBB;
+		break;
+	case PERM_PRE_ROOT:
+	default:
+		break;
+	}
+	switch (perm) {
+	case PERM_ROOT:
+	case PERM_ANDROID:
+	case PERM_ANDROID_DATA:
+	case PERM_ANDROID_MEDIA:
+		if (S_ISDIR(d_inode(dentry)->i_mode))
+			gid = multiuser_get_uid(info_d->userid, AID_MEDIA_RW);
+		else
+			gid = multiuser_get_uid(info_d->userid, get_type(name));
+		break;
+	case PERM_ANDROID_OBB:
+		gid = AID_MEDIA_OBB;
+		break;
+	case PERM_ANDROID_PACKAGE:
+		if (uid_is_app(info_d->d_uid))
+			gid = multiuser_get_ext_gid(info_d->d_uid);
+		else
+			gid = multiuser_get_uid(info_d->userid, AID_MEDIA_RW);
+		break;
+	case PERM_ANDROID_PACKAGE_CACHE:
+		if (uid_is_app(info_d->d_uid))
+			gid = multiuser_get_ext_cache_gid(info_d->d_uid);
+		else
+			gid = multiuser_get_uid(info_d->userid, AID_MEDIA_RW);
+		break;
+	case PERM_PRE_ROOT:
+	default:
+		break;
+	}
+
+	sdcardfs_get_lower_path(dentry, &path);
+	inode = d_inode(path.dentry);
+	if (d_inode(path.dentry)->i_gid.val != gid || d_inode(path.dentry)->i_uid.val != uid) {
+retry_deleg:
+		newattrs.ia_valid = ATTR_GID | ATTR_UID | ATTR_FORCE;
+		newattrs.ia_uid = make_kuid(current_user_ns(), uid);
+		newattrs.ia_gid = make_kgid(current_user_ns(), gid);
+		if (!S_ISDIR(inode->i_mode))
+			newattrs.ia_valid |=
+				ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
+		inode_lock(inode);
+		error = security_path_chown(&path, newattrs.ia_uid, newattrs.ia_gid);
+		if (!error)
+			error = notify_change2(path.mnt, path.dentry, &newattrs, &delegated_inode);
+		inode_unlock(inode);
+		if (delegated_inode) {
+			error = break_deleg_wait(&delegated_inode);
+			if (!error)
+				goto retry_deleg;
+		}
+		if (error)
+			pr_debug("sdcardfs: Failed to touch up lower fs gid/uid for %s\n", name);
+	}
+	sdcardfs_put_lower_path(dentry, &path);
+}
+
+static int descendant_may_need_fixup(struct sdcardfs_inode_data *data,
+		struct limit_search *limit)
+{
+	if (data->perm == PERM_ROOT)
+		return (limit->flags & BY_USERID) ?
+				data->userid == limit->userid : 1;
+	if (data->perm == PERM_PRE_ROOT || data->perm == PERM_ANDROID)
+		return 1;
+	return 0;
+}
+
+static int needs_fixup(perm_t perm)
+{
+	if (perm == PERM_ANDROID_DATA || perm == PERM_ANDROID_OBB
+			|| perm == PERM_ANDROID_MEDIA)
+		return 1;
+	return 0;
+}
+
+static void __fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit, int depth)
+{
+	struct dentry *child;
+	struct sdcardfs_inode_info *info;
+
+	/*
+	 * All paths will terminate their recursion on hitting PERM_ANDROID_OBB,
+	 * PERM_ANDROID_MEDIA, or PERM_ANDROID_DATA. This happens at a depth of
+	 * at most 3.
+	 */
+	WARN(depth > 3, "%s: Max expected depth exceeded!\n", __func__);
+	spin_lock_nested(&dentry->d_lock, depth);
+	if (!d_inode(dentry)) {
+		spin_unlock(&dentry->d_lock);
+		return;
+	}
+	info = SDCARDFS_I(d_inode(dentry));
+
+	if (needs_fixup(info->data->perm)) {
+		list_for_each_entry(child, &dentry->d_subdirs, d_child) {
+			spin_lock_nested(&child->d_lock, depth + 1);
+			if (!(limit->flags & BY_NAME) || qstr_case_eq(&child->d_name, &limit->name)) {
+				if (d_inode(child)) {
+					get_derived_permission(dentry, child);
+					fixup_tmp_permissions(d_inode(child));
+					spin_unlock(&child->d_lock);
+					break;
+				}
+			}
+			spin_unlock(&child->d_lock);
+		}
+	} else if (descendant_may_need_fixup(info->data, limit)) {
+		list_for_each_entry(child, &dentry->d_subdirs, d_child) {
+			__fixup_perms_recursive(child, limit, depth + 1);
+		}
+	}
+	spin_unlock(&dentry->d_lock);
+}
+
+void fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit)
+{
+	__fixup_perms_recursive(dentry, limit, 0);
+}
+
+/* main function for updating derived permission */
+inline void update_derived_permission_lock(struct dentry *dentry)
+{
+	struct dentry *parent;
+
+	if (!dentry || !d_inode(dentry)) {
+		pr_err("sdcardfs: %s: invalid dentry\n", __func__);
+		return;
+	}
+	/* FIXME:
+	 * 1. need to check whether the dentry is updated or not
+	 * 2. remove the root dentry update
+	 */
+	if (!IS_ROOT(dentry)) {
+		parent = dget_parent(dentry);
+		if (parent) {
+			get_derived_permission(parent, dentry);
+			dput(parent);
+		}
+	}
+	fixup_tmp_permissions(d_inode(dentry));
+}
+
+int need_graft_path(struct dentry *dentry)
+{
+	int ret = 0;
+	struct dentry *parent = dget_parent(dentry);
+	struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent));
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+	struct qstr obb = QSTR_LITERAL("obb");
+
+	if (!sbi->options.unshared_obb &&
+			parent_info->data->perm == PERM_ANDROID &&
+			qstr_case_eq(&dentry->d_name, &obb)) {
+
+		/* /Android/obb is the base obbpath of DERIVED_UNIFIED */
+		if (!(sbi->options.multiuser == false
+				&& parent_info->data->userid == 0)) {
+			ret = 1;
+		}
+	}
+	dput(parent);
+	return ret;
+}
+
+int is_obbpath_invalid(struct dentry *dent)
+{
+	int ret = 0;
+	struct sdcardfs_dentry_info *di = SDCARDFS_D(dent);
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dent->d_sb);
+	char *path_buf, *obbpath_s;
+	int need_put = 0;
+	struct path lower_path;
+
+	/* check the base obbpath has been changed.
+	 * this routine can check an uninitialized obb dentry as well.
+	 * regarding the uninitialized obb, refer to the sdcardfs_mkdir()
+	 */
+	spin_lock(&di->lock);
+	if (di->orig_path.dentry) {
+		if (!di->lower_path.dentry) {
+			ret = 1;
+		} else {
+			path_get(&di->lower_path);
+
+			path_buf = kmalloc(PATH_MAX, GFP_ATOMIC);
+			if (!path_buf) {
+				ret = 1;
+				pr_err("sdcardfs: fail to allocate path_buf in %s.\n", __func__);
+			} else {
+				obbpath_s = d_path(&di->lower_path, path_buf, PATH_MAX);
+				if (d_unhashed(di->lower_path.dentry) ||
+					!str_case_eq(sbi->obbpath_s, obbpath_s)) {
+					ret = 1;
+				}
+				kfree(path_buf);
+			}
+
+			pathcpy(&lower_path, &di->lower_path);
+			need_put = 1;
+		}
+	}
+	spin_unlock(&di->lock);
+	if (need_put)
+		path_put(&lower_path);
+	return ret;
+}
+
+int is_base_obbpath(struct dentry *dentry)
+{
+	int ret = 0;
+	struct dentry *parent = dget_parent(dentry);
+	struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent));
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+	struct qstr q_obb = QSTR_LITERAL("obb");
+
+	spin_lock(&SDCARDFS_D(dentry)->lock);
+	if (sbi->options.multiuser) {
+		if (parent_info->data->perm == PERM_PRE_ROOT &&
+				qstr_case_eq(&dentry->d_name, &q_obb)) {
+			ret = 1;
+		}
+	} else  if (parent_info->data->perm == PERM_ANDROID &&
+			qstr_case_eq(&dentry->d_name, &q_obb)) {
+		ret = 1;
+	}
+	spin_unlock(&SDCARDFS_D(dentry)->lock);
+	return ret;
+}
+
+/* The lower_path will be stored to the dentry's orig_path
+ * and the base obbpath will be copyed to the lower_path variable.
+ * if an error returned, there's no change in the lower_path
+ * returns: -ERRNO if error (0: no error)
+ */
+int setup_obb_dentry(struct dentry *dentry, struct path *lower_path)
+{
+	int err = 0;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+	struct path obbpath;
+
+	/* A local obb dentry must have its own orig_path to support rmdir
+	 * and mkdir of itself. Usually, we expect that the sbi->obbpath
+	 * is avaiable on this stage.
+	 */
+	sdcardfs_set_orig_path(dentry, lower_path);
+
+	err = kern_path(sbi->obbpath_s,
+			LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &obbpath);
+
+	if (!err) {
+		/* the obbpath base has been found */
+		pathcpy(lower_path, &obbpath);
+	} else {
+		/* if the sbi->obbpath is not available, we can optionally
+		 * setup the lower_path with its orig_path.
+		 * but, the current implementation just returns an error
+		 * because the sdcard daemon also regards this case as
+		 * a lookup fail.
+		 */
+		pr_info("sdcardfs: the sbi->obbpath is not available\n");
+	}
+	return err;
+}
+
+

diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c
new file mode 100644
index 0000000..271c4c4
--- /dev/null
+++ b/fs/sdcardfs/file.c

@@ -0,0 +1,467 @@
+/*
+ * fs/sdcardfs/file.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE
+#include <linux/backing-dev.h>
+#endif
+
+static ssize_t sdcardfs_read(struct file *file, char __user *buf,
+			   size_t count, loff_t *ppos)
+{
+	int err;
+	struct file *lower_file;
+	struct dentry *dentry = file->f_path.dentry;
+#ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE
+	struct backing_dev_info *bdi;
+#endif
+
+	lower_file = sdcardfs_lower_file(file);
+
+#ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE
+	if (file->f_mode & FMODE_NOACTIVE) {
+		if (!(lower_file->f_mode & FMODE_NOACTIVE)) {
+			bdi = lower_file->f_mapping->backing_dev_info;
+			lower_file->f_ra.ra_pages = bdi->ra_pages * 2;
+			spin_lock(&lower_file->f_lock);
+			lower_file->f_mode |= FMODE_NOACTIVE;
+			spin_unlock(&lower_file->f_lock);
+		}
+	}
+#endif
+
+	err = vfs_read(lower_file, buf, count, ppos);
+	/* update our inode atime upon a successful lower read */
+	if (err >= 0)
+		fsstack_copy_attr_atime(d_inode(dentry),
+					file_inode(lower_file));
+
+	return err;
+}
+
+static ssize_t sdcardfs_write(struct file *file, const char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	int err;
+	struct file *lower_file;
+	struct dentry *dentry = file->f_path.dentry;
+	struct inode *inode = d_inode(dentry);
+
+	/* check disk space */
+	if (!check_min_free_space(dentry, count, 0)) {
+		pr_err("No minimum free space.\n");
+		return -ENOSPC;
+	}
+
+	lower_file = sdcardfs_lower_file(file);
+	err = vfs_write(lower_file, buf, count, ppos);
+	/* update our inode times+sizes upon a successful lower write */
+	if (err >= 0) {
+		if (sizeof(loff_t) > sizeof(long))
+			inode_lock(inode);
+		fsstack_copy_inode_size(inode, file_inode(lower_file));
+		fsstack_copy_attr_times(inode, file_inode(lower_file));
+		if (sizeof(loff_t) > sizeof(long))
+			inode_unlock(inode);
+	}
+
+	return err;
+}
+
+static int sdcardfs_readdir(struct file *file, struct dir_context *ctx)
+{
+	int err;
+	struct file *lower_file = NULL;
+	struct dentry *dentry = file->f_path.dentry;
+
+	lower_file = sdcardfs_lower_file(file);
+
+	lower_file->f_pos = file->f_pos;
+	err = iterate_dir(lower_file, ctx);
+	file->f_pos = lower_file->f_pos;
+	if (err >= 0)		/* copy the atime */
+		fsstack_copy_attr_atime(d_inode(dentry),
+					file_inode(lower_file));
+	return err;
+}
+
+static long sdcardfs_unlocked_ioctl(struct file *file, unsigned int cmd,
+				  unsigned long arg)
+{
+	long err = -ENOTTY;
+	struct file *lower_file;
+	const struct cred *saved_cred = NULL;
+	struct dentry *dentry = file->f_path.dentry;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+
+	lower_file = sdcardfs_lower_file(file);
+
+	/* XXX: use vfs_ioctl if/when VFS exports it */
+	if (!lower_file || !lower_file->f_op)
+		goto out;
+
+	/* save current_cred and override it */
+	saved_cred = override_fsids(sbi, SDCARDFS_I(file_inode(file))->data);
+	if (!saved_cred) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if (lower_file->f_op->unlocked_ioctl)
+		err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
+
+	/* some ioctls can change inode attributes (EXT2_IOC_SETFLAGS) */
+	if (!err)
+		sdcardfs_copy_and_fix_attrs(file_inode(file),
+				      file_inode(lower_file));
+	revert_fsids(saved_cred);
+out:
+	return err;
+}
+
+#ifdef CONFIG_COMPAT
+static long sdcardfs_compat_ioctl(struct file *file, unsigned int cmd,
+				unsigned long arg)
+{
+	long err = -ENOTTY;
+	struct file *lower_file;
+	const struct cred *saved_cred = NULL;
+	struct dentry *dentry = file->f_path.dentry;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+
+	lower_file = sdcardfs_lower_file(file);
+
+	/* XXX: use vfs_ioctl if/when VFS exports it */
+	if (!lower_file || !lower_file->f_op)
+		goto out;
+
+	/* save current_cred and override it */
+	saved_cred = override_fsids(sbi, SDCARDFS_I(file_inode(file))->data);
+	if (!saved_cred) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if (lower_file->f_op->compat_ioctl)
+		err = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
+
+	revert_fsids(saved_cred);
+out:
+	return err;
+}
+#endif
+
+static int sdcardfs_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	int err = 0;
+	bool willwrite;
+	struct file *lower_file;
+	const struct vm_operations_struct *saved_vm_ops = NULL;
+
+	/* this might be deferred to mmap's writepage */
+	willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags);
+
+	/*
+	 * File systems which do not implement ->writepage may use
+	 * generic_file_readonly_mmap as their ->mmap op.  If you call
+	 * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL.
+	 * But we cannot call the lower ->mmap op, so we can't tell that
+	 * writeable mappings won't work.  Therefore, our only choice is to
+	 * check if the lower file system supports the ->writepage, and if
+	 * not, return EINVAL (the same error that
+	 * generic_file_readonly_mmap returns in that case).
+	 */
+	lower_file = sdcardfs_lower_file(file);
+	if (willwrite && !lower_file->f_mapping->a_ops->writepage) {
+		err = -EINVAL;
+		pr_err("sdcardfs: lower file system does not support writeable mmap\n");
+		goto out;
+	}
+
+	/*
+	 * find and save lower vm_ops.
+	 *
+	 * XXX: the VFS should have a cleaner way of finding the lower vm_ops
+	 */
+	if (!SDCARDFS_F(file)->lower_vm_ops) {
+		err = lower_file->f_op->mmap(lower_file, vma);
+		if (err) {
+			pr_err("sdcardfs: lower mmap failed %d\n", err);
+			goto out;
+		}
+		saved_vm_ops = vma->vm_ops; /* save: came from lower ->mmap */
+	}
+
+	/*
+	 * Next 3 lines are all I need from generic_file_mmap.  I definitely
+	 * don't want its test for ->readpage which returns -ENOEXEC.
+	 */
+	file_accessed(file);
+	vma->vm_ops = &sdcardfs_vm_ops;
+
+	file->f_mapping->a_ops = &sdcardfs_aops; /* set our aops */
+	if (!SDCARDFS_F(file)->lower_vm_ops) /* save for our ->fault */
+		SDCARDFS_F(file)->lower_vm_ops = saved_vm_ops;
+	vma->vm_private_data = file;
+	get_file(lower_file);
+	vma->vm_file = lower_file;
+
+out:
+	return err;
+}
+
+static int sdcardfs_open(struct inode *inode, struct file *file)
+{
+	int err = 0;
+	struct file *lower_file = NULL;
+	struct path lower_path;
+	struct dentry *dentry = file->f_path.dentry;
+	struct dentry *parent = dget_parent(dentry);
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+	const struct cred *saved_cred = NULL;
+
+	/* don't open unhashed/deleted files */
+	if (d_unhashed(dentry)) {
+		err = -ENOENT;
+		goto out_err;
+	}
+
+	if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) {
+		err = -EACCES;
+		goto out_err;
+	}
+
+	/* save current_cred and override it */
+	saved_cred = override_fsids(sbi, SDCARDFS_I(inode)->data);
+	if (!saved_cred) {
+		err = -ENOMEM;
+		goto out_err;
+	}
+
+	file->private_data =
+		kzalloc(sizeof(struct sdcardfs_file_info), GFP_KERNEL);
+	if (!SDCARDFS_F(file)) {
+		err = -ENOMEM;
+		goto out_revert_cred;
+	}
+
+	/* open lower object and link sdcardfs's file struct to lower's */
+	sdcardfs_get_lower_path(file->f_path.dentry, &lower_path);
+	lower_file = dentry_open(&lower_path, file->f_flags, current_cred());
+	path_put(&lower_path);
+	if (IS_ERR(lower_file)) {
+		err = PTR_ERR(lower_file);
+		lower_file = sdcardfs_lower_file(file);
+		if (lower_file) {
+			sdcardfs_set_lower_file(file, NULL);
+			fput(lower_file); /* fput calls dput for lower_dentry */
+		}
+	} else {
+		sdcardfs_set_lower_file(file, lower_file);
+	}
+
+	if (err)
+		kfree(SDCARDFS_F(file));
+	else
+		sdcardfs_copy_and_fix_attrs(inode, sdcardfs_lower_inode(inode));
+
+out_revert_cred:
+	revert_fsids(saved_cred);
+out_err:
+	dput(parent);
+	return err;
+}
+
+static int sdcardfs_flush(struct file *file, fl_owner_t id)
+{
+	int err = 0;
+	struct file *lower_file = NULL;
+
+	lower_file = sdcardfs_lower_file(file);
+	if (lower_file && lower_file->f_op && lower_file->f_op->flush) {
+		filemap_write_and_wait(file->f_mapping);
+		err = lower_file->f_op->flush(lower_file, id);
+	}
+
+	return err;
+}
+
+/* release all lower object references & free the file info structure */
+static int sdcardfs_file_release(struct inode *inode, struct file *file)
+{
+	struct file *lower_file;
+
+	lower_file = sdcardfs_lower_file(file);
+	if (lower_file) {
+		sdcardfs_set_lower_file(file, NULL);
+		fput(lower_file);
+	}
+
+	kfree(SDCARDFS_F(file));
+	return 0;
+}
+
+static int sdcardfs_fsync(struct file *file, loff_t start, loff_t end,
+			int datasync)
+{
+	int err;
+	struct file *lower_file;
+	struct path lower_path;
+	struct dentry *dentry = file->f_path.dentry;
+
+	err = __generic_file_fsync(file, start, end, datasync);
+	if (err)
+		goto out;
+
+	lower_file = sdcardfs_lower_file(file);
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	err = vfs_fsync_range(lower_file, start, end, datasync);
+	sdcardfs_put_lower_path(dentry, &lower_path);
+out:
+	return err;
+}
+
+static int sdcardfs_fasync(int fd, struct file *file, int flag)
+{
+	int err = 0;
+	struct file *lower_file = NULL;
+
+	lower_file = sdcardfs_lower_file(file);
+	if (lower_file->f_op && lower_file->f_op->fasync)
+		err = lower_file->f_op->fasync(fd, lower_file, flag);
+
+	return err;
+}
+
+/*
+ * Sdcardfs cannot use generic_file_llseek as ->llseek, because it would
+ * only set the offset of the upper file.  So we have to implement our
+ * own method to set both the upper and lower file offsets
+ * consistently.
+ */
+static loff_t sdcardfs_file_llseek(struct file *file, loff_t offset, int whence)
+{
+	int err;
+	struct file *lower_file;
+
+	err = generic_file_llseek(file, offset, whence);
+	if (err < 0)
+		goto out;
+
+	lower_file = sdcardfs_lower_file(file);
+	err = generic_file_llseek(lower_file, offset, whence);
+
+out:
+	return err;
+}
+
+/*
+ * Sdcardfs read_iter, redirect modified iocb to lower read_iter
+ */
+ssize_t sdcardfs_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+	int err;
+	struct file *file = iocb->ki_filp, *lower_file;
+
+	lower_file = sdcardfs_lower_file(file);
+	if (!lower_file->f_op->read_iter) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	get_file(lower_file); /* prevent lower_file from being released */
+	iocb->ki_filp = lower_file;
+	err = lower_file->f_op->read_iter(iocb, iter);
+	iocb->ki_filp = file;
+	fput(lower_file);
+	/* update upper inode atime as needed */
+	if (err >= 0 || err == -EIOCBQUEUED)
+		fsstack_copy_attr_atime(file->f_path.dentry->d_inode,
+					file_inode(lower_file));
+out:
+	return err;
+}
+
+/*
+ * Sdcardfs write_iter, redirect modified iocb to lower write_iter
+ */
+ssize_t sdcardfs_write_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+	int err;
+	struct file *file = iocb->ki_filp, *lower_file;
+	struct inode *inode = file->f_path.dentry->d_inode;
+
+	lower_file = sdcardfs_lower_file(file);
+	if (!lower_file->f_op->write_iter) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	get_file(lower_file); /* prevent lower_file from being released */
+	iocb->ki_filp = lower_file;
+	err = lower_file->f_op->write_iter(iocb, iter);
+	iocb->ki_filp = file;
+	fput(lower_file);
+	/* update upper inode times/sizes as needed */
+	if (err >= 0 || err == -EIOCBQUEUED) {
+		if (sizeof(loff_t) > sizeof(long))
+			inode_lock(inode);
+		fsstack_copy_inode_size(inode, file_inode(lower_file));
+		fsstack_copy_attr_times(inode, file_inode(lower_file));
+		if (sizeof(loff_t) > sizeof(long))
+			inode_unlock(inode);
+	}
+out:
+	return err;
+}
+
+const struct file_operations sdcardfs_main_fops = {
+	.llseek		= generic_file_llseek,
+	.read		= sdcardfs_read,
+	.write		= sdcardfs_write,
+	.unlocked_ioctl	= sdcardfs_unlocked_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= sdcardfs_compat_ioctl,
+#endif
+	.mmap		= sdcardfs_mmap,
+	.open		= sdcardfs_open,
+	.flush		= sdcardfs_flush,
+	.release	= sdcardfs_file_release,
+	.fsync		= sdcardfs_fsync,
+	.fasync		= sdcardfs_fasync,
+	.read_iter	= sdcardfs_read_iter,
+	.write_iter	= sdcardfs_write_iter,
+};
+
+/* trimmed directory options */
+const struct file_operations sdcardfs_dir_fops = {
+	.llseek		= sdcardfs_file_llseek,
+	.read		= generic_read_dir,
+	.iterate	= sdcardfs_readdir,
+	.unlocked_ioctl	= sdcardfs_unlocked_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= sdcardfs_compat_ioctl,
+#endif
+	.open		= sdcardfs_open,
+	.release	= sdcardfs_file_release,
+	.flush		= sdcardfs_flush,
+	.fsync		= sdcardfs_fsync,
+	.fasync		= sdcardfs_fasync,
+};

diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c
new file mode 100644
index 0000000..4dd681e
--- /dev/null
+++ b/fs/sdcardfs/inode.c

@@ -0,0 +1,821 @@
+/*
+ * fs/sdcardfs/inode.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#include <linux/fs_struct.h>
+#include <linux/ratelimit.h>
+#include <linux/sched/task.h>
+
+const struct cred *override_fsids(struct sdcardfs_sb_info *sbi,
+		struct sdcardfs_inode_data *data)
+{
+	struct cred *cred;
+	const struct cred *old_cred;
+	uid_t uid;
+
+	cred = prepare_creds();
+	if (!cred)
+		return NULL;
+
+	if (sbi->options.gid_derivation) {
+		if (data->under_obb)
+			uid = AID_MEDIA_OBB;
+		else
+			uid = multiuser_get_uid(data->userid, sbi->options.fs_low_uid);
+	} else {
+		uid = sbi->options.fs_low_uid;
+	}
+	cred->fsuid = make_kuid(&init_user_ns, uid);
+	cred->fsgid = make_kgid(&init_user_ns, sbi->options.fs_low_gid);
+
+	old_cred = override_creds(cred);
+
+	return old_cred;
+}
+
+void revert_fsids(const struct cred *old_cred)
+{
+	const struct cred *cur_cred;
+
+	cur_cred = current->cred;
+	revert_creds(old_cred);
+	put_cred(cur_cred);
+}
+
+static int sdcardfs_create(struct inode *dir, struct dentry *dentry,
+			 umode_t mode, bool want_excl)
+{
+	int err;
+	struct dentry *lower_dentry;
+	struct vfsmount *lower_dentry_mnt;
+	struct dentry *lower_parent_dentry = NULL;
+	struct path lower_path;
+	const struct cred *saved_cred = NULL;
+	struct fs_struct *saved_fs;
+	struct fs_struct *copied_fs;
+
+	if (!check_caller_access_to_name(dir, &dentry->d_name)) {
+		err = -EACCES;
+		goto out_eacces;
+	}
+
+	/* save current_cred and override it */
+	saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb),
+					SDCARDFS_I(dir)->data);
+	if (!saved_cred)
+		return -ENOMEM;
+
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	lower_dentry = lower_path.dentry;
+	lower_dentry_mnt = lower_path.mnt;
+	lower_parent_dentry = lock_parent(lower_dentry);
+
+	/* set last 16bytes of mode field to 0664 */
+	mode = (mode & S_IFMT) | 00664;
+
+	/* temporarily change umask for lower fs write */
+	saved_fs = current->fs;
+	copied_fs = copy_fs_struct(current->fs);
+	if (!copied_fs) {
+		err = -ENOMEM;
+		goto out_unlock;
+	}
+	copied_fs->umask = 0;
+	task_lock(current);
+	current->fs = copied_fs;
+	task_unlock(current);
+
+	err = vfs_create2(lower_dentry_mnt, d_inode(lower_parent_dentry), lower_dentry, mode, want_excl);
+	if (err)
+		goto out;
+
+	err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path,
+			SDCARDFS_I(dir)->data->userid);
+	if (err)
+		goto out;
+	fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir));
+	fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry));
+	fixup_lower_ownership(dentry, dentry->d_name.name);
+
+out:
+	task_lock(current);
+	current->fs = saved_fs;
+	task_unlock(current);
+	free_fs_struct(copied_fs);
+out_unlock:
+	unlock_dir(lower_parent_dentry);
+	sdcardfs_put_lower_path(dentry, &lower_path);
+	revert_fsids(saved_cred);
+out_eacces:
+	return err;
+}
+
+static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+	int err;
+	struct dentry *lower_dentry;
+	struct vfsmount *lower_mnt;
+	struct inode *lower_dir_inode = sdcardfs_lower_inode(dir);
+	struct dentry *lower_dir_dentry;
+	struct path lower_path;
+	const struct cred *saved_cred = NULL;
+
+	if (!check_caller_access_to_name(dir, &dentry->d_name)) {
+		err = -EACCES;
+		goto out_eacces;
+	}
+
+	/* save current_cred and override it */
+	saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb),
+						SDCARDFS_I(dir)->data);
+	if (!saved_cred)
+		return -ENOMEM;
+
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	lower_dentry = lower_path.dentry;
+	lower_mnt = lower_path.mnt;
+	dget(lower_dentry);
+	lower_dir_dentry = lock_parent(lower_dentry);
+
+	err = vfs_unlink2(lower_mnt, lower_dir_inode, lower_dentry, NULL);
+
+	/*
+	 * Note: unlinking on top of NFS can cause silly-renamed files.
+	 * Trying to delete such files results in EBUSY from NFS
+	 * below.  Silly-renamed files will get deleted by NFS later on, so
+	 * we just need to detect them here and treat such EBUSY errors as
+	 * if the upper file was successfully deleted.
+	 */
+	if (err == -EBUSY && lower_dentry->d_flags & DCACHE_NFSFS_RENAMED)
+		err = 0;
+	if (err)
+		goto out;
+	fsstack_copy_attr_times(dir, lower_dir_inode);
+	fsstack_copy_inode_size(dir, lower_dir_inode);
+	set_nlink(d_inode(dentry),
+		  sdcardfs_lower_inode(d_inode(dentry))->i_nlink);
+	d_inode(dentry)->i_ctime = dir->i_ctime;
+	d_drop(dentry); /* this is needed, else LTP fails (VFS won't do it) */
+out:
+	unlock_dir(lower_dir_dentry);
+	dput(lower_dentry);
+	sdcardfs_put_lower_path(dentry, &lower_path);
+	revert_fsids(saved_cred);
+out_eacces:
+	return err;
+}
+
+static int touch(char *abs_path, mode_t mode)
+{
+	struct file *filp = filp_open(abs_path, O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW, mode);
+
+	if (IS_ERR(filp)) {
+		if (PTR_ERR(filp) == -EEXIST) {
+			return 0;
+		} else {
+			pr_err("sdcardfs: failed to open(%s): %ld\n",
+						abs_path, PTR_ERR(filp));
+			return PTR_ERR(filp);
+		}
+	}
+	filp_close(filp, current->files);
+	return 0;
+}
+
+static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	int err;
+	int make_nomedia_in_obb = 0;
+	struct dentry *lower_dentry;
+	struct vfsmount *lower_mnt;
+	struct dentry *lower_parent_dentry = NULL;
+	struct dentry *parent_dentry = NULL;
+	struct path lower_path;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+	const struct cred *saved_cred = NULL;
+	struct sdcardfs_inode_data *pd = SDCARDFS_I(dir)->data;
+	int touch_err = 0;
+	struct fs_struct *saved_fs;
+	struct fs_struct *copied_fs;
+	struct qstr q_obb = QSTR_LITERAL("obb");
+	struct qstr q_data = QSTR_LITERAL("data");
+
+	if (!check_caller_access_to_name(dir, &dentry->d_name)) {
+		err = -EACCES;
+		goto out_eacces;
+	}
+
+	/* save current_cred and override it */
+	saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb),
+						SDCARDFS_I(dir)->data);
+	if (!saved_cred)
+		return -ENOMEM;
+
+	/* check disk space */
+	parent_dentry = dget_parent(dentry);
+	if (!check_min_free_space(parent_dentry, 0, 1)) {
+		pr_err("sdcardfs: No minimum free space.\n");
+		err = -ENOSPC;
+		dput(parent_dentry);
+		goto out_revert;
+	}
+	dput(parent_dentry);
+
+	/* the lower_dentry is negative here */
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	lower_dentry = lower_path.dentry;
+	lower_mnt = lower_path.mnt;
+	lower_parent_dentry = lock_parent(lower_dentry);
+
+	/* set last 16bytes of mode field to 0775 */
+	mode = (mode & S_IFMT) | 00775;
+
+	/* temporarily change umask for lower fs write */
+	saved_fs = current->fs;
+	copied_fs = copy_fs_struct(current->fs);
+	if (!copied_fs) {
+		err = -ENOMEM;
+		unlock_dir(lower_parent_dentry);
+		goto out_unlock;
+	}
+	copied_fs->umask = 0;
+	task_lock(current);
+	current->fs = copied_fs;
+	task_unlock(current);
+
+	err = vfs_mkdir2(lower_mnt, d_inode(lower_parent_dentry), lower_dentry, mode);
+
+	if (err) {
+		unlock_dir(lower_parent_dentry);
+		goto out;
+	}
+
+	/* if it is a local obb dentry, setup it with the base obbpath */
+	if (need_graft_path(dentry)) {
+
+		err = setup_obb_dentry(dentry, &lower_path);
+		if (err) {
+			/* if the sbi->obbpath is not available, the lower_path won't be
+			 * changed by setup_obb_dentry() but the lower path is saved to
+			 * its orig_path. this dentry will be revalidated later.
+			 * but now, the lower_path should be NULL
+			 */
+			sdcardfs_put_reset_lower_path(dentry);
+
+			/* the newly created lower path which saved to its orig_path or
+			 * the lower_path is the base obbpath.
+			 * therefore, an additional path_get is required
+			 */
+			path_get(&lower_path);
+		} else
+			make_nomedia_in_obb = 1;
+	}
+
+	err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path, pd->userid);
+	if (err) {
+		unlock_dir(lower_parent_dentry);
+		goto out;
+	}
+
+	fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir));
+	fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry));
+	/* update number of links on parent directory */
+	set_nlink(dir, sdcardfs_lower_inode(dir)->i_nlink);
+	fixup_lower_ownership(dentry, dentry->d_name.name);
+	unlock_dir(lower_parent_dentry);
+	if ((!sbi->options.multiuser) && (qstr_case_eq(&dentry->d_name, &q_obb))
+		&& (pd->perm == PERM_ANDROID) && (pd->userid == 0))
+		make_nomedia_in_obb = 1;
+
+	/* When creating /Android/data and /Android/obb, mark them as .nomedia */
+	if (make_nomedia_in_obb ||
+		((pd->perm == PERM_ANDROID)
+				&& (qstr_case_eq(&dentry->d_name, &q_data)))) {
+		revert_fsids(saved_cred);
+		saved_cred = override_fsids(sbi,
+					SDCARDFS_I(d_inode(dentry))->data);
+		if (!saved_cred) {
+			pr_err("sdcardfs: failed to set up .nomedia in %s: %d\n",
+						lower_path.dentry->d_name.name,
+						-ENOMEM);
+			goto out;
+		}
+		set_fs_pwd(current->fs, &lower_path);
+		touch_err = touch(".nomedia", 0664);
+		if (touch_err) {
+			pr_err("sdcardfs: failed to create .nomedia in %s: %d\n",
+						lower_path.dentry->d_name.name,
+						touch_err);
+			goto out;
+		}
+	}
+out:
+	task_lock(current);
+	current->fs = saved_fs;
+	task_unlock(current);
+
+	free_fs_struct(copied_fs);
+out_unlock:
+	sdcardfs_put_lower_path(dentry, &lower_path);
+out_revert:
+	revert_fsids(saved_cred);
+out_eacces:
+	return err;
+}
+
+static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	struct dentry *lower_dentry;
+	struct dentry *lower_dir_dentry;
+	struct vfsmount *lower_mnt;
+	int err;
+	struct path lower_path;
+	const struct cred *saved_cred = NULL;
+
+	if (!check_caller_access_to_name(dir, &dentry->d_name)) {
+		err = -EACCES;
+		goto out_eacces;
+	}
+
+	/* save current_cred and override it */
+	saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb),
+						SDCARDFS_I(dir)->data);
+	if (!saved_cred)
+		return -ENOMEM;
+
+	/* sdcardfs_get_real_lower(): in case of remove an user's obb dentry
+	 * the dentry on the original path should be deleted.
+	 */
+	sdcardfs_get_real_lower(dentry, &lower_path);
+
+	lower_dentry = lower_path.dentry;
+	lower_mnt = lower_path.mnt;
+	lower_dir_dentry = lock_parent(lower_dentry);
+
+	err = vfs_rmdir2(lower_mnt, d_inode(lower_dir_dentry), lower_dentry);
+	if (err)
+		goto out;
+
+	d_drop(dentry);	/* drop our dentry on success (why not VFS's job?) */
+	if (d_inode(dentry))
+		clear_nlink(d_inode(dentry));
+	fsstack_copy_attr_times(dir, d_inode(lower_dir_dentry));
+	fsstack_copy_inode_size(dir, d_inode(lower_dir_dentry));
+	set_nlink(dir, d_inode(lower_dir_dentry)->i_nlink);
+
+out:
+	unlock_dir(lower_dir_dentry);
+	sdcardfs_put_real_lower(dentry, &lower_path);
+	revert_fsids(saved_cred);
+out_eacces:
+	return err;
+}
+
+/*
+ * The locking rules in sdcardfs_rename are complex.  We could use a simpler
+ * superblock-level name-space lock for renames and copy-ups.
+ */
+static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+			 struct inode *new_dir, struct dentry *new_dentry,
+			 unsigned int flags)
+{
+	int err = 0;
+	struct dentry *lower_old_dentry = NULL;
+	struct dentry *lower_new_dentry = NULL;
+	struct dentry *lower_old_dir_dentry = NULL;
+	struct dentry *lower_new_dir_dentry = NULL;
+	struct vfsmount *lower_mnt = NULL;
+	struct dentry *trap = NULL;
+	struct path lower_old_path, lower_new_path;
+	const struct cred *saved_cred = NULL;
+
+	if (flags)
+		return -EINVAL;
+
+	if (!check_caller_access_to_name(old_dir, &old_dentry->d_name) ||
+		!check_caller_access_to_name(new_dir, &new_dentry->d_name)) {
+		err = -EACCES;
+		goto out_eacces;
+	}
+
+	/* save current_cred and override it */
+	saved_cred = override_fsids(SDCARDFS_SB(old_dir->i_sb),
+						SDCARDFS_I(new_dir)->data);
+	if (!saved_cred)
+		return -ENOMEM;
+
+	sdcardfs_get_real_lower(old_dentry, &lower_old_path);
+	sdcardfs_get_lower_path(new_dentry, &lower_new_path);
+	lower_old_dentry = lower_old_path.dentry;
+	lower_new_dentry = lower_new_path.dentry;
+	lower_mnt = lower_old_path.mnt;
+	lower_old_dir_dentry = dget_parent(lower_old_dentry);
+	lower_new_dir_dentry = dget_parent(lower_new_dentry);
+
+	trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
+	/* source should not be ancestor of target */
+	if (trap == lower_old_dentry) {
+		err = -EINVAL;
+		goto out;
+	}
+	/* target should not be ancestor of source */
+	if (trap == lower_new_dentry) {
+		err = -ENOTEMPTY;
+		goto out;
+	}
+
+	err = vfs_rename2(lower_mnt,
+			 d_inode(lower_old_dir_dentry), lower_old_dentry,
+			 d_inode(lower_new_dir_dentry), lower_new_dentry,
+			 NULL, 0);
+	if (err)
+		goto out;
+
+	/* Copy attrs from lower dir, but i_uid/i_gid */
+	sdcardfs_copy_and_fix_attrs(new_dir, d_inode(lower_new_dir_dentry));
+	fsstack_copy_inode_size(new_dir, d_inode(lower_new_dir_dentry));
+
+	if (new_dir != old_dir) {
+		sdcardfs_copy_and_fix_attrs(old_dir, d_inode(lower_old_dir_dentry));
+		fsstack_copy_inode_size(old_dir, d_inode(lower_old_dir_dentry));
+	}
+	get_derived_permission_new(new_dentry->d_parent, old_dentry, &new_dentry->d_name);
+	fixup_tmp_permissions(d_inode(old_dentry));
+	fixup_lower_ownership(old_dentry, new_dentry->d_name.name);
+	d_invalidate(old_dentry); /* Can't fixup ownership recursively :( */
+out:
+	unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
+	dput(lower_old_dir_dentry);
+	dput(lower_new_dir_dentry);
+	sdcardfs_put_real_lower(old_dentry, &lower_old_path);
+	sdcardfs_put_lower_path(new_dentry, &lower_new_path);
+	revert_fsids(saved_cred);
+out_eacces:
+	return err;
+}
+
+#if 0
+static int sdcardfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
+{
+	int err;
+	struct dentry *lower_dentry;
+	struct path lower_path;
+	/* XXX readlink does not requires overriding credential */
+
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	lower_dentry = lower_path.dentry;
+	if (!d_inode(lower_dentry)->i_op ||
+	    !d_inode(lower_dentry)->i_op->readlink) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = d_inode(lower_dentry)->i_op->readlink(lower_dentry,
+						    buf, bufsiz);
+	if (err < 0)
+		goto out;
+	fsstack_copy_attr_atime(d_inode(dentry), d_inode(lower_dentry));
+
+out:
+	sdcardfs_put_lower_path(dentry, &lower_path);
+	return err;
+}
+#endif
+
+#if 0
+static const char *sdcardfs_follow_link(struct dentry *dentry, void **cookie)
+{
+	char *buf;
+	int len = PAGE_SIZE, err;
+	mm_segment_t old_fs;
+
+	/* This is freed by the put_link method assuming a successful call. */
+	buf = kmalloc(len, GFP_KERNEL);
+	if (!buf) {
+		buf = ERR_PTR(-ENOMEM);
+		return buf;
+	}
+
+	/* read the symlink, and then we will follow it */
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	err = sdcardfs_readlink(dentry, buf, len);
+	set_fs(old_fs);
+	if (err < 0) {
+		kfree(buf);
+		buf = ERR_PTR(err);
+	} else {
+		buf[err] = '\0';
+	}
+	return *cookie = buf;
+}
+#endif
+
+static int sdcardfs_permission_wrn(struct inode *inode, int mask)
+{
+	WARN_RATELIMIT(1, "sdcardfs does not support permission. Use permission2.\n");
+	return -EINVAL;
+}
+
+void copy_attrs(struct inode *dest, const struct inode *src)
+{
+	dest->i_mode = src->i_mode;
+	dest->i_uid = src->i_uid;
+	dest->i_gid = src->i_gid;
+	dest->i_rdev = src->i_rdev;
+	dest->i_atime = src->i_atime;
+	dest->i_mtime = src->i_mtime;
+	dest->i_ctime = src->i_ctime;
+	dest->i_blkbits = src->i_blkbits;
+	dest->i_flags = src->i_flags;
+#ifdef CONFIG_FS_POSIX_ACL
+	dest->i_acl = src->i_acl;
+#endif
+#ifdef CONFIG_SECURITY
+	dest->i_security = src->i_security;
+#endif
+}
+
+static int sdcardfs_permission(struct vfsmount *mnt, struct inode *inode, int mask)
+{
+	int err;
+	struct inode tmp;
+	struct sdcardfs_inode_data *top = top_data_get(SDCARDFS_I(inode));
+
+	if (IS_ERR(mnt))
+		return PTR_ERR(mnt);
+
+	if (!top)
+		return -EINVAL;
+
+	/*
+	 * Permission check on sdcardfs inode.
+	 * Calling process should have AID_SDCARD_RW permission
+	 * Since generic_permission only needs i_mode, i_uid,
+	 * i_gid, and i_sb, we can create a fake inode to pass
+	 * this information down in.
+	 *
+	 * The underlying code may attempt to take locks in some
+	 * cases for features we're not using, but if that changes,
+	 * locks must be dealt with to avoid undefined behavior.
+	 */
+	copy_attrs(&tmp, inode);
+	tmp.i_uid = make_kuid(&init_user_ns, top->d_uid);
+	tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, inode->i_sb, top));
+	tmp.i_mode = (inode->i_mode & S_IFMT)
+			| get_mode(mnt, SDCARDFS_I(inode), top);
+	data_put(top);
+	tmp.i_sb = inode->i_sb;
+	if (IS_POSIXACL(inode))
+		pr_warn("%s: This may be undefined behavior...\n", __func__);
+	err = generic_permission(&tmp, mask);
+	return err;
+}
+
+static int sdcardfs_setattr_wrn(struct dentry *dentry, struct iattr *ia)
+{
+	WARN_RATELIMIT(1, "sdcardfs does not support setattr. User setattr2.\n");
+	return -EINVAL;
+}
+
+static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct iattr *ia)
+{
+	int err;
+	struct dentry *lower_dentry;
+	struct vfsmount *lower_mnt;
+	struct inode *inode;
+	struct inode *lower_inode;
+	struct path lower_path;
+	struct iattr lower_ia;
+	struct dentry *parent;
+	struct inode tmp;
+	struct dentry tmp_d;
+	struct sdcardfs_inode_data *top;
+
+	const struct cred *saved_cred = NULL;
+
+	inode = d_inode(dentry);
+	top = top_data_get(SDCARDFS_I(inode));
+
+	if (!top)
+		return -EINVAL;
+
+	/*
+	 * Permission check on sdcardfs inode.
+	 * Calling process should have AID_SDCARD_RW permission
+	 * Since generic_permission only needs i_mode, i_uid,
+	 * i_gid, and i_sb, we can create a fake inode to pass
+	 * this information down in.
+	 *
+	 * The underlying code may attempt to take locks in some
+	 * cases for features we're not using, but if that changes,
+	 * locks must be dealt with to avoid undefined behavior.
+	 *
+	 */
+	copy_attrs(&tmp, inode);
+	tmp.i_uid = make_kuid(&init_user_ns, top->d_uid);
+	tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, dentry->d_sb, top));
+	tmp.i_mode = (inode->i_mode & S_IFMT)
+			| get_mode(mnt, SDCARDFS_I(inode), top);
+	tmp.i_size = i_size_read(inode);
+	data_put(top);
+	tmp.i_sb = inode->i_sb;
+	tmp_d.d_inode = &tmp;
+
+	/*
+	 * Check if user has permission to change dentry.  We don't check if
+	 * this user can change the lower inode: that should happen when
+	 * calling notify_change on the lower inode.
+	 */
+	/* prepare our own lower struct iattr (with the lower file) */
+	memcpy(&lower_ia, ia, sizeof(lower_ia));
+	/* Allow touch updating timestamps. A previous permission check ensures
+	 * we have write access. Changes to mode, owner, and group are ignored
+	 */
+	ia->ia_valid |= ATTR_FORCE;
+	err = setattr_prepare(&tmp_d, ia);
+
+	if (!err) {
+		/* check the Android group ID */
+		parent = dget_parent(dentry);
+		if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name))
+			err = -EACCES;
+		dput(parent);
+	}
+
+	if (err)
+		goto out_err;
+
+	/* save current_cred and override it */
+	saved_cred = override_fsids(SDCARDFS_SB(dentry->d_sb),
+						SDCARDFS_I(inode)->data);
+	if (!saved_cred)
+		return -ENOMEM;
+
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	lower_dentry = lower_path.dentry;
+	lower_mnt = lower_path.mnt;
+	lower_inode = sdcardfs_lower_inode(inode);
+
+	if (ia->ia_valid & ATTR_FILE)
+		lower_ia.ia_file = sdcardfs_lower_file(ia->ia_file);
+
+	lower_ia.ia_valid &= ~(ATTR_UID | ATTR_GID | ATTR_MODE);
+
+	/*
+	 * If shrinking, first truncate upper level to cancel writing dirty
+	 * pages beyond the new eof; and also if its' maxbytes is more
+	 * limiting (fail with -EFBIG before making any change to the lower
+	 * level).  There is no need to vmtruncate the upper level
+	 * afterwards in the other cases: we fsstack_copy_inode_size from
+	 * the lower level.
+	 */
+	if (ia->ia_valid & ATTR_SIZE) {
+		err = inode_newsize_ok(&tmp, ia->ia_size);
+		if (err) {
+			goto out;
+		}
+		truncate_setsize(inode, ia->ia_size);
+	}
+
+	/*
+	 * mode change is for clearing setuid/setgid bits. Allow lower fs
+	 * to interpret this in its own way.
+	 */
+	if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
+		lower_ia.ia_valid &= ~ATTR_MODE;
+
+	/* notify the (possibly copied-up) lower inode */
+	/*
+	 * Note: we use d_inode(lower_dentry), because lower_inode may be
+	 * unlinked (no inode->i_sb and i_ino==0.  This happens if someone
+	 * tries to open(), unlink(), then ftruncate() a file.
+	 */
+	inode_lock(d_inode(lower_dentry));
+	err = notify_change2(lower_mnt, lower_dentry, &lower_ia, /* note: lower_ia */
+			NULL);
+	inode_unlock(d_inode(lower_dentry));
+	if (err)
+		goto out;
+
+	/* get attributes from the lower inode and update derived permissions */
+	sdcardfs_copy_and_fix_attrs(inode, lower_inode);
+
+	/*
+	 * Not running fsstack_copy_inode_size(inode, lower_inode), because
+	 * VFS should update our inode size, and notify_change on
+	 * lower_inode should update its size.
+	 */
+
+out:
+	sdcardfs_put_lower_path(dentry, &lower_path);
+	revert_fsids(saved_cred);
+out_err:
+	return err;
+}
+
+static int sdcardfs_fillattr(struct vfsmount *mnt, struct inode *inode,
+				struct kstat *lower_stat, struct kstat *stat)
+{
+	struct sdcardfs_inode_info *info = SDCARDFS_I(inode);
+	struct sdcardfs_inode_data *top = top_data_get(info);
+	struct super_block *sb = inode->i_sb;
+
+	if (!top)
+		return -EINVAL;
+
+	stat->dev = inode->i_sb->s_dev;
+	stat->ino = inode->i_ino;
+	stat->mode = (inode->i_mode  & S_IFMT) | get_mode(mnt, info, top);
+	stat->nlink = inode->i_nlink;
+	stat->uid = make_kuid(&init_user_ns, top->d_uid);
+	stat->gid = make_kgid(&init_user_ns, get_gid(mnt, sb, top));
+	stat->rdev = inode->i_rdev;
+	stat->size = lower_stat->size;
+	stat->atime = lower_stat->atime;
+	stat->mtime = lower_stat->mtime;
+	stat->ctime = lower_stat->ctime;
+	stat->blksize = lower_stat->blksize;
+	stat->blocks = lower_stat->blocks;
+	data_put(top);
+	return 0;
+}
+static int sdcardfs_getattr(const struct path *path, struct kstat *stat,
+				u32 request_mask, unsigned int flags)
+{
+	struct vfsmount *mnt = path->mnt;
+	struct dentry *dentry = path->dentry;
+	struct kstat lower_stat;
+	struct path lower_path;
+	struct dentry *parent;
+	int err;
+
+	parent = dget_parent(dentry);
+	if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) {
+		dput(parent);
+		return -EACCES;
+	}
+	dput(parent);
+
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	err = vfs_getattr(&lower_path, &lower_stat, request_mask, flags);
+	if (err)
+		goto out;
+	sdcardfs_copy_and_fix_attrs(d_inode(dentry),
+			      d_inode(lower_path.dentry));
+	err = sdcardfs_fillattr(mnt, d_inode(dentry), &lower_stat, stat);
+out:
+	sdcardfs_put_lower_path(dentry, &lower_path);
+	return err;
+}
+
+const struct inode_operations sdcardfs_symlink_iops = {
+	.permission2	= sdcardfs_permission,
+	.setattr2	= sdcardfs_setattr,
+	/* XXX Following operations are implemented,
+	 *     but FUSE(sdcard) or FAT does not support them
+	 *     These methods are *NOT* perfectly tested.
+	.readlink	= sdcardfs_readlink,
+	.follow_link	= sdcardfs_follow_link,
+	.put_link	= kfree_put_link,
+	 */
+};
+
+const struct inode_operations sdcardfs_dir_iops = {
+	.create		= sdcardfs_create,
+	.lookup		= sdcardfs_lookup,
+	.permission	= sdcardfs_permission_wrn,
+	.permission2	= sdcardfs_permission,
+	.unlink		= sdcardfs_unlink,
+	.mkdir		= sdcardfs_mkdir,
+	.rmdir		= sdcardfs_rmdir,
+	.rename		= sdcardfs_rename,
+	.setattr	= sdcardfs_setattr_wrn,
+	.setattr2	= sdcardfs_setattr,
+	.getattr	= sdcardfs_getattr,
+};
+
+const struct inode_operations sdcardfs_main_iops = {
+	.permission	= sdcardfs_permission_wrn,
+	.permission2	= sdcardfs_permission,
+	.setattr	= sdcardfs_setattr_wrn,
+	.setattr2	= sdcardfs_setattr,
+	.getattr	= sdcardfs_getattr,
+};

diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c
new file mode 100644
index 0000000..a5c9686
--- /dev/null
+++ b/fs/sdcardfs/lookup.c

@@ -0,0 +1,470 @@
+/*
+ * fs/sdcardfs/lookup.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#include "linux/delay.h"
+
+/* The dentry cache is just so we have properly sized dentries */
+static struct kmem_cache *sdcardfs_dentry_cachep;
+
+int sdcardfs_init_dentry_cache(void)
+{
+	sdcardfs_dentry_cachep =
+		kmem_cache_create("sdcardfs_dentry",
+				  sizeof(struct sdcardfs_dentry_info),
+				  0, SLAB_RECLAIM_ACCOUNT, NULL);
+
+	return sdcardfs_dentry_cachep ? 0 : -ENOMEM;
+}
+
+void sdcardfs_destroy_dentry_cache(void)
+{
+	kmem_cache_destroy(sdcardfs_dentry_cachep);
+}
+
+void free_dentry_private_data(struct dentry *dentry)
+{
+	kmem_cache_free(sdcardfs_dentry_cachep, dentry->d_fsdata);
+	dentry->d_fsdata = NULL;
+}
+
+/* allocate new dentry private data */
+int new_dentry_private_data(struct dentry *dentry)
+{
+	struct sdcardfs_dentry_info *info = SDCARDFS_D(dentry);
+
+	/* use zalloc to init dentry_info.lower_path */
+	info = kmem_cache_zalloc(sdcardfs_dentry_cachep, GFP_ATOMIC);
+	if (!info)
+		return -ENOMEM;
+
+	spin_lock_init(&info->lock);
+	dentry->d_fsdata = info;
+
+	return 0;
+}
+
+struct inode_data {
+	struct inode *lower_inode;
+	userid_t id;
+};
+
+static int sdcardfs_inode_test(struct inode *inode, void *candidate_data/*void *candidate_lower_inode*/)
+{
+	struct inode *current_lower_inode = sdcardfs_lower_inode(inode);
+	userid_t current_userid = SDCARDFS_I(inode)->data->userid;
+
+	if (current_lower_inode == ((struct inode_data *)candidate_data)->lower_inode &&
+			current_userid == ((struct inode_data *)candidate_data)->id)
+		return 1; /* found a match */
+	else
+		return 0; /* no match */
+}
+
+static int sdcardfs_inode_set(struct inode *inode, void *lower_inode)
+{
+	/* we do actual inode initialization in sdcardfs_iget */
+	return 0;
+}
+
+struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode, userid_t id)
+{
+	struct sdcardfs_inode_info *info;
+	struct inode_data data;
+	struct inode *inode; /* the new inode to return */
+
+	if (!igrab(lower_inode))
+		return ERR_PTR(-ESTALE);
+
+	data.id = id;
+	data.lower_inode = lower_inode;
+	inode = iget5_locked(sb, /* our superblock */
+			     /*
+			      * hashval: we use inode number, but we can
+			      * also use "(unsigned long)lower_inode"
+			      * instead.
+			      */
+			     lower_inode->i_ino, /* hashval */
+			     sdcardfs_inode_test, /* inode comparison function */
+			     sdcardfs_inode_set, /* inode init function */
+			     &data); /* data passed to test+set fxns */
+	if (!inode) {
+		iput(lower_inode);
+		return ERR_PTR(-ENOMEM);
+	}
+	/* if found a cached inode, then just return it (after iput) */
+	if (!(inode->i_state & I_NEW)) {
+		iput(lower_inode);
+		return inode;
+	}
+
+	/* initialize new inode */
+	info = SDCARDFS_I(inode);
+
+	inode->i_ino = lower_inode->i_ino;
+	sdcardfs_set_lower_inode(inode, lower_inode);
+
+	inode_inc_iversion_raw(inode);
+
+	/* use different set of inode ops for symlinks & directories */
+	if (S_ISDIR(lower_inode->i_mode))
+		inode->i_op = &sdcardfs_dir_iops;
+	else if (S_ISLNK(lower_inode->i_mode))
+		inode->i_op = &sdcardfs_symlink_iops;
+	else
+		inode->i_op = &sdcardfs_main_iops;
+
+	/* use different set of file ops for directories */
+	if (S_ISDIR(lower_inode->i_mode))
+		inode->i_fop = &sdcardfs_dir_fops;
+	else
+		inode->i_fop = &sdcardfs_main_fops;
+
+	inode->i_mapping->a_ops = &sdcardfs_aops;
+
+	inode->i_atime.tv_sec = 0;
+	inode->i_atime.tv_nsec = 0;
+	inode->i_mtime.tv_sec = 0;
+	inode->i_mtime.tv_nsec = 0;
+	inode->i_ctime.tv_sec = 0;
+	inode->i_ctime.tv_nsec = 0;
+
+	/* properly initialize special inodes */
+	if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) ||
+	    S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode))
+		init_special_inode(inode, lower_inode->i_mode,
+				   lower_inode->i_rdev);
+
+	/* all well, copy inode attributes */
+	sdcardfs_copy_and_fix_attrs(inode, lower_inode);
+	fsstack_copy_inode_size(inode, lower_inode);
+
+	unlock_new_inode(inode);
+	return inode;
+}
+
+/*
+ * Helper interpose routine, called directly by ->lookup to handle
+ * spliced dentries.
+ */
+static struct dentry *__sdcardfs_interpose(struct dentry *dentry,
+					 struct super_block *sb,
+					 struct path *lower_path,
+					 userid_t id)
+{
+	struct inode *inode;
+	struct inode *lower_inode;
+	struct super_block *lower_sb;
+	struct dentry *ret_dentry;
+
+	lower_inode = d_inode(lower_path->dentry);
+	lower_sb = sdcardfs_lower_super(sb);
+
+	/* check that the lower file system didn't cross a mount point */
+	if (lower_inode->i_sb != lower_sb) {
+		ret_dentry = ERR_PTR(-EXDEV);
+		goto out;
+	}
+
+	/*
+	 * We allocate our new inode below by calling sdcardfs_iget,
+	 * which will initialize some of the new inode's fields
+	 */
+
+	/* inherit lower inode number for sdcardfs's inode */
+	inode = sdcardfs_iget(sb, lower_inode, id);
+	if (IS_ERR(inode)) {
+		ret_dentry = ERR_CAST(inode);
+		goto out;
+	}
+
+	ret_dentry = d_splice_alias(inode, dentry);
+	dentry = ret_dentry ?: dentry;
+	if (!IS_ERR(dentry))
+		update_derived_permission_lock(dentry);
+out:
+	return ret_dentry;
+}
+
+/*
+ * Connect an sdcardfs inode dentry/inode with several lower ones.  This is
+ * the classic stackable file system "vnode interposition" action.
+ *
+ * @dentry: sdcardfs's dentry which interposes on lower one
+ * @sb: sdcardfs's super_block
+ * @lower_path: the lower path (caller does path_get/put)
+ */
+int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb,
+		     struct path *lower_path, userid_t id)
+{
+	struct dentry *ret_dentry;
+
+	ret_dentry = __sdcardfs_interpose(dentry, sb, lower_path, id);
+	return PTR_ERR(ret_dentry);
+}
+
+struct sdcardfs_name_data {
+	struct dir_context ctx;
+	const struct qstr *to_find;
+	char *name;
+	bool found;
+};
+
+static int sdcardfs_name_match(struct dir_context *ctx, const char *name,
+		int namelen, loff_t offset, u64 ino, unsigned int d_type)
+{
+	struct sdcardfs_name_data *buf = container_of(ctx, struct sdcardfs_name_data, ctx);
+	struct qstr candidate = QSTR_INIT(name, namelen);
+
+	if (qstr_case_eq(buf->to_find, &candidate)) {
+		memcpy(buf->name, name, namelen);
+		buf->name[namelen] = 0;
+		buf->found = true;
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * Main driver function for sdcardfs's lookup.
+ *
+ * Returns: NULL (ok), ERR_PTR if an error occurred.
+ * Fills in lower_parent_path with <dentry,mnt> on success.
+ */
+static struct dentry *__sdcardfs_lookup(struct dentry *dentry,
+		unsigned int flags, struct path *lower_parent_path, userid_t id)
+{
+	int err = 0;
+	struct vfsmount *lower_dir_mnt;
+	struct dentry *lower_dir_dentry = NULL;
+	struct dentry *lower_dentry;
+	const struct qstr *name;
+	struct path lower_path;
+	struct qstr dname;
+	struct dentry *ret_dentry = NULL;
+	struct sdcardfs_sb_info *sbi;
+
+	sbi = SDCARDFS_SB(dentry->d_sb);
+	/* must initialize dentry operations */
+	d_set_d_op(dentry, &sdcardfs_ci_dops);
+
+	if (IS_ROOT(dentry))
+		goto out;
+
+	name = &dentry->d_name;
+
+	/* now start the actual lookup procedure */
+	lower_dir_dentry = lower_parent_path->dentry;
+	lower_dir_mnt = lower_parent_path->mnt;
+
+	/* Use vfs_path_lookup to check if the dentry exists or not */
+	err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name->name, 0,
+				&lower_path);
+	/* check for other cases */
+	if (err == -ENOENT) {
+		struct file *file;
+		const struct cred *cred = current_cred();
+
+		struct sdcardfs_name_data buffer = {
+			.ctx.actor = sdcardfs_name_match,
+			.to_find = name,
+			.name = __getname(),
+			.found = false,
+		};
+
+		if (!buffer.name) {
+			err = -ENOMEM;
+			goto out;
+		}
+		file = dentry_open(lower_parent_path, O_RDONLY, cred);
+		if (IS_ERR(file)) {
+			err = PTR_ERR(file);
+			goto put_name;
+		}
+		err = iterate_dir(file, &buffer.ctx);
+		fput(file);
+		if (err)
+			goto put_name;
+
+		if (buffer.found)
+			err = vfs_path_lookup(lower_dir_dentry,
+						lower_dir_mnt,
+						buffer.name, 0,
+						&lower_path);
+		else
+			err = -ENOENT;
+put_name:
+		__putname(buffer.name);
+	}
+
+	/* no error: handle positive dentries */
+	if (!err) {
+		/* check if the dentry is an obb dentry
+		 * if true, the lower_inode must be replaced with
+		 * the inode of the graft path
+		 */
+
+		if (need_graft_path(dentry)) {
+
+			/* setup_obb_dentry()
+			 * The lower_path will be stored to the dentry's orig_path
+			 * and the base obbpath will be copyed to the lower_path variable.
+			 * if an error returned, there's no change in the lower_path
+			 * returns: -ERRNO if error (0: no error)
+			 */
+			err = setup_obb_dentry(dentry, &lower_path);
+
+			if (err) {
+				/* if the sbi->obbpath is not available, we can optionally
+				 * setup the lower_path with its orig_path.
+				 * but, the current implementation just returns an error
+				 * because the sdcard daemon also regards this case as
+				 * a lookup fail.
+				 */
+				pr_info("sdcardfs: base obbpath is not available\n");
+				sdcardfs_put_reset_orig_path(dentry);
+				goto out;
+			}
+		}
+
+		sdcardfs_set_lower_path(dentry, &lower_path);
+		ret_dentry =
+			__sdcardfs_interpose(dentry, dentry->d_sb, &lower_path, id);
+		if (IS_ERR(ret_dentry)) {
+			err = PTR_ERR(ret_dentry);
+			 /* path_put underlying path on error */
+			sdcardfs_put_reset_lower_path(dentry);
+		}
+		goto out;
+	}
+
+	/*
+	 * We don't consider ENOENT an error, and we want to return a
+	 * negative dentry.
+	 */
+	if (err && err != -ENOENT)
+		goto out;
+
+	/* instatiate a new negative dentry */
+	dname.name = name->name;
+	dname.len = name->len;
+
+	/* See if the low-level filesystem might want
+	 * to use its own hash
+	 */
+	lower_dentry = d_hash_and_lookup(lower_dir_dentry, &dname);
+	if (IS_ERR(lower_dentry))
+		return lower_dentry;
+
+	if (!lower_dentry) {
+		/* We called vfs_path_lookup earlier, and did not get a negative
+		 * dentry then. Don't confuse the lower filesystem by forcing
+		 * one on it now...
+		 */
+		err = -ENOENT;
+		goto out;
+	}
+
+	lower_path.dentry = lower_dentry;
+	lower_path.mnt = mntget(lower_dir_mnt);
+	sdcardfs_set_lower_path(dentry, &lower_path);
+
+	/*
+	 * If the intent is to create a file, then don't return an error, so
+	 * the VFS will continue the process of making this negative dentry
+	 * into a positive one.
+	 */
+	if (flags & (LOOKUP_CREATE|LOOKUP_RENAME_TARGET))
+		err = 0;
+
+out:
+	if (err)
+		return ERR_PTR(err);
+	return ret_dentry;
+}
+
+/*
+ * On success:
+ * fills dentry object appropriate values and returns NULL.
+ * On fail (== error)
+ * returns error ptr
+ *
+ * @dir : Parent inode.
+ * @dentry : Target dentry to lookup. we should set each of fields.
+ *	     (dentry->d_name is initialized already)
+ * @nd : nameidata of parent inode
+ */
+struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry,
+			     unsigned int flags)
+{
+	struct dentry *ret = NULL, *parent;
+	struct path lower_parent_path;
+	int err = 0;
+	const struct cred *saved_cred = NULL;
+
+	parent = dget_parent(dentry);
+
+	if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) {
+		ret = ERR_PTR(-EACCES);
+		goto out_err;
+	}
+
+	/* save current_cred and override it */
+	saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb),
+						SDCARDFS_I(dir)->data);
+	if (!saved_cred) {
+		ret = ERR_PTR(-ENOMEM);
+		goto out_err;
+	}
+
+	sdcardfs_get_lower_path(parent, &lower_parent_path);
+
+	/* allocate dentry private data.  We free it in ->d_release */
+	err = new_dentry_private_data(dentry);
+	if (err) {
+		ret = ERR_PTR(err);
+		goto out;
+	}
+
+	ret = __sdcardfs_lookup(dentry, flags, &lower_parent_path,
+				SDCARDFS_I(dir)->data->userid);
+	if (IS_ERR(ret))
+		goto out;
+	if (ret)
+		dentry = ret;
+	if (d_inode(dentry)) {
+		fsstack_copy_attr_times(d_inode(dentry),
+					sdcardfs_lower_inode(d_inode(dentry)));
+		/* get derived permission */
+		get_derived_permission(parent, dentry);
+		fixup_tmp_permissions(d_inode(dentry));
+		fixup_lower_ownership(dentry, dentry->d_name.name);
+	}
+	/* update parent directory's atime */
+	fsstack_copy_attr_atime(d_inode(parent),
+				sdcardfs_lower_inode(d_inode(parent)));
+
+out:
+	sdcardfs_put_lower_path(parent, &lower_parent_path);
+	revert_fsids(saved_cred);
+out_err:
+	dput(parent);
+	return ret;
+}

diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c
new file mode 100644
index 0000000..d890c57
--- /dev/null
+++ b/fs/sdcardfs/main.c

@@ -0,0 +1,511 @@
+/*
+ * fs/sdcardfs/main.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/parser.h>
+
+enum {
+	Opt_fsuid,
+	Opt_fsgid,
+	Opt_gid,
+	Opt_debug,
+	Opt_mask,
+	Opt_multiuser,
+	Opt_userid,
+	Opt_reserved_mb,
+	Opt_gid_derivation,
+	Opt_default_normal,
+	Opt_nocache,
+	Opt_unshared_obb,
+	Opt_err,
+};
+
+static const match_table_t sdcardfs_tokens = {
+	{Opt_fsuid, "fsuid=%u"},
+	{Opt_fsgid, "fsgid=%u"},
+	{Opt_gid, "gid=%u"},
+	{Opt_debug, "debug"},
+	{Opt_mask, "mask=%u"},
+	{Opt_userid, "userid=%d"},
+	{Opt_multiuser, "multiuser"},
+	{Opt_gid_derivation, "derive_gid"},
+	{Opt_default_normal, "default_normal"},
+	{Opt_unshared_obb, "unshared_obb"},
+	{Opt_reserved_mb, "reserved_mb=%u"},
+	{Opt_nocache, "nocache"},
+	{Opt_err, NULL}
+};
+
+static int parse_options(struct super_block *sb, char *options, int silent,
+				int *debug, struct sdcardfs_vfsmount_options *vfsopts,
+				struct sdcardfs_mount_options *opts)
+{
+	char *p;
+	substring_t args[MAX_OPT_ARGS];
+	int option;
+
+	/* by default, we use AID_MEDIA_RW as uid, gid */
+	opts->fs_low_uid = AID_MEDIA_RW;
+	opts->fs_low_gid = AID_MEDIA_RW;
+	vfsopts->mask = 0;
+	opts->multiuser = false;
+	opts->fs_user_id = 0;
+	vfsopts->gid = 0;
+	/* by default, 0MB is reserved */
+	opts->reserved_mb = 0;
+	/* by default, gid derivation is off */
+	opts->gid_derivation = false;
+	opts->default_normal = false;
+	opts->nocache = false;
+
+	*debug = 0;
+
+	if (!options)
+		return 0;
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token;
+
+		if (!*p)
+			continue;
+
+		token = match_token(p, sdcardfs_tokens, args);
+
+		switch (token) {
+		case Opt_debug:
+			*debug = 1;
+			break;
+		case Opt_fsuid:
+			if (match_int(&args[0], &option))
+				return 0;
+			opts->fs_low_uid = option;
+			break;
+		case Opt_fsgid:
+			if (match_int(&args[0], &option))
+				return 0;
+			opts->fs_low_gid = option;
+			break;
+		case Opt_gid:
+			if (match_int(&args[0], &option))
+				return 0;
+			vfsopts->gid = option;
+			break;
+		case Opt_userid:
+			if (match_int(&args[0], &option))
+				return 0;
+			opts->fs_user_id = option;
+			break;
+		case Opt_mask:
+			if (match_int(&args[0], &option))
+				return 0;
+			vfsopts->mask = option;
+			break;
+		case Opt_multiuser:
+			opts->multiuser = true;
+			break;
+		case Opt_reserved_mb:
+			if (match_int(&args[0], &option))
+				return 0;
+			opts->reserved_mb = option;
+			break;
+		case Opt_gid_derivation:
+			opts->gid_derivation = true;
+			break;
+		case Opt_default_normal:
+			opts->default_normal = true;
+			break;
+		case Opt_nocache:
+			opts->nocache = true;
+			break;
+		case Opt_unshared_obb:
+			opts->unshared_obb = true;
+			break;
+		/* unknown option */
+		default:
+			if (!silent)
+				pr_err("Unrecognized mount option \"%s\" or missing value", p);
+			return -EINVAL;
+		}
+	}
+
+	if (*debug) {
+		pr_info("sdcardfs : options - debug:%d\n", *debug);
+		pr_info("sdcardfs : options - uid:%d\n",
+							opts->fs_low_uid);
+		pr_info("sdcardfs : options - gid:%d\n",
+							opts->fs_low_gid);
+	}
+
+	return 0;
+}
+
+int parse_options_remount(struct super_block *sb, char *options, int silent,
+				struct sdcardfs_vfsmount_options *vfsopts)
+{
+	char *p;
+	substring_t args[MAX_OPT_ARGS];
+	int option;
+	int debug;
+
+	if (!options)
+		return 0;
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token;
+
+		if (!*p)
+			continue;
+
+		token = match_token(p, sdcardfs_tokens, args);
+
+		switch (token) {
+		case Opt_debug:
+			debug = 1;
+			break;
+		case Opt_gid:
+			if (match_int(&args[0], &option))
+				return 0;
+			vfsopts->gid = option;
+
+			break;
+		case Opt_mask:
+			if (match_int(&args[0], &option))
+				return 0;
+			vfsopts->mask = option;
+			break;
+		case Opt_unshared_obb:
+		case Opt_default_normal:
+		case Opt_multiuser:
+		case Opt_userid:
+		case Opt_fsuid:
+		case Opt_fsgid:
+		case Opt_reserved_mb:
+		case Opt_gid_derivation:
+			if (!silent)
+				pr_warn("Option \"%s\" can't be changed during remount\n", p);
+			break;
+		/* unknown option */
+		default:
+			if (!silent)
+				pr_err("Unrecognized mount option \"%s\" or missing value", p);
+			return -EINVAL;
+		}
+	}
+
+	if (debug) {
+		pr_info("sdcardfs : options - debug:%d\n", debug);
+		pr_info("sdcardfs : options - gid:%d\n", vfsopts->gid);
+		pr_info("sdcardfs : options - mask:%d\n", vfsopts->mask);
+	}
+
+	return 0;
+}
+
+#if 0
+/*
+ * our custom d_alloc_root work-alike
+ *
+ * we can't use d_alloc_root if we want to use our own interpose function
+ * unchanged, so we simply call our own "fake" d_alloc_root
+ */
+static struct dentry *sdcardfs_d_alloc_root(struct super_block *sb)
+{
+	struct dentry *ret = NULL;
+
+	if (sb) {
+		static const struct qstr name = {
+			.name = "/",
+			.len = 1
+		};
+
+		ret = d_alloc(NULL, &name);
+		if (ret) {
+			d_set_d_op(ret, &sdcardfs_ci_dops);
+			ret->d_sb = sb;
+			ret->d_parent = ret;
+		}
+	}
+	return ret;
+}
+#endif
+
+DEFINE_MUTEX(sdcardfs_super_list_lock);
+EXPORT_SYMBOL_GPL(sdcardfs_super_list_lock);
+LIST_HEAD(sdcardfs_super_list);
+EXPORT_SYMBOL_GPL(sdcardfs_super_list);
+
+/*
+ * There is no need to lock the sdcardfs_super_info's rwsem as there is no
+ * way anyone can have a reference to the superblock at this point in time.
+ */
+static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb,
+		const char *dev_name, void *raw_data, int silent)
+{
+	int err = 0;
+	int debug;
+	struct super_block *lower_sb;
+	struct path lower_path;
+	struct sdcardfs_sb_info *sb_info;
+	struct sdcardfs_vfsmount_options *mnt_opt = mnt->data;
+	struct inode *inode;
+
+	pr_info("sdcardfs version 2.0\n");
+
+	if (!dev_name) {
+		pr_err("sdcardfs: read_super: missing dev_name argument\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	pr_info("sdcardfs: dev_name -> %s\n", dev_name);
+	pr_info("sdcardfs: options -> %s\n", (char *)raw_data);
+	pr_info("sdcardfs: mnt -> %p\n", mnt);
+
+	/* parse lower path */
+	err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
+			&lower_path);
+	if (err) {
+		pr_err("sdcardfs: error accessing lower directory '%s'\n", dev_name);
+		goto out;
+	}
+
+	/* allocate superblock private data */
+	sb->s_fs_info = kzalloc(sizeof(struct sdcardfs_sb_info), GFP_KERNEL);
+	if (!SDCARDFS_SB(sb)) {
+		pr_crit("sdcardfs: read_super: out of memory\n");
+		err = -ENOMEM;
+		goto out_free;
+	}
+
+	sb_info = sb->s_fs_info;
+	/* parse options */
+	err = parse_options(sb, raw_data, silent, &debug, mnt_opt, &sb_info->options);
+	if (err) {
+		pr_err("sdcardfs: invalid options\n");
+		goto out_freesbi;
+	}
+
+	/* set the lower superblock field of upper superblock */
+	lower_sb = lower_path.dentry->d_sb;
+	atomic_inc(&lower_sb->s_active);
+	sdcardfs_set_lower_super(sb, lower_sb);
+
+	sb->s_stack_depth = lower_sb->s_stack_depth + 1;
+	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
+		pr_err("sdcardfs: maximum fs stacking depth exceeded\n");
+		err = -EINVAL;
+		goto out_sput;
+	}
+
+	/* inherit maxbytes from lower file system */
+	sb->s_maxbytes = lower_sb->s_maxbytes;
+
+	/*
+	 * Our c/m/atime granularity is 1 ns because we may stack on file
+	 * systems whose granularity is as good.
+	 */
+	sb->s_time_gran = 1;
+
+	sb->s_magic = SDCARDFS_SUPER_MAGIC;
+	sb->s_op = &sdcardfs_sops;
+
+	/* get a new inode and allocate our root dentry */
+	inode = sdcardfs_iget(sb, d_inode(lower_path.dentry), 0);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		goto out_sput;
+	}
+	sb->s_root = d_make_root(inode);
+	if (!sb->s_root) {
+		err = -ENOMEM;
+		goto out_sput;
+	}
+	d_set_d_op(sb->s_root, &sdcardfs_ci_dops);
+
+	/* link the upper and lower dentries */
+	sb->s_root->d_fsdata = NULL;
+	err = new_dentry_private_data(sb->s_root);
+	if (err)
+		goto out_freeroot;
+
+	/* set the lower dentries for s_root */
+	sdcardfs_set_lower_path(sb->s_root, &lower_path);
+
+	/*
+	 * No need to call interpose because we already have a positive
+	 * dentry, which was instantiated by d_make_root.  Just need to
+	 * d_rehash it.
+	 */
+	d_rehash(sb->s_root);
+
+	/* setup permission policy */
+	sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL);
+	mutex_lock(&sdcardfs_super_list_lock);
+	if (sb_info->options.multiuser) {
+		setup_derived_state(d_inode(sb->s_root), PERM_PRE_ROOT,
+				sb_info->options.fs_user_id, AID_ROOT);
+		snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name);
+	} else {
+		setup_derived_state(d_inode(sb->s_root), PERM_ROOT,
+				sb_info->options.fs_user_id, AID_ROOT);
+		snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name);
+	}
+	fixup_tmp_permissions(d_inode(sb->s_root));
+	sb_info->sb = sb;
+	list_add(&sb_info->list, &sdcardfs_super_list);
+	mutex_unlock(&sdcardfs_super_list_lock);
+
+	if (!silent)
+		pr_info("sdcardfs: mounted on top of %s type %s\n",
+				dev_name, lower_sb->s_type->name);
+	goto out; /* all is well */
+
+	/* no longer needed: free_dentry_private_data(sb->s_root); */
+out_freeroot:
+	dput(sb->s_root);
+	sb->s_root = NULL;
+out_sput:
+	/* drop refs we took earlier */
+	atomic_dec(&lower_sb->s_active);
+out_freesbi:
+	kfree(SDCARDFS_SB(sb));
+	sb->s_fs_info = NULL;
+out_free:
+	path_put(&lower_path);
+
+out:
+	return err;
+}
+
+struct sdcardfs_mount_private {
+	struct vfsmount *mnt;
+	const char *dev_name;
+	void *raw_data;
+};
+
+static int __sdcardfs_fill_super(
+	struct super_block *sb,
+	void *_priv, int silent)
+{
+	struct sdcardfs_mount_private *priv = _priv;
+
+	return sdcardfs_read_super(priv->mnt,
+		sb, priv->dev_name, priv->raw_data, silent);
+}
+
+static struct dentry *sdcardfs_mount(struct vfsmount *mnt,
+		struct file_system_type *fs_type, int flags,
+			    const char *dev_name, void *raw_data)
+{
+	struct sdcardfs_mount_private priv = {
+		.mnt = mnt,
+		.dev_name = dev_name,
+		.raw_data = raw_data
+	};
+
+	return mount_nodev(fs_type, flags,
+		&priv, __sdcardfs_fill_super);
+}
+
+static struct dentry *sdcardfs_mount_wrn(struct file_system_type *fs_type,
+		    int flags, const char *dev_name, void *raw_data)
+{
+	WARN(1, "sdcardfs does not support mount. Use mount2.\n");
+	return ERR_PTR(-EINVAL);
+}
+
+void *sdcardfs_alloc_mnt_data(void)
+{
+	return kmalloc(sizeof(struct sdcardfs_vfsmount_options), GFP_KERNEL);
+}
+
+void sdcardfs_kill_sb(struct super_block *sb)
+{
+	struct sdcardfs_sb_info *sbi;
+
+	if (sb->s_magic == SDCARDFS_SUPER_MAGIC && sb->s_fs_info) {
+		sbi = SDCARDFS_SB(sb);
+		mutex_lock(&sdcardfs_super_list_lock);
+		list_del(&sbi->list);
+		mutex_unlock(&sdcardfs_super_list_lock);
+	}
+	kill_anon_super(sb);
+}
+
+static struct file_system_type sdcardfs_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= SDCARDFS_NAME,
+	.mount		= sdcardfs_mount_wrn,
+	.mount2		= sdcardfs_mount,
+	.alloc_mnt_data = sdcardfs_alloc_mnt_data,
+	.kill_sb	= sdcardfs_kill_sb,
+	.fs_flags	= 0,
+};
+MODULE_ALIAS_FS(SDCARDFS_NAME);
+
+static int __init init_sdcardfs_fs(void)
+{
+	int err;
+
+	pr_info("Registering sdcardfs " SDCARDFS_VERSION "\n");
+
+	err = sdcardfs_init_inode_cache();
+	if (err)
+		goto out;
+	err = sdcardfs_init_dentry_cache();
+	if (err)
+		goto out;
+	err = packagelist_init();
+	if (err)
+		goto out;
+	err = register_filesystem(&sdcardfs_fs_type);
+out:
+	if (err) {
+		sdcardfs_destroy_inode_cache();
+		sdcardfs_destroy_dentry_cache();
+		packagelist_exit();
+	}
+	return err;
+}
+
+static void __exit exit_sdcardfs_fs(void)
+{
+	sdcardfs_destroy_inode_cache();
+	sdcardfs_destroy_dentry_cache();
+	packagelist_exit();
+	unregister_filesystem(&sdcardfs_fs_type);
+	pr_info("Completed sdcardfs module unload\n");
+}
+
+/* Original wrapfs authors */
+MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University (http://www.fsl.cs.sunysb.edu/)");
+
+/* Original sdcardfs authors */
+MODULE_AUTHOR("Woojoong Lee, Daeho Jeong, Kitae Lee, Yeongjin Gil System Memory Lab., Samsung Electronics");
+
+/* Current maintainer */
+MODULE_AUTHOR("Daniel Rosenberg, Google");
+MODULE_DESCRIPTION("Sdcardfs " SDCARDFS_VERSION);
+MODULE_LICENSE("GPL");
+
+module_init(init_sdcardfs_fs);
+module_exit(exit_sdcardfs_fs);

diff --git a/fs/sdcardfs/mmap.c b/fs/sdcardfs/mmap.c
new file mode 100644
index 0000000..2847c0e
--- /dev/null
+++ b/fs/sdcardfs/mmap.c

@@ -0,0 +1,87 @@
+/*
+ * fs/sdcardfs/mmap.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+
+static int sdcardfs_fault(struct vm_fault *vmf)
+{
+	int err;
+	struct file *file;
+	const struct vm_operations_struct *lower_vm_ops;
+
+	file = (struct file *)vmf->vma->vm_private_data;
+	lower_vm_ops = SDCARDFS_F(file)->lower_vm_ops;
+	BUG_ON(!lower_vm_ops);
+
+	err = lower_vm_ops->fault(vmf);
+	return err;
+}
+
+static void sdcardfs_vm_open(struct vm_area_struct *vma)
+{
+	struct file *file = (struct file *)vma->vm_private_data;
+
+	get_file(file);
+}
+
+static void sdcardfs_vm_close(struct vm_area_struct *vma)
+{
+	struct file *file = (struct file *)vma->vm_private_data;
+
+	fput(file);
+}
+
+static int sdcardfs_page_mkwrite(struct vm_fault *vmf)
+{
+	int err = 0;
+	struct file *file;
+	const struct vm_operations_struct *lower_vm_ops;
+
+	file = (struct file *)vmf->vma->vm_private_data;
+	lower_vm_ops = SDCARDFS_F(file)->lower_vm_ops;
+	BUG_ON(!lower_vm_ops);
+	if (!lower_vm_ops->page_mkwrite)
+		goto out;
+
+	err = lower_vm_ops->page_mkwrite(vmf);
+out:
+	return err;
+}
+
+static ssize_t sdcardfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+{
+	/*
+	 * This function should never be called directly.  We need it
+	 * to exist, to get past a check in open_check_o_direct(),
+	 * which is called from do_last().
+	 */
+	return -EINVAL;
+}
+
+const struct address_space_operations sdcardfs_aops = {
+	.direct_IO	= sdcardfs_direct_IO,
+};
+
+const struct vm_operations_struct sdcardfs_vm_ops = {
+	.fault		= sdcardfs_fault,
+	.page_mkwrite	= sdcardfs_page_mkwrite,
+	.open		= sdcardfs_vm_open,
+	.close		= sdcardfs_vm_close,
+};

diff --git a/fs/sdcardfs/multiuser.h b/fs/sdcardfs/multiuser.h
new file mode 100644
index 0000000..85341e7
--- /dev/null
+++ b/fs/sdcardfs/multiuser.h

@@ -0,0 +1,53 @@
+/*
+ * fs/sdcardfs/multiuser.h
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#define AID_USER_OFFSET     100000 /* offset for uid ranges for each user */
+#define AID_APP_START        10000 /* first app user */
+#define AID_APP_END          19999 /* last app user */
+#define AID_CACHE_GID_START  20000 /* start of gids for apps to mark cached data */
+#define AID_EXT_GID_START    30000 /* start of gids for apps to mark external data */
+#define AID_EXT_CACHE_GID_START 40000 /* start of gids for apps to mark external cached data */
+#define AID_EXT_CACHE_GID_END 49999   /* end of gids for apps to mark external cached data */
+#define AID_SHARED_GID_START 50000 /* start of gids for apps in each user to share */
+
+typedef uid_t userid_t;
+typedef uid_t appid_t;
+
+static inline uid_t multiuser_get_uid(userid_t user_id, appid_t app_id)
+{
+	return (user_id * AID_USER_OFFSET) + (app_id % AID_USER_OFFSET);
+}
+
+static inline bool uid_is_app(uid_t uid)
+{
+	appid_t appid = uid % AID_USER_OFFSET;
+
+	return appid >= AID_APP_START && appid <= AID_APP_END;
+}
+
+static inline gid_t multiuser_get_ext_cache_gid(uid_t uid)
+{
+	return uid - AID_APP_START + AID_EXT_CACHE_GID_START;
+}
+
+static inline gid_t multiuser_get_ext_gid(uid_t uid)
+{
+	return uid - AID_APP_START + AID_EXT_GID_START;
+}

diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c
new file mode 100644
index 0000000..4b9a563
--- /dev/null
+++ b/fs/sdcardfs/packagelist.c

@@ -0,0 +1,882 @@
+/*
+ * fs/sdcardfs/packagelist.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#include <linux/hashtable.h>
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/radix-tree.h>
+#include <linux/dcache.h>
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/configfs.h>
+
+struct hashtable_entry {
+	struct hlist_node hlist;
+	struct hlist_node dlist; /* for deletion cleanup */
+	struct qstr key;
+	atomic_t value;
+};
+
+static DEFINE_HASHTABLE(package_to_appid, 8);
+static DEFINE_HASHTABLE(package_to_userid, 8);
+static DEFINE_HASHTABLE(ext_to_groupid, 8);
+
+
+static struct kmem_cache *hashtable_entry_cachep;
+
+static unsigned int full_name_case_hash(const void *salt, const unsigned char *name, unsigned int len)
+{
+	unsigned long hash = init_name_hash(salt);
+
+	while (len--)
+		hash = partial_name_hash(tolower(*name++), hash);
+	return end_name_hash(hash);
+}
+
+static inline void qstr_init(struct qstr *q, const char *name)
+{
+	q->name = name;
+	q->len = strlen(q->name);
+	q->hash = full_name_case_hash(0, q->name, q->len);
+}
+
+static inline int qstr_copy(const struct qstr *src, struct qstr *dest)
+{
+	dest->name = kstrdup(src->name, GFP_KERNEL);
+	dest->hash_len = src->hash_len;
+	return !!dest->name;
+}
+
+
+static appid_t __get_appid(const struct qstr *key)
+{
+	struct hashtable_entry *hash_cur;
+	unsigned int hash = key->hash;
+	appid_t ret_id;
+
+	rcu_read_lock();
+	hash_for_each_possible_rcu(package_to_appid, hash_cur, hlist, hash) {
+		if (qstr_case_eq(key, &hash_cur->key)) {
+			ret_id = atomic_read(&hash_cur->value);
+			rcu_read_unlock();
+			return ret_id;
+		}
+	}
+	rcu_read_unlock();
+	return 0;
+}
+
+appid_t get_appid(const char *key)
+{
+	struct qstr q;
+
+	qstr_init(&q, key);
+	return __get_appid(&q);
+}
+
+static appid_t __get_ext_gid(const struct qstr *key)
+{
+	struct hashtable_entry *hash_cur;
+	unsigned int hash = key->hash;
+	appid_t ret_id;
+
+	rcu_read_lock();
+	hash_for_each_possible_rcu(ext_to_groupid, hash_cur, hlist, hash) {
+		if (qstr_case_eq(key, &hash_cur->key)) {
+			ret_id = atomic_read(&hash_cur->value);
+			rcu_read_unlock();
+			return ret_id;
+		}
+	}
+	rcu_read_unlock();
+	return 0;
+}
+
+appid_t get_ext_gid(const char *key)
+{
+	struct qstr q;
+
+	qstr_init(&q, key);
+	return __get_ext_gid(&q);
+}
+
+static appid_t __is_excluded(const struct qstr *app_name, userid_t user)
+{
+	struct hashtable_entry *hash_cur;
+	unsigned int hash = app_name->hash;
+
+	rcu_read_lock();
+	hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) {
+		if (atomic_read(&hash_cur->value) == user &&
+				qstr_case_eq(app_name, &hash_cur->key)) {
+			rcu_read_unlock();
+			return 1;
+		}
+	}
+	rcu_read_unlock();
+	return 0;
+}
+
+appid_t is_excluded(const char *key, userid_t user)
+{
+	struct qstr q;
+	qstr_init(&q, key);
+	return __is_excluded(&q, user);
+}
+
+/* Kernel has already enforced everything we returned through
+ * derive_permissions_locked(), so this is used to lock down access
+ * even further, such as enforcing that apps hold sdcard_rw.
+ */
+int check_caller_access_to_name(struct inode *parent_node, const struct qstr *name)
+{
+	struct qstr q_autorun = QSTR_LITERAL("autorun.inf");
+	struct qstr q__android_secure = QSTR_LITERAL(".android_secure");
+	struct qstr q_android_secure = QSTR_LITERAL("android_secure");
+
+	/* Always block security-sensitive files at root */
+	if (parent_node && SDCARDFS_I(parent_node)->data->perm == PERM_ROOT) {
+		if (qstr_case_eq(name, &q_autorun)
+			|| qstr_case_eq(name, &q__android_secure)
+			|| qstr_case_eq(name, &q_android_secure)) {
+			return 0;
+		}
+	}
+
+	/* Root always has access; access for any other UIDs should always
+	 * be controlled through packages.list.
+	 */
+	if (from_kuid(&init_user_ns, current_fsuid()) == 0)
+		return 1;
+
+	/* No extra permissions to enforce */
+	return 1;
+}
+
+static struct hashtable_entry *alloc_hashtable_entry(const struct qstr *key,
+		appid_t value)
+{
+	struct hashtable_entry *ret = kmem_cache_alloc(hashtable_entry_cachep,
+			GFP_KERNEL);
+	if (!ret)
+		return NULL;
+	INIT_HLIST_NODE(&ret->dlist);
+	INIT_HLIST_NODE(&ret->hlist);
+
+	if (!qstr_copy(key, &ret->key)) {
+		kmem_cache_free(hashtable_entry_cachep, ret);
+		return NULL;
+	}
+
+	atomic_set(&ret->value, value);
+	return ret;
+}
+
+static int insert_packagelist_appid_entry_locked(const struct qstr *key, appid_t value)
+{
+	struct hashtable_entry *hash_cur;
+	struct hashtable_entry *new_entry;
+	unsigned int hash = key->hash;
+
+	hash_for_each_possible_rcu(package_to_appid, hash_cur, hlist, hash) {
+		if (qstr_case_eq(key, &hash_cur->key)) {
+			atomic_set(&hash_cur->value, value);
+			return 0;
+		}
+	}
+	new_entry = alloc_hashtable_entry(key, value);
+	if (!new_entry)
+		return -ENOMEM;
+	hash_add_rcu(package_to_appid, &new_entry->hlist, hash);
+	return 0;
+}
+
+static int insert_ext_gid_entry_locked(const struct qstr *key, appid_t value)
+{
+	struct hashtable_entry *hash_cur;
+	struct hashtable_entry *new_entry;
+	unsigned int hash = key->hash;
+
+	/* An extension can only belong to one gid */
+	hash_for_each_possible_rcu(ext_to_groupid, hash_cur, hlist, hash) {
+		if (qstr_case_eq(key, &hash_cur->key))
+			return -EINVAL;
+	}
+	new_entry = alloc_hashtable_entry(key, value);
+	if (!new_entry)
+		return -ENOMEM;
+	hash_add_rcu(ext_to_groupid, &new_entry->hlist, hash);
+	return 0;
+}
+
+static int insert_userid_exclude_entry_locked(const struct qstr *key, userid_t value)
+{
+	struct hashtable_entry *hash_cur;
+	struct hashtable_entry *new_entry;
+	unsigned int hash = key->hash;
+
+	/* Only insert if not already present */
+	hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) {
+		if (atomic_read(&hash_cur->value) == value &&
+				qstr_case_eq(key, &hash_cur->key))
+			return 0;
+	}
+	new_entry = alloc_hashtable_entry(key, value);
+	if (!new_entry)
+		return -ENOMEM;
+	hash_add_rcu(package_to_userid, &new_entry->hlist, hash);
+	return 0;
+}
+
+static void fixup_all_perms_name(const struct qstr *key)
+{
+	struct sdcardfs_sb_info *sbinfo;
+	struct limit_search limit = {
+		.flags = BY_NAME,
+		.name = QSTR_INIT(key->name, key->len),
+	};
+	list_for_each_entry(sbinfo, &sdcardfs_super_list, list) {
+		if (sbinfo_has_sdcard_magic(sbinfo))
+			fixup_perms_recursive(sbinfo->sb->s_root, &limit);
+	}
+}
+
+static void fixup_all_perms_name_userid(const struct qstr *key, userid_t userid)
+{
+	struct sdcardfs_sb_info *sbinfo;
+	struct limit_search limit = {
+		.flags = BY_NAME | BY_USERID,
+		.name = QSTR_INIT(key->name, key->len),
+		.userid = userid,
+	};
+	list_for_each_entry(sbinfo, &sdcardfs_super_list, list) {
+		if (sbinfo_has_sdcard_magic(sbinfo))
+			fixup_perms_recursive(sbinfo->sb->s_root, &limit);
+	}
+}
+
+static void fixup_all_perms_userid(userid_t userid)
+{
+	struct sdcardfs_sb_info *sbinfo;
+	struct limit_search limit = {
+		.flags = BY_USERID,
+		.userid = userid,
+	};
+	list_for_each_entry(sbinfo, &sdcardfs_super_list, list) {
+		if (sbinfo_has_sdcard_magic(sbinfo))
+			fixup_perms_recursive(sbinfo->sb->s_root, &limit);
+	}
+}
+
+static int insert_packagelist_entry(const struct qstr *key, appid_t value)
+{
+	int err;
+
+	mutex_lock(&sdcardfs_super_list_lock);
+	err = insert_packagelist_appid_entry_locked(key, value);
+	if (!err)
+		fixup_all_perms_name(key);
+	mutex_unlock(&sdcardfs_super_list_lock);
+
+	return err;
+}
+
+static int insert_ext_gid_entry(const struct qstr *key, appid_t value)
+{
+	int err;
+
+	mutex_lock(&sdcardfs_super_list_lock);
+	err = insert_ext_gid_entry_locked(key, value);
+	mutex_unlock(&sdcardfs_super_list_lock);
+
+	return err;
+}
+
+static int insert_userid_exclude_entry(const struct qstr *key, userid_t value)
+{
+	int err;
+
+	mutex_lock(&sdcardfs_super_list_lock);
+	err = insert_userid_exclude_entry_locked(key, value);
+	if (!err)
+		fixup_all_perms_name_userid(key, value);
+	mutex_unlock(&sdcardfs_super_list_lock);
+
+	return err;
+}
+
+static void free_hashtable_entry(struct hashtable_entry *entry)
+{
+	kfree(entry->key.name);
+	kmem_cache_free(hashtable_entry_cachep, entry);
+}
+
+static void remove_packagelist_entry_locked(const struct qstr *key)
+{
+	struct hashtable_entry *hash_cur;
+	unsigned int hash = key->hash;
+	struct hlist_node *h_t;
+	HLIST_HEAD(free_list);
+
+	hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) {
+		if (qstr_case_eq(key, &hash_cur->key)) {
+			hash_del_rcu(&hash_cur->hlist);
+			hlist_add_head(&hash_cur->dlist, &free_list);
+		}
+	}
+	hash_for_each_possible_rcu(package_to_appid, hash_cur, hlist, hash) {
+		if (qstr_case_eq(key, &hash_cur->key)) {
+			hash_del_rcu(&hash_cur->hlist);
+			hlist_add_head(&hash_cur->dlist, &free_list);
+			break;
+		}
+	}
+	synchronize_rcu();
+	hlist_for_each_entry_safe(hash_cur, h_t, &free_list, dlist)
+		free_hashtable_entry(hash_cur);
+}
+
+static void remove_packagelist_entry(const struct qstr *key)
+{
+	mutex_lock(&sdcardfs_super_list_lock);
+	remove_packagelist_entry_locked(key);
+	fixup_all_perms_name(key);
+	mutex_unlock(&sdcardfs_super_list_lock);
+}
+
+static void remove_ext_gid_entry_locked(const struct qstr *key, gid_t group)
+{
+	struct hashtable_entry *hash_cur;
+	unsigned int hash = key->hash;
+
+	hash_for_each_possible_rcu(ext_to_groupid, hash_cur, hlist, hash) {
+		if (qstr_case_eq(key, &hash_cur->key) && atomic_read(&hash_cur->value) == group) {
+			hash_del_rcu(&hash_cur->hlist);
+			synchronize_rcu();
+			free_hashtable_entry(hash_cur);
+			break;
+		}
+	}
+}
+
+static void remove_ext_gid_entry(const struct qstr *key, gid_t group)
+{
+	mutex_lock(&sdcardfs_super_list_lock);
+	remove_ext_gid_entry_locked(key, group);
+	mutex_unlock(&sdcardfs_super_list_lock);
+}
+
+static void remove_userid_all_entry_locked(userid_t userid)
+{
+	struct hashtable_entry *hash_cur;
+	struct hlist_node *h_t;
+	HLIST_HEAD(free_list);
+	int i;
+
+	hash_for_each_rcu(package_to_userid, i, hash_cur, hlist) {
+		if (atomic_read(&hash_cur->value) == userid) {
+			hash_del_rcu(&hash_cur->hlist);
+			hlist_add_head(&hash_cur->dlist, &free_list);
+		}
+	}
+	synchronize_rcu();
+	hlist_for_each_entry_safe(hash_cur, h_t, &free_list, dlist) {
+		free_hashtable_entry(hash_cur);
+	}
+}
+
+static void remove_userid_all_entry(userid_t userid)
+{
+	mutex_lock(&sdcardfs_super_list_lock);
+	remove_userid_all_entry_locked(userid);
+	fixup_all_perms_userid(userid);
+	mutex_unlock(&sdcardfs_super_list_lock);
+}
+
+static void remove_userid_exclude_entry_locked(const struct qstr *key, userid_t userid)
+{
+	struct hashtable_entry *hash_cur;
+	unsigned int hash = key->hash;
+
+	hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) {
+		if (qstr_case_eq(key, &hash_cur->key) &&
+				atomic_read(&hash_cur->value) == userid) {
+			hash_del_rcu(&hash_cur->hlist);
+			synchronize_rcu();
+			free_hashtable_entry(hash_cur);
+			break;
+		}
+	}
+}
+
+static void remove_userid_exclude_entry(const struct qstr *key, userid_t userid)
+{
+	mutex_lock(&sdcardfs_super_list_lock);
+	remove_userid_exclude_entry_locked(key, userid);
+	fixup_all_perms_name_userid(key, userid);
+	mutex_unlock(&sdcardfs_super_list_lock);
+}
+
+static void packagelist_destroy(void)
+{
+	struct hashtable_entry *hash_cur;
+	struct hlist_node *h_t;
+	HLIST_HEAD(free_list);
+	int i;
+
+	mutex_lock(&sdcardfs_super_list_lock);
+	hash_for_each_rcu(package_to_appid, i, hash_cur, hlist) {
+		hash_del_rcu(&hash_cur->hlist);
+		hlist_add_head(&hash_cur->dlist, &free_list);
+	}
+	hash_for_each_rcu(package_to_userid, i, hash_cur, hlist) {
+		hash_del_rcu(&hash_cur->hlist);
+		hlist_add_head(&hash_cur->dlist, &free_list);
+	}
+	synchronize_rcu();
+	hlist_for_each_entry_safe(hash_cur, h_t, &free_list, dlist)
+		free_hashtable_entry(hash_cur);
+	mutex_unlock(&sdcardfs_super_list_lock);
+	pr_info("sdcardfs: destroyed packagelist pkgld\n");
+}
+
+#define SDCARDFS_CONFIGFS_ATTR(_pfx, _name)			\
+static struct configfs_attribute _pfx##attr_##_name = {	\
+	.ca_name	= __stringify(_name),		\
+	.ca_mode	= S_IRUGO | S_IWUGO,		\
+	.ca_owner	= THIS_MODULE,			\
+	.show		= _pfx##_name##_show,		\
+	.store		= _pfx##_name##_store,		\
+}
+
+#define SDCARDFS_CONFIGFS_ATTR_RO(_pfx, _name)			\
+static struct configfs_attribute _pfx##attr_##_name = {	\
+	.ca_name	= __stringify(_name),		\
+	.ca_mode	= S_IRUGO,			\
+	.ca_owner	= THIS_MODULE,			\
+	.show		= _pfx##_name##_show,		\
+}
+
+#define SDCARDFS_CONFIGFS_ATTR_WO(_pfx, _name)			\
+static struct configfs_attribute _pfx##attr_##_name = {	\
+	.ca_name	= __stringify(_name),		\
+	.ca_mode	= S_IWUGO,			\
+	.ca_owner	= THIS_MODULE,			\
+	.store		= _pfx##_name##_store,		\
+}
+
+struct package_details {
+	struct config_item item;
+	struct qstr name;
+};
+
+static inline struct package_details *to_package_details(struct config_item *item)
+{
+	return item ? container_of(item, struct package_details, item) : NULL;
+}
+
+static ssize_t package_details_appid_show(struct config_item *item, char *page)
+{
+	return scnprintf(page, PAGE_SIZE, "%u\n", __get_appid(&to_package_details(item)->name));
+}
+
+static ssize_t package_details_appid_store(struct config_item *item,
+				       const char *page, size_t count)
+{
+	unsigned int tmp;
+	int ret;
+
+	ret = kstrtouint(page, 10, &tmp);
+	if (ret)
+		return ret;
+
+	ret = insert_packagelist_entry(&to_package_details(item)->name, tmp);
+
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static ssize_t package_details_excluded_userids_show(struct config_item *item,
+				      char *page)
+{
+	struct package_details *package_details = to_package_details(item);
+	struct hashtable_entry *hash_cur;
+	unsigned int hash = package_details->name.hash;
+	int count = 0;
+
+	rcu_read_lock();
+	hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) {
+		if (qstr_case_eq(&package_details->name, &hash_cur->key))
+			count += scnprintf(page + count, PAGE_SIZE - count,
+					"%d ", atomic_read(&hash_cur->value));
+	}
+	rcu_read_unlock();
+	if (count)
+		count--;
+	count += scnprintf(page + count, PAGE_SIZE - count, "\n");
+	return count;
+}
+
+static ssize_t package_details_excluded_userids_store(struct config_item *item,
+				       const char *page, size_t count)
+{
+	unsigned int tmp;
+	int ret;
+
+	ret = kstrtouint(page, 10, &tmp);
+	if (ret)
+		return ret;
+
+	ret = insert_userid_exclude_entry(&to_package_details(item)->name, tmp);
+
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static ssize_t package_details_clear_userid_store(struct config_item *item,
+				       const char *page, size_t count)
+{
+	unsigned int tmp;
+	int ret;
+
+	ret = kstrtouint(page, 10, &tmp);
+	if (ret)
+		return ret;
+	remove_userid_exclude_entry(&to_package_details(item)->name, tmp);
+	return count;
+}
+
+static void package_details_release(struct config_item *item)
+{
+	struct package_details *package_details = to_package_details(item);
+
+	pr_info("sdcardfs: removing %s\n", package_details->name.name);
+	remove_packagelist_entry(&package_details->name);
+	kfree(package_details->name.name);
+	kfree(package_details);
+}
+
+SDCARDFS_CONFIGFS_ATTR(package_details_, appid);
+SDCARDFS_CONFIGFS_ATTR(package_details_, excluded_userids);
+SDCARDFS_CONFIGFS_ATTR_WO(package_details_, clear_userid);
+
+static struct configfs_attribute *package_details_attrs[] = {
+	&package_details_attr_appid,
+	&package_details_attr_excluded_userids,
+	&package_details_attr_clear_userid,
+	NULL,
+};
+
+static struct configfs_item_operations package_details_item_ops = {
+	.release = package_details_release,
+};
+
+static struct config_item_type package_appid_type = {
+	.ct_item_ops	= &package_details_item_ops,
+	.ct_attrs	= package_details_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+struct extensions_value {
+	struct config_group group;
+	unsigned int num;
+};
+
+struct extension_details {
+	struct config_item item;
+	struct qstr name;
+	unsigned int num;
+};
+
+static inline struct extensions_value *to_extensions_value(struct config_item *item)
+{
+	return item ? container_of(to_config_group(item), struct extensions_value, group) : NULL;
+}
+
+static inline struct extension_details *to_extension_details(struct config_item *item)
+{
+	return item ? container_of(item, struct extension_details, item) : NULL;
+}
+
+static void extension_details_release(struct config_item *item)
+{
+	struct extension_details *extension_details = to_extension_details(item);
+
+	pr_info("sdcardfs: No longer mapping %s files to gid %d\n",
+			extension_details->name.name, extension_details->num);
+	remove_ext_gid_entry(&extension_details->name, extension_details->num);
+	kfree(extension_details->name.name);
+	kfree(extension_details);
+}
+
+static struct configfs_item_operations extension_details_item_ops = {
+	.release = extension_details_release,
+};
+
+static struct config_item_type extension_details_type = {
+	.ct_item_ops = &extension_details_item_ops,
+	.ct_owner = THIS_MODULE,
+};
+
+static struct config_item *extension_details_make_item(struct config_group *group, const char *name)
+{
+	struct extensions_value *extensions_value = to_extensions_value(&group->cg_item);
+	struct extension_details *extension_details = kzalloc(sizeof(struct extension_details), GFP_KERNEL);
+	const char *tmp;
+	int ret;
+
+	if (!extension_details)
+		return ERR_PTR(-ENOMEM);
+
+	tmp = kstrdup(name, GFP_KERNEL);
+	if (!tmp) {
+		kfree(extension_details);
+		return ERR_PTR(-ENOMEM);
+	}
+	qstr_init(&extension_details->name, tmp);
+	extension_details->num = extensions_value->num;
+	ret = insert_ext_gid_entry(&extension_details->name, extensions_value->num);
+
+	if (ret) {
+		kfree(extension_details->name.name);
+		kfree(extension_details);
+		return ERR_PTR(ret);
+	}
+	config_item_init_type_name(&extension_details->item, name, &extension_details_type);
+
+	return &extension_details->item;
+}
+
+static struct configfs_group_operations extensions_value_group_ops = {
+	.make_item = extension_details_make_item,
+};
+
+static struct config_item_type extensions_name_type = {
+	.ct_group_ops	= &extensions_value_group_ops,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct config_group *extensions_make_group(struct config_group *group, const char *name)
+{
+	struct extensions_value *extensions_value;
+	unsigned int tmp;
+	int ret;
+
+	extensions_value = kzalloc(sizeof(struct extensions_value), GFP_KERNEL);
+	if (!extensions_value)
+		return ERR_PTR(-ENOMEM);
+	ret = kstrtouint(name, 10, &tmp);
+	if (ret) {
+		kfree(extensions_value);
+		return ERR_PTR(ret);
+	}
+
+	extensions_value->num = tmp;
+	config_group_init_type_name(&extensions_value->group, name,
+						&extensions_name_type);
+	return &extensions_value->group;
+}
+
+static void extensions_drop_group(struct config_group *group, struct config_item *item)
+{
+	struct extensions_value *value = to_extensions_value(item);
+
+	pr_info("sdcardfs: No longer mapping any files to gid %d\n", value->num);
+	kfree(value);
+}
+
+static struct configfs_group_operations extensions_group_ops = {
+	.make_group	= extensions_make_group,
+	.drop_item	= extensions_drop_group,
+};
+
+static struct config_item_type extensions_type = {
+	.ct_group_ops	= &extensions_group_ops,
+	.ct_owner	= THIS_MODULE,
+};
+
+struct config_group extension_group = {
+	.cg_item = {
+		.ci_namebuf = "extensions",
+		.ci_type = &extensions_type,
+	},
+};
+
+static struct config_item *packages_make_item(struct config_group *group, const char *name)
+{
+	struct package_details *package_details;
+	const char *tmp;
+
+	package_details = kzalloc(sizeof(struct package_details), GFP_KERNEL);
+	if (!package_details)
+		return ERR_PTR(-ENOMEM);
+	tmp = kstrdup(name, GFP_KERNEL);
+	if (!tmp) {
+		kfree(package_details);
+		return ERR_PTR(-ENOMEM);
+	}
+	qstr_init(&package_details->name, tmp);
+	config_item_init_type_name(&package_details->item, name,
+						&package_appid_type);
+
+	return &package_details->item;
+}
+
+static ssize_t packages_list_show(struct config_item *item, char *page)
+{
+	struct hashtable_entry *hash_cur_app;
+	struct hashtable_entry *hash_cur_user;
+	int i;
+	int count = 0, written = 0;
+	const char errormsg[] = "<truncated>\n";
+	unsigned int hash;
+
+	rcu_read_lock();
+	hash_for_each_rcu(package_to_appid, i, hash_cur_app, hlist) {
+		written = scnprintf(page + count, PAGE_SIZE - sizeof(errormsg) - count, "%s %d\n",
+					hash_cur_app->key.name, atomic_read(&hash_cur_app->value));
+		hash = hash_cur_app->key.hash;
+		hash_for_each_possible_rcu(package_to_userid, hash_cur_user, hlist, hash) {
+			if (qstr_case_eq(&hash_cur_app->key, &hash_cur_user->key)) {
+				written += scnprintf(page + count + written - 1,
+					PAGE_SIZE - sizeof(errormsg) - count - written + 1,
+					" %d\n", atomic_read(&hash_cur_user->value)) - 1;
+			}
+		}
+		if (count + written == PAGE_SIZE - sizeof(errormsg) - 1) {
+			count += scnprintf(page + count, PAGE_SIZE - count, errormsg);
+			break;
+		}
+		count += written;
+	}
+	rcu_read_unlock();
+
+	return count;
+}
+
+static ssize_t packages_remove_userid_store(struct config_item *item,
+				       const char *page, size_t count)
+{
+	unsigned int tmp;
+	int ret;
+
+	ret = kstrtouint(page, 10, &tmp);
+	if (ret)
+		return ret;
+	remove_userid_all_entry(tmp);
+	return count;
+}
+
+static struct configfs_attribute packages_attr_packages_gid_list = {
+	.ca_name	= "packages_gid.list",
+	.ca_mode	= S_IRUGO,
+	.ca_owner	= THIS_MODULE,
+	.show		= packages_list_show,
+};
+
+SDCARDFS_CONFIGFS_ATTR_WO(packages_, remove_userid);
+
+static struct configfs_attribute *packages_attrs[] = {
+	&packages_attr_packages_gid_list,
+	&packages_attr_remove_userid,
+	NULL,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations packages_group_ops = {
+	.make_item	= packages_make_item,
+};
+
+static struct config_item_type packages_type = {
+	.ct_group_ops	= &packages_group_ops,
+	.ct_attrs	= packages_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+struct config_group *sd_default_groups[] = {
+	&extension_group,
+	NULL,
+};
+
+static struct configfs_subsystem sdcardfs_packages = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf = "sdcardfs",
+			.ci_type = &packages_type,
+		},
+	},
+};
+
+static int configfs_sdcardfs_init(void)
+{
+	int ret, i;
+	struct configfs_subsystem *subsys = &sdcardfs_packages;
+
+	config_group_init(&subsys->su_group);
+	for (i = 0; sd_default_groups[i]; i++) {
+		config_group_init(sd_default_groups[i]);
+		configfs_add_default_group(sd_default_groups[i], &subsys->su_group);
+	}
+	mutex_init(&subsys->su_mutex);
+	ret = configfs_register_subsystem(subsys);
+	if (ret) {
+		pr_err("Error %d while registering subsystem %s\n",
+		       ret,
+		       subsys->su_group.cg_item.ci_namebuf);
+	}
+	return ret;
+}
+
+static void configfs_sdcardfs_exit(void)
+{
+	configfs_unregister_subsystem(&sdcardfs_packages);
+}
+
+int packagelist_init(void)
+{
+	hashtable_entry_cachep =
+		kmem_cache_create("packagelist_hashtable_entry",
+					sizeof(struct hashtable_entry), 0, 0, NULL);
+	if (!hashtable_entry_cachep) {
+		pr_err("sdcardfs: failed creating pkgl_hashtable entry slab cache\n");
+		return -ENOMEM;
+	}
+
+	configfs_sdcardfs_init();
+	return 0;
+}
+
+void packagelist_exit(void)
+{
+	configfs_sdcardfs_exit();
+	packagelist_destroy();
+	kmem_cache_destroy(hashtable_entry_cachep);
+}

diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h
new file mode 100644
index 0000000..9ccf62c
--- /dev/null
+++ b/fs/sdcardfs/sdcardfs.h

@@ -0,0 +1,655 @@
+/*
+ * fs/sdcardfs/sdcardfs.h
+ *
+ * The sdcardfs v2.0
+ *   This file system replaces the sdcard daemon on Android
+ *   On version 2.0, some of the daemon functions have been ported
+ *   to support the multi-user concepts of Android 4.4
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#ifndef _SDCARDFS_H_
+#define _SDCARDFS_H_
+
+#include <linux/dcache.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/aio.h>
+#include <linux/kref.h>
+#include <linux/mm.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/seq_file.h>
+#include <linux/statfs.h>
+#include <linux/fs_stack.h>
+#include <linux/magic.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/security.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/iversion.h>
+#include "multiuser.h"
+
+/* the file system name */
+#define SDCARDFS_NAME "sdcardfs"
+
+/* sdcardfs root inode number */
+#define SDCARDFS_ROOT_INO     1
+
+/* useful for tracking code reachability */
+#define UDBG pr_default("DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__)
+
+#define SDCARDFS_DIRENT_SIZE 256
+
+/* temporary static uid settings for development */
+#define AID_ROOT             0	/* uid for accessing /mnt/sdcard & extSdcard */
+#define AID_MEDIA_RW      1023	/* internal media storage write access */
+
+#define AID_SDCARD_RW     1015	/* external storage write access */
+#define AID_SDCARD_R      1028	/* external storage read access */
+#define AID_SDCARD_PICS   1033	/* external storage photos access */
+#define AID_SDCARD_AV     1034	/* external storage audio/video access */
+#define AID_SDCARD_ALL    1035	/* access all users external storage */
+#define AID_MEDIA_OBB     1059  /* obb files */
+
+#define AID_SDCARD_IMAGE  1057
+
+#define AID_PACKAGE_INFO  1027
+
+
+/*
+ * Permissions are handled by our permission function.
+ * We don't want anyone who happens to look at our inode value to prematurely
+ * block access, so store more permissive values. These are probably never
+ * used.
+ */
+#define fixup_tmp_permissions(x)	\
+	do {						\
+		(x)->i_uid = make_kuid(&init_user_ns,	\
+				SDCARDFS_I(x)->data->d_uid);	\
+		(x)->i_gid = make_kgid(&init_user_ns, AID_SDCARD_RW);	\
+		(x)->i_mode = ((x)->i_mode & S_IFMT) | 0775;\
+	} while (0)
+
+/* Android 5.0 support */
+
+/* Permission mode for a specific node. Controls how file permissions
+ * are derived for children nodes.
+ */
+typedef enum {
+	/* Nothing special; this node should just inherit from its parent. */
+	PERM_INHERIT,
+	/* This node is one level above a normal root; used for legacy layouts
+	 * which use the first level to represent user_id.
+	 */
+	PERM_PRE_ROOT,
+	/* This node is "/" */
+	PERM_ROOT,
+	/* This node is "/Android" */
+	PERM_ANDROID,
+	/* This node is "/Android/data" */
+	PERM_ANDROID_DATA,
+	/* This node is "/Android/obb" */
+	PERM_ANDROID_OBB,
+	/* This node is "/Android/media" */
+	PERM_ANDROID_MEDIA,
+	/* This node is "/Android/[data|media|obb]/[package]" */
+	PERM_ANDROID_PACKAGE,
+	/* This node is "/Android/[data|media|obb]/[package]/cache" */
+	PERM_ANDROID_PACKAGE_CACHE,
+} perm_t;
+
+struct sdcardfs_sb_info;
+struct sdcardfs_mount_options;
+struct sdcardfs_inode_info;
+struct sdcardfs_inode_data;
+
+/* Do not directly use this function. Use OVERRIDE_CRED() instead. */
+const struct cred *override_fsids(struct sdcardfs_sb_info *sbi,
+			struct sdcardfs_inode_data *data);
+/* Do not directly use this function, use REVERT_CRED() instead. */
+void revert_fsids(const struct cred *old_cred);
+
+/* operations vectors defined in specific files */
+extern const struct file_operations sdcardfs_main_fops;
+extern const struct file_operations sdcardfs_dir_fops;
+extern const struct inode_operations sdcardfs_main_iops;
+extern const struct inode_operations sdcardfs_dir_iops;
+extern const struct inode_operations sdcardfs_symlink_iops;
+extern const struct super_operations sdcardfs_sops;
+extern const struct dentry_operations sdcardfs_ci_dops;
+extern const struct address_space_operations sdcardfs_aops, sdcardfs_dummy_aops;
+extern const struct vm_operations_struct sdcardfs_vm_ops;
+
+extern int sdcardfs_init_inode_cache(void);
+extern void sdcardfs_destroy_inode_cache(void);
+extern int sdcardfs_init_dentry_cache(void);
+extern void sdcardfs_destroy_dentry_cache(void);
+extern int new_dentry_private_data(struct dentry *dentry);
+extern void free_dentry_private_data(struct dentry *dentry);
+extern struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry,
+				unsigned int flags);
+extern struct inode *sdcardfs_iget(struct super_block *sb,
+				 struct inode *lower_inode, userid_t id);
+extern int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb,
+			    struct path *lower_path, userid_t id);
+
+/* file private data */
+struct sdcardfs_file_info {
+	struct file *lower_file;
+	const struct vm_operations_struct *lower_vm_ops;
+};
+
+struct sdcardfs_inode_data {
+	struct kref refcount;
+	bool abandoned;
+
+	perm_t perm;
+	userid_t userid;
+	uid_t d_uid;
+	bool under_android;
+	bool under_cache;
+	bool under_obb;
+};
+
+/* sdcardfs inode data in memory */
+struct sdcardfs_inode_info {
+	struct inode *lower_inode;
+	/* state derived based on current position in hierarchy */
+	struct sdcardfs_inode_data *data;
+
+	/* top folder for ownership */
+	spinlock_t top_lock;
+	struct sdcardfs_inode_data *top_data;
+
+	struct inode vfs_inode;
+};
+
+
+/* sdcardfs dentry data in memory */
+struct sdcardfs_dentry_info {
+	spinlock_t lock;	/* protects lower_path */
+	struct path lower_path;
+	struct path orig_path;
+};
+
+struct sdcardfs_mount_options {
+	uid_t fs_low_uid;
+	gid_t fs_low_gid;
+	userid_t fs_user_id;
+	bool multiuser;
+	bool gid_derivation;
+	bool default_normal;
+	bool unshared_obb;
+	unsigned int reserved_mb;
+	bool nocache;
+};
+
+struct sdcardfs_vfsmount_options {
+	gid_t gid;
+	mode_t mask;
+};
+
+extern int parse_options_remount(struct super_block *sb, char *options, int silent,
+		struct sdcardfs_vfsmount_options *vfsopts);
+
+/* sdcardfs super-block data in memory */
+struct sdcardfs_sb_info {
+	struct super_block *sb;
+	struct super_block *lower_sb;
+	/* derived perm policy : some of options have been added
+	 * to sdcardfs_mount_options (Android 4.4 support)
+	 */
+	struct sdcardfs_mount_options options;
+	spinlock_t lock;	/* protects obbpath */
+	char *obbpath_s;
+	struct path obbpath;
+	void *pkgl_id;
+	struct list_head list;
+};
+
+/*
+ * inode to private data
+ *
+ * Since we use containers and the struct inode is _inside_ the
+ * sdcardfs_inode_info structure, SDCARDFS_I will always (given a non-NULL
+ * inode pointer), return a valid non-NULL pointer.
+ */
+static inline struct sdcardfs_inode_info *SDCARDFS_I(const struct inode *inode)
+{
+	return container_of(inode, struct sdcardfs_inode_info, vfs_inode);
+}
+
+/* dentry to private data */
+#define SDCARDFS_D(dent) ((struct sdcardfs_dentry_info *)(dent)->d_fsdata)
+
+/* superblock to private data */
+#define SDCARDFS_SB(super) ((struct sdcardfs_sb_info *)(super)->s_fs_info)
+
+/* file to private Data */
+#define SDCARDFS_F(file) ((struct sdcardfs_file_info *)((file)->private_data))
+
+/* file to lower file */
+static inline struct file *sdcardfs_lower_file(const struct file *f)
+{
+	return SDCARDFS_F(f)->lower_file;
+}
+
+static inline void sdcardfs_set_lower_file(struct file *f, struct file *val)
+{
+	SDCARDFS_F(f)->lower_file = val;
+}
+
+/* inode to lower inode. */
+static inline struct inode *sdcardfs_lower_inode(const struct inode *i)
+{
+	return SDCARDFS_I(i)->lower_inode;
+}
+
+static inline void sdcardfs_set_lower_inode(struct inode *i, struct inode *val)
+{
+	SDCARDFS_I(i)->lower_inode = val;
+}
+
+/* superblock to lower superblock */
+static inline struct super_block *sdcardfs_lower_super(
+	const struct super_block *sb)
+{
+	return SDCARDFS_SB(sb)->lower_sb;
+}
+
+static inline void sdcardfs_set_lower_super(struct super_block *sb,
+					  struct super_block *val)
+{
+	SDCARDFS_SB(sb)->lower_sb = val;
+}
+
+/* path based (dentry/mnt) macros */
+static inline void pathcpy(struct path *dst, const struct path *src)
+{
+	dst->dentry = src->dentry;
+	dst->mnt = src->mnt;
+}
+
+/* sdcardfs_get_pname functions calls path_get()
+ * therefore, the caller must call "proper" path_put functions
+ */
+#define SDCARDFS_DENT_FUNC(pname) \
+static inline void sdcardfs_get_##pname(const struct dentry *dent, \
+					struct path *pname) \
+{ \
+	spin_lock(&SDCARDFS_D(dent)->lock); \
+	pathcpy(pname, &SDCARDFS_D(dent)->pname); \
+	path_get(pname); \
+	spin_unlock(&SDCARDFS_D(dent)->lock); \
+	return; \
+} \
+static inline void sdcardfs_put_##pname(const struct dentry *dent, \
+					struct path *pname) \
+{ \
+	path_put(pname); \
+	return; \
+} \
+static inline void sdcardfs_set_##pname(const struct dentry *dent, \
+					struct path *pname) \
+{ \
+	spin_lock(&SDCARDFS_D(dent)->lock); \
+	pathcpy(&SDCARDFS_D(dent)->pname, pname); \
+	spin_unlock(&SDCARDFS_D(dent)->lock); \
+	return; \
+} \
+static inline void sdcardfs_reset_##pname(const struct dentry *dent) \
+{ \
+	spin_lock(&SDCARDFS_D(dent)->lock); \
+	SDCARDFS_D(dent)->pname.dentry = NULL; \
+	SDCARDFS_D(dent)->pname.mnt = NULL; \
+	spin_unlock(&SDCARDFS_D(dent)->lock); \
+	return; \
+} \
+static inline void sdcardfs_put_reset_##pname(const struct dentry *dent) \
+{ \
+	struct path pname; \
+	spin_lock(&SDCARDFS_D(dent)->lock); \
+	if (SDCARDFS_D(dent)->pname.dentry) { \
+		pathcpy(&pname, &SDCARDFS_D(dent)->pname); \
+		SDCARDFS_D(dent)->pname.dentry = NULL; \
+		SDCARDFS_D(dent)->pname.mnt = NULL; \
+		spin_unlock(&SDCARDFS_D(dent)->lock); \
+		path_put(&pname); \
+	} else \
+		spin_unlock(&SDCARDFS_D(dent)->lock); \
+	return; \
+}
+
+SDCARDFS_DENT_FUNC(lower_path)
+SDCARDFS_DENT_FUNC(orig_path)
+
+static inline bool sbinfo_has_sdcard_magic(struct sdcardfs_sb_info *sbinfo)
+{
+	return sbinfo && sbinfo->sb
+			&& sbinfo->sb->s_magic == SDCARDFS_SUPER_MAGIC;
+}
+
+static inline struct sdcardfs_inode_data *data_get(
+		struct sdcardfs_inode_data *data)
+{
+	if (data)
+		kref_get(&data->refcount);
+	return data;
+}
+
+static inline struct sdcardfs_inode_data *top_data_get(
+		struct sdcardfs_inode_info *info)
+{
+	struct sdcardfs_inode_data *top_data;
+
+	spin_lock(&info->top_lock);
+	top_data = data_get(info->top_data);
+	spin_unlock(&info->top_lock);
+	return top_data;
+}
+
+extern void data_release(struct kref *ref);
+
+static inline void data_put(struct sdcardfs_inode_data *data)
+{
+	kref_put(&data->refcount, data_release);
+}
+
+static inline void release_own_data(struct sdcardfs_inode_info *info)
+{
+	/*
+	 * This happens exactly once per inode. At this point, the inode that
+	 * originally held this data is about to be freed, and all references
+	 * to it are held as a top value, and will likely be released soon.
+	 */
+	info->data->abandoned = true;
+	data_put(info->data);
+}
+
+static inline void set_top(struct sdcardfs_inode_info *info,
+			struct sdcardfs_inode_info *top_owner)
+{
+	struct sdcardfs_inode_data *old_top;
+	struct sdcardfs_inode_data *new_top = NULL;
+
+	if (top_owner)
+		new_top = top_data_get(top_owner);
+
+	spin_lock(&info->top_lock);
+	old_top = info->top_data;
+	info->top_data = new_top;
+	if (old_top)
+		data_put(old_top);
+	spin_unlock(&info->top_lock);
+}
+
+static inline int get_gid(struct vfsmount *mnt,
+		struct super_block *sb,
+		struct sdcardfs_inode_data *data)
+{
+	struct sdcardfs_vfsmount_options *vfsopts = mnt->data;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(sb);
+
+	if (vfsopts->gid == AID_SDCARD_RW && !sbi->options.default_normal)
+		/* As an optimization, certain trusted system components only run
+		 * as owner but operate across all users. Since we're now handing
+		 * out the sdcard_rw GID only to trusted apps, we're okay relaxing
+		 * the user boundary enforcement for the default view. The UIDs
+		 * assigned to app directories are still multiuser aware.
+		 */
+		return AID_SDCARD_RW;
+	else
+		return multiuser_get_uid(data->userid, vfsopts->gid);
+}
+
+static inline int get_mode(struct vfsmount *mnt,
+		struct sdcardfs_inode_info *info,
+		struct sdcardfs_inode_data *data)
+{
+	int owner_mode;
+	int filtered_mode;
+	struct sdcardfs_vfsmount_options *opts = mnt->data;
+	int visible_mode = 0775 & ~opts->mask;
+
+
+	if (data->perm == PERM_PRE_ROOT) {
+		/* Top of multi-user view should always be visible to ensure
+		* secondary users can traverse inside.
+		*/
+		visible_mode = 0711;
+	} else if (data->under_android) {
+		/* Block "other" access to Android directories, since only apps
+		* belonging to a specific user should be in there; we still
+		* leave +x open for the default view.
+		*/
+		if (opts->gid == AID_SDCARD_RW)
+			visible_mode = visible_mode & ~0006;
+		else
+			visible_mode = visible_mode & ~0007;
+	}
+	owner_mode = info->lower_inode->i_mode & 0700;
+	filtered_mode = visible_mode & (owner_mode | (owner_mode >> 3) | (owner_mode >> 6));
+	return filtered_mode;
+}
+
+static inline int has_graft_path(const struct dentry *dent)
+{
+	int ret = 0;
+
+	spin_lock(&SDCARDFS_D(dent)->lock);
+	if (SDCARDFS_D(dent)->orig_path.dentry != NULL)
+		ret = 1;
+	spin_unlock(&SDCARDFS_D(dent)->lock);
+
+	return ret;
+}
+
+static inline void sdcardfs_get_real_lower(const struct dentry *dent,
+						struct path *real_lower)
+{
+	/* in case of a local obb dentry
+	 * the orig_path should be returned
+	 */
+	if (has_graft_path(dent))
+		sdcardfs_get_orig_path(dent, real_lower);
+	else
+		sdcardfs_get_lower_path(dent, real_lower);
+}
+
+static inline void sdcardfs_put_real_lower(const struct dentry *dent,
+						struct path *real_lower)
+{
+	if (has_graft_path(dent))
+		sdcardfs_put_orig_path(dent, real_lower);
+	else
+		sdcardfs_put_lower_path(dent, real_lower);
+}
+
+extern struct mutex sdcardfs_super_list_lock;
+extern struct list_head sdcardfs_super_list;
+
+/* for packagelist.c */
+extern appid_t get_appid(const char *app_name);
+extern appid_t get_ext_gid(const char *app_name);
+extern appid_t is_excluded(const char *app_name, userid_t userid);
+extern int check_caller_access_to_name(struct inode *parent_node, const struct qstr *name);
+extern int packagelist_init(void);
+extern void packagelist_exit(void);
+
+/* for derived_perm.c */
+#define BY_NAME		(1 << 0)
+#define BY_USERID	(1 << 1)
+struct limit_search {
+	unsigned int flags;
+	struct qstr name;
+	userid_t userid;
+};
+
+extern void setup_derived_state(struct inode *inode, perm_t perm,
+			userid_t userid, uid_t uid);
+extern void get_derived_permission(struct dentry *parent, struct dentry *dentry);
+extern void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, const struct qstr *name);
+extern void fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit);
+
+extern void update_derived_permission_lock(struct dentry *dentry);
+void fixup_lower_ownership(struct dentry *dentry, const char *name);
+extern int need_graft_path(struct dentry *dentry);
+extern int is_base_obbpath(struct dentry *dentry);
+extern int is_obbpath_invalid(struct dentry *dentry);
+extern int setup_obb_dentry(struct dentry *dentry, struct path *lower_path);
+
+/* locking helpers */
+static inline struct dentry *lock_parent(struct dentry *dentry)
+{
+	struct dentry *dir = dget_parent(dentry);
+
+	inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
+	return dir;
+}
+
+static inline void unlock_dir(struct dentry *dir)
+{
+	inode_unlock(d_inode(dir));
+	dput(dir);
+}
+
+static inline int prepare_dir(const char *path_s, uid_t uid, gid_t gid, mode_t mode)
+{
+	int err;
+	struct dentry *dent;
+	struct iattr attrs;
+	struct path parent;
+
+	dent = kern_path_locked(path_s, &parent);
+	if (IS_ERR(dent)) {
+		err = PTR_ERR(dent);
+		if (err == -EEXIST)
+			err = 0;
+		goto out_unlock;
+	}
+
+	err = vfs_mkdir2(parent.mnt, d_inode(parent.dentry), dent, mode);
+	if (err) {
+		if (err == -EEXIST)
+			err = 0;
+		goto out_dput;
+	}
+
+	attrs.ia_uid = make_kuid(&init_user_ns, uid);
+	attrs.ia_gid = make_kgid(&init_user_ns, gid);
+	attrs.ia_valid = ATTR_UID | ATTR_GID;
+	inode_lock(d_inode(dent));
+	notify_change2(parent.mnt, dent, &attrs, NULL);
+	inode_unlock(d_inode(dent));
+
+out_dput:
+	dput(dent);
+
+out_unlock:
+	/* parent dentry locked by lookup_create */
+	inode_unlock(d_inode(parent.dentry));
+	path_put(&parent);
+	return err;
+}
+
+/*
+ * Return 1, if a disk has enough free space, otherwise 0.
+ * We assume that any files can not be overwritten.
+ */
+static inline int check_min_free_space(struct dentry *dentry, size_t size, int dir)
+{
+	int err;
+	struct path lower_path;
+	struct kstatfs statfs;
+	u64 avail;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+
+	if (sbi->options.reserved_mb) {
+		/* Get fs stat of lower filesystem. */
+		sdcardfs_get_lower_path(dentry, &lower_path);
+		err = vfs_statfs(&lower_path, &statfs);
+		sdcardfs_put_lower_path(dentry, &lower_path);
+
+		if (unlikely(err))
+			return 0;
+
+		/* Invalid statfs informations. */
+		if (unlikely(statfs.f_bsize == 0))
+			return 0;
+
+		/* if you are checking directory, set size to f_bsize. */
+		if (unlikely(dir))
+			size = statfs.f_bsize;
+
+		/* available size */
+		avail = statfs.f_bavail * statfs.f_bsize;
+
+		/* not enough space */
+		if ((u64)size > avail)
+			return 0;
+
+		/* enough space */
+		if ((avail - size) > (sbi->options.reserved_mb * 1024 * 1024))
+			return 1;
+
+		return 0;
+	} else
+		return 1;
+}
+
+/*
+ * Copies attrs and maintains sdcardfs managed attrs
+ * Since our permission check handles all special permissions, set those to be open
+ */
+static inline void sdcardfs_copy_and_fix_attrs(struct inode *dest, const struct inode *src)
+{
+	dest->i_mode = (src->i_mode  & S_IFMT) | S_IRWXU | S_IRWXG |
+			S_IROTH | S_IXOTH; /* 0775 */
+	dest->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(dest)->data->d_uid);
+	dest->i_gid = make_kgid(&init_user_ns, AID_SDCARD_RW);
+	dest->i_rdev = src->i_rdev;
+	dest->i_atime = src->i_atime;
+	dest->i_mtime = src->i_mtime;
+	dest->i_ctime = src->i_ctime;
+	dest->i_blkbits = src->i_blkbits;
+	dest->i_flags = src->i_flags;
+	set_nlink(dest, src->i_nlink);
+}
+
+static inline bool str_case_eq(const char *s1, const char *s2)
+{
+	return !strcasecmp(s1, s2);
+}
+
+static inline bool str_n_case_eq(const char *s1, const char *s2, size_t len)
+{
+	return !strncasecmp(s1, s2, len);
+}
+
+static inline bool qstr_case_eq(const struct qstr *q1, const struct qstr *q2)
+{
+	return q1->len == q2->len && str_n_case_eq(q1->name, q2->name, q2->len);
+}
+
+#define QSTR_LITERAL(string) QSTR_INIT(string, sizeof(string)-1)
+
+#endif	/* not _SDCARDFS_H_ */

diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c
new file mode 100644
index 0000000..5e0c39a
--- /dev/null
+++ b/fs/sdcardfs/super.c

@@ -0,0 +1,333 @@
+/*
+ * fs/sdcardfs/super.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+
+/*
+ * The inode cache is used with alloc_inode for both our inode info and the
+ * vfs inode.
+ */
+static struct kmem_cache *sdcardfs_inode_cachep;
+
+/*
+ * To support the top references, we must track some data separately.
+ * An sdcardfs_inode_info always has a reference to its data, and once set up,
+ * also has a reference to its top. The top may be itself, in which case it
+ * holds two references to its data. When top is changed, it takes a ref to the
+ * new data and then drops the ref to the old data.
+ */
+static struct kmem_cache *sdcardfs_inode_data_cachep;
+
+void data_release(struct kref *ref)
+{
+	struct sdcardfs_inode_data *data =
+		container_of(ref, struct sdcardfs_inode_data, refcount);
+
+	kmem_cache_free(sdcardfs_inode_data_cachep, data);
+}
+
+/* final actions when unmounting a file system */
+static void sdcardfs_put_super(struct super_block *sb)
+{
+	struct sdcardfs_sb_info *spd;
+	struct super_block *s;
+
+	spd = SDCARDFS_SB(sb);
+	if (!spd)
+		return;
+
+	if (spd->obbpath_s) {
+		kfree(spd->obbpath_s);
+		path_put(&spd->obbpath);
+	}
+
+	/* decrement lower super references */
+	s = sdcardfs_lower_super(sb);
+	sdcardfs_set_lower_super(sb, NULL);
+	atomic_dec(&s->s_active);
+
+	kfree(spd);
+	sb->s_fs_info = NULL;
+}
+
+static int sdcardfs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	int err;
+	struct path lower_path;
+	u32 min_blocks;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	err = vfs_statfs(&lower_path, buf);
+	sdcardfs_put_lower_path(dentry, &lower_path);
+
+	if (sbi->options.reserved_mb) {
+		/* Invalid statfs informations. */
+		if (buf->f_bsize == 0) {
+			pr_err("Returned block size is zero.\n");
+			return -EINVAL;
+		}
+
+		min_blocks = ((sbi->options.reserved_mb * 1024 * 1024)/buf->f_bsize);
+		buf->f_blocks -= min_blocks;
+
+		if (buf->f_bavail > min_blocks)
+			buf->f_bavail -= min_blocks;
+		else
+			buf->f_bavail = 0;
+
+		/* Make reserved blocks invisiable to media storage */
+		buf->f_bfree = buf->f_bavail;
+	}
+
+	/* set return buf to our f/s to avoid confusing user-level utils */
+	buf->f_type = SDCARDFS_SUPER_MAGIC;
+
+	return err;
+}
+
+/*
+ * @flags: numeric mount options
+ * @options: mount options string
+ */
+static int sdcardfs_remount_fs(struct super_block *sb, int *flags, char *options)
+{
+	int err = 0;
+
+	/*
+	 * The VFS will take care of "ro" and "rw" flags among others.  We
+	 * can safely accept a few flags (RDONLY, MANDLOCK), and honor
+	 * SILENT, but anything else left over is an error.
+	 */
+	if ((*flags & ~(MS_RDONLY | MS_MANDLOCK | MS_SILENT)) != 0) {
+		pr_err("sdcardfs: remount flags 0x%x unsupported\n", *flags);
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+/*
+ * @mnt: mount point we are remounting
+ * @sb: superblock we are remounting
+ * @flags: numeric mount options
+ * @options: mount options string
+ */
+static int sdcardfs_remount_fs2(struct vfsmount *mnt, struct super_block *sb,
+						int *flags, char *options)
+{
+	int err = 0;
+
+	/*
+	 * The VFS will take care of "ro" and "rw" flags among others.  We
+	 * can safely accept a few flags (RDONLY, MANDLOCK), and honor
+	 * SILENT, but anything else left over is an error.
+	 */
+	if ((*flags & ~(MS_RDONLY | MS_MANDLOCK | MS_SILENT | MS_REMOUNT)) != 0) {
+		pr_err("sdcardfs: remount flags 0x%x unsupported\n", *flags);
+		err = -EINVAL;
+	}
+	pr_info("Remount options were %s for vfsmnt %p.\n", options, mnt);
+	err = parse_options_remount(sb, options, *flags & ~MS_SILENT, mnt->data);
+
+
+	return err;
+}
+
+static void *sdcardfs_clone_mnt_data(void *data)
+{
+	struct sdcardfs_vfsmount_options *opt = kmalloc(sizeof(struct sdcardfs_vfsmount_options), GFP_KERNEL);
+	struct sdcardfs_vfsmount_options *old = data;
+
+	if (!opt)
+		return NULL;
+	opt->gid = old->gid;
+	opt->mask = old->mask;
+	return opt;
+}
+
+static void sdcardfs_copy_mnt_data(void *data, void *newdata)
+{
+	struct sdcardfs_vfsmount_options *old = data;
+	struct sdcardfs_vfsmount_options *new = newdata;
+
+	old->gid = new->gid;
+	old->mask = new->mask;
+}
+
+/*
+ * Called by iput() when the inode reference count reached zero
+ * and the inode is not hashed anywhere.  Used to clear anything
+ * that needs to be, before the inode is completely destroyed and put
+ * on the inode free list.
+ */
+static void sdcardfs_evict_inode(struct inode *inode)
+{
+	struct inode *lower_inode;
+
+	truncate_inode_pages(&inode->i_data, 0);
+	set_top(SDCARDFS_I(inode), NULL);
+	clear_inode(inode);
+	/*
+	 * Decrement a reference to a lower_inode, which was incremented
+	 * by our read_inode when it was created initially.
+	 */
+	lower_inode = sdcardfs_lower_inode(inode);
+	sdcardfs_set_lower_inode(inode, NULL);
+	iput(lower_inode);
+}
+
+static struct inode *sdcardfs_alloc_inode(struct super_block *sb)
+{
+	struct sdcardfs_inode_info *i;
+	struct sdcardfs_inode_data *d;
+
+	i = kmem_cache_alloc(sdcardfs_inode_cachep, GFP_KERNEL);
+	if (!i)
+		return NULL;
+
+	/* memset everything up to the inode to 0 */
+	memset(i, 0, offsetof(struct sdcardfs_inode_info, vfs_inode));
+
+	d = kmem_cache_alloc(sdcardfs_inode_data_cachep,
+					GFP_KERNEL | __GFP_ZERO);
+	if (!d) {
+		kmem_cache_free(sdcardfs_inode_cachep, i);
+		return NULL;
+	}
+
+	i->data = d;
+	kref_init(&d->refcount);
+	i->top_data = d;
+	spin_lock_init(&i->top_lock);
+	kref_get(&d->refcount);
+
+	inode_set_iversion(&i->vfs_inode, 1);
+	return &i->vfs_inode;
+}
+
+static void i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+
+	release_own_data(SDCARDFS_I(inode));
+	kmem_cache_free(sdcardfs_inode_cachep, SDCARDFS_I(inode));
+}
+
+static void sdcardfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, i_callback);
+}
+
+/* sdcardfs inode cache constructor */
+static void init_once(void *obj)
+{
+	struct sdcardfs_inode_info *i = obj;
+
+	inode_init_once(&i->vfs_inode);
+}
+
+int sdcardfs_init_inode_cache(void)
+{
+	sdcardfs_inode_cachep =
+		kmem_cache_create("sdcardfs_inode_cache",
+				  sizeof(struct sdcardfs_inode_info), 0,
+				  SLAB_RECLAIM_ACCOUNT, init_once);
+
+	if (!sdcardfs_inode_cachep)
+		return -ENOMEM;
+
+	sdcardfs_inode_data_cachep =
+		kmem_cache_create("sdcardfs_inode_data_cache",
+				  sizeof(struct sdcardfs_inode_data), 0,
+				  SLAB_RECLAIM_ACCOUNT, NULL);
+	if (!sdcardfs_inode_data_cachep) {
+		kmem_cache_destroy(sdcardfs_inode_cachep);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/* sdcardfs inode cache destructor */
+void sdcardfs_destroy_inode_cache(void)
+{
+	kmem_cache_destroy(sdcardfs_inode_data_cachep);
+	kmem_cache_destroy(sdcardfs_inode_cachep);
+}
+
+/*
+ * Used only in nfs, to kill any pending RPC tasks, so that subsequent
+ * code can actually succeed and won't leave tasks that need handling.
+ */
+static void sdcardfs_umount_begin(struct super_block *sb)
+{
+	struct super_block *lower_sb;
+
+	lower_sb = sdcardfs_lower_super(sb);
+	if (lower_sb && lower_sb->s_op && lower_sb->s_op->umount_begin)
+		lower_sb->s_op->umount_begin(lower_sb);
+}
+
+static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m,
+			struct dentry *root)
+{
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(root->d_sb);
+	struct sdcardfs_mount_options *opts = &sbi->options;
+	struct sdcardfs_vfsmount_options *vfsopts = mnt->data;
+
+	if (opts->fs_low_uid != 0)
+		seq_printf(m, ",fsuid=%u", opts->fs_low_uid);
+	if (opts->fs_low_gid != 0)
+		seq_printf(m, ",fsgid=%u", opts->fs_low_gid);
+	if (vfsopts->gid != 0)
+		seq_printf(m, ",gid=%u", vfsopts->gid);
+	if (opts->multiuser)
+		seq_puts(m, ",multiuser");
+	if (vfsopts->mask)
+		seq_printf(m, ",mask=%u", vfsopts->mask);
+	if (opts->fs_user_id)
+		seq_printf(m, ",userid=%u", opts->fs_user_id);
+	if (opts->gid_derivation)
+		seq_puts(m, ",derive_gid");
+	if (opts->default_normal)
+		seq_puts(m, ",default_normal");
+	if (opts->reserved_mb != 0)
+		seq_printf(m, ",reserved=%uMB", opts->reserved_mb);
+	if (opts->nocache)
+		seq_printf(m, ",nocache");
+
+	return 0;
+};
+
+const struct super_operations sdcardfs_sops = {
+	.put_super	= sdcardfs_put_super,
+	.statfs		= sdcardfs_statfs,
+	.remount_fs	= sdcardfs_remount_fs,
+	.remount_fs2	= sdcardfs_remount_fs2,
+	.clone_mnt_data	= sdcardfs_clone_mnt_data,
+	.copy_mnt_data	= sdcardfs_copy_mnt_data,
+	.evict_inode	= sdcardfs_evict_inode,
+	.umount_begin	= sdcardfs_umount_begin,
+	.show_options2	= sdcardfs_show_options,
+	.alloc_inode	= sdcardfs_alloc_inode,
+	.destroy_inode	= sdcardfs_destroy_inode,
+	.drop_inode	= generic_delete_inode,
+};

diff --git a/fs/super.c b/fs/super.c
index f3a8c00..b24987b 100644
--- a/fs/super.c
+++ b/fs/super.c

@@ -834,7 +834,8 @@
 }
 
 /**
- *	do_remount_sb - asks filesystem to change mount options.
+ *	do_remount_sb2 - asks filesystem to change mount options.
+ *	@mnt:   mount we are looking at
  *	@sb:	superblock in question
  *	@sb_flags: revised superblock flags
  *	@data:	the rest of options
@@ -842,7 +843,7 @@
  *
  *	Alters the mount options of a mounted file system.
  */
-int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force)
+int do_remount_sb2(struct vfsmount *mnt, struct super_block *sb, int sb_flags, void *data, int force)
 {
 	int retval;
 	int remount_ro;
@@ -884,7 +885,16 @@
 		}
 	}
 
-	if (sb->s_op->remount_fs) {
+	if (mnt && sb->s_op->remount_fs2) {
+		retval = sb->s_op->remount_fs2(mnt, sb, &sb_flags, data);
+		if (retval) {
+			if (!force)
+				goto cancel_readonly;
+			/* If forced remount, go ahead despite any errors */
+			WARN(1, "forced remount of a %s fs returned %i\n",
+			     sb->s_type->name, retval);
+		}
+	} else if (sb->s_op->remount_fs) {
 		retval = sb->s_op->remount_fs(sb, &sb_flags, data);
 		if (retval) {
 			if (!force)
@@ -916,6 +926,11 @@
 	return retval;
 }
 
+int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
+{
+	return do_remount_sb2(NULL, sb, flags, data, force);
+}
+
 static void do_emergency_remount_callback(struct super_block *sb)
 {
 	down_write(&sb->s_umount);
@@ -1241,7 +1256,7 @@
 EXPORT_SYMBOL(mount_single);
 
 struct dentry *
-mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
+mount_fs(struct file_system_type *type, int flags, const char *name, struct vfsmount *mnt, void *data)
 {
 	struct dentry *root;
 	struct super_block *sb;
@@ -1258,7 +1273,10 @@
 			goto out_free_secdata;
 	}
 
-	root = type->mount(type, flags, name, data);
+	if (type->mount2)
+		root = type->mount2(mnt, type, flags, name, data);
+	else
+		root = type->mount(type, flags, name, data);
 	if (IS_ERR(root)) {
 		error = PTR_ERR(root);
 		goto out_free_secdata;

diff --git a/fs/sync.c b/fs/sync.c
index b54e054..055daab 100644
--- a/fs/sync.c
+++ b/fs/sync.c

@@ -9,7 +9,7 @@
 #include <linux/slab.h>
 #include <linux/export.h>
 #include <linux/namei.h>
-#include <linux/sched.h>
+#include <linux/sched/xacct.h>
 #include <linux/writeback.h>
 #include <linux/syscalls.h>
 #include <linux/linkage.h>
@@ -220,6 +220,7 @@
 	if (f.file) {
 		ret = vfs_fsync(f.file, datasync);
 		fdput(f);
+		inc_syscfs(current);
 	}
 	return ret;
 }

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 9c2955f..65e23dd 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c

@@ -913,7 +913,8 @@
 					 new_flags, vma->anon_vma,
 					 vma->vm_file, vma->vm_pgoff,
 					 vma_policy(vma),
-					 NULL_VM_UFFD_CTX);
+					 NULL_VM_UFFD_CTX,
+					 vma_get_anon_name(vma));
 			if (prev)
 				vma = prev;
 			else
@@ -1463,7 +1464,8 @@
 		prev = vma_merge(mm, prev, start, vma_end, new_flags,
 				 vma->anon_vma, vma->vm_file, vma->vm_pgoff,
 				 vma_policy(vma),
-				 ((struct vm_userfaultfd_ctx){ ctx }));
+				 ((struct vm_userfaultfd_ctx){ ctx }),
+				 vma_get_anon_name(vma));
 		if (prev) {
 			vma = prev;
 			goto next;
@@ -1625,7 +1627,8 @@
 		prev = vma_merge(mm, prev, start, vma_end, new_flags,
 				 vma->anon_vma, vma->vm_file, vma->vm_pgoff,
 				 vma_policy(vma),
-				 NULL_VM_UFFD_CTX);
+				 NULL_VM_UFFD_CTX,
+				 vma_get_anon_name(vma));
 		if (prev) {
 			vma = prev;
 			goto next;

diff --git a/fs/utimes.c b/fs/utimes.c
index 69d4b6b..1039ef7 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c

@@ -88,7 +88,7 @@
 	}
 retry_deleg:
 	inode_lock(inode);
-	error = notify_change(path->dentry, &newattrs, &delegated_inode);
+	error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode);
 	inode_unlock(inode);
 	if (delegated_inode) {
 		error = break_deleg_wait(&delegated_inode);

diff --git a/fs/xattr.c b/fs/xattr.c
index 0d6a6a4..fd7bafb 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c

@@ -130,7 +130,7 @@
 			return -EPERM;
 	}
 
-	return inode_permission(inode, mask);
+	return inode_permission2(ERR_PTR(-EOPNOTSUPP), inode, mask);
 }
 
 int
@@ -305,6 +305,9 @@
 	handler = xattr_resolve_name(inode, &name);
 	if (IS_ERR(handler))
 		return PTR_ERR(handler);
+	if (unlikely(handler->__get))
+		return handler->__get(handler, dentry, inode, name, value,
+				      size);
 	if (!handler->get)
 		return -EOPNOTSUPP;
 	return handler->get(handler, dentry, inode, name, value, size);
@@ -316,6 +319,7 @@
 {
 	struct inode *inode = dentry->d_inode;
 	int error;
+	const struct xattr_handler *handler;
 
 	error = xattr_permission(inode, name, MAY_READ);
 	if (error)
@@ -338,7 +342,12 @@
 		return ret;
 	}
 nolsm:
-	return __vfs_getxattr(dentry, inode, name, value, size);
+	handler = xattr_resolve_name(inode, &name);
+	if (IS_ERR(handler))
+		return PTR_ERR(handler);
+	if (!handler->get)
+		return -EOPNOTSUPP;
+	return handler->get(handler, dentry, inode, name, value, size);
 }
 EXPORT_SYMBOL_GPL(vfs_getxattr);
 

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index dd38c97..8667538 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h

@@ -66,8 +66,9 @@
  * RODATA_MAIN is not used because existing code already defines .rodata.x
  * sections to be brought in with rodata.
  */
-#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
+#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG)
 #define TEXT_MAIN .text .text.[0-9a-zA-Z_]*
+#define TEXT_CFI_MAIN .text.[0-9a-zA-Z_]*.cfi
 #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..LPBX*
 #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]*
 #define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]*
@@ -75,6 +76,7 @@
 #define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]*
 #else
 #define TEXT_MAIN .text
+#define TEXT_CFI_MAIN .text.cfi
 #define DATA_MAIN .data
 #define SDATA_MAIN .sdata
 #define RODATA_MAIN .rodata
@@ -491,7 +493,9 @@
 #define TEXT_TEXT							\
 		ALIGN_FUNCTION();					\
 		*(.text.hot TEXT_MAIN .text.fixup .text.unlikely)	\
+		*(TEXT_CFI_MAIN) 					\
 		*(.text..refcount)					\
+		*(.text..ftrace)					\
 		*(.ref.text)						\
 	MEM_KEEP(init.text*)						\
 	MEM_KEEP(exit.text*)						\

diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h
new file mode 100644
index 0000000..3d261f5
--- /dev/null
+++ b/include/crypto/chacha.h

@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common values and helper functions for the ChaCha and XChaCha stream ciphers.
+ *
+ * XChaCha extends ChaCha's nonce to 192 bits, while provably retaining ChaCha's
+ * security.  Here they share the same key size, tfm context, and setkey
+ * function; only their IV size and encrypt/decrypt function differ.
+ *
+ * The ChaCha paper specifies 20, 12, and 8-round variants.  In general, it is
+ * recommended to use the 20-round variant ChaCha20.  However, the other
+ * variants can be needed in some performance-sensitive scenarios.  The generic
+ * ChaCha code currently allows only the 20 and 12-round variants.
+ */
+
+#ifndef _CRYPTO_CHACHA_H
+#define _CRYPTO_CHACHA_H
+
+#include <crypto/skcipher.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+
+/* 32-bit stream position, then 96-bit nonce (RFC7539 convention) */
+#define CHACHA_IV_SIZE		16
+
+#define CHACHA_KEY_SIZE		32
+#define CHACHA_BLOCK_SIZE	64
+
+/* 192-bit nonce, then 64-bit stream position */
+#define XCHACHA_IV_SIZE		32
+
+struct chacha_ctx {
+	u32 key[8];
+	int nrounds;
+};
+
+void chacha_block(u32 *state, u8 *stream, int nrounds);
+static inline void chacha20_block(u32 *state, u8 *stream)
+{
+	chacha_block(state, stream, 20);
+}
+void hchacha_block(const u32 *in, u32 *out, int nrounds);
+
+void crypto_chacha_init(u32 *state, struct chacha_ctx *ctx, u8 *iv);
+
+int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			   unsigned int keysize);
+int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			   unsigned int keysize);
+
+int crypto_chacha_crypt(struct skcipher_request *req);
+int crypto_xchacha_crypt(struct skcipher_request *req);
+
+#endif /* _CRYPTO_CHACHA_H */

diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h
deleted file mode 100644
index b83d660..0000000
--- a/include/crypto/chacha20.h
+++ /dev/null

@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Common values for the ChaCha20 algorithm
- */
-
-#ifndef _CRYPTO_CHACHA20_H
-#define _CRYPTO_CHACHA20_H
-
-#include <crypto/skcipher.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-
-#define CHACHA20_IV_SIZE	16
-#define CHACHA20_KEY_SIZE	32
-#define CHACHA20_BLOCK_SIZE	64
-#define CHACHA20_BLOCK_WORDS	(CHACHA20_BLOCK_SIZE / sizeof(u32))
-
-struct chacha20_ctx {
-	u32 key[8];
-};
-
-void chacha20_block(u32 *state, u32 *stream);
-void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv);
-int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
-			   unsigned int keysize);
-int crypto_chacha20_crypt(struct skcipher_request *req);
-
-#endif

diff --git a/include/crypto/nhpoly1305.h b/include/crypto/nhpoly1305.h
new file mode 100644
index 0000000..53c0442
--- /dev/null
+++ b/include/crypto/nhpoly1305.h

@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common values and helper functions for the NHPoly1305 hash function.
+ */
+
+#ifndef _NHPOLY1305_H
+#define _NHPOLY1305_H
+
+#include <crypto/hash.h>
+#include <crypto/poly1305.h>
+
+/* NH parameterization: */
+
+/* Endianness: little */
+/* Word size: 32 bits (works well on NEON, SSE2, AVX2) */
+
+/* Stride: 2 words (optimal on ARM32 NEON; works okay on other CPUs too) */
+#define NH_PAIR_STRIDE		2
+#define NH_MESSAGE_UNIT		(NH_PAIR_STRIDE * 2 * sizeof(u32))
+
+/* Num passes (Toeplitz iteration count): 4, to give ε = 2^{-128} */
+#define NH_NUM_PASSES		4
+#define NH_HASH_BYTES		(NH_NUM_PASSES * sizeof(u64))
+
+/* Max message size: 1024 bytes (32x compression factor) */
+#define NH_NUM_STRIDES		64
+#define NH_MESSAGE_WORDS	(NH_PAIR_STRIDE * 2 * NH_NUM_STRIDES)
+#define NH_MESSAGE_BYTES	(NH_MESSAGE_WORDS * sizeof(u32))
+#define NH_KEY_WORDS		(NH_MESSAGE_WORDS + \
+				 NH_PAIR_STRIDE * 2 * (NH_NUM_PASSES - 1))
+#define NH_KEY_BYTES		(NH_KEY_WORDS * sizeof(u32))
+
+#define NHPOLY1305_KEY_SIZE	(POLY1305_BLOCK_SIZE + NH_KEY_BYTES)
+
+struct nhpoly1305_key {
+	struct poly1305_key poly_key;
+	u32 nh_key[NH_KEY_WORDS];
+};
+
+struct nhpoly1305_state {
+
+	/* Running total of polynomial evaluation */
+	struct poly1305_state poly_state;
+
+	/* Partial block buffer */
+	u8 buffer[NH_MESSAGE_UNIT];
+	unsigned int buflen;
+
+	/*
+	 * Number of bytes remaining until the current NH message reaches
+	 * NH_MESSAGE_BYTES.  When nonzero, 'nh_hash' holds the partial NH hash.
+	 */
+	unsigned int nh_remaining;
+
+	__le64 nh_hash[NH_NUM_PASSES];
+};
+
+typedef void (*nh_t)(const u32 *key, const u8 *message, size_t message_len,
+		     __le64 hash[NH_NUM_PASSES]);
+
+int crypto_nhpoly1305_setkey(struct crypto_shash *tfm,
+			     const u8 *key, unsigned int keylen);
+
+int crypto_nhpoly1305_init(struct shash_desc *desc);
+int crypto_nhpoly1305_update(struct shash_desc *desc,
+			     const u8 *src, unsigned int srclen);
+int crypto_nhpoly1305_update_helper(struct shash_desc *desc,
+				    const u8 *src, unsigned int srclen,
+				    nh_t nh_fn);
+int crypto_nhpoly1305_final(struct shash_desc *desc, u8 *dst);
+int crypto_nhpoly1305_final_helper(struct shash_desc *desc, u8 *dst,
+				   nh_t nh_fn);
+
+#endif /* _NHPOLY1305_H */

diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
index f718a19..34317ed 100644
--- a/include/crypto/poly1305.h
+++ b/include/crypto/poly1305.h

@@ -13,13 +13,21 @@
 #define POLY1305_KEY_SIZE	32
 #define POLY1305_DIGEST_SIZE	16
 
+struct poly1305_key {
+	u32 r[5];	/* key, base 2^26 */
+};
+
+struct poly1305_state {
+	u32 h[5];	/* accumulator, base 2^26 */
+};
+
 struct poly1305_desc_ctx {
 	/* key */
-	u32 r[5];
+	struct poly1305_key r;
 	/* finalize key */
 	u32 s[4];
 	/* accumulator */
-	u32 h[5];
+	struct poly1305_state h;
 	/* partial buffer */
 	u8 buf[POLY1305_BLOCK_SIZE];
 	/* bytes used in partial buffer */
@@ -30,6 +38,22 @@
 	bool sset;
 };
 
+/*
+ * Poly1305 core functions.  These implement the ε-almost-∆-universal hash
+ * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce
+ * ("s key") at the end.  They also only support block-aligned inputs.
+ */
+void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key);
+static inline void poly1305_core_init(struct poly1305_state *state)
+{
+	memset(state->h, 0, sizeof(state->h));
+}
+void poly1305_core_blocks(struct poly1305_state *state,
+			  const struct poly1305_key *key,
+			  const void *src, unsigned int nblocks);
+void poly1305_core_emit(const struct poly1305_state *state, void *dst);
+
+/* Crypto API helper functions for the Poly1305 MAC */
 int crypto_poly1305_init(struct shash_desc *desc);
 unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
 					const u8 *src, unsigned int srclen);

diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index 2b70941..7e5a33e 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h

@@ -9,6 +9,7 @@
 #include <linux/percpu.h>
 
 void topology_normalize_cpu_scale(void);
+int topology_update_cpu_topology(void);
 
 struct device_node;
 bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu);
@@ -32,4 +33,12 @@
 	return per_cpu(freq_scale, cpu);
 }
 
+DECLARE_PER_CPU(unsigned long, max_freq_scale);
+
+static inline
+unsigned long topology_get_max_freq_scale(struct sched_domain *sd, int cpu)
+{
+	return per_cpu(max_freq_scale, cpu);
+}
+
 #endif /* _LINUX_ARCH_TOPOLOGY_H_ */

diff --git a/include/linux/cfi.h b/include/linux/cfi.h
new file mode 100644
index 0000000..e27033d
--- /dev/null
+++ b/include/linux/cfi.h

@@ -0,0 +1,38 @@
+#ifndef _LINUX_CFI_H
+#define _LINUX_CFI_H
+
+#include <linux/stringify.h>
+
+#ifdef CONFIG_CFI_CLANG
+#ifdef CONFIG_MODULES
+
+typedef void (*cfi_check_fn)(uint64_t, void *, void *);
+
+/* Compiler-generated function in each module, and the kernel */
+#define CFI_CHECK_FN		__cfi_check
+#define CFI_CHECK_FN_NAME	__stringify(CFI_CHECK_FN)
+
+extern void CFI_CHECK_FN(uint64_t, void *, void *);
+
+#ifdef CONFIG_CFI_CLANG_SHADOW
+extern void cfi_module_add(struct module *mod, unsigned long min_addr,
+	unsigned long max_addr);
+
+extern void cfi_module_remove(struct module *mod, unsigned long min_addr,
+	unsigned long max_addr);
+#else
+static inline void cfi_module_add(struct module *mod, unsigned long min_addr,
+	unsigned long max_addr)
+{
+}
+
+static inline void cfi_module_remove(struct module *mod, unsigned long min_addr,
+	unsigned long max_addr)
+{
+}
+#endif /* CONFIG_CFI_CLANG_SHADOW */
+
+#endif /* CONFIG_MODULES */
+#endif /* CONFIG_CFI_CLANG */
+
+#endif /* _LINUX_CFI_H */

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index a01ebb630..e78086c 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h

@@ -20,6 +20,7 @@
 #include <linux/u64_stats_sync.h>
 #include <linux/workqueue.h>
 #include <linux/bpf-cgroup.h>
+#include <linux/psi_types.h>
 
 #ifdef CONFIG_CGROUPS
 
@@ -31,6 +32,7 @@
 struct kernfs_ops;
 struct kernfs_open_file;
 struct seq_file;
+struct poll_table_struct;
 
 #define MAX_CGROUP_TYPE_NAMELEN 32
 #define MAX_CGROUP_ROOT_NAMELEN 64
@@ -442,6 +444,9 @@
 	/* used to schedule release agent */
 	struct work_struct release_agent_work;
 
+	/* used to track pressure stalls */
+	struct psi_group psi;
+
 	/* used to store eBPF programs */
 	struct cgroup_bpf bpf;
 
@@ -575,6 +580,9 @@
 	ssize_t (*write)(struct kernfs_open_file *of,
 			 char *buf, size_t nbytes, loff_t off);
 
+	__poll_t (*poll)(struct kernfs_open_file *of,
+			 struct poll_table_struct *pt);
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lock_class_key	lockdep_key;
 #endif

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b4854b4..7fd81b7 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h

@@ -668,6 +668,11 @@
 	pr_cont_kernfs_path(cgrp->kn);
 }
 
+static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
+{
+	return &cgrp->psi;
+}
+
 static inline void cgroup_init_kthreadd(void)
 {
 	/*
@@ -722,6 +727,16 @@
 	return NULL;
 }
 
+static inline struct cgroup *cgroup_parent(struct cgroup *cgrp)
+{
+	return NULL;
+}
+
+static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
+{
+	return NULL;
+}
+
 static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
 					       struct cgroup *ancestor)
 {

diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index acb77dcf..8996c09 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h

@@ -21,6 +21,10 @@
 SUBSYS(cpuacct)
 #endif
 
+#if IS_ENABLED(CONFIG_SCHED_TUNE)
+SUBSYS(schedtune)
+#endif
+
 #if IS_ENABLED(CONFIG_BLK_CGROUP)
 SUBSYS(io)
 #endif

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index d756f23..ed0f2ad 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h

@@ -43,3 +43,12 @@
 #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
 #define __assume_aligned(a, ...)	\
 	__attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
+
+#ifdef CONFIG_LTO_CLANG
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+#define __norecordmcount \
+	__attribute__((__section__(".text..ftrace")))
+#endif
+
+#define __nocfi		__attribute__((no_sanitize("cfi")))
+#endif

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 2b8ed70..b444840 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h

@@ -256,6 +256,14 @@
 # define __gnu_inline
 #endif
 
+#ifndef __norecordmcount
+#define __norecordmcount
+#endif
+
+#ifndef __nocfi
+#define __nocfi
+#endif
+
 /*
  * Force always-inline if the user requests it so via the .config.
  * GCC does not warn about unused static inline functions for

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 72f59e8..dae9863 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h

@@ -942,11 +942,21 @@
 }
 #endif
 
+#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
+void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
+			struct cpufreq_governor *old_gov);
+#else
+static inline void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
+			struct cpufreq_governor *old_gov) { }
+#endif
+
 extern void arch_freq_prepare_all(void);
 extern unsigned int arch_freq_get_on_cpu(int cpu);
 
 extern void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
 				unsigned long max_freq);
+extern void arch_set_max_freq_scale(struct cpumask *cpus,
+				    unsigned long policy_max_freq);
 
 /* the following are really really optional */
 extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;

diff --git a/include/linux/cpufreq_times.h b/include/linux/cpufreq_times.h
new file mode 100644
index 0000000..0eb6dc9
--- /dev/null
+++ b/include/linux/cpufreq_times.h

@@ -0,0 +1,46 @@
+/* drivers/cpufreq/cpufreq_times.c
+ *
+ * Copyright (C) 2018 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_CPUFREQ_TIMES_H
+#define _LINUX_CPUFREQ_TIMES_H
+
+#include <linux/cpufreq.h>
+#include <linux/pid.h>
+
+#ifdef CONFIG_CPU_FREQ_TIMES
+void cpufreq_task_times_init(struct task_struct *p);
+void cpufreq_task_times_alloc(struct task_struct *p);
+void cpufreq_task_times_exit(struct task_struct *p);
+int proc_time_in_state_show(struct seq_file *m, struct pid_namespace *ns,
+			    struct pid *pid, struct task_struct *p);
+void cpufreq_acct_update_power(struct task_struct *p, u64 cputime);
+void cpufreq_times_create_policy(struct cpufreq_policy *policy);
+void cpufreq_times_record_transition(struct cpufreq_policy *policy,
+                                     unsigned int new_freq);
+void cpufreq_task_times_remove_uids(uid_t uid_start, uid_t uid_end);
+int single_uid_time_in_state_open(struct inode *inode, struct file *file);
+#else
+static inline void cpufreq_task_times_init(struct task_struct *p) {}
+static inline void cpufreq_task_times_alloc(struct task_struct *p) {}
+static inline void cpufreq_task_times_exit(struct task_struct *p) {}
+static inline void cpufreq_acct_update_power(struct task_struct *p,
+					     u64 cputime) {}
+static inline void cpufreq_times_create_policy(struct cpufreq_policy *policy) {}
+static inline void cpufreq_times_record_transition(
+	struct cpufreq_policy *policy, unsigned int new_freq) {}
+static inline void cpufreq_task_times_remove_uids(uid_t uid_start,
+						  uid_t uid_end) {}
+#endif /* CONFIG_CPU_FREQ_TIMES */
+#endif /* _LINUX_CPUFREQ_TIMES_H */

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 4325d6f..35b06712 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h

@@ -219,7 +219,7 @@
 #endif
 
 /* kernel/sched/idle.c */
-extern void sched_idle_set_state(struct cpuidle_state *idle_state);
+extern void sched_idle_set_state(struct cpuidle_state *idle_state, int index);
 extern void default_idle_call(void);
 
 #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 0880bae..bd19969 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h

@@ -146,6 +146,7 @@
 	struct vfsmount *(*d_automount)(struct path *);
 	int (*d_manage)(const struct path *, bool);
 	struct dentry *(*d_real)(struct dentry *, const struct inode *);
+	void (*d_canonical_path)(const struct path *, struct path *);
 } ____cacheline_aligned;
 
 /*

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 31c865d..577d1b2 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h

@@ -57,7 +57,12 @@
 
 	u64 freepages_start;
 	u64 freepages_delay;	/* wait for memory reclaim */
+
+	u64 thrashing_start;
+	u64 thrashing_delay;	/* wait for thrashing page */
+
 	u32 freepages_count;	/* total count of memory reclaim */
+	u32 thrashing_count;	/* total count of thrash waits */
 };
 #endif
 
@@ -76,6 +81,8 @@
 extern __u64 __delayacct_blkio_ticks(struct task_struct *);
 extern void __delayacct_freepages_start(void);
 extern void __delayacct_freepages_end(void);
+extern void __delayacct_thrashing_start(void);
+extern void __delayacct_thrashing_end(void);
 
 static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
 {
@@ -156,6 +163,18 @@
 		__delayacct_freepages_end();
 }
 
+static inline void delayacct_thrashing_start(void)
+{
+	if (current->delays)
+		__delayacct_thrashing_start();
+}
+
+static inline void delayacct_thrashing_end(void)
+{
+	if (current->delays)
+		__delayacct_thrashing_end();
+}
+
 #else
 static inline void delayacct_set_flag(int flag)
 {}
@@ -182,6 +201,10 @@
 {}
 static inline void delayacct_freepages_end(void)
 {}
+static inline void delayacct_thrashing_start(void)
+{}
+static inline void delayacct_thrashing_end(void)
+{}
 
 #endif /* CONFIG_TASK_DELAY_ACCT */
 

diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
new file mode 100644
index 0000000..aa027f7
--- /dev/null
+++ b/include/linux/energy_model.h

@@ -0,0 +1,187 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_ENERGY_MODEL_H
+#define _LINUX_ENERGY_MODEL_H
+#include <linux/cpumask.h>
+#include <linux/jump_label.h>
+#include <linux/kobject.h>
+#include <linux/rcupdate.h>
+#include <linux/sched/cpufreq.h>
+#include <linux/sched/topology.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_ENERGY_MODEL
+/**
+ * em_cap_state - Capacity state of a performance domain
+ * @frequency:	The CPU frequency in KHz, for consistency with CPUFreq
+ * @power:	The power consumed by 1 CPU at this level, in milli-watts
+ * @cost:	The cost coefficient associated with this level, used during
+ *		energy calculation. Equal to: power * max_frequency / frequency
+ */
+struct em_cap_state {
+	unsigned long frequency;
+	unsigned long power;
+	unsigned long cost;
+};
+
+/**
+ * em_perf_domain - Performance domain
+ * @table:		List of capacity states, in ascending order
+ * @nr_cap_states:	Number of capacity states
+ * @cpus:		Cpumask covering the CPUs of the domain
+ *
+ * A "performance domain" represents a group of CPUs whose performance is
+ * scaled together. All CPUs of a performance domain must have the same
+ * micro-architecture. Performance domains often have a 1-to-1 mapping with
+ * CPUFreq policies.
+ */
+struct em_perf_domain {
+	struct em_cap_state *table;
+	int nr_cap_states;
+	unsigned long cpus[0];
+};
+
+#define EM_CPU_MAX_POWER 0xFFFF
+
+struct em_data_callback {
+	/**
+	 * active_power() - Provide power at the next capacity state of a CPU
+	 * @power	: Active power at the capacity state in mW (modified)
+	 * @freq	: Frequency at the capacity state in kHz (modified)
+	 * @cpu		: CPU for which we do this operation
+	 *
+	 * active_power() must find the lowest capacity state of 'cpu' above
+	 * 'freq' and update 'power' and 'freq' to the matching active power
+	 * and frequency.
+	 *
+	 * The power is the one of a single CPU in the domain, expressed in
+	 * milli-watts. It is expected to fit in the [0, EM_CPU_MAX_POWER]
+	 * range.
+	 *
+	 * Return 0 on success.
+	 */
+	int (*active_power)(unsigned long *power, unsigned long *freq, int cpu);
+};
+#define EM_DATA_CB(_active_power_cb) { .active_power = &_active_power_cb }
+
+struct em_perf_domain *em_cpu_get(int cpu);
+int em_register_perf_domain(cpumask_t *span, unsigned int nr_states,
+						struct em_data_callback *cb);
+
+/**
+ * em_pd_energy() - Estimates the energy consumed by the CPUs of a perf. domain
+ * @pd		: performance domain for which energy has to be estimated
+ * @max_util	: highest utilization among CPUs of the domain
+ * @sum_util	: sum of the utilization of all CPUs in the domain
+ *
+ * Return: the sum of the energy consumed by the CPUs of the domain assuming
+ * a capacity state satisfying the max utilization of the domain.
+ */
+static inline unsigned long em_pd_energy(struct em_perf_domain *pd,
+				unsigned long max_util, unsigned long sum_util)
+{
+	unsigned long freq, scale_cpu;
+	struct em_cap_state *cs;
+	int i, cpu;
+
+	/*
+	 * In order to predict the capacity state, map the utilization of the
+	 * most utilized CPU of the performance domain to a requested frequency,
+	 * like schedutil.
+	 */
+	cpu = cpumask_first(to_cpumask(pd->cpus));
+	scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
+	cs = &pd->table[pd->nr_cap_states - 1];
+	freq = map_util_freq(max_util, cs->frequency, scale_cpu);
+
+	/*
+	 * Find the lowest capacity state of the Energy Model above the
+	 * requested frequency.
+	 */
+	for (i = 0; i < pd->nr_cap_states; i++) {
+		cs = &pd->table[i];
+		if (cs->frequency >= freq)
+			break;
+	}
+
+	/*
+	 * The capacity of a CPU in the domain at that capacity state (cs)
+	 * can be computed as:
+	 *
+	 *             cs->freq * scale_cpu
+	 *   cs->cap = --------------------                          (1)
+	 *                 cpu_max_freq
+	 *
+	 * So, ignoring the costs of idle states (which are not available in
+	 * the EM), the energy consumed by this CPU at that capacity state is
+	 * estimated as:
+	 *
+	 *             cs->power * cpu_util
+	 *   cpu_nrg = --------------------                          (2)
+	 *                   cs->cap
+	 *
+	 * since 'cpu_util / cs->cap' represents its percentage of busy time.
+	 *
+	 *   NOTE: Although the result of this computation actually is in
+	 *         units of power, it can be manipulated as an energy value
+	 *         over a scheduling period, since it is assumed to be
+	 *         constant during that interval.
+	 *
+	 * By injecting (1) in (2), 'cpu_nrg' can be re-expressed as a product
+	 * of two terms:
+	 *
+	 *             cs->power * cpu_max_freq   cpu_util
+	 *   cpu_nrg = ------------------------ * ---------          (3)
+	 *                    cs->freq            scale_cpu
+	 *
+	 * The first term is static, and is stored in the em_cap_state struct
+	 * as 'cs->cost'.
+	 *
+	 * Since all CPUs of the domain have the same micro-architecture, they
+	 * share the same 'cs->cost', and the same CPU capacity. Hence, the
+	 * total energy of the domain (which is the simple sum of the energy of
+	 * all of its CPUs) can be factorized as:
+	 *
+	 *            cs->cost * \Sum cpu_util
+	 *   pd_nrg = ------------------------                       (4)
+	 *                  scale_cpu
+	 */
+	return cs->cost * sum_util / scale_cpu;
+}
+
+/**
+ * em_pd_nr_cap_states() - Get the number of capacity states of a perf. domain
+ * @pd		: performance domain for which this must be done
+ *
+ * Return: the number of capacity states in the performance domain table
+ */
+static inline int em_pd_nr_cap_states(struct em_perf_domain *pd)
+{
+	return pd->nr_cap_states;
+}
+
+#else
+struct em_perf_domain {};
+struct em_data_callback {};
+#define EM_DATA_CB(_active_power_cb) { }
+
+static inline int em_register_perf_domain(cpumask_t *span,
+			unsigned int nr_states, struct em_data_callback *cb)
+{
+	return -EINVAL;
+}
+static inline struct em_perf_domain *em_cpu_get(int cpu)
+{
+	return NULL;
+}
+static inline unsigned long em_pd_energy(struct em_perf_domain *pd,
+			unsigned long max_util, unsigned long sum_util)
+{
+	return 0;
+}
+static inline int em_pd_nr_cap_states(struct em_perf_domain *pd)
+{
+	return 0;
+}
+#endif
+
+#endif

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 40fec5f..f574042 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /**
  * include/linux/f2fs_fs.h
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef _LINUX_F2FS_FS_H
 #define _LINUX_F2FS_FS_H
@@ -112,12 +109,16 @@
 	struct f2fs_device devs[MAX_DEVICES];	/* device list */
 	__le32 qf_ino[F2FS_MAX_QUOTAS];	/* quota inode numbers */
 	__u8 hot_ext_count;		/* # of hot file extension */
-	__u8 reserved[314];		/* valid reserved region */
+	__u8 reserved[310];		/* valid reserved region */
+	__le32 crc;			/* checksum of superblock */
 } __packed;
 
 /*
  * For checkpoint
  */
+#define CP_DISABLED_QUICK_FLAG		0x00002000
+#define CP_DISABLED_FLAG		0x00001000
+#define CP_QUOTA_NEED_FSCK_FLAG		0x00000800
 #define CP_LARGE_NAT_BITMAP_FLAG	0x00000400
 #define CP_NOCRC_RECOVERY_FLAG	0x00000200
 #define CP_TRIMMED_FLAG		0x00000100
@@ -186,7 +187,7 @@
 struct f2fs_extent {
 	__le32 fofs;		/* start file offset of the extent */
 	__le32 blk;		/* start block address of the extent */
-	__le32 len;		/* lengh of the extent */
+	__le32 len;		/* length of the extent */
 } __packed;
 
 #define F2FS_NAME_LEN		255
@@ -284,7 +285,7 @@
 
 struct node_footer {
 	__le32 nid;		/* node id */
-	__le32 ino;		/* inode nunmber */
+	__le32 ino;		/* inode number */
 	__le32 flag;		/* include cold/fsync/dentry marks and offset */
 	__le64 cp_ver;		/* checkpoint version */
 	__le32 next_blkaddr;	/* next node page block address */
@@ -512,7 +513,7 @@
 struct f2fs_dir_entry {
 	__le32 hash_code;	/* hash code of file name */
 	__le32 ino;		/* inode number */
-	__le16 name_len;	/* lengh of file name */
+	__le16 name_len;	/* length of file name */
 	__u8 file_type;		/* file type */
 } __packed;
 

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9242000..edada8f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h

@@ -1637,13 +1637,21 @@
  * VFS helper functions..
  */
 extern int vfs_create(struct inode *, struct dentry *, umode_t, bool);
+extern int vfs_create2(struct vfsmount *, struct inode *, struct dentry *, umode_t, bool);
 extern int vfs_mkdir(struct inode *, struct dentry *, umode_t);
+extern int vfs_mkdir2(struct vfsmount *, struct inode *, struct dentry *, umode_t);
 extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
+extern int vfs_mknod2(struct vfsmount *, struct inode *, struct dentry *, umode_t, dev_t);
 extern int vfs_symlink(struct inode *, struct dentry *, const char *);
+extern int vfs_symlink2(struct vfsmount *, struct inode *, struct dentry *, const char *);
 extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **);
+extern int vfs_link2(struct vfsmount *, struct dentry *, struct inode *, struct dentry *, struct inode **);
 extern int vfs_rmdir(struct inode *, struct dentry *);
+extern int vfs_rmdir2(struct vfsmount *, struct inode *, struct dentry *);
 extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
+extern int vfs_unlink2(struct vfsmount *, struct inode *, struct dentry *, struct inode **);
 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
+extern int vfs_rename2(struct vfsmount *, struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
 extern int vfs_whiteout(struct inode *, struct dentry *);
 
 extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode,
@@ -1652,6 +1660,9 @@
 int vfs_mkobj(struct dentry *, umode_t,
 		int (*f)(struct dentry *, umode_t, void *),
 		void *);
+int vfs_mkobj2(struct vfsmount *, struct dentry *, umode_t,
+		int (*f)(struct dentry *, umode_t, void *),
+		void *);
 
 extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 
@@ -1782,6 +1793,7 @@
 	struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
 	const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
 	int (*permission) (struct inode *, int);
+	int (*permission2) (struct vfsmount *, struct inode *, int);
 	struct posix_acl * (*get_acl)(struct inode *, int);
 
 	int (*readlink) (struct dentry *, char __user *,int);
@@ -1796,7 +1808,8 @@
 	int (*rename) (struct inode *, struct dentry *,
 			struct inode *, struct dentry *, unsigned int);
 	int (*setattr) (struct dentry *, struct iattr *);
-	int (*getattr) (const struct path *, struct kstat *, u32, unsigned int);
+	int (*setattr2) (struct vfsmount *, struct dentry *, struct iattr *);
+        int (*getattr) (const struct path *, struct kstat *, u32, unsigned int);
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
 		      u64 len);
@@ -1870,9 +1883,13 @@
 	int (*unfreeze_fs) (struct super_block *);
 	int (*statfs) (struct dentry *, struct kstatfs *);
 	int (*remount_fs) (struct super_block *, int *, char *);
+	int (*remount_fs2) (struct vfsmount *, struct super_block *, int *, char *);
+	void *(*clone_mnt_data) (void *);
+	void (*copy_mnt_data) (void *, void *);
 	void (*umount_begin) (struct super_block *);
 
 	int (*show_options)(struct seq_file *, struct dentry *);
+	int (*show_options2)(struct vfsmount *,struct seq_file *, struct dentry *);
 	int (*show_devname)(struct seq_file *, struct dentry *);
 	int (*show_path)(struct seq_file *, struct dentry *);
 	int (*show_stats)(struct seq_file *, struct dentry *);
@@ -2132,6 +2149,9 @@
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move() during rename() internally. */
 	struct dentry *(*mount) (struct file_system_type *, int,
 		       const char *, void *);
+	struct dentry *(*mount2) (struct vfsmount *, struct file_system_type *, int,
+			       const char *, void *);
+	void *(*alloc_mnt_data) (void);
 	void (*kill_sb) (struct super_block *);
 	struct module *owner;
 	struct file_system_type * next;
@@ -2448,6 +2468,8 @@
 extern long vfs_truncate(const struct path *, loff_t);
 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
 		       struct file *filp);
+extern int do_truncate2(struct vfsmount *, struct dentry *, loff_t start,
+			unsigned int time_attrs, struct file *filp);
 extern int vfs_fallocate(struct file *file, int mode, loff_t offset,
 			loff_t len);
 extern long do_sys_open(int dfd, const char __user *filename, int flags,
@@ -2759,7 +2781,9 @@
 extern sector_t bmap(struct inode *, sector_t);
 #endif
 extern int notify_change(struct dentry *, struct iattr *, struct inode **);
+extern int notify_change2(struct vfsmount *, struct dentry *, struct iattr *, struct inode **);
 extern int inode_permission(struct inode *, int);
+extern int inode_permission2(struct vfsmount *, struct inode *, int);
 extern int generic_permission(struct inode *, int);
 extern int __check_sticky(struct inode *dir, struct inode *inode);
 

diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index fd1ce10..61b7251 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h

@@ -210,12 +210,19 @@
 static inline void fsnotify_open(struct file *file)
 {
 	const struct path *path = &file->f_path;
+	struct path lower_path;
 	struct inode *inode = file_inode(file);
 	__u32 mask = FS_OPEN;
 
 	if (S_ISDIR(inode->i_mode))
 		mask |= FS_ISDIR;
 
+	if (path->dentry->d_op && path->dentry->d_op->d_canonical_path) {
+		path->dentry->d_op->d_canonical_path(path, &lower_path);
+		fsnotify_parent(&lower_path, NULL, mask);
+		fsnotify(lower_path.dentry->d_inode, mask, &lower_path, FSNOTIFY_EVENT_PATH, NULL, 0);
+		path_put(&lower_path);
+	}
 	fsnotify_parent(path, NULL, mask);
 	fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0);
 }

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index dd16e82..677a5ea 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h

@@ -224,8 +224,16 @@
 int register_ftrace_function(struct ftrace_ops *ops);
 int unregister_ftrace_function(struct ftrace_ops *ops);
 
+#ifdef CONFIG_CFI_CLANG
+/* Use a C stub with the correct type for CFI */
+static inline void ftrace_stub(unsigned long a0, unsigned long a1,
+			       struct ftrace_ops *op, struct pt_regs *regs)
+{
+}
+#else
 extern void ftrace_stub(unsigned long a0, unsigned long a1,
 			struct ftrace_ops *op, struct pt_regs *regs);
+#endif
 
 #else /* !CONFIG_FUNCTION_TRACER */
 /*

diff --git a/include/linux/init.h b/include/linux/init.h
index 2538d17..847e853 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h

@@ -47,7 +47,7 @@
 
 /* These are for everybody (although not all archs will actually
    discard it in modules) */
-#define __init		__section(.init.text) __cold  __latent_entropy __noinitretpoline
+#define __init		__section(.init.text) __cold  __latent_entropy __noinitretpoline __nocfi
 #define __initdata	__section(.init.data)
 #define __initconst	__section(.init.rodata)
 #define __exitdata	__section(.exit.data)
@@ -167,6 +167,20 @@
 
 #ifndef __ASSEMBLY__
 
+#ifdef CONFIG_LTO_CLANG
+  /*
+   * Use __COUNTER__ prefix in the variable to help ensure ordering
+   * inside a compilation unit that defines multiple initcalls, and
+   * __LINE__ to help prevent naming collisions.
+   */
+  #define ___initcall_name2(c, l, fn, id) __initcall_##c##_##l##_##fn##id
+  #define ___initcall_name1(c, l, fn, id) ___initcall_name2(c, l, fn, id)
+  #define __initcall_name(fn, id) \
+		___initcall_name1(__COUNTER__, __LINE__, fn, id)
+#else
+  #define __initcall_name(fn, id) 	__initcall_##fn##id
+#endif
+
 /*
  * initcalls are now grouped by functionality into separate
  * subsections. Ordering inside the subsections is determined
@@ -187,12 +201,12 @@
 #define ___define_initcall(fn, id, __sec)			\
 	__ADDRESSABLE(fn)					\
 	asm(".section	\"" #__sec ".init\", \"a\"	\n"	\
-	"__initcall_" #fn #id ":			\n"	\
+	__stringify(__initcall_name(fn, id)) ":		\n"	\
 	    ".long	" #fn " - .			\n"	\
 	    ".previous					\n");
 #else
 #define ___define_initcall(fn, id, __sec) \
-	static initcall_t __initcall_##fn##id __used \
+	static initcall_t __initcall_name(fn, id) __used \
 		__attribute__((__section__(#__sec ".init"))) = fn;
 #endif
 

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 8415bf1..4a0fc3b 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h

@@ -42,6 +42,7 @@
 	__s32		accept_ra_rt_info_max_plen;
 #endif
 #endif
+	__s32		accept_ra_rt_table;
 	__s32		proxy_ndp;
 	__s32		accept_source_route;
 	__s32		accept_ra_from_local;

diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 814643f..444869d 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h

@@ -25,6 +25,7 @@
 struct vm_area_struct;
 struct super_block;
 struct file_system_type;
+struct poll_table_struct;
 
 struct kernfs_open_node;
 struct kernfs_iattrs;
@@ -261,6 +262,9 @@
 	ssize_t (*write)(struct kernfs_open_file *of, char *buf, size_t bytes,
 			 loff_t off);
 
+	__poll_t (*poll)(struct kernfs_open_file *of,
+			 struct poll_table_struct *pt);
+
 	int (*mmap)(struct kernfs_open_file *of, struct vm_area_struct *vma);
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -350,6 +354,8 @@
 int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
 		     const char *new_name, const void *new_ns);
 int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr);
+__poll_t kernfs_generic_poll(struct kernfs_open_file *of,
+			     struct poll_table_struct *pt);
 void kernfs_notify(struct kernfs_node *kn);
 
 const void *kernfs_super_ns(struct super_block *sb);

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index c196176..edf8f86 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h

@@ -4,7 +4,6 @@
 /* Simple interface for creating and stopping kernel threads without mess. */
 #include <linux/err.h>
 #include <linux/sched.h>
-#include <linux/cgroup.h>
 
 __printf(4, 5)
 struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
@@ -198,6 +197,8 @@
 
 void kthread_destroy_worker(struct kthread_worker *worker);
 
+struct cgroup_subsys_state;
+
 #ifdef CONFIG_BLK_CGROUP
 void kthread_associate_blkcg(struct cgroup_subsys_state *css);
 struct cgroup_subsys_state *kthread_blkcg(void);

diff --git a/include/linux/mm.h b/include/linux/mm.h
index bdec425..e4bb76e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h

@@ -2248,7 +2248,7 @@
 extern struct vm_area_struct *vma_merge(struct mm_struct *,
 	struct vm_area_struct *prev, unsigned long addr, unsigned long end,
 	unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
-	struct mempolicy *, struct vm_userfaultfd_ctx);
+	struct mempolicy *, struct vm_userfaultfd_ctx, const char __user *);
 extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
 extern int __split_vma(struct mm_struct *, struct vm_area_struct *,
 	unsigned long addr, int new_below);

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5ed8f62..7dfd40d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h

@@ -290,11 +290,18 @@
 	/*
 	 * For areas with an address space and backing store,
 	 * linkage into the address_space->i_mmap interval tree.
+	 *
+	 * For private anonymous mappings, a pointer to a null terminated string
+	 * in the user process containing the name given to the vma, or NULL
+	 * if unnamed.
 	 */
-	struct {
-		struct rb_node rb;
-		unsigned long rb_subtree_last;
-	} shared;
+	union {
+		struct {
+			struct rb_node rb;
+			unsigned long rb_subtree_last;
+		} shared;
+		const char __user *anon_name;
+	};
 
 	/*
 	 * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
@@ -649,4 +656,13 @@
 	unsigned long val;
 } swp_entry_t;
 
+/* Return the name for an anonymous mapping or NULL for a file-backed mapping */
+static inline const char __user *vma_get_anon_name(struct vm_area_struct *vma)
+{
+	if (vma->vm_file)
+		return NULL;
+
+	return vma->anon_name;
+}
+
 #endif /* _LINUX_MM_TYPES_H */

diff --git a/include/linux/mmc/pm.h b/include/linux/mmc/pm.h
index 4a13920..6e2d6a1 100644
--- a/include/linux/mmc/pm.h
+++ b/include/linux/mmc/pm.h

@@ -26,5 +26,6 @@
 
 #define MMC_PM_KEEP_POWER	(1 << 0)	/* preserve card power during suspend */
 #define MMC_PM_WAKE_SDIO_IRQ	(1 << 1)	/* wake up host system on SDIO IRQ assertion */
+#define MMC_PM_IGNORE_PM_NOTIFY	(1 << 2)	/* ignore mmc pm notify */
 
 #endif /* LINUX_MMC_PM_H */

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d4b0c79..4e46ff2 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h

@@ -163,6 +163,7 @@
 	NR_ISOLATED_FILE,	/* Temporary isolated pages from file lru */
 	WORKINGSET_REFAULT,
 	WORKINGSET_ACTIVATE,
+	WORKINGSET_RESTORE,
 	WORKINGSET_NODERECLAIM,
 	NR_ANON_MAPPED,	/* Mapped anonymous pages */
 	NR_FILE_MAPPED,	/* pagecache pages mapped into pagetables.

diff --git a/include/linux/module.h b/include/linux/module.h
index 9915397..ff1b6de 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h

@@ -21,6 +21,7 @@
 #include <linux/rbtree_latch.h>
 #include <linux/error-injection.h>
 #include <linux/tracepoint-defs.h>
+#include <linux/cfi.h>
 
 #include <linux/percpu.h>
 #include <asm/module.h>
@@ -348,6 +349,10 @@
 	const s32 *crcs;
 	unsigned int num_syms;
 
+#ifdef CONFIG_CFI_CLANG
+	cfi_check_fn cfi_check;
+#endif
+
 	/* Kernel parameters. */
 #ifdef CONFIG_SYSFS
 	struct mutex param_lock;

diff --git a/include/linux/mount.h b/include/linux/mount.h
index 4b0db44..6645650 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h

@@ -68,6 +68,7 @@
 	struct dentry *mnt_root;	/* root of the mounted tree */
 	struct super_block *mnt_sb;	/* pointer to superblock */
 	int mnt_flags;
+	void *data;
 } __randomize_layout;
 
 struct file; /* forward dec */

diff --git a/include/linux/namei.h b/include/linux/namei.h
index a78606e..e53a912 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h

@@ -79,10 +79,13 @@
 extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int);
 extern void done_path_create(struct path *, struct dentry *);
 extern struct dentry *kern_path_locked(const char *, struct path *);
+extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
+			   const char *, unsigned int, struct path *);
 extern int kern_path_mountpoint(int, const char *, struct path *, unsigned int);
 
 extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int);
 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
+extern struct dentry *lookup_one_len2(const char *, struct vfsmount *mnt, struct dentry *, int);
 extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int);
 
 extern int follow_down_one(struct path *);

diff --git a/include/linux/netfilter/xt_quota2.h b/include/linux/netfilter/xt_quota2.h
new file mode 100644
index 0000000..eadc69033
--- /dev/null
+++ b/include/linux/netfilter/xt_quota2.h

@@ -0,0 +1,25 @@
+#ifndef _XT_QUOTA_H
+#define _XT_QUOTA_H
+
+enum xt_quota_flags {
+	XT_QUOTA_INVERT    = 1 << 0,
+	XT_QUOTA_GROW      = 1 << 1,
+	XT_QUOTA_PACKET    = 1 << 2,
+	XT_QUOTA_NO_CHANGE = 1 << 3,
+	XT_QUOTA_MASK      = 0x0F,
+};
+
+struct xt_quota_counter;
+
+struct xt_quota_mtinfo2 {
+	char name[15];
+	u_int8_t flags;
+
+	/* Comparison-invariant */
+	aligned_u64 quota;
+
+	/* Used internally by the kernel */
+	struct xt_quota_counter *master __attribute__((aligned(8)));
+};
+
+#endif /* _XT_QUOTA_H */

diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index b9cd9eb..587c147 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h

@@ -69,6 +69,27 @@
 extern int of_get_flat_dt_size(void);
 extern uint32_t of_get_flat_dt_phandle(unsigned long node);
 
+/*
+ * early_init_dt_scan_chosen - scan the device tree for ramdisk and bootargs
+ *
+ * The boot arguments will be placed into the memory pointed to by @data.
+ * That memory should be COMMAND_LINE_SIZE big and initialized to be a valid
+ * (possibly empty) string.  Logic for what will be in @data after this
+ * function finishes:
+ *
+ * - CONFIG_CMDLINE_FORCE=true
+ *     CONFIG_CMDLINE
+ * - CONFIG_CMDLINE_EXTEND=true, @data is non-empty string
+ *     @data + dt bootargs (even if dt bootargs are empty)
+ * - CONFIG_CMDLINE_EXTEND=true, @data is empty string
+ *     CONFIG_CMDLINE + dt bootargs (even if dt bootargs are empty)
+ * - CMDLINE_FROM_BOOTLOADER=true, dt bootargs=non-empty:
+ *     dt bootargs
+ * - CMDLINE_FROM_BOOTLOADER=true, dt bootargs=empty, @data is non-empty string
+ *     @data is left unchanged
+ * - CMDLINE_FROM_BOOTLOADER=true, dt bootargs=empty, @data is empty string
+ *     CONFIG_CMDLINE (or "" if that's not defined)
+ */
 extern int early_init_dt_scan_chosen(unsigned long node, const char *uname,
 				     int depth, void *data);
 extern int early_init_dt_scan_memory(unsigned long node, const char *uname,

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 74bee8c..4d99504 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h

@@ -69,13 +69,14 @@
  */
 enum pageflags {
 	PG_locked,		/* Page is locked. Don't touch. */
-	PG_error,
 	PG_referenced,
 	PG_uptodate,
 	PG_dirty,
 	PG_lru,
 	PG_active,
+	PG_workingset,
 	PG_waiters,		/* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */
+	PG_error,
 	PG_slab,
 	PG_owner_priv_1,	/* Owner use. If pagecache, fs may use*/
 	PG_arch_1,
@@ -280,6 +281,8 @@
 PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD)
 PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
 	TESTCLEARFLAG(Active, active, PF_HEAD)
+PAGEFLAG(Workingset, workingset, PF_HEAD)
+	TESTCLEARFLAG(Workingset, workingset, PF_HEAD)
 __PAGEFLAG(Slab, slab, PF_NO_TAIL)
 __PAGEFLAG(SlobFree, slob_free, PF_NO_TAIL)
 PAGEFLAG(Checked, checked, PF_NO_COMPOUND)	   /* Used by some filesystems */

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index b1bd218..78cac3f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h

@@ -252,6 +252,7 @@
 #define FGP_WRITE		0x00000008
 #define FGP_NOFS		0x00000010
 #define FGP_NOWAIT		0x00000020
+#define FGP_FOR_MMAP		0x00000040
 
 struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
 		int fgp_flags, gfp_t cache_gfp_mask);
@@ -398,8 +399,7 @@
 static inline struct page *read_mapping_page(struct address_space *mapping,
 				pgoff_t index, void *data)
 {
-	filler_t *filler = (filler_t *)mapping->a_ops->readpage;
-	return read_cache_page(mapping, index, filler, data);
+	return read_cache_page(mapping, index, NULL, data);
 }
 
 /*

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b22bc81..3b6a0c5 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h

@@ -1189,6 +1189,11 @@
 int perf_event_max_stack_handler(struct ctl_table *table, int write,
 				 void __user *buffer, size_t *lenp, loff_t *ppos);
 
+static inline bool perf_paranoid_any(void)
+{
+	return sysctl_perf_event_paranoid > 2;
+}
+
 static inline bool perf_paranoid_tracepoint_raw(void)
 {
 	return sysctl_perf_event_paranoid > -1;

diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 099b319..11dbffc 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h

@@ -301,6 +301,7 @@
 struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev);
 struct dev_pm_opp *of_dev_pm_opp_find_required_opp(struct device *dev, struct device_node *np);
 struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp);
+int of_dev_pm_opp_get_cpu_power(unsigned long *mW, unsigned long *KHz, int cpu);
 #else
 static inline int dev_pm_opp_of_add_table(struct device *dev)
 {
@@ -343,6 +344,10 @@
 {
 	return NULL;
 }
+static inline int of_dev_pm_opp_get_cpu_power(unsigned long *mW, unsigned long *KHz, int cpu)
+{
+	return -ENOTSUPP;
+}
 #endif
 
 #endif		/* __LINUX_OPP_H__ */

diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index d0e1f15..a50b1c9 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h

@@ -116,6 +116,12 @@
 
 #endif /* CONFIG_PROC_FS */
 
+#ifdef CONFIG_PROC_UID
+extern void proc_register_uid(kuid_t uid);
+#else
+static inline void proc_register_uid(kuid_t uid) {}
+#endif
+
 struct net;
 
 static inline struct proc_dir_entry *proc_net_mkdir(

diff --git a/include/linux/psi.h b/include/linux/psi.h
new file mode 100644
index 0000000..af892c2
--- /dev/null
+++ b/include/linux/psi.h

@@ -0,0 +1,62 @@
+#ifndef _LINUX_PSI_H
+#define _LINUX_PSI_H
+
+#include <linux/jump_label.h>
+#include <linux/psi_types.h>
+#include <linux/sched.h>
+#include <linux/poll.h>
+
+struct seq_file;
+struct css_set;
+
+#ifdef CONFIG_PSI
+
+extern struct static_key_false psi_disabled;
+
+void psi_init(void);
+
+void psi_task_change(struct task_struct *task, int clear, int set);
+
+void psi_memstall_tick(struct task_struct *task, int cpu);
+void psi_memstall_enter(unsigned long *flags);
+void psi_memstall_leave(unsigned long *flags);
+
+int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
+
+#ifdef CONFIG_CGROUPS
+int psi_cgroup_alloc(struct cgroup *cgrp);
+void psi_cgroup_free(struct cgroup *cgrp);
+void cgroup_move_task(struct task_struct *p, struct css_set *to);
+
+struct psi_trigger *psi_trigger_create(struct psi_group *group,
+			char *buf, size_t nbytes, enum psi_res res);
+void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t);
+
+__poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
+			poll_table *wait);
+#endif
+
+#else /* CONFIG_PSI */
+
+static inline void psi_init(void) {}
+
+static inline void psi_memstall_enter(unsigned long *flags) {}
+static inline void psi_memstall_leave(unsigned long *flags) {}
+
+#ifdef CONFIG_CGROUPS
+static inline int psi_cgroup_alloc(struct cgroup *cgrp)
+{
+	return 0;
+}
+static inline void psi_cgroup_free(struct cgroup *cgrp)
+{
+}
+static inline void cgroup_move_task(struct task_struct *p, struct css_set *to)
+{
+	rcu_assign_pointer(p->cgroups, to);
+}
+#endif
+
+#endif /* CONFIG_PSI */
+
+#endif /* _LINUX_PSI_H */

diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
new file mode 100644
index 0000000..07aaf9b
--- /dev/null
+++ b/include/linux/psi_types.h

@@ -0,0 +1,173 @@
+#ifndef _LINUX_PSI_TYPES_H
+#define _LINUX_PSI_TYPES_H
+
+#include <linux/kthread.h>
+#include <linux/seqlock.h>
+#include <linux/types.h>
+#include <linux/kref.h>
+#include <linux/wait.h>
+
+#ifdef CONFIG_PSI
+
+/* Tracked task states */
+enum psi_task_count {
+	NR_IOWAIT,
+	NR_MEMSTALL,
+	NR_RUNNING,
+	NR_PSI_TASK_COUNTS = 3,
+};
+
+/* Task state bitmasks */
+#define TSK_IOWAIT	(1 << NR_IOWAIT)
+#define TSK_MEMSTALL	(1 << NR_MEMSTALL)
+#define TSK_RUNNING	(1 << NR_RUNNING)
+
+/* Resources that workloads could be stalled on */
+enum psi_res {
+	PSI_IO,
+	PSI_MEM,
+	PSI_CPU,
+	NR_PSI_RESOURCES = 3,
+};
+
+/*
+ * Pressure states for each resource:
+ *
+ * SOME: Stalled tasks & working tasks
+ * FULL: Stalled tasks & no working tasks
+ */
+enum psi_states {
+	PSI_IO_SOME,
+	PSI_IO_FULL,
+	PSI_MEM_SOME,
+	PSI_MEM_FULL,
+	PSI_CPU_SOME,
+	/* Only per-CPU, to weigh the CPU in the global average: */
+	PSI_NONIDLE,
+	NR_PSI_STATES = 6,
+};
+
+enum psi_aggregators {
+	PSI_AVGS = 0,
+	PSI_POLL,
+	NR_PSI_AGGREGATORS,
+};
+
+struct psi_group_cpu {
+	/* 1st cacheline updated by the scheduler */
+
+	/* Aggregator needs to know of concurrent changes */
+	seqcount_t seq ____cacheline_aligned_in_smp;
+
+	/* States of the tasks belonging to this group */
+	unsigned int tasks[NR_PSI_TASK_COUNTS];
+
+	/* Aggregate pressure state derived from the tasks */
+	u32 state_mask;
+
+	/* Period time sampling buckets for each state of interest (ns) */
+	u32 times[NR_PSI_STATES];
+
+	/* Time of last task change in this group (rq_clock) */
+	u64 state_start;
+
+	/* 2nd cacheline updated by the aggregator */
+
+	/* Delta detection against the sampling buckets */
+	u32 times_prev[NR_PSI_AGGREGATORS][NR_PSI_STATES]
+			____cacheline_aligned_in_smp;
+};
+
+/* PSI growth tracking window */
+struct psi_window {
+	/* Window size in ns */
+	u64 size;
+
+	/* Start time of the current window in ns */
+	u64 start_time;
+
+	/* Value at the start of the window */
+	u64 start_value;
+
+	/* Value growth in the previous window */
+	u64 prev_growth;
+};
+
+struct psi_trigger {
+	/* PSI state being monitored by the trigger */
+	enum psi_states state;
+
+	/* User-spacified threshold in ns */
+	u64 threshold;
+
+	/* List node inside triggers list */
+	struct list_head node;
+
+	/* Backpointer needed during trigger destruction */
+	struct psi_group *group;
+
+	/* Wait queue for polling */
+	wait_queue_head_t event_wait;
+
+	/* Pending event flag */
+	int event;
+
+	/* Tracking window */
+	struct psi_window win;
+
+	/*
+	 * Time last event was generated. Used for rate-limiting
+	 * events to one per window
+	 */
+	u64 last_event_time;
+
+	/* Refcounting to prevent premature destruction */
+	struct kref refcount;
+};
+
+struct psi_group {
+	/* Protects data used by the aggregator */
+	struct mutex avgs_lock;
+
+	/* Per-cpu task state & time tracking */
+	struct psi_group_cpu __percpu *pcpu;
+
+	/* Running pressure averages */
+	u64 avg_total[NR_PSI_STATES - 1];
+	u64 avg_last_update;
+	u64 avg_next_update;
+
+	/* Aggregator work control */
+	struct delayed_work avgs_work;
+
+	/* Total stall times and sampled pressure averages */
+	u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
+	unsigned long avg[NR_PSI_STATES - 1][3];
+
+	/* Monitor work control */
+	atomic_t poll_scheduled;
+	struct kthread_worker __rcu *poll_kworker;
+	struct kthread_delayed_work poll_work;
+
+	/* Protects data used by the monitor */
+	struct mutex trigger_lock;
+
+	/* Configured polling triggers */
+	struct list_head triggers;
+	u32 nr_triggers[NR_PSI_STATES - 1];
+	u32 poll_states;
+	u64 poll_min_period;
+
+	/* Total stall times at the start of monitor activation */
+	u64 polling_total[NR_PSI_STATES - 1];
+	u64 polling_next_update;
+	u64 polling_until;
+};
+
+#else /* CONFIG_PSI */
+
+struct psi_group { };
+
+#endif /* CONFIG_PSI */
+
+#endif /* _LINUX_PSI_TYPES_H */

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 20f5ba2..abfaea66 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h

@@ -355,12 +355,6 @@
  * For cfs_rq, it is the aggregated load_avg of all runnable and
  * blocked sched_entities.
  *
- * load_avg may also take frequency scaling into account:
- *
- *   load_avg = runnable% * scale_load_down(load) * freq%
- *
- * where freq% is the CPU frequency normalized to the highest frequency.
- *
  * [util_avg definition]
  *
  *   util_avg = running% * SCHED_CAPACITY_SCALE
@@ -369,17 +363,14 @@
  * a CPU. For cfs_rq, it is the aggregated util_avg of all runnable
  * and blocked sched_entities.
  *
- * util_avg may also factor frequency scaling and CPU capacity scaling:
+ * load_avg and util_avg don't direcly factor frequency scaling and CPU
+ * capacity scaling. The scaling is done through the rq_clock_pelt that
+ * is used for computing those signals (see update_rq_clock_pelt())
  *
- *   util_avg = running% * SCHED_CAPACITY_SCALE * freq% * capacity%
- *
- * where freq% is the same as above, and capacity% is the CPU capacity
- * normalized to the greatest capacity (due to uarch differences, etc).
- *
- * N.B., the above ratios (runnable%, running%, freq%, and capacity%)
- * themselves are in the range of [0, 1]. To do fixed point arithmetics,
- * we therefore scale them to as large a range as necessary. This is for
- * example reflected by util_avg's SCHED_CAPACITY_SCALE.
+ * N.B., the above ratios (runnable% and running%) themselves are in the
+ * range of [0, 1]. To do fixed point arithmetics, we therefore scale them
+ * to as large a range as necessary. This is for example reflected by
+ * util_avg's SCHED_CAPACITY_SCALE.
  *
  * [Overflow issue]
  *
@@ -710,6 +701,10 @@
 	unsigned			sched_contributes_to_load:1;
 	unsigned			sched_migrated:1;
 	unsigned			sched_remote_wakeup:1;
+#ifdef CONFIG_PSI
+	unsigned			sched_psi_wake_requeue:1;
+#endif
+
 	/* Force alignment to the next boundary: */
 	unsigned			:0;
 
@@ -799,6 +794,10 @@
 	u64				stimescaled;
 #endif
 	u64				gtime;
+#ifdef CONFIG_CPU_FREQ_TIMES
+	u64				*time_in_state;
+	unsigned int			max_state;
+#endif
 	struct prev_cputime		prev_cputime;
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 	struct vtime			vtime;
@@ -963,6 +962,10 @@
 	siginfo_t			*last_siginfo;
 
 	struct task_io_accounting	ioac;
+#ifdef CONFIG_PSI
+	/* Pressure stall state */
+	unsigned int			psi_flags;
+#endif
 #ifdef CONFIG_TASK_XACCT
 	/* Accumulated RSS usage: */
 	u64				acct_rss_mem1;
@@ -1398,6 +1401,7 @@
 #define PF_KTHREAD		0x00200000	/* I am a kernel thread */
 #define PF_RANDOMIZE		0x00400000	/* Randomize virtual address space */
 #define PF_SWAPWRITE		0x00800000	/* Allowed to write to swap */
+#define PF_MEMSTALL		0x01000000	/* Stalled due to lack of memory */
 #define PF_NO_SETAFFINITY	0x04000000	/* Userland is not allowed to meddle with cpus_allowed */
 #define PF_MCE_EARLY		0x08000000      /* Early kill for mce process policy */
 #define PF_MUTEX_TESTER		0x20000000	/* Thread belongs to the rt mutex tester */

diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
index 5966744..afa940c 100644
--- a/include/linux/sched/cpufreq.h
+++ b/include/linux/sched/cpufreq.h

@@ -20,6 +20,12 @@
                        void (*func)(struct update_util_data *data, u64 time,
 				    unsigned int flags));
 void cpufreq_remove_update_util_hook(int cpu);
+
+static inline unsigned long map_util_freq(unsigned long util,
+					unsigned long freq, unsigned long cap)
+{
+	return (freq + (freq >> 2)) * util / cap;
+}
 #endif /* CONFIG_CPU_FREQ */
 
 #endif /* _LINUX_SCHED_CPUFREQ_H */

diff --git a/include/linux/sched/loadavg.h b/include/linux/sched/loadavg.h
index 80bc84b..4859bea 100644
--- a/include/linux/sched/loadavg.h
+++ b/include/linux/sched/loadavg.h

@@ -22,10 +22,26 @@
 #define EXP_5		2014		/* 1/exp(5sec/5min) */
 #define EXP_15		2037		/* 1/exp(5sec/15min) */
 
-#define CALC_LOAD(load,exp,n) \
-	load *= exp; \
-	load += n*(FIXED_1-exp); \
-	load >>= FSHIFT;
+/*
+ * a1 = a0 * e + a * (1 - e)
+ */
+static inline unsigned long
+calc_load(unsigned long load, unsigned long exp, unsigned long active)
+{
+	unsigned long newload;
+
+	newload = load * exp + active * (FIXED_1 - exp);
+	if (active >= load)
+		newload += FIXED_1-1;
+
+	return newload / FIXED_1;
+}
+
+extern unsigned long calc_load_n(unsigned long load, unsigned long exp,
+				 unsigned long active, unsigned int n);
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
 
 extern void calc_global_load(unsigned long ticks);
 

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index a9c32da..77e9fa3c 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h

@@ -22,6 +22,8 @@
 
 extern unsigned int sysctl_sched_latency;
 extern unsigned int sysctl_sched_min_granularity;
+extern unsigned int sysctl_sched_sync_hint_enable;
+extern unsigned int sysctl_sched_cstate_aware;
 extern unsigned int sysctl_sched_wakeup_granularity;
 extern unsigned int sysctl_sched_child_runs_first;
 

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 15f3f61f..beffd85 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h

@@ -23,10 +23,10 @@
 #define SD_BALANCE_FORK		0x0008	/* Balance on fork, clone */
 #define SD_BALANCE_WAKE		0x0010  /* Balance on wakeup */
 #define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
-#define SD_ASYM_CPUCAPACITY	0x0040  /* Groups have different max cpu capacities */
-#define SD_SHARE_CPUCAPACITY	0x0080	/* Domain members share cpu capacity */
+#define SD_ASYM_CPUCAPACITY	0x0040  /* Domain members have different CPU capacities */
+#define SD_SHARE_CPUCAPACITY	0x0080	/* Domain members share CPU capacity */
 #define SD_SHARE_POWERDOMAIN	0x0100	/* Domain members share power domain */
-#define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
+#define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share CPU pkg resources */
 #define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
 #define SD_ASYM_PACKING		0x0800  /* Place busy groups earlier in the domain */
 #define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
@@ -202,6 +202,17 @@
 # define SD_INIT_NAME(type)
 #endif
 
+#ifndef arch_scale_cpu_capacity
+static __always_inline
+unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
+{
+	if (sd && (sd->flags & SD_SHARE_CPUCAPACITY) && (sd->span_weight > 1))
+		return sd->smt_gain / sd->span_weight;
+
+	return SCHED_CAPACITY_SCALE;
+}
+#endif
+
 #else /* CONFIG_SMP */
 
 struct sched_domain_attr;
@@ -217,6 +228,14 @@
 	return true;
 }
 
+#ifndef arch_scale_cpu_capacity
+static __always_inline
+unsigned long arch_scale_cpu_capacity(void __always_unused *sd, int cpu)
+{
+	return SCHED_CAPACITY_SCALE;
+}
+#endif
+
 #endif	/* !CONFIG_SMP */
 
 static inline int task_node(const struct task_struct *p)

diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h
index 10b19a1..a3661e9 100644
--- a/include/linux/sched/wake_q.h
+++ b/include/linux/sched/wake_q.h

@@ -34,6 +34,7 @@
 struct wake_q_head {
 	struct wake_q_node *first;
 	struct wake_q_node **lastp;
+	int count;
 };
 
 #define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
@@ -45,6 +46,7 @@
 {
 	head->first = WAKE_Q_TAIL;
 	head->lastp = &head->first;
+	head->count = 0;
 }
 
 extern void wake_q_add(struct wake_q_head *head,

diff --git a/include/linux/sched/xacct.h b/include/linux/sched/xacct.h
index c078f0a..9544c9d 100644
--- a/include/linux/sched/xacct.h
+++ b/include/linux/sched/xacct.h

@@ -28,6 +28,11 @@
 {
 	tsk->ioac.syscw++;
 }
+
+static inline void inc_syscfs(struct task_struct *tsk)
+{
+	tsk->ioac.syscfs++;
+}
 #else
 static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
 {
@@ -44,6 +49,10 @@
 static inline void inc_syscw(struct task_struct *tsk)
 {
 }
+
+static inline void inc_syscfs(struct task_struct *tsk)
+{
+}
 #endif
 
 #endif /* _LINUX_SCHED_XACCT_H */

diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h
index f4c9fc0..3105055 100644
--- a/include/linux/scmi_protocol.h
+++ b/include/linux/scmi_protocol.h

@@ -91,6 +91,8 @@
  *	to sustained performance level mapping
  * @freq_get: gets the frequency for a given device using sustained frequency
  *	to sustained performance level mapping
+ * @est_power_get: gets the estimated power cost for a given performance domain
+ *	at a given frequency
  */
 struct scmi_perf_ops {
 	int (*limits_set)(const struct scmi_handle *handle, u32 domain,
@@ -110,6 +112,8 @@
 			unsigned long rate, bool poll);
 	int (*freq_get)(const struct scmi_handle *handle, u32 domain,
 			unsigned long *rate, bool poll);
+	int (*est_power_get)(const struct scmi_handle *handle, u32 domain,
+			     unsigned long *rate, unsigned long *power);
 };
 
 /**

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 3f529ad..71c6110 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h

@@ -446,6 +446,7 @@
 extern bool pm_save_wakeup_count(unsigned int count);
 extern void pm_wakep_autosleep_enabled(bool set);
 extern void pm_print_active_wakeup_sources(void);
+extern void pm_get_active_wakeup_sources(char *pending_sources, size_t max);
 
 extern void lock_system_sleep(void);
 extern void unlock_system_sleep(void);

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7bd0a6f..7c2e664 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h

@@ -305,7 +305,7 @@
 
 /* linux/mm/workingset.c */
 void *workingset_eviction(struct address_space *mapping, struct page *page);
-bool workingset_refault(void *shadow);
+void workingset_refault(struct page *page, void *shadow);
 void workingset_activation(struct page *page);
 
 /* Do not use directly, use workingset_lookup_update */

diff --git a/include/linux/task_io_accounting.h b/include/linux/task_io_accounting.h
index 6f6acce..bb26108 100644
--- a/include/linux/task_io_accounting.h
+++ b/include/linux/task_io_accounting.h

@@ -19,6 +19,8 @@
 	u64 syscr;
 	/* # of write syscalls */
 	u64 syscw;
+	/* # of fsync syscalls */
+	u64 syscfs;
 #endif /* CONFIG_TASK_XACCT */
 
 #ifdef CONFIG_TASK_IO_ACCOUNTING

diff --git a/include/linux/task_io_accounting_ops.h b/include/linux/task_io_accounting_ops.h
index bb5498b..733ab62 100644
--- a/include/linux/task_io_accounting_ops.h
+++ b/include/linux/task_io_accounting_ops.h

@@ -97,6 +97,7 @@
 	dst->wchar += src->wchar;
 	dst->syscr += src->syscr;
 	dst->syscw += src->syscw;
+	dst->syscfs += src->syscfs;
 }
 #else
 static inline void task_chr_io_accounting_add(struct task_io_accounting *dst,

diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 8675e14..af4396c 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h

@@ -587,6 +587,7 @@
 	struct config_group group;
 	struct list_head cfs_list;
 	struct usb_function_driver *fd;
+	struct usb_function *f;
 	int (*set_inst_name)(struct usb_function_instance *inst,
 			      const char *name);
 	void (*free_func_inst)(struct usb_function_instance *inst);

diff --git a/include/linux/usb/f_accessory.h b/include/linux/usb/f_accessory.h
new file mode 100644
index 0000000..ebe3c4d
--- /dev/null
+++ b/include/linux/usb/f_accessory.h

@@ -0,0 +1,23 @@
+/*
+ * Gadget Function Driver for Android USB accessories
+ *
+ * Copyright (C) 2011 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __LINUX_USB_F_ACCESSORY_H
+#define __LINUX_USB_F_ACCESSORY_H
+
+#include <uapi/linux/usb/f_accessory.h>
+
+#endif /* __LINUX_USB_F_ACCESSORY_H */

diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h
new file mode 100644
index 0000000..9fbe209
--- /dev/null
+++ b/include/linux/wakeup_reason.h

@@ -0,0 +1,30 @@
+/*
+ * include/linux/wakeup_reason.h
+ *
+ * Logs the reason which caused the kernel to resume
+ * from the suspend mode.
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _LINUX_WAKEUP_REASON_H
+#define _LINUX_WAKEUP_REASON_H
+
+#define MAX_SUSPEND_ABORT_LEN 256
+
+void log_wakeup_reason(int irq);
+#ifdef CONFIG_SUSPEND
+void log_suspend_abort_reason(const char *fmt, ...);
+#else
+static inline void log_suspend_abort_reason(const char *fmt, ...) { }
+#endif
+
+#endif /* _LINUX_WAKEUP_REASON_H */

diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 6dad031..30f25e1 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h

@@ -30,10 +30,13 @@
 	const char *prefix;
 	int flags;      /* fs private flags */
 	bool (*list)(struct dentry *dentry);
-	int (*get)(const struct xattr_handler *, struct dentry *dentry,
+	int (*get)(const struct xattr_handler *handler, struct dentry *dentry,
 		   struct inode *inode, const char *name, void *buffer,
 		   size_t size);
-	int (*set)(const struct xattr_handler *, struct dentry *dentry,
+	int (*__get)(const struct xattr_handler *handler, struct dentry *dentry,
+		     struct inode *inode, const char *name, void *buffer,
+		     size_t size);
+	int (*set)(const struct xattr_handler *handler, struct dentry *dentry,
 		   struct inode *inode, const char *name, const void *buffer,
 		   size_t size, int flags);
 };

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 6def035..d803aa7 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h

@@ -296,6 +296,18 @@
 void addrconf_prefix_rcv(struct net_device *dev,
 			 u8 *opt, int len, bool sllao);
 
+/* Determines into what table to put autoconf PIO/RIO/default routes
+ * learned on this device.
+ *
+ * - If 0, use the same table for every device. This puts routes into
+ *   one of RT_TABLE_{PREFIX,INFO,DFLT} depending on the type of route
+ *   (but note that these three are currently all equal to
+ *   RT6_TABLE_MAIN).
+ * - If > 0, use the specified table.
+ * - If < 0, put routes into table dev->ifindex + (-rt_table).
+ */
+u32 addrconf_rt_table(const struct net_device *dev, u32 default_table);
+
 /*
  *	anycast prototypes (anycast.c)
  */

diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index ddfbb59..d2a016e 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h

@@ -38,6 +38,7 @@
 	ND_OPT_RDNSS = 25,		/* RFC5006 */
 	ND_OPT_DNSSL = 31,		/* RFC6106 */
 	ND_OPT_6CO = 34,		/* RFC6775 */
+	ND_OPT_CAPTIVE_PORTAL = 37,	/* RFC7710 */
 	__ND_OPT_MAX
 };
 

diff --git a/include/trace/events/android_fs.h b/include/trace/events/android_fs.h
new file mode 100644
index 0000000..4950953
--- /dev/null
+++ b/include/trace/events/android_fs.h

@@ -0,0 +1,65 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM android_fs
+
+#if !defined(_TRACE_ANDROID_FS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_ANDROID_FS_H
+
+#include <linux/tracepoint.h>
+#include <trace/events/android_fs_template.h>
+
+DEFINE_EVENT(android_fs_data_start_template, android_fs_dataread_start,
+	TP_PROTO(struct inode *inode, loff_t offset, int bytes,
+		 pid_t pid, char *pathname, char *command),
+	TP_ARGS(inode, offset, bytes, pid, pathname, command));
+
+DEFINE_EVENT(android_fs_data_end_template, android_fs_dataread_end,
+	TP_PROTO(struct inode *inode, loff_t offset, int bytes),
+	TP_ARGS(inode, offset, bytes));
+
+DEFINE_EVENT(android_fs_data_start_template, android_fs_datawrite_start,
+	TP_PROTO(struct inode *inode, loff_t offset, int bytes,
+		 pid_t pid, char *pathname, char *command),
+	TP_ARGS(inode, offset, bytes, pid, pathname, command));
+
+DEFINE_EVENT(android_fs_data_end_template, android_fs_datawrite_end,
+	TP_PROTO(struct inode *inode, loff_t offset, int bytes),
+	     TP_ARGS(inode, offset, bytes));
+
+#endif /* _TRACE_ANDROID_FS_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
+#ifndef ANDROID_FSTRACE_GET_PATHNAME
+#define ANDROID_FSTRACE_GET_PATHNAME
+
+/* Sizes an on-stack array, so careful if sizing this up ! */
+#define MAX_TRACE_PATHBUF_LEN	256
+
+static inline char *
+android_fstrace_get_pathname(char *buf, int buflen, struct inode *inode)
+{
+	char *path;
+	struct dentry *d;
+
+	/*
+	 * d_obtain_alias() will either iput() if it locates an existing
+	 * dentry or transfer the reference to the new dentry created.
+	 * So get an extra reference here.
+	 */
+	ihold(inode);
+	d = d_obtain_alias(inode);
+	if (likely(!IS_ERR(d))) {
+		path = dentry_path_raw(d, buf, buflen);
+		if (unlikely(IS_ERR(path))) {
+			strcpy(buf, "ERROR");
+			path = buf;
+		}
+		dput(d);
+	} else {
+		strcpy(buf, "ERROR");
+		path = buf;
+	}
+	return path;
+}
+#endif

diff --git a/include/trace/events/android_fs_template.h b/include/trace/events/android_fs_template.h
new file mode 100644
index 0000000..b23d17b
--- /dev/null
+++ b/include/trace/events/android_fs_template.h

@@ -0,0 +1,64 @@
+#if !defined(_TRACE_ANDROID_FS_TEMPLATE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_ANDROID_FS_TEMPLATE_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(android_fs_data_start_template,
+	TP_PROTO(struct inode *inode, loff_t offset, int bytes,
+		 pid_t pid, char *pathname, char *command),
+	TP_ARGS(inode, offset, bytes, pid, pathname, command),
+	TP_STRUCT__entry(
+		__string(pathbuf, pathname);
+		__field(loff_t,	offset);
+		__field(int,	bytes);
+		__field(loff_t,	i_size);
+		__string(cmdline, command);
+		__field(pid_t,	pid);
+		__field(ino_t,	ino);
+	),
+	TP_fast_assign(
+		{
+			/*
+			 * Replace the spaces in filenames and cmdlines
+			 * because this screws up the tooling that parses
+			 * the traces.
+			 */
+			__assign_str(pathbuf, pathname);
+			(void)strreplace(__get_str(pathbuf), ' ', '_');
+			__entry->offset		= offset;
+			__entry->bytes		= bytes;
+			__entry->i_size		= i_size_read(inode);
+			__assign_str(cmdline, command);
+			(void)strreplace(__get_str(cmdline), ' ', '_');
+			__entry->pid		= pid;
+			__entry->ino		= inode->i_ino;
+		}
+	),
+	TP_printk("entry_name %s, offset %llu, bytes %d, cmdline %s,"
+		  " pid %d, i_size %llu, ino %lu",
+		  __get_str(pathbuf), __entry->offset, __entry->bytes,
+		  __get_str(cmdline), __entry->pid, __entry->i_size,
+		  (unsigned long) __entry->ino)
+);
+
+DECLARE_EVENT_CLASS(android_fs_data_end_template,
+	TP_PROTO(struct inode *inode, loff_t offset, int bytes),
+	TP_ARGS(inode, offset, bytes),
+	TP_STRUCT__entry(
+		__field(ino_t,	ino);
+		__field(loff_t,	offset);
+		__field(int,	bytes);
+	),
+	TP_fast_assign(
+		{
+			__entry->ino		= inode->i_ino;
+			__entry->offset		= offset;
+			__entry->bytes		= bytes;
+		}
+	),
+	TP_printk("ino %lu, offset %llu, bytes %d",
+		  (unsigned long) __entry->ino,
+		  __entry->offset, __entry->bytes)
+);
+
+#endif /* _TRACE_ANDROID_FS_TEMPLATE_H */

diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 7956989..8b2e347 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h

@@ -150,6 +150,17 @@
 		{ CP_SPEC_LOG_NUM,	"log type is 2" },		\
 		{ CP_RECOVER_DIR,	"dir needs recovery" })
 
+#define show_shutdown_mode(type)					\
+	__print_symbolic(type,						\
+		{ F2FS_GOING_DOWN_FULLSYNC,	"full sync" },		\
+		{ F2FS_GOING_DOWN_METASYNC,	"meta sync" },		\
+		{ F2FS_GOING_DOWN_NOSYNC,	"no sync" },		\
+		{ F2FS_GOING_DOWN_METAFLUSH,	"meta flush" },		\
+		{ F2FS_GOING_DOWN_NEED_FSCK,	"need fsck" })
+
+struct f2fs_sb_info;
+struct f2fs_io_info;
+struct extent_info;
 struct victim_sel_policy;
 struct f2fs_map_blocks;
 
@@ -534,6 +545,9 @@
 		__field(block_t,	m_lblk)
 		__field(block_t,	m_pblk)
 		__field(unsigned int,	m_len)
+		__field(unsigned int,	m_flags)
+		__field(int,	m_seg_type)
+		__field(bool,	m_may_create)
 		__field(int,	ret)
 	),
 
@@ -543,15 +557,22 @@
 		__entry->m_lblk		= map->m_lblk;
 		__entry->m_pblk		= map->m_pblk;
 		__entry->m_len		= map->m_len;
+		__entry->m_flags	= map->m_flags;
+		__entry->m_seg_type	= map->m_seg_type;
+		__entry->m_may_create	= map->m_may_create;
 		__entry->ret		= ret;
 	),
 
 	TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, "
-		"start blkaddr = 0x%llx, len = 0x%llx, err = %d",
+		"start blkaddr = 0x%llx, len = 0x%llx, flags = %u,"
+		"seg_type = %d, may_create = %d, err = %d",
 		show_dev_ino(__entry),
 		(unsigned long long)__entry->m_lblk,
 		(unsigned long long)__entry->m_pblk,
 		(unsigned long long)__entry->m_len,
+		__entry->m_flags,
+		__entry->m_seg_type,
+		__entry->m_may_create,
 		__entry->ret)
 );
 
@@ -1617,6 +1638,30 @@
 	TP_ARGS(sb, type, count)
 );
 
+TRACE_EVENT(f2fs_shutdown,
+
+	TP_PROTO(struct f2fs_sb_info *sbi, unsigned int mode, int ret),
+
+	TP_ARGS(sbi, mode, ret),
+
+	TP_STRUCT__entry(
+		__field(dev_t,	dev)
+		__field(unsigned int, mode)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		__entry->dev = sbi->sb->s_dev;
+		__entry->mode = mode;
+		__entry->ret = ret;
+	),
+
+	TP_printk("dev = (%d,%d), mode: %s, ret:%d",
+		show_dev(__entry->dev),
+		show_shutdown_mode(__entry->mode),
+		__entry->ret)
+);
+
 #endif /* _TRACE_F2FS_H */
 
  /* This part must be outside protection */

diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index a81cffb..a1675d4 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h

@@ -88,6 +88,7 @@
 	{1UL << PG_dirty,		"dirty"		},		\
 	{1UL << PG_lru,			"lru"		},		\
 	{1UL << PG_active,		"active"	},		\
+	{1UL << PG_workingset,		"workingset"	},		\
 	{1UL << PG_slab,		"slab"		},		\
 	{1UL << PG_owner_priv_1,	"owner_priv_1"	},		\
 	{1UL << PG_arch_1,		"arch_1"	},		\

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 5e1a757..f50f908 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h

@@ -391,6 +391,30 @@
 	     TP_ARGS(tsk, delay));
 
 /*
+ * Tracepoint for recording the cause of uninterruptible sleep.
+ */
+TRACE_EVENT(sched_blocked_reason,
+
+	TP_PROTO(struct task_struct *tsk),
+
+	TP_ARGS(tsk),
+
+	TP_STRUCT__entry(
+		__field( pid_t,	pid	)
+		__field( void*, caller	)
+		__field( bool, io_wait	)
+	),
+
+	TP_fast_assign(
+		__entry->pid	= tsk->pid;
+		__entry->caller = (void*)get_wchan(tsk);
+		__entry->io_wait = tsk->in_iowait;
+	),
+
+	TP_printk("pid=%d iowait=%d caller=%pS", __entry->pid, __entry->io_wait, __entry->caller)
+);
+
+/*
  * Tracepoint for accounting runtime (time the task is executing
  * on a CPU).
  */
@@ -582,6 +606,424 @@
 
 	TP_printk("cpu=%d", __entry->cpu)
 );
+
+#ifdef CONFIG_SMP
+#ifdef CREATE_TRACE_POINTS
+static inline
+int __trace_sched_cpu(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	struct rq *rq = cfs_rq ? cfs_rq->rq : NULL;
+#else
+	struct rq *rq = cfs_rq ? container_of(cfs_rq, struct rq, cfs) : NULL;
+#endif
+	return rq ? cpu_of(rq)
+		  : task_cpu((container_of(se, struct task_struct, se)));
+}
+
+static inline
+int __trace_sched_path(struct cfs_rq *cfs_rq, char *path, int len)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	int l = path ? len : 0;
+
+	if (cfs_rq && task_group_is_autogroup(cfs_rq->tg))
+		return autogroup_path(cfs_rq->tg, path, l) + 1;
+	else if (cfs_rq && cfs_rq->tg->css.cgroup)
+		return cgroup_path(cfs_rq->tg->css.cgroup, path, l) + 1;
+#endif
+	if (path)
+		strcpy(path, "(null)");
+
+	return strlen("(null)");
+}
+
+static inline
+struct cfs_rq *__trace_sched_group_cfs_rq(struct sched_entity *se)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	return se->my_q;
+#else
+	return NULL;
+#endif
+}
+#endif /* CREATE_TRACE_POINTS */
+
+/*
+ * Tracepoint for cfs_rq load tracking:
+ */
+TRACE_EVENT(sched_load_cfs_rq,
+
+	TP_PROTO(struct cfs_rq *cfs_rq),
+
+	TP_ARGS(cfs_rq),
+
+	TP_STRUCT__entry(
+		__field(	int,		cpu			)
+		__dynamic_array(char,		path,
+				__trace_sched_path(cfs_rq, NULL, 0)	)
+		__field(	unsigned long,	load			)
+		__field(	unsigned long,	rbl_load		)
+		__field(	unsigned long,	util			)
+	),
+
+	TP_fast_assign(
+		__entry->cpu		= __trace_sched_cpu(cfs_rq, NULL);
+		__trace_sched_path(cfs_rq, __get_dynamic_array(path),
+				   __get_dynamic_array_len(path));
+		__entry->load		= cfs_rq->avg.load_avg;
+		__entry->rbl_load 	= cfs_rq->avg.runnable_load_avg;
+		__entry->util		= cfs_rq->avg.util_avg;
+	),
+
+	TP_printk("cpu=%d path=%s load=%lu rbl_load=%lu util=%lu",
+		  __entry->cpu, __get_str(path), __entry->load,
+		  __entry->rbl_load,__entry->util)
+);
+
+/*
+ * Tracepoint for rt_rq load tracking:
+ */
+struct rq;
+TRACE_EVENT(sched_load_rt_rq,
+
+	TP_PROTO(struct rq *rq),
+
+	TP_ARGS(rq),
+
+	TP_STRUCT__entry(
+		__field(	int,		cpu			)
+		__field(	unsigned long,	util			)
+	),
+
+	TP_fast_assign(
+		__entry->cpu	= rq->cpu;
+		__entry->util	= rq->avg_rt.util_avg;
+	),
+
+	TP_printk("cpu=%d util=%lu", __entry->cpu,
+		  __entry->util)
+);
+
+/*
+ * Tracepoint for sched_entity load tracking:
+ */
+TRACE_EVENT(sched_load_se,
+
+	TP_PROTO(struct sched_entity *se),
+
+	TP_ARGS(se),
+
+	TP_STRUCT__entry(
+		__field(	int,		cpu			      )
+		__dynamic_array(char,		path,
+		  __trace_sched_path(__trace_sched_group_cfs_rq(se), NULL, 0) )
+		__array(	char,		comm,	TASK_COMM_LEN	      )
+		__field(	pid_t,		pid			      )
+		__field(	unsigned long,	load			      )
+		__field(	unsigned long,	rbl_load		      )
+		__field(	unsigned long,	util			      )
+	),
+
+	TP_fast_assign(
+		struct cfs_rq *gcfs_rq = __trace_sched_group_cfs_rq(se);
+		struct task_struct *p = gcfs_rq ? NULL
+				    : container_of(se, struct task_struct, se);
+
+		__entry->cpu		= __trace_sched_cpu(gcfs_rq, se);
+		__trace_sched_path(gcfs_rq, __get_dynamic_array(path),
+				   __get_dynamic_array_len(path));
+		memcpy(__entry->comm, p ? p->comm : "(null)",
+				      p ? TASK_COMM_LEN : sizeof("(null)"));
+		__entry->pid = p ? p->pid : -1;
+		__entry->load = se->avg.load_avg;
+		__entry->rbl_load = se->avg.runnable_load_avg;
+		__entry->util = se->avg.util_avg;
+	),
+
+	TP_printk("cpu=%d path=%s comm=%s pid=%d load=%lu rbl_load=%lu util=%lu",
+		  __entry->cpu, __get_str(path), __entry->comm, __entry->pid,
+		  __entry->load, __entry->rbl_load, __entry->util)
+);
+
+/*
+ * Tracepoint for task_group load tracking:
+ */
+#ifdef CONFIG_FAIR_GROUP_SCHED
+TRACE_EVENT(sched_load_tg,
+
+	TP_PROTO(struct cfs_rq *cfs_rq),
+
+	TP_ARGS(cfs_rq),
+
+	TP_STRUCT__entry(
+		__field(	int,	cpu				)
+		__dynamic_array(char,	path,
+				__trace_sched_path(cfs_rq, NULL, 0)	)
+		__field(	long,	load				)
+	),
+
+	TP_fast_assign(
+		__entry->cpu	= cfs_rq->rq->cpu;
+		__trace_sched_path(cfs_rq, __get_dynamic_array(path),
+				   __get_dynamic_array_len(path));
+		__entry->load	= atomic_long_read(&cfs_rq->tg->load_avg);
+	),
+
+	TP_printk("cpu=%d path=%s load=%ld", __entry->cpu, __get_str(path),
+		  __entry->load)
+);
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+
+/*
+ * Tracepoint for tasks' estimated utilization.
+ */
+TRACE_EVENT(sched_util_est_task,
+
+	TP_PROTO(struct task_struct *tsk, struct sched_avg *avg),
+
+	TP_ARGS(tsk, avg),
+
+	TP_STRUCT__entry(
+		__array( char,	comm,	TASK_COMM_LEN		)
+		__field( pid_t,		pid			)
+		__field( int,		cpu			)
+		__field( unsigned int,	util_avg		)
+		__field( unsigned int,	est_enqueued		)
+		__field( unsigned int,	est_ewma		)
+
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid			= tsk->pid;
+		__entry->cpu			= task_cpu(tsk);
+		__entry->util_avg		= avg->util_avg;
+		__entry->est_enqueued		= avg->util_est.enqueued;
+		__entry->est_ewma		= avg->util_est.ewma;
+	),
+
+	TP_printk("comm=%s pid=%d cpu=%d util_avg=%u util_est_ewma=%u util_est_enqueued=%u",
+		  __entry->comm,
+		  __entry->pid,
+		  __entry->cpu,
+		  __entry->util_avg,
+		  __entry->est_ewma,
+		  __entry->est_enqueued)
+);
+
+/*
+ * Tracepoint for root cfs_rq's estimated utilization.
+ */
+TRACE_EVENT(sched_util_est_cpu,
+
+	TP_PROTO(int cpu, struct cfs_rq *cfs_rq),
+
+	TP_ARGS(cpu, cfs_rq),
+
+	TP_STRUCT__entry(
+		__field( int,		cpu			)
+		__field( unsigned int,	util_avg		)
+		__field( unsigned int,	util_est_enqueued	)
+	),
+
+	TP_fast_assign(
+		__entry->cpu			= cpu;
+		__entry->util_avg		= cfs_rq->avg.util_avg;
+		__entry->util_est_enqueued	= cfs_rq->avg.util_est.enqueued;
+	),
+
+	TP_printk("cpu=%d util_avg=%u util_est_enqueued=%u",
+		  __entry->cpu,
+		  __entry->util_avg,
+		  __entry->util_est_enqueued)
+);
+
+/*
+ * Tracepoint for find_best_target
+ */
+TRACE_EVENT(sched_find_best_target,
+
+	TP_PROTO(struct task_struct *tsk, bool prefer_idle,
+		 unsigned long min_util, int best_idle, int best_active,
+		 int target, int backup),
+
+	TP_ARGS(tsk, prefer_idle, min_util, best_idle,
+		best_active, target, backup),
+
+	TP_STRUCT__entry(
+		__array( char,  comm,   TASK_COMM_LEN   )
+		__field( pid_t, pid                     )
+		__field( unsigned long, min_util        )
+		__field( bool,  prefer_idle             )
+		__field( int,   best_idle               )
+		__field( int,   best_active             )
+		__field( int,   target                  )
+		__field( int,   backup                  )
+		),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid            = tsk->pid;
+		__entry->min_util       = min_util;
+		__entry->prefer_idle    = prefer_idle;
+		__entry->best_idle      = best_idle;
+		__entry->best_active    = best_active;
+		__entry->target         = target;
+		__entry->backup         = backup;
+		),
+
+	TP_printk("pid=%d comm=%s prefer_idle=%d "
+		  "best_idle=%d best_active=%d target=%d backup=%d",
+		  __entry->pid, __entry->comm, __entry->prefer_idle,
+		  __entry->best_idle, __entry->best_active,
+		  __entry->target, __entry->backup)
+);
+
+/*
+ * Tracepoint for accounting CPU  boosted utilization
+ */
+TRACE_EVENT(sched_boost_cpu,
+
+	TP_PROTO(int cpu, unsigned long util, long margin),
+
+	TP_ARGS(cpu, util, margin),
+
+	TP_STRUCT__entry(
+		__field( int,           cpu	)
+		__field( unsigned long, util	)
+		__field(long,           margin	)
+	),
+
+	TP_fast_assign(
+		__entry->cpu    = cpu;
+		__entry->util   = util;
+		__entry->margin = margin;
+	),
+
+	TP_printk("cpu=%d util=%lu margin=%ld",
+		__entry->cpu,
+		__entry->util,
+		__entry->margin)
+);
+
+/*
+ * Tracepoint for schedtune_tasks_update
+ */
+TRACE_EVENT(sched_tune_tasks_update,
+
+	TP_PROTO(struct task_struct *tsk, int cpu, int tasks, int idx,
+		int boost, int max_boost, u64 group_ts),
+
+	TP_ARGS(tsk, cpu, tasks, idx, boost, max_boost, group_ts),
+
+	TP_STRUCT__entry(
+		__array( char,  comm,   TASK_COMM_LEN   )
+		__field( pid_t,         pid             )
+		__field( int,           cpu             )
+		__field( int,           tasks           )
+		__field( int,           idx             )
+		__field( int,           boost           )
+		__field( int,           max_boost       )
+		__field( u64,		group_ts	)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid            = tsk->pid;
+		__entry->cpu            = cpu;
+		__entry->tasks          = tasks;
+		__entry->idx            = idx;
+		__entry->boost          = boost;
+		__entry->max_boost      = max_boost;
+		__entry->group_ts	= group_ts;
+	),
+
+	TP_printk("pid=%d comm=%s "
+		"cpu=%d tasks=%d idx=%d boost=%d max_boost=%d timeout=%llu",
+		__entry->pid, __entry->comm,
+		__entry->cpu, __entry->tasks, __entry->idx,
+		__entry->boost, __entry->max_boost,
+		__entry->group_ts)
+);
+
+/*
+ * Tracepoint for schedtune_boostgroup_update
+ */
+TRACE_EVENT(sched_tune_boostgroup_update,
+
+	TP_PROTO(int cpu, int variation, int max_boost),
+
+	TP_ARGS(cpu, variation, max_boost),
+
+	TP_STRUCT__entry(
+		__field( int,   cpu		)
+		__field( int,   variation	)
+		__field( int,   max_boost	)
+	),
+
+	TP_fast_assign(
+		__entry->cpu            = cpu;
+		__entry->variation      = variation;
+		__entry->max_boost      = max_boost;
+	),
+
+	TP_printk("cpu=%d variation=%d max_boost=%d",
+		__entry->cpu, __entry->variation, __entry->max_boost)
+);
+
+/*
+ * Tracepoint for accounting task boosted utilization
+ */
+TRACE_EVENT(sched_boost_task,
+
+	TP_PROTO(struct task_struct *tsk, unsigned long util, long margin),
+
+	TP_ARGS(tsk, util, margin),
+
+	TP_STRUCT__entry(
+		__array( char,  comm,   TASK_COMM_LEN	)
+		__field( pid_t,         pid		)
+		__field( unsigned long, util		)
+		__field( long,          margin		)
+
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid    = tsk->pid;
+		__entry->util   = util;
+		__entry->margin = margin;
+	),
+
+	TP_printk("comm=%s pid=%d util=%lu margin=%ld",
+		__entry->comm, __entry->pid,
+		__entry->util,
+		__entry->margin)
+);
+
+/*
+ * Tracepoint for system overutilized flag
+*/
+TRACE_EVENT(sched_overutilized,
+
+	TP_PROTO(int overutilized),
+
+	TP_ARGS(overutilized),
+
+	TP_STRUCT__entry(
+		__field( int,  overutilized    )
+	),
+
+	TP_fast_assign(
+		__entry->overutilized   = overutilized;
+	),
+
+	TP_printk("overutilized=%d",
+		__entry->overutilized)
+);
+
+#endif /* CONFIG_SMP */
 #endif /* _TRACE_SCHED_H */
 
 /* This part must be outside protection */

diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h
index bfaec69..36a53fd 100644
--- a/include/uapi/linux/android/binder.h
+++ b/include/uapi/linux/android/binder.h

@@ -38,9 +38,64 @@
 	BINDER_TYPE_PTR		= B_PACK_CHARS('p', 't', '*', B_TYPE_LARGE),
 };
 
-enum {
+/**
+ * enum flat_binder_object_shifts: shift values for flat_binder_object_flags
+ * @FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT: shift for getting scheduler policy.
+ *
+ */
+enum flat_binder_object_shifts {
+	FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT = 9,
+};
+
+/**
+ * enum flat_binder_object_flags - flags for use in flat_binder_object.flags
+ */
+enum flat_binder_object_flags {
+	/**
+	 * @FLAT_BINDER_FLAG_PRIORITY_MASK: bit-mask for min scheduler priority
+	 *
+	 * These bits can be used to set the minimum scheduler priority
+	 * at which transactions into this node should run. Valid values
+	 * in these bits depend on the scheduler policy encoded in
+	 * @FLAT_BINDER_FLAG_SCHED_POLICY_MASK.
+	 *
+	 * For SCHED_NORMAL/SCHED_BATCH, the valid range is between [-20..19]
+	 * For SCHED_FIFO/SCHED_RR, the value can run between [1..99]
+	 */
 	FLAT_BINDER_FLAG_PRIORITY_MASK = 0xff,
+	/**
+	 * @FLAT_BINDER_FLAG_ACCEPTS_FDS: whether the node accepts fds.
+	 */
 	FLAT_BINDER_FLAG_ACCEPTS_FDS = 0x100,
+	/**
+	 * @FLAT_BINDER_FLAG_SCHED_POLICY_MASK: bit-mask for scheduling policy
+	 *
+	 * These two bits can be used to set the min scheduling policy at which
+	 * transactions on this node should run. These match the UAPI
+	 * scheduler policy values, eg:
+	 * 00b: SCHED_NORMAL
+	 * 01b: SCHED_FIFO
+	 * 10b: SCHED_RR
+	 * 11b: SCHED_BATCH
+	 */
+	FLAT_BINDER_FLAG_SCHED_POLICY_MASK =
+		3U << FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT,
+
+	/**
+	 * @FLAT_BINDER_FLAG_INHERIT_RT: whether the node inherits RT policy
+	 *
+	 * Only when set, calls into this node will inherit a real-time
+	 * scheduling policy from the caller (for synchronous transactions).
+	 */
+	FLAT_BINDER_FLAG_INHERIT_RT = 0x800,
+
+	/**
+	 * @FLAT_BINDER_FLAG_TXN_SECURITY_CTX: request security contexts
+	 *
+	 * Only when set, causes senders to include their security
+	 * context
+	 */
+	FLAT_BINDER_FLAG_TXN_SECURITY_CTX = 0x1000,
 };
 
 #ifdef BINDER_IPC_32BIT
@@ -200,6 +255,15 @@
 	__u32            has_weak_ref;
 };
 
+struct binder_node_info_for_ref {
+	__u32            handle;
+	__u32            strong_count;
+	__u32            weak_count;
+	__u32            reserved1;
+	__u32            reserved2;
+	__u32            reserved3;
+};
+
 #define BINDER_WRITE_READ		_IOWR('b', 1, struct binder_write_read)
 #define BINDER_SET_IDLE_TIMEOUT		_IOW('b', 3, __s64)
 #define BINDER_SET_MAX_THREADS		_IOW('b', 5, __u32)
@@ -208,6 +272,8 @@
 #define BINDER_THREAD_EXIT		_IOW('b', 8, __s32)
 #define BINDER_VERSION			_IOWR('b', 9, struct binder_version)
 #define BINDER_GET_NODE_DEBUG_INFO	_IOWR('b', 11, struct binder_node_debug_info)
+#define BINDER_GET_NODE_INFO_FOR_REF	_IOWR('b', 12, struct binder_node_info_for_ref)
+#define BINDER_SET_CONTEXT_MGR_EXT	_IOW('b', 13, struct flat_binder_object)
 
 /*
  * NOTE: Two special error codes you should check for when calling
@@ -266,6 +332,11 @@
 	} data;
 };
 
+struct binder_transaction_data_secctx {
+	struct binder_transaction_data transaction_data;
+	binder_uintptr_t secctx;
+};
+
 struct binder_transaction_data_sg {
 	struct binder_transaction_data transaction_data;
 	binder_size_t buffers_size;
@@ -301,6 +372,11 @@
 	BR_OK = _IO('r', 1),
 	/* No parameters! */
 
+	BR_TRANSACTION_SEC_CTX = _IOR('r', 2,
+				      struct binder_transaction_data_secctx),
+	/*
+	 * binder_transaction_data_secctx: the received command.
+	 */
 	BR_TRANSACTION = _IOR('r', 2, struct binder_transaction_data),
 	BR_REPLY = _IOR('r', 3, struct binder_transaction_data),
 	/*

diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 6448cdd..a2f8658 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h

@@ -41,6 +41,7 @@
 #define F_SEAL_SHRINK	0x0002	/* prevent file from shrinking */
 #define F_SEAL_GROW	0x0004	/* prevent file from growing */
 #define F_SEAL_WRITE	0x0008	/* prevent writes */
+#define F_SEAL_FUTURE_WRITE	0x0010  /* prevent future writes while mapped */
 /* (1U << 31) is reserved for signed error codes */
 
 /*

diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index a441ea1..086e7ee 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h

@@ -269,7 +269,8 @@
 #define FS_POLICY_FLAGS_PAD_16		0x02
 #define FS_POLICY_FLAGS_PAD_32		0x03
 #define FS_POLICY_FLAGS_PAD_MASK	0x03
-#define FS_POLICY_FLAGS_VALID		0x03
+#define FS_POLICY_FLAG_DIRECT_KEY	0x04	/* use master key directly */
+#define FS_POLICY_FLAGS_VALID		0x07
 
 /* Encryption algorithms */
 #define FS_ENCRYPTION_MODE_INVALID		0
@@ -281,6 +282,7 @@
 #define FS_ENCRYPTION_MODE_AES_128_CTS		6
 #define FS_ENCRYPTION_MODE_SPECK128_256_XTS	7 /* Removed, do not use. */
 #define FS_ENCRYPTION_MODE_SPECK128_256_CTS	8 /* Removed, do not use. */
+#define FS_ENCRYPTION_MODE_ADIANTUM		9
 
 struct fscrypt_policy {
 	__u8 version;

diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 1a6fee97..d723479 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h

@@ -56,6 +56,8 @@
 #define REISER2FS_SUPER_MAGIC_STRING	"ReIsEr2Fs"
 #define REISER2FS_JR_SUPER_MAGIC_STRING	"ReIsEr3Fs"
 
+#define SDCARDFS_SUPER_MAGIC	0x5dca2df5
+
 #define SMB_SUPER_MAGIC		0x517B
 #define CGROUP_SUPER_MAGIC	0x27e0eb
 #define CGROUP2_SUPER_MAGIC	0x63677270

diff --git a/include/uapi/linux/netfilter/xt_IDLETIMER.h b/include/uapi/linux/netfilter/xt_IDLETIMER.h
index 3c586a1..c82a1c1 100644
--- a/include/uapi/linux/netfilter/xt_IDLETIMER.h
+++ b/include/uapi/linux/netfilter/xt_IDLETIMER.h

@@ -5,6 +5,7 @@
  * Header file for Xtables timer target module.
  *
  * Copyright (C) 2004, 2010 Nokia Corporation
+ *
  * Written by Timo Teras <ext-timo.teras@nokia.com>
  *
  * Converted to x_tables and forward-ported to 2.6.34
@@ -33,12 +34,19 @@
 #include <linux/types.h>
 
 #define MAX_IDLETIMER_LABEL_SIZE 28
+#define NLMSG_MAX_SIZE 64
+
+#define NL_EVENT_TYPE_INACTIVE 0
+#define NL_EVENT_TYPE_ACTIVE 1
 
 struct idletimer_tg_info {
 	__u32 timeout;
 
 	char label[MAX_IDLETIMER_LABEL_SIZE];
 
+	/* Use netlink messages for notification in addition to sysfs */
+	__u8 send_nl_msg;
+
 	/* for kernel module internal use only */
 	struct idletimer_tg *timer __attribute__((aligned(8)));
 };

diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index b17201e..5cdf234 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h

@@ -220,4 +220,7 @@
 # define PR_SPEC_DISABLE		(1UL << 2)
 # define PR_SPEC_FORCE_DISABLE		(1UL << 3)
 
+#define PR_SET_VMA		0x53564d41
+# define PR_SET_VMA_ANON_NAME		0
+
 #endif /* _LINUX_PRCTL_H */

diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h
index b7aa7bb..5e8ca16 100644
--- a/include/uapi/linux/taskstats.h
+++ b/include/uapi/linux/taskstats.h

@@ -34,7 +34,7 @@
  */
 
 
-#define TASKSTATS_VERSION	8
+#define TASKSTATS_VERSION	9
 #define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
 					 * in linux/sched.h */
 
@@ -164,6 +164,10 @@
 	/* Delay waiting for memory reclaim */
 	__u64	freepages_count;
 	__u64	freepages_delay_total;
+
+	/* Delay waiting for thrashing page */
+	__u64	thrashing_count;
+	__u64	thrashing_delay_total;
 };
 
 

diff --git a/include/uapi/linux/usb/f_accessory.h b/include/uapi/linux/usb/f_accessory.h
new file mode 100644
index 0000000..0baeb7d
--- /dev/null
+++ b/include/uapi/linux/usb/f_accessory.h

@@ -0,0 +1,146 @@
+/*
+ * Gadget Function Driver for Android USB accessories
+ *
+ * Copyright (C) 2011 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _UAPI_LINUX_USB_F_ACCESSORY_H
+#define _UAPI_LINUX_USB_F_ACCESSORY_H
+
+/* Use Google Vendor ID when in accessory mode */
+#define USB_ACCESSORY_VENDOR_ID 0x18D1
+
+
+/* Product ID to use when in accessory mode */
+#define USB_ACCESSORY_PRODUCT_ID 0x2D00
+
+/* Product ID to use when in accessory mode and adb is enabled */
+#define USB_ACCESSORY_ADB_PRODUCT_ID 0x2D01
+
+/* Indexes for strings sent by the host via ACCESSORY_SEND_STRING */
+#define ACCESSORY_STRING_MANUFACTURER   0
+#define ACCESSORY_STRING_MODEL          1
+#define ACCESSORY_STRING_DESCRIPTION    2
+#define ACCESSORY_STRING_VERSION        3
+#define ACCESSORY_STRING_URI            4
+#define ACCESSORY_STRING_SERIAL         5
+
+/* Control request for retrieving device's protocol version
+ *
+ *	requestType:    USB_DIR_IN | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_GET_PROTOCOL
+ *	value:          0
+ *	index:          0
+ *	data            version number (16 bits little endian)
+ *                     1 for original accessory support
+ *                     2 adds HID and device to host audio support
+ */
+#define ACCESSORY_GET_PROTOCOL  51
+
+/* Control request for host to send a string to the device
+ *
+ *	requestType:    USB_DIR_OUT | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_SEND_STRING
+ *	value:          0
+ *	index:          string ID
+ *	data            zero terminated UTF8 string
+ *
+ *  The device can later retrieve these strings via the
+ *  ACCESSORY_GET_STRING_* ioctls
+ */
+#define ACCESSORY_SEND_STRING   52
+
+/* Control request for starting device in accessory mode.
+ * The host sends this after setting all its strings to the device.
+ *
+ *	requestType:    USB_DIR_OUT | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_START
+ *	value:          0
+ *	index:          0
+ *	data            none
+ */
+#define ACCESSORY_START         53
+
+/* Control request for registering a HID device.
+ * Upon registering, a unique ID is sent by the accessory in the
+ * value parameter. This ID will be used for future commands for
+ * the device
+ *
+ *	requestType:    USB_DIR_OUT | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_REGISTER_HID_DEVICE
+ *	value:          Accessory assigned ID for the HID device
+ *	index:          total length of the HID report descriptor
+ *	data            none
+ */
+#define ACCESSORY_REGISTER_HID         54
+
+/* Control request for unregistering a HID device.
+ *
+ *	requestType:    USB_DIR_OUT | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_REGISTER_HID
+ *	value:          Accessory assigned ID for the HID device
+ *	index:          0
+ *	data            none
+ */
+#define ACCESSORY_UNREGISTER_HID         55
+
+/* Control request for sending the HID report descriptor.
+ * If the HID descriptor is longer than the endpoint zero max packet size,
+ * the descriptor will be sent in multiple ACCESSORY_SET_HID_REPORT_DESC
+ * commands. The data for the descriptor must be sent sequentially
+ * if multiple packets are needed.
+ *
+ *	requestType:    USB_DIR_OUT | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_SET_HID_REPORT_DESC
+ *	value:          Accessory assigned ID for the HID device
+ *	index:          offset of data in descriptor
+ *                      (needed when HID descriptor is too big for one packet)
+ *	data            the HID report descriptor
+ */
+#define ACCESSORY_SET_HID_REPORT_DESC         56
+
+/* Control request for sending HID events.
+ *
+ *	requestType:    USB_DIR_OUT | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_SEND_HID_EVENT
+ *	value:          Accessory assigned ID for the HID device
+ *	index:          0
+ *	data            the HID report for the event
+ */
+#define ACCESSORY_SEND_HID_EVENT         57
+
+/* Control request for setting the audio mode.
+ *
+ *	requestType:	USB_DIR_OUT | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_SET_AUDIO_MODE
+ *	value:          0 - no audio
+ *                     1 - device to host, 44100 16-bit stereo PCM
+ *	index:          0
+ *	data            none
+ */
+#define ACCESSORY_SET_AUDIO_MODE         58
+
+/* ioctls for retrieving strings set by the host */
+#define ACCESSORY_GET_STRING_MANUFACTURER   _IOW('M', 1, char[256])
+#define ACCESSORY_GET_STRING_MODEL          _IOW('M', 2, char[256])
+#define ACCESSORY_GET_STRING_DESCRIPTION    _IOW('M', 3, char[256])
+#define ACCESSORY_GET_STRING_VERSION        _IOW('M', 4, char[256])
+#define ACCESSORY_GET_STRING_URI            _IOW('M', 5, char[256])
+#define ACCESSORY_GET_STRING_SERIAL         _IOW('M', 6, char[256])
+/* returns 1 if there is a start request pending */
+#define ACCESSORY_IS_START_REQUESTED        _IO('M', 7)
+/* returns audio mode (set via the ACCESSORY_SET_AUDIO_MODE control request) */
+#define ACCESSORY_GET_AUDIO_MODE            _IO('M', 8)
+
+#endif /* _UAPI_LINUX_USB_F_ACCESSORY_H */

diff --git a/init/Kconfig b/init/Kconfig
index 47035b5..3ced12e 100644
--- a/init/Kconfig
+++ b/init/Kconfig

@@ -19,6 +19,9 @@
 config CC_IS_CLANG
 	def_bool $(success,$(CC) --version | head -n 1 | grep -q clang)
 
+config LD_IS_LLD
+	def_bool $(success,$(LD) -v | head -n 1 | grep -q LLD)
+
 config CLANG_VERSION
 	int
 	default $(shell,$(srctree)/scripts/clang-version.sh $(CC))
@@ -493,6 +496,45 @@
 
 	  Say N if unsure.
 
+config PSI
+	bool "Pressure stall information tracking"
+	help
+	  Collect metrics that indicate how overcommitted the CPU, memory,
+	  and IO capacity are in the system.
+
+	  If you say Y here, the kernel will create /proc/pressure/ with the
+	  pressure statistics files cpu, memory, and io. These will indicate
+	  the share of walltime in which some or all tasks in the system are
+	  delayed due to contention of the respective resource.
+
+	  In kernels with cgroup support, cgroups (cgroup2 only) will
+	  have cpu.pressure, memory.pressure, and io.pressure files,
+	  which aggregate pressure stalls for the grouped tasks only.
+
+	  For more details see Documentation/accounting/psi.txt.
+
+	  Say N if unsure.
+
+config PSI_DEFAULT_DISABLED
+	bool "Require boot parameter to enable pressure stall information tracking"
+	default n
+	depends on PSI
+	help
+	  If set, pressure stall information tracking will be disabled
+	  per default but can be enabled through passing psi=1 on the
+	  kernel commandline during boot.
+
+	  This feature adds some code to the task wakeup and sleep
+	  paths of the scheduler. The overhead is too low to affect
+	  common scheduling-intense workloads in practice (such as
+	  webservers, memcache), but it does show up in artificial
+	  scheduler stress tests, such as hackbench.
+
+	  If you are paranoid and not sure what the kernel will be
+	  used for, say Y.
+
+	  Say N if unsure.
+
 endmenu # "CPU/Task time and stats accounting"
 
 config CPU_ISOLATION
@@ -999,6 +1041,29 @@
 	  desktop applications.  Task group autogeneration is currently based
 	  upon task session.
 
+config SCHED_TUNE
+	bool "Boosting for CFS tasks (EXPERIMENTAL)"
+	depends on SMP
+	help
+	  This option enables support for task classification using a new
+	  cgroup controller, schedtune. Schedtune allows tasks to be given
+	  a boost value and marked as latency-sensitive or not. This option
+	  provides the "schedtune" controller.
+
+	  This new controller:
+	  1. allows only a two layers hierarchy, where the root defines the
+	     system-wide boost value and its direct childrens define each one a
+	     different "class of tasks" to be boosted with a different value
+	  2. supports up to 16 different task classes, each one which could be
+	     configured with a different boost value
+
+	  Latency-sensitive tasks are not subject to energy-aware wakeup
+	  task placement. The boost value assigned to tasks is used to
+	  influence task placement and CPU frequency selection (if
+	  utilization-driven frequency selection is in use).
+
+	  If unsure, say N.
+
 config SYSFS_DEPRECATED
 	bool "Enable deprecated sysfs features to support old userspace tools"
 	depends on SYSFS
@@ -1981,7 +2046,7 @@
 
 config MODULES_TREE_LOOKUP
 	def_bool y
-	depends on PERF_EVENTS || TRACING
+	depends on PERF_EVENTS || TRACING || CFI_CLANG
 
 config INIT_ALL_POSSIBLE
 	bool

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index de4070d..8763ee7 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c

@@ -721,7 +721,7 @@
 	info->notify_user_ns = NULL;
 }
 
-static int prepare_open(struct dentry *dentry, int oflag, int ro,
+static int prepare_open(struct vfsmount *mnt, struct dentry *dentry, int oflag, int ro,
 			umode_t mode, struct filename *name,
 			struct mq_attr *attr)
 {
@@ -735,7 +735,7 @@
 		if (ro)
 			return ro;
 		audit_inode_parent_hidden(name, dentry->d_parent);
-		return vfs_mkobj(dentry, mode & ~current_umask(),
+		return vfs_mkobj2(mnt, dentry, mode & ~current_umask(),
 				  mqueue_create_attr, attr);
 	}
 	/* it already existed */
@@ -745,7 +745,7 @@
 	if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
 		return -EINVAL;
 	acc = oflag2acc[oflag & O_ACCMODE];
-	return inode_permission(d_inode(dentry), acc);
+	return inode_permission2(mnt, d_inode(dentry), acc);
 }
 
 static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
@@ -769,13 +769,13 @@
 
 	ro = mnt_want_write(mnt);	/* we'll drop it in any case */
 	inode_lock(d_inode(root));
-	path.dentry = lookup_one_len(name->name, root, strlen(name->name));
+	path.dentry = lookup_one_len2(name->name, mnt, root, strlen(name->name));
 	if (IS_ERR(path.dentry)) {
 		error = PTR_ERR(path.dentry);
 		goto out_putfd;
 	}
 	path.mnt = mntget(mnt);
-	error = prepare_open(path.dentry, oflag, ro, mode, name, attr);
+	error = prepare_open(path.mnt, path.dentry, oflag, ro, mode, name, attr);
 	if (!error) {
 		struct file *file = dentry_open(&path, oflag, current_cred());
 		if (!IS_ERR(file))
@@ -825,7 +825,7 @@
 	if (err)
 		goto out_name;
 	inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT);
-	dentry = lookup_one_len(name->name, mnt->mnt_root,
+	dentry = lookup_one_len2(name->name, mnt, mnt->mnt_root,
 				strlen(name->name));
 	if (IS_ERR(dentry)) {
 		err = PTR_ERR(dentry);
@@ -837,7 +837,7 @@
 		err = -ENOENT;
 	} else {
 		ihold(inode);
-		err = vfs_unlink(d_inode(dentry->d_parent), dentry, NULL);
+		err = vfs_unlink2(mnt, d_inode(dentry->d_parent), dentry, NULL);
 	}
 	dput(dentry);
 

diff --git a/kernel/Makefile b/kernel/Makefile
index df5e3ca..4888f54 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile

@@ -35,6 +35,9 @@
 # cond_syscall is currently not LTO compatible
 CFLAGS_sys_ni.o = $(DISABLE_LTO)
 
+# Don't instrument error handlers
+CFLAGS_cfi.o = $(DISABLE_CFI_CLANG)
+
 obj-y += sched/
 obj-y += locking/
 obj-y += power/
@@ -104,6 +107,7 @@
 obj-$(CONFIG_IRQ_WORK) += irq_work.o
 obj-$(CONFIG_CPU_PM) += cpu_pm.o
 obj-$(CONFIG_BPF) += bpf/
+obj-$(CONFIG_CFI_CLANG) += cfi.o
 
 obj-$(CONFIG_PERF_EVENTS) += events/
 

diff --git a/kernel/cfi.c b/kernel/cfi.c
new file mode 100644
index 0000000..9e99922
--- /dev/null
+++ b/kernel/cfi.c

@@ -0,0 +1,306 @@
+/*
+ * CFI (Control Flow Integrity) error and slowpath handling
+ *
+ * Copyright (C) 2017 Google, Inc.
+ */
+
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/ratelimit.h>
+#include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+#include <asm/bug.h>
+#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
+
+/* Compiler-defined handler names */
+#ifdef CONFIG_CFI_PERMISSIVE
+#define cfi_failure_handler	__ubsan_handle_cfi_check_fail
+#define cfi_slowpath_handler	__cfi_slowpath_diag
+#else /* enforcing */
+#define cfi_failure_handler	__ubsan_handle_cfi_check_fail_abort
+#define cfi_slowpath_handler	__cfi_slowpath
+#endif /* CONFIG_CFI_PERMISSIVE */
+
+static inline void handle_cfi_failure(void *ptr)
+{
+#ifdef CONFIG_CFI_PERMISSIVE
+	WARN_RATELIMIT(1, "CFI failure (target: %pF):\n", ptr);
+#else
+	pr_err("CFI failure (target: %pF):\n", ptr);
+	BUG();
+#endif
+}
+
+#ifdef CONFIG_MODULES
+#ifdef CONFIG_CFI_CLANG_SHADOW
+struct shadow_range {
+	/* Module address range */
+	unsigned long mod_min_addr;
+	unsigned long mod_max_addr;
+	/* Module page range */
+	unsigned long min_page;
+	unsigned long max_page;
+};
+
+#define SHADOW_ORDER	1
+#define SHADOW_PAGES	(1 << SHADOW_ORDER)
+#define SHADOW_SIZE \
+	((SHADOW_PAGES * PAGE_SIZE - sizeof(struct shadow_range)) / sizeof(u16))
+#define SHADOW_INVALID	0xFFFF
+
+struct cfi_shadow {
+	/* Page range covered by the shadow */
+	struct shadow_range r;
+	/* Page offsets to __cfi_check functions in modules */
+	u16 shadow[SHADOW_SIZE];
+};
+
+static DEFINE_SPINLOCK(shadow_update_lock);
+static struct cfi_shadow __rcu *cfi_shadow __read_mostly = NULL;
+
+static inline int ptr_to_shadow(const struct cfi_shadow *s, unsigned long ptr)
+{
+	unsigned long index;
+	unsigned long page = ptr >> PAGE_SHIFT;
+
+	if (unlikely(page < s->r.min_page))
+		return -1; /* Outside of module area */
+
+	index = page - s->r.min_page;
+
+	if (index >= SHADOW_SIZE)
+		return -1; /* Cannot be addressed with shadow */
+
+	return (int)index;
+}
+
+static inline unsigned long shadow_to_ptr(const struct cfi_shadow *s,
+	int index)
+{
+	BUG_ON(index < 0 || index >= SHADOW_SIZE);
+
+	if (unlikely(s->shadow[index] == SHADOW_INVALID))
+		return 0;
+
+	return (s->r.min_page + s->shadow[index]) << PAGE_SHIFT;
+}
+
+static inline unsigned long shadow_to_page(const struct cfi_shadow *s,
+	int index)
+{
+	BUG_ON(index < 0 || index >= SHADOW_SIZE);
+
+	return (s->r.min_page + index) << PAGE_SHIFT;
+}
+
+static void prepare_next_shadow(const struct cfi_shadow __rcu *prev,
+		struct cfi_shadow *next)
+{
+	int i, index, check;
+
+	/* Mark everything invalid */
+	memset(next->shadow, 0xFF, sizeof(next->shadow));
+
+	if (!prev)
+		return; /* No previous shadow */
+
+	/* If the base address didn't change, update is not needed */
+	if (prev->r.min_page == next->r.min_page) {
+		memcpy(next->shadow, prev->shadow, sizeof(next->shadow));
+		return;
+	}
+
+	/* Convert the previous shadow to the new address range */
+	for (i = 0; i < SHADOW_SIZE; ++i) {
+		if (prev->shadow[i] == SHADOW_INVALID)
+			continue;
+
+		index = ptr_to_shadow(next, shadow_to_page(prev, i));
+		if (index < 0)
+			continue;
+
+		check = ptr_to_shadow(next,
+				shadow_to_ptr(prev, prev->shadow[i]));
+		if (check < 0)
+			continue;
+
+		next->shadow[index] = (u16)check;
+	}
+}
+
+static void add_module_to_shadow(struct cfi_shadow *s, struct module *mod)
+{
+	unsigned long ptr;
+	unsigned long min_page_addr;
+	unsigned long max_page_addr;
+	unsigned long check = (unsigned long)mod->cfi_check;
+	int check_index = ptr_to_shadow(s, check);
+
+	BUG_ON((check & PAGE_MASK) != check); /* Must be page aligned */
+
+	if (check_index < 0)
+		return; /* Module not addressable with shadow */
+
+	min_page_addr = (unsigned long)mod->core_layout.base & PAGE_MASK;
+	max_page_addr = (unsigned long)mod->core_layout.base +
+				       mod->core_layout.text_size;
+	max_page_addr &= PAGE_MASK;
+
+	/* For each page, store the check function index in the shadow */
+	for (ptr = min_page_addr; ptr <= max_page_addr; ptr += PAGE_SIZE) {
+		int index = ptr_to_shadow(s, ptr);
+		if (index >= 0) {
+			/* Assume a page only contains code for one module */
+			BUG_ON(s->shadow[index] != SHADOW_INVALID);
+			s->shadow[index] = (u16)check_index;
+		}
+	}
+}
+
+static void remove_module_from_shadow(struct cfi_shadow *s, struct module *mod)
+{
+	unsigned long ptr;
+	unsigned long min_page_addr;
+	unsigned long max_page_addr;
+
+	min_page_addr = (unsigned long)mod->core_layout.base & PAGE_MASK;
+	max_page_addr = (unsigned long)mod->core_layout.base +
+				       mod->core_layout.text_size;
+	max_page_addr &= PAGE_MASK;
+
+	for (ptr = min_page_addr; ptr <= max_page_addr; ptr += PAGE_SIZE) {
+		int index = ptr_to_shadow(s, ptr);
+		if (index >= 0)
+			s->shadow[index] = SHADOW_INVALID;
+	}
+}
+
+typedef void (*update_shadow_fn)(struct cfi_shadow *, struct module *);
+
+static void update_shadow(struct module *mod, unsigned long min_addr,
+	unsigned long max_addr, update_shadow_fn fn)
+{
+	struct cfi_shadow *prev;
+	struct cfi_shadow *next = (struct cfi_shadow *)
+		__get_free_pages(GFP_KERNEL, SHADOW_ORDER);
+
+	BUG_ON(!next);
+
+	next->r.mod_min_addr = min_addr;
+	next->r.mod_max_addr = max_addr;
+	next->r.min_page = min_addr >> PAGE_SHIFT;
+	next->r.max_page = max_addr >> PAGE_SHIFT;
+
+	spin_lock(&shadow_update_lock);
+	prev = rcu_dereference_protected(cfi_shadow, 1);
+	prepare_next_shadow(prev, next);
+
+	fn(next, mod);
+	set_memory_ro((unsigned long)next, SHADOW_PAGES);
+	rcu_assign_pointer(cfi_shadow, next);
+
+	spin_unlock(&shadow_update_lock);
+	synchronize_rcu();
+
+	if (prev) {
+		set_memory_rw((unsigned long)prev, SHADOW_PAGES);
+		free_pages((unsigned long)prev, SHADOW_ORDER);
+	}
+}
+
+void cfi_module_add(struct module *mod, unsigned long min_addr,
+	unsigned long max_addr)
+{
+	update_shadow(mod, min_addr, max_addr, add_module_to_shadow);
+}
+EXPORT_SYMBOL(cfi_module_add);
+
+void cfi_module_remove(struct module *mod, unsigned long min_addr,
+	unsigned long max_addr)
+{
+	update_shadow(mod, min_addr, max_addr, remove_module_from_shadow);
+}
+EXPORT_SYMBOL(cfi_module_remove);
+
+static inline cfi_check_fn ptr_to_check_fn(const struct cfi_shadow __rcu *s,
+	unsigned long ptr)
+{
+	int index;
+
+	if (unlikely(!s))
+		return NULL; /* No shadow available */
+
+	if (ptr < s->r.mod_min_addr || ptr > s->r.mod_max_addr)
+		return NULL; /* Not in a mapped module */
+
+	index = ptr_to_shadow(s, ptr);
+	if (index < 0)
+		return NULL; /* Cannot be addressed with shadow */
+
+	return (cfi_check_fn)shadow_to_ptr(s, index);
+}
+#endif /* CONFIG_CFI_CLANG_SHADOW */
+
+static inline cfi_check_fn find_module_cfi_check(void *ptr)
+{
+	struct module *mod;
+
+	preempt_disable();
+	mod = __module_address((unsigned long)ptr);
+	preempt_enable();
+
+	if (mod)
+		return mod->cfi_check;
+
+	return CFI_CHECK_FN;
+}
+
+static inline cfi_check_fn find_cfi_check(void *ptr)
+{
+#ifdef CONFIG_CFI_CLANG_SHADOW
+	cfi_check_fn f;
+
+	if (!rcu_access_pointer(cfi_shadow))
+		return CFI_CHECK_FN; /* No loaded modules */
+
+	/* Look up the __cfi_check function to use */
+	rcu_read_lock();
+	f = ptr_to_check_fn(rcu_dereference(cfi_shadow), (unsigned long)ptr);
+	rcu_read_unlock();
+
+	if (f)
+		return f;
+
+	/*
+	 * Fall back to find_module_cfi_check, which works also for a larger
+	 * module address space, but is slower.
+	 */
+#endif /* CONFIG_CFI_CLANG_SHADOW */
+
+	return find_module_cfi_check(ptr);
+}
+
+void cfi_slowpath_handler(uint64_t id, void *ptr, void *diag)
+{
+	cfi_check_fn check = find_cfi_check(ptr);
+
+	if (likely(check))
+		check(id, ptr, diag);
+	else /* Don't allow unchecked modules */
+		handle_cfi_failure(ptr);
+}
+EXPORT_SYMBOL(cfi_slowpath_handler);
+#endif /* CONFIG_MODULES */
+
+void cfi_failure_handler(void *data, void *ptr, void *vtable)
+{
+	handle_cfi_failure(ptr);
+}
+EXPORT_SYMBOL(cfi_failure_handler);
+
+void __cfi_check_fail(void *data, void *ptr)
+{
+	handle_cfi_failure(ptr);
+}

diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 51063e7..0a39b26 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c

@@ -541,7 +541,8 @@
 	tcred = get_task_cred(task);
 	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
 	    !uid_eq(cred->euid, tcred->uid) &&
-	    !uid_eq(cred->euid, tcred->suid))
+	    !uid_eq(cred->euid, tcred->suid) &&
+	    !ns_capable(tcred->user_ns, CAP_SYS_NICE))
 		ret = -EACCES;
 	put_cred(tcred);
 	if (ret)

diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 78ef274..8e91dd0 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c

@@ -55,6 +55,7 @@
 #include <linux/nsproxy.h>
 #include <linux/file.h>
 #include <linux/sched/cputime.h>
+#include <linux/psi.h>
 #include <net/sock.h>
 
 #define CREATE_TRACE_POINTS
@@ -836,7 +837,7 @@
 		 */
 		WARN_ON_ONCE(task->flags & PF_EXITING);
 
-		rcu_assign_pointer(task->cgroups, to_cset);
+		cgroup_move_task(task, to_cset);
 		list_add_tail(&task->cg_list, use_mg_tasks ? &to_cset->mg_tasks :
 							     &to_cset->tasks);
 	}
@@ -3424,6 +3425,79 @@
 	return ret;
 }
 
+#ifdef CONFIG_PSI
+static int cgroup_io_pressure_show(struct seq_file *seq, void *v)
+{
+	return psi_show(seq, &seq_css(seq)->cgroup->psi, PSI_IO);
+}
+static int cgroup_memory_pressure_show(struct seq_file *seq, void *v)
+{
+	return psi_show(seq, &seq_css(seq)->cgroup->psi, PSI_MEM);
+}
+static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
+{
+	return psi_show(seq, &seq_css(seq)->cgroup->psi, PSI_CPU);
+}
+
+static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
+					  size_t nbytes, enum psi_res res)
+{
+	struct psi_trigger *new;
+	struct cgroup *cgrp;
+
+	cgrp = cgroup_kn_lock_live(of->kn, false);
+	if (!cgrp)
+		return -ENODEV;
+
+	cgroup_get(cgrp);
+	cgroup_kn_unlock(of->kn);
+
+	new = psi_trigger_create(&cgrp->psi, buf, nbytes, res);
+	if (IS_ERR(new)) {
+		cgroup_put(cgrp);
+		return PTR_ERR(new);
+	}
+
+	psi_trigger_replace(&of->priv, new);
+
+	cgroup_put(cgrp);
+
+	return nbytes;
+}
+
+static ssize_t cgroup_io_pressure_write(struct kernfs_open_file *of,
+					  char *buf, size_t nbytes,
+					  loff_t off)
+{
+	return cgroup_pressure_write(of, buf, nbytes, PSI_IO);
+}
+
+static ssize_t cgroup_memory_pressure_write(struct kernfs_open_file *of,
+					  char *buf, size_t nbytes,
+					  loff_t off)
+{
+	return cgroup_pressure_write(of, buf, nbytes, PSI_MEM);
+}
+
+static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of,
+					  char *buf, size_t nbytes,
+					  loff_t off)
+{
+	return cgroup_pressure_write(of, buf, nbytes, PSI_CPU);
+}
+
+static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
+					  poll_table *pt)
+{
+	return psi_trigger_poll(&of->priv, of->file, pt);
+}
+
+static void cgroup_pressure_release(struct kernfs_open_file *of)
+{
+	psi_trigger_replace(&of->priv, NULL);
+}
+#endif /* CONFIG_PSI */
+
 static int cgroup_file_open(struct kernfs_open_file *of)
 {
 	struct cftype *cft = of->kn->priv;
@@ -3491,6 +3565,16 @@
 	return ret ?: nbytes;
 }
 
+static __poll_t cgroup_file_poll(struct kernfs_open_file *of, poll_table *pt)
+{
+	struct cftype *cft = of->kn->priv;
+
+	if (cft->poll)
+		return cft->poll(of, pt);
+
+	return kernfs_generic_poll(of, pt);
+}
+
 static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos)
 {
 	return seq_cft(seq)->seq_start(seq, ppos);
@@ -3529,6 +3613,7 @@
 	.open			= cgroup_file_open,
 	.release		= cgroup_file_release,
 	.write			= cgroup_file_write,
+	.poll			= cgroup_file_poll,
 	.seq_show		= cgroup_seqfile_show,
 };
 
@@ -3537,6 +3622,7 @@
 	.open			= cgroup_file_open,
 	.release		= cgroup_file_release,
 	.write			= cgroup_file_write,
+	.poll			= cgroup_file_poll,
 	.seq_start		= cgroup_seqfile_start,
 	.seq_next		= cgroup_seqfile_next,
 	.seq_stop		= cgroup_seqfile_stop,
@@ -4592,6 +4678,32 @@
 		.flags = CFTYPE_NOT_ON_ROOT,
 		.seq_show = cpu_stat_show,
 	},
+#ifdef CONFIG_PSI
+	{
+		.name = "io.pressure",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = cgroup_io_pressure_show,
+		.write = cgroup_io_pressure_write,
+		.poll = cgroup_pressure_poll,
+		.release = cgroup_pressure_release,
+	},
+	{
+		.name = "memory.pressure",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = cgroup_memory_pressure_show,
+		.write = cgroup_memory_pressure_write,
+		.poll = cgroup_pressure_poll,
+		.release = cgroup_pressure_release,
+	},
+	{
+		.name = "cpu.pressure",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = cgroup_cpu_pressure_show,
+		.write = cgroup_cpu_pressure_write,
+		.poll = cgroup_pressure_poll,
+		.release = cgroup_pressure_release,
+	},
+#endif /* CONFIG_PSI */
 	{ }	/* terminate */
 };
 
@@ -4652,6 +4764,7 @@
 			 */
 			cgroup_put(cgroup_parent(cgrp));
 			kernfs_put(cgrp->kn);
+			psi_cgroup_free(cgrp);
 			if (cgroup_on_dfl(cgrp))
 				cgroup_rstat_exit(cgrp);
 			kfree(cgrp);
@@ -4910,10 +5023,15 @@
 	cgrp->self.parent = &parent->self;
 	cgrp->root = root;
 	cgrp->level = level;
-	ret = cgroup_bpf_inherit(cgrp);
+
+	ret = psi_cgroup_alloc(cgrp);
 	if (ret)
 		goto out_idr_free;
 
+	ret = cgroup_bpf_inherit(cgrp);
+	if (ret)
+		goto out_psi_free;
+
 	spin_lock_irq(&css_set_lock);
 	for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
 		cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
@@ -4953,6 +5071,8 @@
 
 	return cgrp;
 
+out_psi_free:
+	psi_cgroup_free(cgrp);
 out_idr_free:
 	cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
 out_stat_exit:

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index ff956cc..a6561a4 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c

@@ -103,6 +103,7 @@
 
 	/* user-configured CPUs and Memory Nodes allow to tasks */
 	cpumask_var_t cpus_allowed;
+	cpumask_var_t cpus_requested;
 	nodemask_t mems_allowed;
 
 	/* effective CPUs and Memory Nodes allow to tasks */
@@ -412,7 +413,7 @@
 
 static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
 {
-	return	cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
+	return	cpumask_subset(p->cpus_requested, q->cpus_requested) &&
 		nodes_subset(p->mems_allowed, q->mems_allowed) &&
 		is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
 		is_mem_exclusive(p) <= is_mem_exclusive(q);
@@ -511,7 +512,7 @@
 	cpuset_for_each_child(c, css, par) {
 		if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
 		    c != cur &&
-		    cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
+		    cpumask_intersects(trial->cpus_requested, c->cpus_requested))
 			goto out;
 		if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
 		    c != cur &&
@@ -972,17 +973,18 @@
 	if (!*buf) {
 		cpumask_clear(trialcs->cpus_allowed);
 	} else {
-		retval = cpulist_parse(buf, trialcs->cpus_allowed);
+		retval = cpulist_parse(buf, trialcs->cpus_requested);
 		if (retval < 0)
 			return retval;
 
-		if (!cpumask_subset(trialcs->cpus_allowed,
-				    top_cpuset.cpus_allowed))
+		if (!cpumask_subset(trialcs->cpus_requested, cpu_present_mask))
 			return -EINVAL;
+
+		cpumask_and(trialcs->cpus_allowed, trialcs->cpus_requested, cpu_active_mask);
 	}
 
 	/* Nothing to do if the cpus didn't change */
-	if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
+	if (cpumask_equal(cs->cpus_requested, trialcs->cpus_requested))
 		return 0;
 
 	retval = validate_change(cs, trialcs);
@@ -991,6 +993,7 @@
 
 	spin_lock_irq(&callback_lock);
 	cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
+	cpumask_copy(cs->cpus_requested, trialcs->cpus_requested);
 	spin_unlock_irq(&callback_lock);
 
 	/* use trialcs->cpus_allowed as a temp variable */
@@ -1759,7 +1762,7 @@
 
 	switch (type) {
 	case FILE_CPULIST:
-		seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed));
+		seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_requested));
 		break;
 	case FILE_MEMLIST:
 		seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed));
@@ -1949,11 +1952,14 @@
 		return ERR_PTR(-ENOMEM);
 	if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL))
 		goto free_cs;
+	if (!alloc_cpumask_var(&cs->cpus_requested, GFP_KERNEL))
+		goto free_allowed;
 	if (!alloc_cpumask_var(&cs->effective_cpus, GFP_KERNEL))
-		goto free_cpus;
+		goto free_requested;
 
 	set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
 	cpumask_clear(cs->cpus_allowed);
+	cpumask_clear(cs->cpus_requested);
 	nodes_clear(cs->mems_allowed);
 	cpumask_clear(cs->effective_cpus);
 	nodes_clear(cs->effective_mems);
@@ -1962,7 +1968,9 @@
 
 	return &cs->css;
 
-free_cpus:
+free_requested:
+	free_cpumask_var(cs->cpus_requested);
+free_allowed:
 	free_cpumask_var(cs->cpus_allowed);
 free_cs:
 	kfree(cs);
@@ -2025,6 +2033,7 @@
 	cs->mems_allowed = parent->mems_allowed;
 	cs->effective_mems = parent->mems_allowed;
 	cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
+	cpumask_copy(cs->cpus_requested, parent->cpus_requested);
 	cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
 	spin_unlock_irq(&callback_lock);
 out_unlock:
@@ -2059,6 +2068,7 @@
 
 	free_cpumask_var(cs->effective_cpus);
 	free_cpumask_var(cs->cpus_allowed);
+	free_cpumask_var(cs->cpus_requested);
 	kfree(cs);
 }
 
@@ -2121,8 +2131,10 @@
 
 	BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL));
 	BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL));
+	BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_requested, GFP_KERNEL));
 
 	cpumask_setall(top_cpuset.cpus_allowed);
+	cpumask_setall(top_cpuset.cpus_requested);
 	nodes_setall(top_cpuset.mems_allowed);
 	cpumask_setall(top_cpuset.effective_cpus);
 	nodes_setall(top_cpuset.effective_mems);
@@ -2255,7 +2267,7 @@
 		goto retry;
 	}
 
-	cpumask_and(&new_cpus, cs->cpus_allowed, parent_cs(cs)->effective_cpus);
+	cpumask_and(&new_cpus, cs->cpus_requested, parent_cs(cs)->effective_cpus);
 	nodes_and(new_mems, cs->mems_allowed, parent_cs(cs)->effective_mems);
 
 	cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);

diff --git a/kernel/cpu.c b/kernel/cpu.c
index d9f855c..6ebbdf5 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c

@@ -1220,6 +1220,7 @@
 void enable_nonboot_cpus(void)
 {
 	int cpu, error;
+	struct device *cpu_device;
 
 	/* Allow everyone to use the CPU hotplug again */
 	cpu_maps_update_begin();
@@ -1237,6 +1238,12 @@
 		trace_suspend_resume(TPS("CPU_ON"), cpu, false);
 		if (!error) {
 			pr_info("CPU%d is up\n", cpu);
+			cpu_device = get_cpu_device(cpu);
+			if (!cpu_device)
+				pr_err("%s: failed to get cpu%d device\n",
+				       __func__, cpu);
+			else
+				kobject_uevent(&cpu_device->kobj, KOBJ_ONLINE);
 			continue;
 		}
 		pr_warn("Error taking CPU%d up: %d\n", cpu, error);

diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index f338d23..9592420 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c

@@ -2556,16 +2556,11 @@
 	}
 	kdb_printf("%02ld:%02ld\n", val.uptime/(60*60), (val.uptime/60)%60);
 
-	/* lifted from fs/proc/proc_misc.c::loadavg_read_proc() */
-
-#define LOAD_INT(x) ((x) >> FSHIFT)
-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
 	kdb_printf("load avg   %ld.%02ld %ld.%02ld %ld.%02ld\n",
 		LOAD_INT(val.loads[0]), LOAD_FRAC(val.loads[0]),
 		LOAD_INT(val.loads[1]), LOAD_FRAC(val.loads[1]),
 		LOAD_INT(val.loads[2]), LOAD_FRAC(val.loads[2]));
-#undef LOAD_INT
-#undef LOAD_FRAC
+
 	/* Display in kilobytes */
 #define K(x) ((x) << (PAGE_SHIFT - 10))
 	kdb_printf("\nMemTotal:       %8lu kB\nMemFree:        %8lu kB\n"

diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index ca8ac28..2a12b98 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c

@@ -135,9 +135,12 @@
 	d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
 	tmp = d->freepages_delay_total + tsk->delays->freepages_delay;
 	d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
+	tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay;
+	d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp;
 	d->blkio_count += tsk->delays->blkio_count;
 	d->swapin_count += tsk->delays->swapin_count;
 	d->freepages_count += tsk->delays->freepages_count;
+	d->thrashing_count += tsk->delays->thrashing_count;
 	raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
 
 	return 0;
@@ -169,3 +172,15 @@
 		&current->delays->freepages_count);
 }
 
+void __delayacct_thrashing_start(void)
+{
+	current->delays->thrashing_start = ktime_get_ns();
+}
+
+void __delayacct_thrashing_end(void)
+{
+	delayacct_end(&current->delays->lock,
+		      &current->delays->thrashing_start,
+		      &current->delays->thrashing_delay,
+		      &current->delays->thrashing_count);
+}

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 7ca44b8..54136fa 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c

@@ -397,8 +397,13 @@
  *   0 - disallow raw tracepoint access for unpriv
  *   1 - disallow cpu events for unpriv
  *   2 - disallow kernel profiling for unpriv
+ *   3 - disallow all unpriv perf event use
  */
+#ifdef CONFIG_SECURITY_PERF_EVENTS_RESTRICT
+int sysctl_perf_event_paranoid __read_mostly = 3;
+#else
 int sysctl_perf_event_paranoid __read_mostly = 2;
+#endif
 
 /* Minimum for 512 kiB + 1 user control page */
 int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */
@@ -10485,6 +10490,9 @@
 	if (flags & ~PERF_FLAG_ALL)
 		return -EINVAL;
 
+	if (perf_paranoid_any() && !capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
 	err = perf_copy_attr(attr_uptr, &attr);
 	if (err)
 		return err;

diff --git a/kernel/fork.c b/kernel/fork.c
index e76ce81..1a72fea 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c

@@ -91,6 +91,7 @@
 #include <linux/kcov.h>
 #include <linux/livepatch.h>
 #include <linux/thread_info.h>
+#include <linux/cpufreq_times.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -394,6 +395,8 @@
 
 void free_task(struct task_struct *tsk)
 {
+	cpufreq_task_times_exit(tsk);
+
 #ifndef CONFIG_THREAD_INFO_IN_TASK
 	/*
 	 * The task is finally done with both the stack and thread_info,
@@ -1733,6 +1736,8 @@
 	if (!p)
 		goto fork_out;
 
+	cpufreq_task_times_init(p);
+
 	/*
 	 * This _must_ happen before we call free_task(), i.e. before we jump
 	 * to any of the bad_fork_* labels. This is to avoid freeing
@@ -1804,6 +1809,10 @@
 
 	p->default_timer_slack_ns = current->timer_slack_ns;
 
+#ifdef CONFIG_PSI
+	p->psi_flags = 0;
+#endif
+
 	task_io_accounting_init(&p->ioac);
 	acct_clear_integrals(p);
 
@@ -2206,6 +2215,8 @@
 	if (IS_ERR(p))
 		return PTR_ERR(p);
 
+	cpufreq_task_times_alloc(p);
+
 	/*
 	 * Do this prior waking up the new thread - the thread pointer
 	 * might get invalid after that point, if the thread exits quickly.

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 02a0b01..672ed40 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c

@@ -268,6 +268,24 @@
 	       !!__bpf_address_lookup(addr, symbolsize, offset, namebuf);
 }
 
+#ifdef CONFIG_CFI_CLANG
+/*
+ * LLVM appends .cfi to function names when CONFIG_CFI_CLANG is enabled,
+ * which causes confusion and potentially breaks user space tools, so we
+ * will strip the postfix from expanded symbol names.
+ */
+static inline void cleanup_symbol_name(char *s)
+{
+	char *res;
+
+	res = strrchr(s, '.');
+	if (res && !strcmp(res, ".cfi"))
+		*res = '\0';
+}
+#else
+static inline void cleanup_symbol_name(char *s) {}
+#endif
+
 /*
  * Lookup an address
  * - modname is set to NULL if it's in the kernel.
@@ -294,7 +312,9 @@
 				       namebuf, KSYM_NAME_LEN);
 		if (modname)
 			*modname = NULL;
-		return namebuf;
+
+		ret = namebuf;
+		goto found;
 	}
 
 	/* See if it's in a module or a BPF JITed image. */
@@ -307,11 +327,16 @@
 	if (!ret)
 		ret = ftrace_mod_address_lookup(addr, symbolsize,
 						offset, modname, namebuf);
+
+found:
+	cleanup_symbol_name(namebuf);
 	return ret;
 }
 
 int lookup_symbol_name(unsigned long addr, char *symname)
 {
+	int res;
+
 	symname[0] = '\0';
 	symname[KSYM_NAME_LEN - 1] = '\0';
 
@@ -322,15 +347,23 @@
 		/* Grab name */
 		kallsyms_expand_symbol(get_symbol_offset(pos),
 				       symname, KSYM_NAME_LEN);
-		return 0;
+		goto found;
 	}
 	/* See if it's in a module. */
-	return lookup_module_symbol_name(addr, symname);
+	res = lookup_module_symbol_name(addr, symname);
+	if (res)
+		return res;
+
+found:
+	cleanup_symbol_name(symname);
+	return 0;
 }
 
 int lookup_symbol_attrs(unsigned long addr, unsigned long *size,
 			unsigned long *offset, char *modname, char *name)
 {
+	int res;
+
 	name[0] = '\0';
 	name[KSYM_NAME_LEN - 1] = '\0';
 
@@ -342,10 +375,16 @@
 		kallsyms_expand_symbol(get_symbol_offset(pos),
 				       name, KSYM_NAME_LEN);
 		modname[0] = '\0';
-		return 0;
+		goto found;
 	}
 	/* See if it's in a module. */
-	return lookup_module_symbol_attrs(addr, size, offset, modname, name);
+	res = lookup_module_symbol_attrs(addr, size, offset, modname, name);
+	if (res)
+		return res;
+
+found:
+	cleanup_symbol_name(name);
+	return 0;
 }
 
 /* Look up a kernel symbol and return it in a text buffer. */

diff --git a/kernel/kthread.c b/kernel/kthread.c
index 087d18d..e3dfad2 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c

@@ -11,6 +11,7 @@
 #include <linux/kthread.h>
 #include <linux/completion.h>
 #include <linux/err.h>
+#include <linux/cgroup.h>
 #include <linux/cpuset.h>
 #include <linux/unistd.h>
 #include <linux/file.h>

diff --git a/kernel/module.c b/kernel/module.c
index 3fda10c..8644c18 100644
--- a/kernel/module.c
+++ b/kernel/module.c

@@ -2130,6 +2130,8 @@
 {
 }
 
+static void cfi_cleanup(struct module *mod);
+
 /* Free a module, remove from lists, etc. */
 static void free_module(struct module *mod)
 {
@@ -2171,6 +2173,10 @@
 
 	/* This may be empty, but that's OK */
 	disable_ro_nx(&mod->init_layout);
+
+	/* Clean up CFI for the module. */
+	cfi_cleanup(mod);
+
 	module_arch_freeing_init(mod);
 	module_memfree(mod->init_layout.base);
 	kfree(mod->args);
@@ -3358,6 +3364,8 @@
 	return 0;
 }
 
+static void cfi_init(struct module *mod);
+
 static int post_relocation(struct module *mod, const struct load_info *info)
 {
 	/* Sort exception table now relocations are done. */
@@ -3370,6 +3378,9 @@
 	/* Setup kallsyms-specific fields. */
 	add_kallsyms(mod, info);
 
+	/* Setup CFI for the module. */
+	cfi_init(mod);
+
 	/* Arch-specific module finalizing. */
 	return module_finalize(info->hdr, info->sechdrs, mod);
 }
@@ -4137,6 +4148,22 @@
 }
 #endif /* CONFIG_KALLSYMS */
 
+static void cfi_init(struct module *mod)
+{
+#ifdef CONFIG_CFI_CLANG
+	mod->cfi_check =
+		(cfi_check_fn)mod_find_symname(mod, CFI_CHECK_FN_NAME);
+	cfi_module_add(mod, module_addr_min, module_addr_max);
+#endif
+}
+
+static void cfi_cleanup(struct module *mod)
+{
+#ifdef CONFIG_CFI_CLANG
+	cfi_module_remove(mod, module_addr_min, module_addr_max);
+#endif
+}
+
 /* Maximum number of characters written by module_flags() */
 #define MODULE_FLAGS_BUF_SIZE (TAINT_FLAGS_COUNT + 4)
 

diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 3a6c2f8..f8fe57d 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig

@@ -298,3 +298,18 @@
 
 config CPU_PM
 	bool
+
+config ENERGY_MODEL
+	bool "Energy Model for CPUs"
+	depends on SMP
+	depends on CPU_FREQ
+	default n
+	help
+	  Several subsystems (thermal and/or the task scheduler for example)
+	  can leverage information about the energy consumed by CPUs to make
+	  smarter decisions. This config option enables the framework from
+	  which subsystems can access the energy models.
+
+	  The exact usage of the energy model is subsystem-dependent.
+
+	  If in doubt, say N.

diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index a3f79f0e..bbf0676 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile

@@ -15,3 +15,6 @@
 obj-$(CONFIG_PM_WAKELOCKS)	+= wakelock.o
 
 obj-$(CONFIG_MAGIC_SYSRQ)	+= poweroff.o
+
+obj-$(CONFIG_ENERGY_MODEL)	+= energy_model.o
+obj-$(CONFIG_SUSPEND)		+= wakeup_reason.o

diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
new file mode 100644
index 0000000..7d66ee6
--- /dev/null
+++ b/kernel/power/energy_model.c

@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Energy Model of CPUs
+ *
+ * Copyright (c) 2018, Arm ltd.
+ * Written by: Quentin Perret, Arm ltd.
+ */
+
+#define pr_fmt(fmt) "energy_model: " fmt
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/debugfs.h>
+#include <linux/energy_model.h>
+#include <linux/sched/topology.h>
+#include <linux/slab.h>
+
+/* Mapping of each CPU to the performance domain to which it belongs. */
+static DEFINE_PER_CPU(struct em_perf_domain *, em_data);
+
+/*
+ * Mutex serializing the registrations of performance domains and letting
+ * callbacks defined by drivers sleep.
+ */
+static DEFINE_MUTEX(em_pd_mutex);
+
+#ifdef CONFIG_DEBUG_FS
+static struct dentry *rootdir;
+
+static void em_debug_create_cs(struct em_cap_state *cs, struct dentry *pd)
+{
+	struct dentry *d;
+	char name[24];
+
+	snprintf(name, sizeof(name), "cs:%lu", cs->frequency);
+
+	/* Create per-cs directory */
+	d = debugfs_create_dir(name, pd);
+	debugfs_create_ulong("frequency", 0444, d, &cs->frequency);
+	debugfs_create_ulong("power", 0444, d, &cs->power);
+	debugfs_create_ulong("cost", 0444, d, &cs->cost);
+}
+
+static int em_debug_cpus_show(struct seq_file *s, void *unused)
+{
+	seq_printf(s, "%*pbl\n", cpumask_pr_args(to_cpumask(s->private)));
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(em_debug_cpus);
+
+static void em_debug_create_pd(struct em_perf_domain *pd, int cpu)
+{
+	struct dentry *d;
+	char name[8];
+	int i;
+
+	snprintf(name, sizeof(name), "pd%d", cpu);
+
+	/* Create the directory of the performance domain */
+	d = debugfs_create_dir(name, rootdir);
+
+	debugfs_create_file("cpus", 0444, d, pd->cpus, &em_debug_cpus_fops);
+
+	/* Create a sub-directory for each capacity state */
+	for (i = 0; i < pd->nr_cap_states; i++)
+		em_debug_create_cs(&pd->table[i], d);
+}
+
+static int __init em_debug_init(void)
+{
+	/* Create /sys/kernel/debug/energy_model directory */
+	rootdir = debugfs_create_dir("energy_model", NULL);
+
+	return 0;
+}
+core_initcall(em_debug_init);
+#else /* CONFIG_DEBUG_FS */
+static void em_debug_create_pd(struct em_perf_domain *pd, int cpu) {}
+#endif
+static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states,
+						struct em_data_callback *cb)
+{
+	unsigned long opp_eff, prev_opp_eff = ULONG_MAX;
+	unsigned long power, freq, prev_freq = 0;
+	int i, ret, cpu = cpumask_first(span);
+	struct em_cap_state *table;
+	struct em_perf_domain *pd;
+	u64 fmax;
+
+	if (!cb->active_power)
+		return NULL;
+
+	pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL);
+	if (!pd)
+		return NULL;
+
+	table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL);
+	if (!table)
+		goto free_pd;
+
+	/* Build the list of capacity states for this performance domain */
+	for (i = 0, freq = 0; i < nr_states; i++, freq++) {
+		/*
+		 * active_power() is a driver callback which ceils 'freq' to
+		 * lowest capacity state of 'cpu' above 'freq' and updates
+		 * 'power' and 'freq' accordingly.
+		 */
+		ret = cb->active_power(&power, &freq, cpu);
+		if (ret) {
+			pr_err("pd%d: invalid cap. state: %d\n", cpu, ret);
+			goto free_cs_table;
+		}
+
+		/*
+		 * We expect the driver callback to increase the frequency for
+		 * higher capacity states.
+		 */
+		if (freq <= prev_freq) {
+			pr_err("pd%d: non-increasing freq: %lu\n", cpu, freq);
+			goto free_cs_table;
+		}
+
+		/*
+		 * The power returned by active_state() is expected to be
+		 * positive, in milli-watts and to fit into 16 bits.
+		 */
+		if (!power || power > EM_CPU_MAX_POWER) {
+			pr_err("pd%d: invalid power: %lu\n", cpu, power);
+			goto free_cs_table;
+		}
+
+		table[i].power = power;
+		table[i].frequency = prev_freq = freq;
+
+		/*
+		 * The hertz/watts efficiency ratio should decrease as the
+		 * frequency grows on sane platforms. But this isn't always
+		 * true in practice so warn the user if a higher OPP is more
+		 * power efficient than a lower one.
+		 */
+		opp_eff = freq / power;
+		if (opp_eff >= prev_opp_eff)
+			pr_warn("pd%d: hertz/watts ratio non-monotonically decreasing: em_cap_state %d >= em_cap_state%d\n",
+					cpu, i, i - 1);
+		prev_opp_eff = opp_eff;
+	}
+
+	/* Compute the cost of each capacity_state. */
+	fmax = (u64) table[nr_states - 1].frequency;
+	for (i = 0; i < nr_states; i++) {
+		table[i].cost = div64_u64(fmax * table[i].power,
+					  table[i].frequency);
+	}
+
+	pd->table = table;
+	pd->nr_cap_states = nr_states;
+	cpumask_copy(to_cpumask(pd->cpus), span);
+
+	em_debug_create_pd(pd, cpu);
+
+	return pd;
+
+free_cs_table:
+	kfree(table);
+free_pd:
+	kfree(pd);
+
+	return NULL;
+}
+
+/**
+ * em_cpu_get() - Return the performance domain for a CPU
+ * @cpu : CPU to find the performance domain for
+ *
+ * Return: the performance domain to which 'cpu' belongs, or NULL if it doesn't
+ * exist.
+ */
+struct em_perf_domain *em_cpu_get(int cpu)
+{
+	return READ_ONCE(per_cpu(em_data, cpu));
+}
+EXPORT_SYMBOL_GPL(em_cpu_get);
+
+/**
+ * em_register_perf_domain() - Register the Energy Model of a performance domain
+ * @span	: Mask of CPUs in the performance domain
+ * @nr_states	: Number of capacity states to register
+ * @cb		: Callback functions providing the data of the Energy Model
+ *
+ * Create Energy Model tables for a performance domain using the callbacks
+ * defined in cb.
+ *
+ * If multiple clients register the same performance domain, all but the first
+ * registration will be ignored.
+ *
+ * Return 0 on success
+ */
+int em_register_perf_domain(cpumask_t *span, unsigned int nr_states,
+						struct em_data_callback *cb)
+{
+	unsigned long cap, prev_cap = 0;
+	struct em_perf_domain *pd;
+	int cpu, ret = 0;
+
+	if (!span || !nr_states || !cb)
+		return -EINVAL;
+
+	/*
+	 * Use a mutex to serialize the registration of performance domains and
+	 * let the driver-defined callback functions sleep.
+	 */
+	mutex_lock(&em_pd_mutex);
+
+	for_each_cpu(cpu, span) {
+		/* Make sure we don't register again an existing domain. */
+		if (READ_ONCE(per_cpu(em_data, cpu))) {
+			ret = -EEXIST;
+			goto unlock;
+		}
+
+		/*
+		 * All CPUs of a domain must have the same micro-architecture
+		 * since they all share the same table.
+		 */
+		cap = arch_scale_cpu_capacity(NULL, cpu);
+		if (prev_cap && prev_cap != cap) {
+			pr_err("CPUs of %*pbl must have the same capacity\n",
+							cpumask_pr_args(span));
+			ret = -EINVAL;
+			goto unlock;
+		}
+		prev_cap = cap;
+	}
+
+	/* Create the performance domain and add it to the Energy Model. */
+	pd = em_create_pd(span, nr_states, cb);
+	if (!pd) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	for_each_cpu(cpu, span) {
+		/*
+		 * The per-cpu array can be read concurrently from em_cpu_get().
+		 * The barrier enforces the ordering needed to make sure readers
+		 * can only access well formed em_perf_domain structs.
+		 */
+		smp_store_release(per_cpu_ptr(&em_data, cpu), pd);
+	}
+
+	pr_debug("Created perf domain %*pbl\n", cpumask_pr_args(span));
+unlock:
+	mutex_unlock(&em_pd_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(em_register_perf_domain);

diff --git a/kernel/power/process.c b/kernel/power/process.c
index 7381d49..c366e3d 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c

@@ -22,6 +22,7 @@
 #include <linux/kmod.h>
 #include <trace/events/power.h>
 #include <linux/cpuset.h>
+#include <linux/wakeup_reason.h>
 
 /*
  * Timeout for stopping processes
@@ -38,6 +39,9 @@
 	unsigned int elapsed_msecs;
 	bool wakeup = false;
 	int sleep_usecs = USEC_PER_MSEC;
+#ifdef CONFIG_PM_SLEEP
+	char suspend_abort[MAX_SUSPEND_ABORT_LEN];
+#endif
 
 	start = ktime_get_boottime();
 
@@ -67,6 +71,11 @@
 			break;
 
 		if (pm_wakeup_pending()) {
+#ifdef CONFIG_PM_SLEEP
+			pm_get_active_wakeup_sources(suspend_abort,
+				MAX_SUSPEND_ABORT_LEN);
+			log_suspend_abort_reason(suspend_abort);
+#endif
 			wakeup = true;
 			break;
 		}
@@ -85,26 +94,27 @@
 	elapsed = ktime_sub(end, start);
 	elapsed_msecs = ktime_to_ms(elapsed);
 
-	if (todo) {
+	if (wakeup) {
 		pr_cont("\n");
-		pr_err("Freezing of tasks %s after %d.%03d seconds "
-		       "(%d tasks refusing to freeze, wq_busy=%d):\n",
-		       wakeup ? "aborted" : "failed",
+		pr_err("Freezing of tasks aborted after %d.%03d seconds",
+		       elapsed_msecs / 1000, elapsed_msecs % 1000);
+	} else if (todo) {
+		pr_cont("\n");
+		pr_err("Freezing of tasks failed after %d.%03d seconds"
+		       " (%d tasks refusing to freeze, wq_busy=%d):\n",
 		       elapsed_msecs / 1000, elapsed_msecs % 1000,
 		       todo - wq_busy, wq_busy);
 
 		if (wq_busy)
 			show_workqueue_state();
 
-		if (!wakeup) {
-			read_lock(&tasklist_lock);
-			for_each_process_thread(g, p) {
-				if (p != current && !freezer_should_skip(p)
-				    && freezing(p) && !frozen(p))
-					sched_show_task(p);
-			}
-			read_unlock(&tasklist_lock);
+		read_lock(&tasklist_lock);
+		for_each_process_thread(g, p) {
+			if (p != current && !freezer_should_skip(p)
+			    && freezing(p) && !frozen(p))
+				sched_show_task(p);
 		}
+		read_unlock(&tasklist_lock);
 	} else {
 		pr_cont("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000,
 			elapsed_msecs % 1000);

diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 0bd595a..36cb7ed 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c

@@ -32,6 +32,7 @@
 #include <trace/events/power.h>
 #include <linux/compiler.h>
 #include <linux/moduleparam.h>
+#include <linux/wakeup_reason.h>
 
 #include "power.h"
 
@@ -396,7 +397,8 @@
  */
 static int suspend_enter(suspend_state_t state, bool *wakeup)
 {
-	int error;
+	char suspend_abort[MAX_SUSPEND_ABORT_LEN];
+	int error, last_dev;
 
 	error = platform_suspend_prepare(state);
 	if (error)
@@ -404,7 +406,11 @@
 
 	error = dpm_suspend_late(PMSG_SUSPEND);
 	if (error) {
+		last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1;
+		last_dev %= REC_FAILED_NUM;
 		pr_err("late suspend of devices failed\n");
+		log_suspend_abort_reason("%s device failed to power down",
+			suspend_stats.failed_devs[last_dev]);
 		goto Platform_finish;
 	}
 	error = platform_suspend_prepare_late(state);
@@ -418,7 +424,11 @@
 
 	error = dpm_suspend_noirq(PMSG_SUSPEND);
 	if (error) {
+		last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1;
+		last_dev %= REC_FAILED_NUM;
 		pr_err("noirq suspend of devices failed\n");
+		log_suspend_abort_reason("noirq suspend of %s device failed",
+			suspend_stats.failed_devs[last_dev]);
 		goto Platform_early_resume;
 	}
 	error = platform_suspend_prepare_noirq(state);
@@ -429,8 +439,10 @@
 		goto Platform_wake;
 
 	error = disable_nonboot_cpus();
-	if (error || suspend_test(TEST_CPUS))
+	if (error || suspend_test(TEST_CPUS)) {
+		log_suspend_abort_reason("Disabling non-boot cpus failed");
 		goto Enable_cpus;
+	}
 
 	arch_suspend_disable_irqs();
 	BUG_ON(!irqs_disabled());
@@ -447,6 +459,9 @@
 			trace_suspend_resume(TPS("machine_suspend"),
 				state, false);
 		} else if (*wakeup) {
+			pm_get_active_wakeup_sources(suspend_abort,
+				MAX_SUSPEND_ABORT_LEN);
+			log_suspend_abort_reason(suspend_abort);
 			error = -EBUSY;
 		}
 		syscore_resume();
@@ -498,6 +513,7 @@
 	error = dpm_suspend_start(PMSG_SUSPEND);
 	if (error) {
 		pr_err("Some devices failed to suspend, or early wake event detected\n");
+		log_suspend_abort_reason("Some devices failed to suspend, or early wake event detected");
 		goto Recover_platform;
 	}
 	suspend_test_finish("suspend devices");

diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c
new file mode 100644
index 0000000..b7e3e55
--- /dev/null
+++ b/kernel/power/wakeup_reason.c

@@ -0,0 +1,214 @@
+/*
+ * kernel/power/wakeup_reason.c
+ *
+ * Logs the reasons which caused the kernel to resume from
+ * the suspend mode.
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/wakeup_reason.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/notifier.h>
+#include <linux/suspend.h>
+
+
+#define MAX_WAKEUP_REASON_IRQS 32
+static int irq_list[MAX_WAKEUP_REASON_IRQS];
+static int irqcount;
+static bool suspend_abort;
+static char abort_reason[MAX_SUSPEND_ABORT_LEN];
+static struct kobject *wakeup_reason;
+static spinlock_t resume_reason_lock;
+
+static ktime_t last_monotime; /* monotonic time before last suspend */
+static ktime_t curr_monotime; /* monotonic time after last suspend */
+static ktime_t last_stime; /* monotonic boottime offset before last suspend */
+static ktime_t curr_stime; /* monotonic boottime offset after last suspend */
+
+static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribute *attr,
+		char *buf)
+{
+	int irq_no, buf_offset = 0;
+	struct irq_desc *desc;
+	unsigned long flags;
+	spin_lock_irqsave(&resume_reason_lock, flags);
+	if (suspend_abort) {
+		buf_offset = sprintf(buf, "Abort: %s", abort_reason);
+	} else {
+		for (irq_no = 0; irq_no < irqcount; irq_no++) {
+			desc = irq_to_desc(irq_list[irq_no]);
+			if (desc && desc->action && desc->action->name)
+				buf_offset += sprintf(buf + buf_offset, "%d %s\n",
+						irq_list[irq_no], desc->action->name);
+			else
+				buf_offset += sprintf(buf + buf_offset, "%d\n",
+						irq_list[irq_no]);
+		}
+	}
+	spin_unlock_irqrestore(&resume_reason_lock, flags);
+	return buf_offset;
+}
+
+static ssize_t last_suspend_time_show(struct kobject *kobj,
+			struct kobj_attribute *attr, char *buf)
+{
+	struct timespec sleep_time;
+	struct timespec total_time;
+	struct timespec suspend_resume_time;
+
+	/*
+	 * total_time is calculated from monotonic bootoffsets because
+	 * unlike CLOCK_MONOTONIC it include the time spent in suspend state.
+	 */
+	total_time = ktime_to_timespec(ktime_sub(curr_stime, last_stime));
+
+	/*
+	 * suspend_resume_time is calculated as monotonic (CLOCK_MONOTONIC)
+	 * time interval before entering suspend and post suspend.
+	 */
+	suspend_resume_time = ktime_to_timespec(ktime_sub(curr_monotime, last_monotime));
+
+	/* sleep_time = total_time - suspend_resume_time */
+	sleep_time = timespec_sub(total_time, suspend_resume_time);
+
+	/* Export suspend_resume_time and sleep_time in pair here. */
+	return sprintf(buf, "%lu.%09lu %lu.%09lu\n",
+				suspend_resume_time.tv_sec, suspend_resume_time.tv_nsec,
+				sleep_time.tv_sec, sleep_time.tv_nsec);
+}
+
+static struct kobj_attribute resume_reason = __ATTR_RO(last_resume_reason);
+static struct kobj_attribute suspend_time = __ATTR_RO(last_suspend_time);
+
+static struct attribute *attrs[] = {
+	&resume_reason.attr,
+	&suspend_time.attr,
+	NULL,
+};
+static struct attribute_group attr_group = {
+	.attrs = attrs,
+};
+
+/*
+ * logs all the wake up reasons to the kernel
+ * stores the irqs to expose them to the userspace via sysfs
+ */
+void log_wakeup_reason(int irq)
+{
+	struct irq_desc *desc;
+	unsigned long flags;
+	desc = irq_to_desc(irq);
+	if (desc && desc->action && desc->action->name)
+		printk(KERN_INFO "Resume caused by IRQ %d, %s\n", irq,
+				desc->action->name);
+	else
+		printk(KERN_INFO "Resume caused by IRQ %d\n", irq);
+
+	spin_lock_irqsave(&resume_reason_lock, flags);
+	if (irqcount == MAX_WAKEUP_REASON_IRQS) {
+		spin_unlock_irqrestore(&resume_reason_lock, flags);
+		printk(KERN_WARNING "Resume caused by more than %d IRQs\n",
+				MAX_WAKEUP_REASON_IRQS);
+		return;
+	}
+
+	irq_list[irqcount++] = irq;
+	spin_unlock_irqrestore(&resume_reason_lock, flags);
+}
+
+void log_suspend_abort_reason(const char *fmt, ...)
+{
+	unsigned long flags;
+	va_list args;
+
+	spin_lock_irqsave(&resume_reason_lock, flags);
+
+	//Suspend abort reason has already been logged.
+	if (suspend_abort) {
+		spin_unlock_irqrestore(&resume_reason_lock, flags);
+		return;
+	}
+
+	suspend_abort = true;
+	va_start(args, fmt);
+	vsnprintf(abort_reason, MAX_SUSPEND_ABORT_LEN, fmt, args);
+	va_end(args);
+	spin_unlock_irqrestore(&resume_reason_lock, flags);
+}
+
+/* Detects a suspend and clears all the previous wake up reasons*/
+static int wakeup_reason_pm_event(struct notifier_block *notifier,
+		unsigned long pm_event, void *unused)
+{
+	unsigned long flags;
+	switch (pm_event) {
+	case PM_SUSPEND_PREPARE:
+		spin_lock_irqsave(&resume_reason_lock, flags);
+		irqcount = 0;
+		suspend_abort = false;
+		spin_unlock_irqrestore(&resume_reason_lock, flags);
+		/* monotonic time since boot */
+		last_monotime = ktime_get();
+		/* monotonic time since boot including the time spent in suspend */
+		last_stime = ktime_get_boottime();
+		break;
+	case PM_POST_SUSPEND:
+		/* monotonic time since boot */
+		curr_monotime = ktime_get();
+		/* monotonic time since boot including the time spent in suspend */
+		curr_stime = ktime_get_boottime();
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block wakeup_reason_pm_notifier_block = {
+	.notifier_call = wakeup_reason_pm_event,
+};
+
+/* Initializes the sysfs parameter
+ * registers the pm_event notifier
+ */
+int __init wakeup_reason_init(void)
+{
+	int retval;
+	spin_lock_init(&resume_reason_lock);
+	retval = register_pm_notifier(&wakeup_reason_pm_notifier_block);
+	if (retval)
+		printk(KERN_WARNING "[%s] failed to register PM notifier %d\n",
+				__func__, retval);
+
+	wakeup_reason = kobject_create_and_add("wakeup_reasons", kernel_kobj);
+	if (!wakeup_reason) {
+		printk(KERN_WARNING "[%s] failed to create a sysfs kobject\n",
+				__func__);
+		return 1;
+	}
+	retval = sysfs_create_group(wakeup_reason, &attr_group);
+	if (retval) {
+		kobject_put(wakeup_reason);
+		printk(KERN_WARNING "[%s] failed to create a sysfs group %d\n",
+				__func__, retval);
+	}
+	return 0;
+}
+
+late_initcall(wakeup_reason_init);

diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 7fe1834..daa5bca 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile

@@ -24,8 +24,10 @@
 obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
 obj-$(CONFIG_SCHEDSTATS) += stats.o
 obj-$(CONFIG_SCHED_DEBUG) += debug.o
+obj-$(CONFIG_SCHED_TUNE) += tune.o
 obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
 obj-$(CONFIG_CPU_FREQ) += cpufreq.o
 obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
 obj-$(CONFIG_MEMBARRIER) += membarrier.o
 obj-$(CONFIG_CPU_ISOLATION) += isolation.o
+obj-$(CONFIG_PSI) += psi.o

diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c
index 2d4ff53..2067080 100644
--- a/kernel/sched/autogroup.c
+++ b/kernel/sched/autogroup.c

@@ -259,7 +259,6 @@
 }
 #endif /* CONFIG_PROC_FS */
 
-#ifdef CONFIG_SCHED_DEBUG
 int autogroup_path(struct task_group *tg, char *buf, int buflen)
 {
 	if (!task_group_is_autogroup(tg))
@@ -267,4 +266,3 @@
 
 	return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
 }
-#endif

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 795c63c..436c1aa 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c

@@ -181,6 +181,7 @@
 	if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
 		update_irq_load_avg(rq, irq_delta + steal);
 #endif
+	update_rq_clock_pelt(rq, delta);
 }
 
 void update_rq_clock(struct rq *rq)
@@ -413,6 +414,8 @@
 	if (cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL))
 		return;
 
+	head->count++;
+
 	get_task_struct(task);
 
 	/*
@@ -422,6 +425,10 @@
 	head->lastp = &node->next;
 }
 
+static int
+try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags,
+	       int sibling_count_hint);
+
 void wake_up_q(struct wake_q_head *head)
 {
 	struct wake_q_node *node = head->first;
@@ -436,10 +443,10 @@
 		task->wake_q.next = NULL;
 
 		/*
-		 * wake_up_process() executes a full barrier, which pairs with
+		 * try_to_wake_up() executes a full barrier, which pairs with
 		 * the queueing in wake_q_add() so as not to miss wakeups.
 		 */
-		wake_up_process(task);
+		try_to_wake_up(task, TASK_NORMAL, 0, head->count);
 		put_task_struct(task);
 	}
 }
@@ -702,6 +709,7 @@
 	if (idle_policy(p->policy)) {
 		load->weight = scale_load(WEIGHT_IDLEPRIO);
 		load->inv_weight = WMULT_IDLEPRIO;
+		p->se.runnable_weight = load->weight;
 		return;
 	}
 
@@ -714,6 +722,7 @@
 	} else {
 		load->weight = scale_load(sched_prio_to_weight[prio]);
 		load->inv_weight = sched_prio_to_wmult[prio];
+		p->se.runnable_weight = load->weight;
 	}
 }
 
@@ -722,8 +731,10 @@
 	if (!(flags & ENQUEUE_NOCLOCK))
 		update_rq_clock(rq);
 
-	if (!(flags & ENQUEUE_RESTORE))
+	if (!(flags & ENQUEUE_RESTORE)) {
 		sched_info_queued(rq, p);
+		psi_enqueue(p, flags & ENQUEUE_WAKEUP);
+	}
 
 	p->sched_class->enqueue_task(rq, p, flags);
 }
@@ -733,8 +744,10 @@
 	if (!(flags & DEQUEUE_NOCLOCK))
 		update_rq_clock(rq);
 
-	if (!(flags & DEQUEUE_SAVE))
+	if (!(flags & DEQUEUE_SAVE)) {
 		sched_info_dequeued(rq, p);
+		psi_dequeue(p, flags & DEQUEUE_SLEEP);
+	}
 
 	p->sched_class->dequeue_task(rq, p, flags);
 }
@@ -1524,12 +1537,14 @@
  * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
  */
 static inline
-int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
+int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags,
+		   int sibling_count_hint)
 {
 	lockdep_assert_held(&p->pi_lock);
 
 	if (p->nr_cpus_allowed > 1)
-		cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
+		cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags,
+						     sibling_count_hint);
 	else
 		cpu = cpumask_any(&p->cpus_allowed);
 
@@ -1932,6 +1947,8 @@
  * @p: the thread to be awakened
  * @state: the mask of task states that can be woken
  * @wake_flags: wake modifier flags (WF_*)
+ * @sibling_count_hint: A hint at the number of threads that are being woken up
+ *                      in this event.
  *
  * If (@state & @p->state) @p->state = TASK_RUNNING.
  *
@@ -1947,7 +1964,8 @@
  *	   %false otherwise.
  */
 static int
-try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
+try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags,
+	       int sibling_count_hint)
 {
 	unsigned long flags;
 	int cpu, success = 0;
@@ -2034,9 +2052,11 @@
 		atomic_dec(&task_rq(p)->nr_iowait);
 	}
 
-	cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
+	cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags,
+			     sibling_count_hint);
 	if (task_cpu(p) != cpu) {
 		wake_flags |= WF_MIGRATED;
+		psi_ttwu_dequeue(p);
 		set_task_cpu(p, cpu);
 	}
 
@@ -2121,13 +2141,13 @@
  */
 int wake_up_process(struct task_struct *p)
 {
-	return try_to_wake_up(p, TASK_NORMAL, 0);
+	return try_to_wake_up(p, TASK_NORMAL, 0, 1);
 }
 EXPORT_SYMBOL(wake_up_process);
 
 int wake_up_state(struct task_struct *p, unsigned int state)
 {
-	return try_to_wake_up(p, state, 0);
+	return try_to_wake_up(p, state, 0, 1);
 }
 
 /*
@@ -2409,7 +2429,7 @@
 	 * as we're not fully set-up yet.
 	 */
 	p->recent_used_cpu = task_cpu(p);
-	__set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
+	__set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0, 1));
 #endif
 	rq = __task_rq_lock(p, &rf);
 	update_rq_clock(rq);
@@ -2948,7 +2968,7 @@
 	int dest_cpu;
 
 	raw_spin_lock_irqsave(&p->pi_lock, flags);
-	dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0);
+	dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0, 1);
 	if (dest_cpu == smp_processor_id())
 		goto unlock;
 
@@ -3051,6 +3071,7 @@
 	curr->sched_class->task_tick(rq, curr, 0);
 	cpu_load_update_active(rq);
 	calc_global_load_tick(rq);
+	psi_task_tick(rq);
 
 	rq_unlock(rq, &rf);
 
@@ -3709,7 +3730,7 @@
 int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
 			  void *key)
 {
-	return try_to_wake_up(curr->private, mode, wake_flags);
+	return try_to_wake_up(curr->private, mode, wake_flags, 1);
 }
 EXPORT_SYMBOL(default_wake_function);
 
@@ -4933,9 +4954,7 @@
 	struct rq_flags rf;
 	struct rq *rq;
 
-	local_irq_disable();
-	rq = this_rq();
-	rq_lock(rq, &rf);
+	rq = this_rq_lock_irq(&rf);
 
 	schedstat_inc(rq->yld_count);
 	current->sched_class->yield_task(rq);
@@ -6075,6 +6094,8 @@
 
 	init_schedstats();
 
+	psi_init();
+
 	scheduler_running = 1;
 }
 

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 64d54ac..2dc8aa8 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c

@@ -13,11 +13,13 @@
 
 #include "sched.h"
 
+#include <linux/sched/cpufreq.h>
 #include <trace/events/power.h>
 
 struct sugov_tunables {
 	struct gov_attr_set	attr_set;
-	unsigned int		rate_limit_us;
+	unsigned int		up_rate_limit_us;
+	unsigned int		down_rate_limit_us;
 };
 
 struct sugov_policy {
@@ -28,7 +30,9 @@
 
 	raw_spinlock_t		update_lock;	/* For shared policies */
 	u64			last_freq_update_time;
-	s64			freq_update_delay_ns;
+	s64			min_rate_limit_ns;
+	s64			up_rate_delay_ns;
+	s64			down_rate_delay_ns;
 	unsigned int		next_freq;
 	unsigned int		cached_raw_freq;
 
@@ -97,9 +101,32 @@
 		return true;
 	}
 
+	/* No need to recalculate next freq for min_rate_limit_us
+	 * at least. However we might still decide to further rate
+	 * limit once frequency change direction is decided, according
+	 * to the separate rate limits.
+	 */
+
+	delta_ns = time - sg_policy->last_freq_update_time;
+	return delta_ns >= sg_policy->min_rate_limit_ns;
+}
+
+static bool sugov_up_down_rate_limit(struct sugov_policy *sg_policy, u64 time,
+				     unsigned int next_freq)
+{
+	s64 delta_ns;
+
 	delta_ns = time - sg_policy->last_freq_update_time;
 
-	return delta_ns >= sg_policy->freq_update_delay_ns;
+	if (next_freq > sg_policy->next_freq &&
+	    delta_ns < sg_policy->up_rate_delay_ns)
+			return true;
+
+	if (next_freq < sg_policy->next_freq &&
+	    delta_ns < sg_policy->down_rate_delay_ns)
+			return true;
+
+	return false;
 }
 
 static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time,
@@ -108,6 +135,9 @@
 	if (sg_policy->next_freq == next_freq)
 		return false;
 
+	if (sugov_up_down_rate_limit(sg_policy, time, next_freq))
+		return false;
+
 	sg_policy->next_freq = next_freq;
 	sg_policy->last_freq_update_time = time;
 
@@ -171,7 +201,7 @@
 	unsigned int freq = arch_scale_freq_invariant() ?
 				policy->cpuinfo.max_freq : policy->cur;
 
-	freq = (freq + (freq >> 2)) * util / max;
+	freq = map_util_freq(util, freq, max);
 
 	if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update)
 		return sg_policy->next_freq;
@@ -193,6 +223,9 @@
  * Where the cfs,rt and dl util numbers are tracked with the same metric and
  * synchronized windows and are thus directly comparable.
  *
+ * The @util parameter passed to this function is assumed to be the aggregation
+ * of RT and CFS util numbers. The cases of DL and IRQ are managed here.
+ *
  * The cfs,rt,dl utilization are the running times measured with rq->clock_task
  * which excludes things like IRQ and steal-time. These latter are then accrued
  * in the irq utilization.
@@ -201,15 +234,14 @@
  * based on the task model parameters and gives the minimal utilization
  * required to meet deadlines.
  */
-static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
+unsigned long schedutil_freq_util(int cpu, unsigned long util,
+				  unsigned long max, enum schedutil_type type)
 {
-	struct rq *rq = cpu_rq(sg_cpu->cpu);
-	unsigned long util, irq, max;
+	unsigned long dl_util, irq;
+	struct rq *rq = cpu_rq(cpu);
 
-	sg_cpu->max = max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu);
-	sg_cpu->bw_dl = cpu_bw_dl(rq);
-
-	if (rt_rq_is_runnable(&rq->rt))
+	if (sched_feat(SUGOV_RT_MAX_FREQ) && type == FREQUENCY_UTIL &&
+						rt_rq_is_runnable(&rq->rt))
 		return max;
 
 	/*
@@ -222,27 +254,33 @@
 		return max;
 
 	/*
-	 * Because the time spend on RT/DL tasks is visible as 'lost' time to
-	 * CFS tasks and we use the same metric to track the effective
-	 * utilization (PELT windows are synchronized) we can directly add them
-	 * to obtain the CPU's actual utilization.
+	 * The function is called with @util defined as the aggregation (the
+	 * sum) of RT and CFS signals, hence leaving the special case of DL
+	 * to be delt with. The exact way of doing things depend on the calling
+	 * context.
 	 */
-	util = cpu_util_cfs(rq);
-	util += cpu_util_rt(rq);
+	dl_util = cpu_util_dl(rq);
 
 	/*
-	 * We do not make cpu_util_dl() a permanent part of this sum because we
-	 * want to use cpu_bw_dl() later on, but we need to check if the
-	 * CFS+RT+DL sum is saturated (ie. no idle time) such that we select
-	 * f_max when there is no idle time.
+	 * For frequency selection we do not make cpu_util_dl() a permanent part
+	 * of this sum because we want to use cpu_bw_dl() later on, but we need
+	 * to check if the CFS+RT+DL sum is saturated (ie. no idle time) such
+	 * that we select f_max when there is no idle time.
 	 *
 	 * NOTE: numerical errors or stop class might cause us to not quite hit
 	 * saturation when we should -- something for later.
 	 */
-	if ((util + cpu_util_dl(rq)) >= max)
+	if (util + dl_util >= max)
 		return max;
 
 	/*
+	 * OTOH, for energy computation we need the estimated running time, so
+	 * include util_dl and ignore dl_bw.
+	 */
+	if (type == ENERGY_UTIL)
+		util += dl_util;
+
+	/*
 	 * There is still idle time; further improve the number by using the
 	 * irq metric. Because IRQ/steal time is hidden from the task clock we
 	 * need to scale the task numbers:
@@ -264,7 +302,22 @@
 	 * bw_dl as requested freq. However, cpufreq is not yet ready for such
 	 * an interface. So, we only do the latter for now.
 	 */
-	return min(max, util + sg_cpu->bw_dl);
+	if (type == FREQUENCY_UTIL)
+		util += cpu_bw_dl(rq);
+
+	return min(max, util);
+}
+
+static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
+{
+	struct rq *rq = cpu_rq(sg_cpu->cpu);
+	unsigned long util = boosted_cpu_util(sg_cpu->cpu, cpu_util_rt(rq));
+	unsigned long max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu);
+
+	sg_cpu->max = max;
+	sg_cpu->bw_dl = cpu_bw_dl(rq);
+
+	return schedutil_freq_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL);
 }
 
 /**
@@ -556,15 +609,32 @@
 	return container_of(attr_set, struct sugov_tunables, attr_set);
 }
 
-static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
+static DEFINE_MUTEX(min_rate_lock);
+
+static void update_min_rate_limit_ns(struct sugov_policy *sg_policy)
+{
+	mutex_lock(&min_rate_lock);
+	sg_policy->min_rate_limit_ns = min(sg_policy->up_rate_delay_ns,
+					   sg_policy->down_rate_delay_ns);
+	mutex_unlock(&min_rate_lock);
+}
+
+static ssize_t up_rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
 {
 	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
 
-	return sprintf(buf, "%u\n", tunables->rate_limit_us);
+	return sprintf(buf, "%u\n", tunables->up_rate_limit_us);
 }
 
-static ssize_t
-rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count)
+static ssize_t down_rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
+{
+	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
+
+	return sprintf(buf, "%u\n", tunables->down_rate_limit_us);
+}
+
+static ssize_t up_rate_limit_us_store(struct gov_attr_set *attr_set,
+				      const char *buf, size_t count)
 {
 	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
 	struct sugov_policy *sg_policy;
@@ -573,18 +643,42 @@
 	if (kstrtouint(buf, 10, &rate_limit_us))
 		return -EINVAL;
 
-	tunables->rate_limit_us = rate_limit_us;
+	tunables->up_rate_limit_us = rate_limit_us;
 
-	list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook)
-		sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC;
+	list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) {
+		sg_policy->up_rate_delay_ns = rate_limit_us * NSEC_PER_USEC;
+		update_min_rate_limit_ns(sg_policy);
+	}
 
 	return count;
 }
 
-static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
+static ssize_t down_rate_limit_us_store(struct gov_attr_set *attr_set,
+					const char *buf, size_t count)
+{
+	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
+	struct sugov_policy *sg_policy;
+	unsigned int rate_limit_us;
+
+	if (kstrtouint(buf, 10, &rate_limit_us))
+		return -EINVAL;
+
+	tunables->down_rate_limit_us = rate_limit_us;
+
+	list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) {
+		sg_policy->down_rate_delay_ns = rate_limit_us * NSEC_PER_USEC;
+		update_min_rate_limit_ns(sg_policy);
+	}
+
+	return count;
+}
+
+static struct governor_attr up_rate_limit_us = __ATTR_RW(up_rate_limit_us);
+static struct governor_attr down_rate_limit_us = __ATTR_RW(down_rate_limit_us);
 
 static struct attribute *sugov_attributes[] = {
-	&rate_limit_us.attr,
+	&up_rate_limit_us.attr,
+	&down_rate_limit_us.attr,
 	NULL
 };
 
@@ -595,7 +689,7 @@
 
 /********************** cpufreq governor interface *********************/
 
-static struct cpufreq_governor schedutil_gov;
+struct cpufreq_governor schedutil_gov;
 
 static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
 {
@@ -740,7 +834,8 @@
 		goto stop_kthread;
 	}
 
-	tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy);
+	tunables->up_rate_limit_us = cpufreq_policy_transition_delay_us(policy);
+	tunables->down_rate_limit_us = cpufreq_policy_transition_delay_us(policy);
 
 	policy->governor_data = sg_policy;
 	sg_policy->tunables = tunables;
@@ -799,7 +894,11 @@
 	struct sugov_policy *sg_policy = policy->governor_data;
 	unsigned int cpu;
 
-	sg_policy->freq_update_delay_ns	= sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
+	sg_policy->up_rate_delay_ns =
+		sg_policy->tunables->up_rate_limit_us * NSEC_PER_USEC;
+	sg_policy->down_rate_delay_ns =
+		sg_policy->tunables->down_rate_limit_us * NSEC_PER_USEC;
+	update_min_rate_limit_ns(sg_policy);
 	sg_policy->last_freq_update_time	= 0;
 	sg_policy->next_freq			= 0;
 	sg_policy->work_in_progress		= false;
@@ -858,7 +957,7 @@
 	sg_policy->limits_changed = true;
 }
 
-static struct cpufreq_governor schedutil_gov = {
+struct cpufreq_governor schedutil_gov = {
 	.name			= "schedutil",
 	.owner			= THIS_MODULE,
 	.dynamic_switching	= true,
@@ -881,3 +980,36 @@
 	return cpufreq_register_governor(&schedutil_gov);
 }
 fs_initcall(sugov_register);
+
+#ifdef CONFIG_ENERGY_MODEL
+extern bool sched_energy_update;
+extern struct mutex sched_energy_mutex;
+
+static void rebuild_sd_workfn(struct work_struct *work)
+{
+	mutex_lock(&sched_energy_mutex);
+	sched_energy_update = true;
+	rebuild_sched_domains();
+	sched_energy_update = false;
+	mutex_unlock(&sched_energy_mutex);
+}
+static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn);
+
+/*
+ * EAS shouldn't be attempted without sugov, so rebuild the sched_domains
+ * on governor changes to make sure the scheduler knows about it.
+ */
+void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
+				  struct cpufreq_governor *old_gov)
+{
+	if (old_gov == &schedutil_gov || policy->governor == &schedutil_gov) {
+		/*
+		 * When called from the cpufreq_register_driver() path, the
+		 * cpu_hotplug_lock is already held, so use a work item to
+		 * avoid nested locking in rebuild_sched_domains().
+		 */
+		schedule_work(&rebuild_sd_work);
+	}
+
+}
+#endif

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 0796f93..ca4208d4 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c

@@ -1,6 +1,7 @@
 /*
  * Simple CPU accounting cgroup controller
  */
+#include <linux/cpufreq_times.h>
 #include "sched.h"
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -128,6 +129,9 @@
 
 	/* Account for user time used */
 	acct_account_cputime(p);
+
+	/* Account power usage for user time */
+	cpufreq_acct_update_power(p, cputime);
 }
 
 /*
@@ -172,6 +176,9 @@
 
 	/* Account for system time used */
 	acct_account_cputime(p);
+
+	/* Account power usage for system time */
+	cpufreq_acct_update_power(p, cputime);
 }
 
 /*

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 72c0705..02f8259 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c

@@ -1566,7 +1566,8 @@
 static int find_later_rq(struct task_struct *task);
 
 static int
-select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
+select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags,
+		  int sibling_count_hint)
 {
 	struct task_struct *curr;
 	struct rq *rq;
@@ -1760,7 +1761,7 @@
 	deadline_queue_push_tasks(rq);
 
 	if (rq->curr->sched_class != &dl_sched_class)
-		update_dl_rq_load_avg(rq_clock_task(rq), rq, 0);
+		update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
 
 	return p;
 }
@@ -1769,7 +1770,7 @@
 {
 	update_curr_dl(rq);
 
-	update_dl_rq_load_avg(rq_clock_task(rq), rq, 1);
+	update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1);
 	if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
 		enqueue_pushable_dl_task(rq, p);
 }
@@ -1786,7 +1787,7 @@
 {
 	update_curr_dl(rq);
 
-	update_dl_rq_load_avg(rq_clock_task(rq), rq, 1);
+	update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1);
 	/*
 	 * Even when we have runtime, update_curr_dl() might have resulted in us
 	 * not being the leftmost task anymore. In that case NEED_RESCHED will

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 75f3226..39196d8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c

@@ -41,6 +41,16 @@
 unsigned int normalized_sysctl_sched_latency		= 6000000ULL;
 
 /*
+ * Enable/disable honoring sync flag in energy-aware wakeups.
+ */
+unsigned int sysctl_sched_sync_hint_enable = 1;
+
+/*
+ * Enable/disable using cstate knowledge in idle sibling selection
+ */
+unsigned int sysctl_sched_cstate_aware = 1;
+
+/*
  * The initial- and re-scaling of tunables is configurable
  *
  * Options are:
@@ -248,13 +258,6 @@
  */
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-
-/* cpu runqueue to which this cfs_rq is attached */
-static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
-{
-	return cfs_rq->rq;
-}
-
 static inline struct task_struct *task_of(struct sched_entity *se)
 {
 	SCHED_WARN_ON(!entity_is_task(se));
@@ -410,12 +413,6 @@
 	return container_of(se, struct task_struct, se);
 }
 
-static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
-{
-	return container_of(cfs_rq, struct rq, cfs);
-}
-
-
 #define for_each_sched_entity(se) \
 		for (; se; se = NULL)
 
@@ -686,12 +683,12 @@
 	return calc_delta_fair(sched_slice(cfs_rq, se), se);
 }
 
-#ifdef CONFIG_SMP
 #include "pelt.h"
-#include "sched-pelt.h"
+#ifdef CONFIG_SMP
 
 static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
 static unsigned long task_h_load(struct task_struct *p);
+static unsigned long capacity_of(int cpu);
 
 /* Give new sched_entity start runnable values to heavy its load in infant time */
 void init_entity_runnable_average(struct sched_entity *se)
@@ -775,7 +772,7 @@
 			 * such that the next switched_to_fair() has the
 			 * expected state.
 			 */
-			se->avg.last_update_time = cfs_rq_clock_task(cfs_rq);
+			se->avg.last_update_time = cfs_rq_clock_pelt(cfs_rq);
 			return;
 		}
 	}
@@ -940,6 +937,7 @@
 			}
 
 			trace_sched_stat_blocked(tsk, delta);
+			trace_sched_blocked_reason(tsk);
 
 			/*
 			 * Blocking time is in units of nanosecs, so shift by
@@ -1486,7 +1484,6 @@
 static unsigned long weighted_cpuload(struct rq *rq);
 static unsigned long source_load(int cpu, int type);
 static unsigned long target_load(int cpu, int type);
-static unsigned long capacity_of(int cpu);
 
 /* Cached statistics for all CPUs within a node */
 struct numa_stats {
@@ -3143,6 +3140,8 @@
 	if (force || abs(delta) > cfs_rq->tg_load_avg_contrib / 64) {
 		atomic_long_add(delta, &cfs_rq->tg->load_avg);
 		cfs_rq->tg_load_avg_contrib = cfs_rq->avg.load_avg;
+
+		trace_sched_load_tg(cfs_rq);
 	}
 }
 
@@ -3191,7 +3190,7 @@
 	p_last_update_time = prev->avg.last_update_time;
 	n_last_update_time = next->avg.last_update_time;
 #endif
-	__update_load_avg_blocked_se(p_last_update_time, cpu_of(rq_of(prev)), se);
+	__update_load_avg_blocked_se(p_last_update_time, se);
 	se->avg.last_update_time = n_last_update_time;
 }
 
@@ -3326,11 +3325,11 @@
 
 	/*
 	 * runnable_sum can't be lower than running_sum
-	 * As running sum is scale with CPU capacity wehreas the runnable sum
-	 * is not we rescale running_sum 1st
+	 * Rescale running sum to be in the same range as runnable sum
+	 * running_sum is in [0 : LOAD_AVG_MAX <<  SCHED_CAPACITY_SHIFT]
+	 * runnable_sum is in [0 : LOAD_AVG_MAX]
 	 */
-	running_sum = se->avg.util_sum /
-		arch_scale_cpu_capacity(NULL, cpu_of(rq_of(cfs_rq)));
+	running_sum = se->avg.util_sum >> SCHED_CAPACITY_SHIFT;
 	runnable_sum = max(runnable_sum, running_sum);
 
 	load_sum = (s64)se_weight(se) * runnable_sum;
@@ -3385,6 +3384,9 @@
 	update_tg_cfs_util(cfs_rq, se, gcfs_rq);
 	update_tg_cfs_runnable(cfs_rq, se, gcfs_rq);
 
+	trace_sched_load_cfs_rq(cfs_rq);
+	trace_sched_load_se(se);
+
 	return 1;
 }
 
@@ -3433,7 +3435,7 @@
 
 /**
  * update_cfs_rq_load_avg - update the cfs_rq's load/util averages
- * @now: current time, as per cfs_rq_clock_task()
+ * @now: current time, as per cfs_rq_clock_pelt()
  * @cfs_rq: cfs_rq to update
  *
  * The cfs_rq avg is the direct sum of all its entities (blocked and runnable)
@@ -3478,7 +3480,7 @@
 		decayed = 1;
 	}
 
-	decayed |= __update_load_avg_cfs_rq(now, cpu_of(rq_of(cfs_rq)), cfs_rq);
+	decayed |= __update_load_avg_cfs_rq(now, cfs_rq);
 
 #ifndef CONFIG_64BIT
 	smp_wmb();
@@ -3537,6 +3539,8 @@
 	add_tg_cfs_propagate(cfs_rq, se->avg.load_sum);
 
 	cfs_rq_util_change(cfs_rq, flags);
+
+	trace_sched_load_cfs_rq(cfs_rq);
 }
 
 /**
@@ -3556,6 +3560,8 @@
 	add_tg_cfs_propagate(cfs_rq, -se->avg.load_sum);
 
 	cfs_rq_util_change(cfs_rq, 0);
+
+	trace_sched_load_cfs_rq(cfs_rq);
 }
 
 /*
@@ -3568,9 +3574,7 @@
 /* Update task and its cfs_rq load average */
 static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
-	u64 now = cfs_rq_clock_task(cfs_rq);
-	struct rq *rq = rq_of(cfs_rq);
-	int cpu = cpu_of(rq);
+	u64 now = cfs_rq_clock_pelt(cfs_rq);
 	int decayed;
 
 	/*
@@ -3578,7 +3582,7 @@
 	 * track group sched_entity load average for task_h_load calc in migration
 	 */
 	if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD))
-		__update_load_avg_se(now, cpu, cfs_rq, se);
+		__update_load_avg_se(now, cfs_rq, se);
 
 	decayed  = update_cfs_rq_load_avg(now, cfs_rq);
 	decayed |= propagate_entity_load_avg(se);
@@ -3630,7 +3634,7 @@
 	u64 last_update_time;
 
 	last_update_time = cfs_rq_last_update_time(cfs_rq);
-	__update_load_avg_blocked_se(last_update_time, cpu_of(rq_of(cfs_rq)), se);
+	__update_load_avg_blocked_se(last_update_time, se);
 }
 
 /*
@@ -3703,6 +3707,10 @@
 	enqueued  = cfs_rq->avg.util_est.enqueued;
 	enqueued += (_task_util_est(p) | UTIL_AVG_UNCHANGED);
 	WRITE_ONCE(cfs_rq->avg.util_est.enqueued, enqueued);
+
+	/* Update plots for Task and CPU estimated utilization */
+	trace_sched_util_est_task(p, &p->se.avg);
+	trace_sched_util_est_cpu(cpu_of(rq_of(cfs_rq)), cfs_rq);
 }
 
 /*
@@ -3723,6 +3731,7 @@
 {
 	long last_ewma_diff;
 	struct util_est ue;
+	int cpu;
 
 	if (!sched_feat(UTIL_EST))
 		return;
@@ -3733,6 +3742,9 @@
 			     (_task_util_est(p) | UTIL_AVG_UNCHANGED));
 	WRITE_ONCE(cfs_rq->avg.util_est.enqueued, ue.enqueued);
 
+	/* Update plots for CPU's estimated utilization */
+	trace_sched_util_est_cpu(cpu_of(rq_of(cfs_rq)), cfs_rq);
+
 	/*
 	 * Skip update of task's estimated utilization when the task has not
 	 * yet completed an activation, e.g. being migrated.
@@ -3758,6 +3770,14 @@
 		return;
 
 	/*
+	 * To avoid overestimation of actual task utilization, skip updates if
+	 * we cannot grant there is idle time in this CPU.
+	 */
+	cpu = cpu_of(rq_of(cfs_rq));
+	if (task_util(p) > capacity_orig_of(cpu))
+		return;
+
+	/*
 	 * Update Task's estimated utilization
 	 *
 	 * When *p completes an activation we can consolidate another sample
@@ -3778,6 +3798,32 @@
 	ue.ewma  += last_ewma_diff;
 	ue.ewma >>= UTIL_EST_WEIGHT_SHIFT;
 	WRITE_ONCE(p->se.avg.util_est, ue);
+
+	/* Update plots for Task's estimated utilization */
+	trace_sched_util_est_task(p, &p->se.avg);
+}
+
+static inline int task_fits_capacity(struct task_struct *p, long capacity)
+{
+	return capacity * 1024 > task_util_est(p) * capacity_margin;
+}
+
+static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
+{
+	if (!static_branch_unlikely(&sched_asym_cpucapacity))
+		return;
+
+	if (!p) {
+		rq->misfit_task_load = 0;
+		return;
+	}
+
+	if (task_fits_capacity(p, capacity_of(cpu_of(rq)))) {
+		rq->misfit_task_load = 0;
+		return;
+	}
+
+	rq->misfit_task_load = task_h_load(p);
 }
 
 #else /* CONFIG_SMP */
@@ -3809,6 +3855,7 @@
 static inline void
 util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p,
 		 bool task_sleep) {}
+static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
 
 #endif /* CONFIG_SMP */
 
@@ -5152,6 +5199,26 @@
 }
 #endif
 
+#ifdef CONFIG_SMP
+static inline unsigned long cpu_util(int cpu);
+static unsigned long capacity_of(int cpu);
+
+static inline bool cpu_overutilized(int cpu)
+{
+	return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin);
+}
+
+static inline void update_overutilized_status(struct rq *rq)
+{
+	if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu)) {
+		WRITE_ONCE(rq->rd->overutilized, SG_OVERUTILIZED);
+		trace_sched_overutilized(1);
+	}
+}
+#else
+static inline void update_overutilized_status(struct rq *rq) { }
+#endif
+
 /*
  * The enqueue_task method is called before nr_running is
  * increased. Here we update the fair scheduling stats and
@@ -5172,6 +5239,24 @@
 	util_est_enqueue(&rq->cfs, p);
 
 	/*
+	 * The code below (indirectly) updates schedutil which looks at
+	 * the cfs_rq utilization to select a frequency.
+	 * Let's update schedtune here to ensure the boost value of the
+	 * current task is accounted for in the selection of the OPP.
+	 *
+	 * We do it also in the case where we enqueue a throttled task;
+	 * we could argue that a throttled task should not boost a CPU,
+	 * however:
+	 * a) properly implementing CPU boosting considering throttled
+	 *    tasks will increase a lot the complexity of the solution
+	 * b) it's not easy to quantify the benefits introduced by
+	 *    such a more complex solution.
+	 * Thus, for the time being we go for the simple solution and boost
+	 * also for throttled RQs.
+	 */
+	schedtune_enqueue_task(p, cpu_of(rq));
+
+	/*
 	 * If in_iowait is set, the code below may not trigger any cpufreq
 	 * utilization updates, so do it here explicitly with the IOWAIT flag
 	 * passed.
@@ -5209,8 +5294,26 @@
 		update_cfs_group(se);
 	}
 
-	if (!se)
+	if (!se) {
 		add_nr_running(rq, 1);
+		/*
+		 * Since new tasks are assigned an initial util_avg equal to
+		 * half of the spare capacity of their CPU, tiny tasks have the
+		 * ability to cross the overutilized threshold, which will
+		 * result in the load balancer ruining all the task placement
+		 * done by EAS. As a way to mitigate that effect, do not account
+		 * for the first enqueue operation of new tasks during the
+		 * overutilized flag detection.
+		 *
+		 * A better way of solving this problem would be to wait for
+		 * the PELT signals of tasks to converge before taking them
+		 * into account, but that is not straightforward to implement,
+		 * and the following generally works well enough in practice.
+		 */
+		if (flags & ENQUEUE_WAKEUP)
+			update_overutilized_status(rq);
+
+	}
 
 	hrtick_update(rq);
 }
@@ -5228,6 +5331,14 @@
 	struct sched_entity *se = &p->se;
 	int task_sleep = flags & DEQUEUE_SLEEP;
 
+	/*
+	 * The code below (indirectly) updates schedutil which looks at
+	 * the cfs_rq utilization to select a frequency.
+	 * Let's update schedtune here to ensure the boost value of the
+	 * current task is not more accounted for in the selection of the OPP.
+	 */
+	schedtune_dequeue_task(p, cpu_of(rq));
+
 	for_each_sched_entity(se) {
 		cfs_rq = cfs_rq_of(se);
 		dequeue_entity(cfs_rq, se, flags);
@@ -5591,11 +5702,6 @@
 	return cpu_rq(cpu)->cpu_capacity;
 }
 
-static unsigned long capacity_orig_of(int cpu)
-{
-	return cpu_rq(cpu)->cpu_capacity_orig;
-}
-
 static unsigned long cpu_avg_load_per_task(int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
@@ -5642,15 +5748,18 @@
  * whatever is irrelevant, spread criteria is apparent partner count exceeds
  * socket size.
  */
-static int wake_wide(struct task_struct *p)
+static int wake_wide(struct task_struct *p, int sibling_count_hint)
 {
 	unsigned int master = current->wakee_flips;
 	unsigned int slave = p->wakee_flips;
-	int factor = this_cpu_read(sd_llc_size);
+	int llc_size = this_cpu_read(sd_llc_size);
+
+	if (sibling_count_hint >= llc_size)
+		return 1;
 
 	if (master < slave)
 		swap(master, slave);
-	if (slave < factor || master < slave * factor)
+	if (slave < llc_size || master < slave * llc_size)
 		return 0;
 	return 1;
 }
@@ -5754,6 +5863,100 @@
 	return target;
 }
 
+#ifdef CONFIG_SCHED_TUNE
+struct reciprocal_value schedtune_spc_rdiv;
+
+static long
+schedtune_margin(unsigned long signal, long boost)
+{
+	long long margin = 0;
+
+	/*
+	 * Signal proportional compensation (SPC)
+	 *
+	 * The Boost (B) value is used to compute a Margin (M) which is
+	 * proportional to the complement of the original Signal (S):
+	 *   M = B * (SCHED_CAPACITY_SCALE - S)
+	 * The obtained M could be used by the caller to "boost" S.
+	 */
+	if (boost >= 0) {
+		margin  = SCHED_CAPACITY_SCALE - signal;
+		margin *= boost;
+	} else
+		margin = -signal * boost;
+
+	margin  = reciprocal_divide(margin, schedtune_spc_rdiv);
+
+	if (boost < 0)
+		margin *= -1;
+	return margin;
+}
+
+static inline int
+schedtune_cpu_margin(unsigned long util, int cpu)
+{
+	int boost = schedtune_cpu_boost(cpu);
+
+	if (boost == 0)
+		return 0;
+
+	return schedtune_margin(util, boost);
+}
+
+static inline long
+schedtune_task_margin(struct task_struct *task)
+{
+	int boost = schedtune_task_boost(task);
+	unsigned long util;
+	long margin;
+
+	if (boost == 0)
+		return 0;
+
+	util = task_util_est(task);
+	margin = schedtune_margin(util, boost);
+
+	return margin;
+}
+
+unsigned long
+boosted_cpu_util(int cpu, unsigned long other_util)
+{
+	unsigned long util = cpu_util_cfs(cpu_rq(cpu)) + other_util;
+	long margin = schedtune_cpu_margin(util, cpu);
+
+	trace_sched_boost_cpu(cpu, util, margin);
+
+	return util + margin;
+}
+
+#else /* CONFIG_SCHED_TUNE */
+
+static inline int
+schedtune_cpu_margin(unsigned long util, int cpu)
+{
+	return 0;
+}
+
+static inline int
+schedtune_task_margin(struct task_struct *task)
+{
+	return 0;
+}
+
+#endif /* CONFIG_SCHED_TUNE */
+
+static inline unsigned long
+boosted_task_util(struct task_struct *task)
+{
+	unsigned long util = task_util_est(task);
+	long margin = schedtune_task_margin(task);
+
+	trace_sched_boost_task(task, util, margin);
+
+	return util + margin;
+}
+
 static unsigned long cpu_util_without(int cpu, struct task_struct *p);
 
 static unsigned long capacity_spare_without(int cpu, struct task_struct *p)
@@ -6387,6 +6590,321 @@
 }
 
 /*
+ * Returns the current capacity of cpu after applying both
+ * cpu and freq scaling.
+ */
+unsigned long capacity_curr_of(int cpu)
+{
+	unsigned long max_cap = cpu_rq(cpu)->cpu_capacity_orig;
+	unsigned long scale_freq = arch_scale_freq_capacity(cpu);
+
+	return cap_scale(max_cap, scale_freq);
+}
+
+static void find_best_target(struct sched_domain *sd, cpumask_t *cpus,
+							struct task_struct *p)
+{
+	unsigned long min_util = boosted_task_util(p);
+	unsigned long target_capacity = ULONG_MAX;
+	unsigned long min_wake_util = ULONG_MAX;
+	unsigned long target_max_spare_cap = 0;
+	unsigned long target_util = ULONG_MAX;
+	bool prefer_idle = schedtune_prefer_idle(p);
+	bool boosted = schedtune_task_boost(p) > 0;
+	/* Initialise with deepest possible cstate (INT_MAX) */
+	int shallowest_idle_cstate = INT_MAX;
+	struct sched_group *sg;
+	int best_active_cpu = -1;
+	int best_idle_cpu = -1;
+	int target_cpu = -1;
+	int backup_cpu = -1;
+	int i;
+
+	/*
+	 * In most cases, target_capacity tracks capacity_orig of the most
+	 * energy efficient CPU candidate, thus requiring to minimise
+	 * target_capacity. For these cases target_capacity is already
+	 * initialized to ULONG_MAX.
+	 * However, for prefer_idle and boosted tasks we look for a high
+	 * performance CPU, thus requiring to maximise target_capacity. In this
+	 * case we initialise target_capacity to 0.
+	 */
+	if (prefer_idle && boosted)
+		target_capacity = 0;
+
+	/* Scan CPUs in all SDs */
+	sg = sd->groups;
+	do {
+		for_each_cpu_and(i, &p->cpus_allowed, sched_group_span(sg)) {
+			unsigned long capacity_curr = capacity_curr_of(i);
+			unsigned long capacity_orig = capacity_orig_of(i);
+			unsigned long wake_util, new_util;
+			long spare_cap;
+			int idle_idx = INT_MAX;
+
+			if (!cpu_online(i))
+				continue;
+
+			/*
+			 * p's blocked utilization is still accounted for on prev_cpu
+			 * so prev_cpu will receive a negative bias due to the double
+			 * accounting. However, the blocked utilization may be zero.
+			 */
+			wake_util = cpu_util_without(i, p);
+			new_util = wake_util + task_util_est(p);
+
+			/*
+			 * Ensure minimum capacity to grant the required boost.
+			 * The target CPU can be already at a capacity level higher
+			 * than the one required to boost the task.
+			 */
+			new_util = max(min_util, new_util);
+			if (new_util > capacity_orig)
+				continue;
+
+			/*
+			 * Pre-compute the maximum possible capacity we expect
+			 * to have available on this CPU once the task is
+			 * enqueued here.
+			 */
+			spare_cap = capacity_orig - new_util;
+
+			if (idle_cpu(i))
+				idle_idx = idle_get_state_idx(cpu_rq(i));
+
+
+			/*
+			 * Case A) Latency sensitive tasks
+			 *
+			 * Unconditionally favoring tasks that prefer idle CPU to
+			 * improve latency.
+			 *
+			 * Looking for:
+			 * - an idle CPU, whatever its idle_state is, since
+			 *   the first CPUs we explore are more likely to be
+			 *   reserved for latency sensitive tasks.
+			 * - a non idle CPU where the task fits in its current
+			 *   capacity and has the maximum spare capacity.
+			 * - a non idle CPU with lower contention from other
+			 *   tasks and running at the lowest possible OPP.
+			 *
+			 * The last two goals tries to favor a non idle CPU
+			 * where the task can run as if it is "almost alone".
+			 * A maximum spare capacity CPU is favoured since
+			 * the task already fits into that CPU's capacity
+			 * without waiting for an OPP chance.
+			 *
+			 * The following code path is the only one in the CPUs
+			 * exploration loop which is always used by
+			 * prefer_idle tasks. It exits the loop with wither a
+			 * best_active_cpu or a target_cpu which should
+			 * represent an optimal choice for latency sensitive
+			 * tasks.
+			 */
+			if (prefer_idle) {
+
+				/*
+				 * Case A.1: IDLE CPU
+				 * Return the best IDLE CPU we find:
+				 * - for boosted tasks: the CPU with the highest
+				 * performance (i.e. biggest capacity_orig)
+				 * - for !boosted tasks: the most energy
+				 * efficient CPU (i.e. smallest capacity_orig)
+				 */
+				if (idle_cpu(i)) {
+					if (boosted &&
+					    capacity_orig < target_capacity)
+						continue;
+					if (!boosted &&
+					    capacity_orig > target_capacity)
+						continue;
+					/*
+					 * Minimise value of idle state: skip
+					 * deeper idle states and pick the
+					 * shallowest.
+					 */
+					if (capacity_orig == target_capacity &&
+					    sysctl_sched_cstate_aware &&
+					    idle_idx >= shallowest_idle_cstate)
+						continue;
+
+					target_capacity = capacity_orig;
+					shallowest_idle_cstate = idle_idx;
+					best_idle_cpu = i;
+					continue;
+				}
+				if (best_idle_cpu != -1)
+					continue;
+
+				/*
+				 * Case A.2: Target ACTIVE CPU
+				 * Favor CPUs with max spare capacity.
+				 */
+				if (capacity_curr > new_util &&
+				    spare_cap > target_max_spare_cap) {
+					target_max_spare_cap = spare_cap;
+					target_cpu = i;
+					continue;
+				}
+				if (target_cpu != -1)
+					continue;
+
+
+				/*
+				 * Case A.3: Backup ACTIVE CPU
+				 * Favor CPUs with:
+				 * - lower utilization due to other tasks
+				 * - lower utilization with the task in
+				 */
+				if (wake_util > min_wake_util)
+					continue;
+				min_wake_util = wake_util;
+				best_active_cpu = i;
+				continue;
+			}
+
+			/*
+			 * Enforce EAS mode
+			 *
+			 * For non latency sensitive tasks, skip CPUs that
+			 * will be overutilized by moving the task there.
+			 *
+			 * The goal here is to remain in EAS mode as long as
+			 * possible at least for !prefer_idle tasks.
+			 */
+			if ((new_util * capacity_margin) >
+			    (capacity_orig * SCHED_CAPACITY_SCALE))
+				continue;
+
+			/*
+			 * Favor CPUs with smaller capacity for non latency
+			 * sensitive tasks.
+			 */
+			if (capacity_orig > target_capacity)
+				continue;
+
+			/*
+			 * Case B) Non latency sensitive tasks on IDLE CPUs.
+			 *
+			 * Find an optimal backup IDLE CPU for non latency
+			 * sensitive tasks.
+			 *
+			 * Looking for:
+			 * - minimizing the capacity_orig,
+			 *   i.e. preferring LITTLE CPUs
+			 * - favoring shallowest idle states
+			 *   i.e. avoid to wakeup deep-idle CPUs
+			 *
+			 * The following code path is used by non latency
+			 * sensitive tasks if IDLE CPUs are available. If at
+			 * least one of such CPUs are available it sets the
+			 * best_idle_cpu to the most suitable idle CPU to be
+			 * selected.
+			 *
+			 * If idle CPUs are available, favour these CPUs to
+			 * improve performances by spreading tasks.
+			 * Indeed, the energy_diff() computed by the caller
+			 * will take care to ensure the minimization of energy
+			 * consumptions without affecting performance.
+			 */
+			if (idle_cpu(i)) {
+				/*
+				 * Skip CPUs in deeper idle state, but only
+				 * if they are also less energy efficient.
+				 * IOW, prefer a deep IDLE LITTLE CPU vs a
+				 * shallow idle big CPU.
+				 */
+				if (capacity_orig == target_capacity &&
+				    sysctl_sched_cstate_aware &&
+				    idle_idx >= shallowest_idle_cstate)
+					continue;
+
+				target_capacity = capacity_orig;
+				shallowest_idle_cstate = idle_idx;
+				best_idle_cpu = i;
+				continue;
+			}
+
+			/*
+			 * Case C) Non latency sensitive tasks on ACTIVE CPUs.
+			 *
+			 * Pack tasks in the most energy efficient capacities.
+			 *
+			 * This task packing strategy prefers more energy
+			 * efficient CPUs (i.e. pack on smaller maximum
+			 * capacity CPUs) while also trying to spread tasks to
+			 * run them all at the lower OPP.
+			 *
+			 * This assumes for example that it's more energy
+			 * efficient to run two tasks on two CPUs at a lower
+			 * OPP than packing both on a single CPU but running
+			 * that CPU at an higher OPP.
+			 *
+			 * Thus, this case keep track of the CPU with the
+			 * smallest maximum capacity and highest spare maximum
+			 * capacity.
+			 */
+
+			/* Favor CPUs with maximum spare capacity */
+			if (capacity_orig == target_capacity &&
+			    spare_cap < target_max_spare_cap)
+				continue;
+
+			target_max_spare_cap = spare_cap;
+			target_capacity = capacity_orig;
+			target_util = new_util;
+			target_cpu = i;
+		}
+
+	} while (sg = sg->next, sg != sd->groups);
+
+	/*
+	 * For non latency sensitive tasks, cases B and C in the previous loop,
+	 * we pick the best IDLE CPU only if we was not able to find a target
+	 * ACTIVE CPU.
+	 *
+	 * Policies priorities:
+	 *
+	 * - prefer_idle tasks:
+	 *
+	 *   a) IDLE CPU available: best_idle_cpu
+	 *   b) ACTIVE CPU where task fits and has the bigger maximum spare
+	 *      capacity (i.e. target_cpu)
+	 *   c) ACTIVE CPU with less contention due to other tasks
+	 *      (i.e. best_active_cpu)
+	 *
+	 * - NON prefer_idle tasks:
+	 *
+	 *   a) ACTIVE CPU: target_cpu
+	 *   b) IDLE CPU: best_idle_cpu
+	 */
+
+	if (prefer_idle && (best_idle_cpu != -1)) {
+		target_cpu = best_idle_cpu;
+		goto target;
+	}
+
+	if (target_cpu == -1)
+		target_cpu = prefer_idle
+			? best_active_cpu
+			: best_idle_cpu;
+	else
+		backup_cpu = prefer_idle
+		? best_active_cpu
+		: best_idle_cpu;
+
+	if (backup_cpu >= 0)
+		cpumask_set_cpu(backup_cpu, cpus);
+	if (target_cpu >= 0) {
+target:
+		cpumask_set_cpu(target_cpu, cpus);
+	}
+
+	trace_sched_find_best_target(p, prefer_idle, min_util, best_idle_cpu,
+			             best_active_cpu, target_cpu, backup_cpu);
+}
+
+/*
  * Disable WAKE_AFFINE in the case where task @p doesn't fit in the
  * capacity of either the waking CPU @cpu or the previous CPU @prev_cpu.
  *
@@ -6397,8 +6915,11 @@
 {
 	long min_cap, max_cap;
 
+	if (!static_branch_unlikely(&sched_asym_cpucapacity))
+		return 0;
+
 	min_cap = min(capacity_orig_of(prev_cpu), capacity_orig_of(cpu));
-	max_cap = cpu_rq(cpu)->rd->max_cpu_capacity;
+	max_cap = cpu_rq(cpu)->rd->max_cpu_capacity.val;
 
 	/* Minimum capacity is close to max, no need to abort wake_affine */
 	if (max_cap - min_cap < max_cap >> 3)
@@ -6407,7 +6928,293 @@
 	/* Bring task utilization in sync with prev_cpu */
 	sync_entity_load_avg(&p->se);
 
-	return min_cap * 1024 < task_util(p) * capacity_margin;
+	return !task_fits_capacity(p, min_cap);
+}
+
+/*
+ * Predicts what cpu_util(@cpu) would return if @p was migrated (and enqueued)
+ * to @dst_cpu.
+ */
+static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu)
+{
+	struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;
+	unsigned long util_est, util = READ_ONCE(cfs_rq->avg.util_avg);
+
+	/*
+	 * If @p migrates from @cpu to another, remove its contribution. Or,
+	 * if @p migrates from another CPU to @cpu, add its contribution. In
+	 * the other cases, @cpu is not impacted by the migration, so the
+	 * util_avg should already be correct.
+	 */
+	if (task_cpu(p) == cpu && dst_cpu != cpu)
+		sub_positive(&util, task_util(p));
+	else if (task_cpu(p) != cpu && dst_cpu == cpu)
+		util += task_util(p);
+
+	if (sched_feat(UTIL_EST)) {
+		util_est = READ_ONCE(cfs_rq->avg.util_est.enqueued);
+
+		/*
+		 * During wake-up, the task isn't enqueued yet and doesn't
+		 * appear in the cfs_rq->avg.util_est.enqueued of any rq,
+		 * so just add it (if needed) to "simulate" what will be
+		 * cpu_util() after the task has been enqueued.
+		 */
+		if (dst_cpu == cpu)
+			util_est += _task_util_est(p);
+
+		util = max(util, util_est);
+	}
+
+	return min(util, capacity_orig_of(cpu));
+}
+
+/*
+ * compute_energy(): Estimates the energy that would be consumed if @p was
+ * migrated to @dst_cpu. compute_energy() predicts what will be the utilization
+ * landscape of the * CPUs after the task migration, and uses the Energy Model
+ * to compute what would be the energy if we decided to actually migrate that
+ * task.
+ */
+static long
+compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
+{
+	long util, max_util, sum_util, energy = 0;
+	int cpu;
+
+	for (; pd; pd = pd->next) {
+		max_util = sum_util = 0;
+		/*
+		 * The capacity state of CPUs of the current rd can be driven by
+		 * CPUs of another rd if they belong to the same performance
+		 * domain. So, account for the utilization of these CPUs too
+		 * by masking pd with cpu_online_mask instead of the rd span.
+		 *
+		 * If an entire performance domain is outside of the current rd,
+		 * it will not appear in its pd list and will not be accounted
+		 * by compute_energy().
+		 */
+		for_each_cpu_and(cpu, perf_domain_span(pd), cpu_online_mask) {
+			util = cpu_util_next(cpu, p, dst_cpu);
+			util += cpu_util_rt(cpu_rq(cpu));
+			util = schedutil_energy_util(cpu, util);
+			max_util = max(util, max_util);
+			sum_util += util;
+		}
+
+		energy += em_pd_energy(pd->em_pd, max_util, sum_util);
+	}
+
+	return energy;
+}
+
+static void select_cpu_candidates(struct sched_domain *sd, cpumask_t *cpus,
+		struct perf_domain *pd, struct task_struct *p, int prev_cpu)
+{
+	int highest_spare_cap_cpu = prev_cpu, best_idle_cpu = -1;
+	unsigned long spare_cap, max_spare_cap, util, cpu_cap;
+	bool prefer_idle = schedtune_prefer_idle(p);
+	bool boosted = schedtune_task_boost(p) > 0;
+	unsigned long target_cap = boosted ? 0 : ULONG_MAX;
+	unsigned long highest_spare_cap = 0;
+	unsigned int min_exit_lat = UINT_MAX;
+	int cpu, max_spare_cap_cpu;
+	struct cpuidle_state *idle;
+
+	for (; pd; pd = pd->next) {
+		max_spare_cap_cpu = -1;
+		max_spare_cap = 0;
+
+		for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) {
+			if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
+				continue;
+
+			/* Skip CPUs that will be overutilized. */
+			util = cpu_util_next(cpu, p, cpu);
+			cpu_cap = capacity_of(cpu);
+			if (cpu_cap * 1024 < util * capacity_margin)
+				continue;
+
+			/*
+			 * Find the CPU with the maximum spare capacity in
+			 * the performance domain
+			 */
+			spare_cap = cpu_cap - util;
+			if (spare_cap > max_spare_cap) {
+				max_spare_cap = spare_cap;
+				max_spare_cap_cpu = cpu;
+			}
+
+			if (!prefer_idle)
+				continue;
+
+			if (idle_cpu(cpu)) {
+				cpu_cap = capacity_orig_of(cpu);
+				if (boosted && cpu_cap < target_cap)
+					continue;
+				if (!boosted && cpu_cap > target_cap)
+					continue;
+				idle = idle_get_state(cpu_rq(cpu));
+				if (idle && idle->exit_latency > min_exit_lat &&
+						cpu_cap == target_cap)
+					continue;
+
+				if (idle)
+					min_exit_lat = idle->exit_latency;
+				target_cap = cpu_cap;
+				best_idle_cpu = cpu;
+			} else if (spare_cap > highest_spare_cap) {
+				highest_spare_cap = spare_cap;
+				highest_spare_cap_cpu = cpu;
+			}
+		}
+
+		if (!prefer_idle && max_spare_cap_cpu >= 0)
+			cpumask_set_cpu(max_spare_cap_cpu, cpus);
+	}
+
+	if (!prefer_idle)
+		return;
+
+	if (best_idle_cpu >= 0)
+		cpumask_set_cpu(best_idle_cpu, cpus);
+	else
+		cpumask_set_cpu(highest_spare_cap_cpu, cpus);
+}
+
+static DEFINE_PER_CPU(cpumask_t, energy_cpus);
+
+/*
+ * find_energy_efficient_cpu(): Find most energy-efficient target CPU for the
+ * waking task. find_energy_efficient_cpu() looks for the CPU with maximum
+ * spare capacity in each performance domain and uses it as a potential
+ * candidate to execute the task. Then, it uses the Energy Model to figure
+ * out which of the CPU candidates is the most energy-efficient.
+ *
+ * The rationale for this heuristic is as follows. In a performance domain,
+ * all the most energy efficient CPU candidates (according to the Energy
+ * Model) are those for which we'll request a low frequency. When there are
+ * several CPUs for which the frequency request will be the same, we don't
+ * have enough data to break the tie between them, because the Energy Model
+ * only includes active power costs. With this model, if we assume that
+ * frequency requests follow utilization (e.g. using schedutil), the CPU with
+ * the maximum spare capacity in a performance domain is guaranteed to be among
+ * the best candidates of the performance domain.
+ *
+ * In practice, it could be preferable from an energy standpoint to pack
+ * small tasks on a CPU in order to let other CPUs go in deeper idle states,
+ * but that could also hurt our chances to go cluster idle, and we have no
+ * ways to tell with the current Energy Model if this is actually a good
+ * idea or not. So, find_energy_efficient_cpu() basically favors
+ * cluster-packing, and spreading inside a cluster. That should at least be
+ * a good thing for latency, and this is consistent with the idea that most
+ * of the energy savings of EAS come from the asymmetry of the system, and
+ * not so much from breaking the tie between identical CPUs. That's also the
+ * reason why EAS is enabled in the topology code only for systems where
+ * SD_ASYM_CPUCAPACITY is set.
+ *
+ * NOTE: Forkees are not accepted in the energy-aware wake-up path because
+ * they don't have any useful utilization data yet and it's not possible to
+ * forecast their impact on energy consumption. Consequently, they will be
+ * placed by find_idlest_cpu() on the least loaded CPU, which might turn out
+ * to be energy-inefficient in some use-cases. The alternative would be to
+ * bias new tasks towards specific types of CPUs first, or to try to infer
+ * their util_avg from the parent task, but those heuristics could hurt
+ * other use-cases too. So, until someone finds a better way to solve this,
+ * let's keep things simple by re-using the existing slow path.
+ */
+
+static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, int sync)
+{
+	unsigned long prev_energy = ULONG_MAX, best_energy = ULONG_MAX;
+	struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
+	int weight, cpu, best_energy_cpu = prev_cpu;
+	unsigned long cur_energy;
+	struct perf_domain *pd;
+	struct sched_domain *sd;
+	cpumask_t *candidates;
+
+	if (sysctl_sched_sync_hint_enable && sync) {
+		cpu = smp_processor_id();
+		if (cpumask_test_cpu(cpu, &p->cpus_allowed))
+			return cpu;
+	}
+
+	rcu_read_lock();
+	pd = rcu_dereference(rd->pd);
+	if (!pd || READ_ONCE(rd->overutilized))
+		goto fail;
+
+	/*
+	 * Energy-aware wake-up happens on the lowest sched_domain starting
+	 * from sd_asym_cpucapacity spanning over this_cpu and prev_cpu.
+	 */
+	sd = rcu_dereference(*this_cpu_ptr(&sd_asym_cpucapacity));
+	while (sd && !cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
+		sd = sd->parent;
+	if (!sd)
+		goto fail;
+
+	sync_entity_load_avg(&p->se);
+	if (!task_util_est(p))
+		goto unlock;
+
+	/* Pre-select a set of candidate CPUs. */
+	candidates = this_cpu_ptr(&energy_cpus);
+	cpumask_clear(candidates);
+
+	if (sched_feat(FIND_BEST_TARGET))
+		find_best_target(sd, candidates, p);
+	else
+		select_cpu_candidates(sd, candidates, pd, p, prev_cpu);
+
+	/* Bail out if no candidate was found. */
+	weight = cpumask_weight(candidates);
+	if (!weight)
+		goto unlock;
+
+	/* If there is only one sensible candidate, select it now. */
+	cpu = cpumask_first(candidates);
+	if (weight == 1 && ((schedtune_prefer_idle(p) && idle_cpu(cpu)) ||
+			    (cpu == prev_cpu))) {
+		best_energy_cpu = cpu;
+		goto unlock;
+	}
+
+	if (cpumask_test_cpu(prev_cpu, &p->cpus_allowed))
+		prev_energy = best_energy = compute_energy(p, prev_cpu, pd);
+	else
+		prev_energy = best_energy = ULONG_MAX;
+
+	/* Select the best candidate energy-wise. */
+	for_each_cpu(cpu, candidates) {
+		if (cpu == prev_cpu)
+			continue;
+		cur_energy = compute_energy(p, cpu, pd);
+		if (cur_energy < best_energy) {
+			best_energy = cur_energy;
+			best_energy_cpu = cpu;
+		}
+	}
+unlock:
+	rcu_read_unlock();
+
+	/*
+	 * Pick the best CPU if prev_cpu cannot be used, or if it saves at
+	 * least 6% of the energy used by prev_cpu.
+	 */
+	if (prev_energy == ULONG_MAX)
+		return best_energy_cpu;
+
+	if ((prev_energy - best_energy) > (prev_energy >> 4))
+		return best_energy_cpu;
+
+	return prev_cpu;
+
+fail:
+	rcu_read_unlock();
+
+	return -1;
 }
 
 /*
@@ -6423,7 +7230,8 @@
  * preempt must be disabled.
  */
 static int
-select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags)
+select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags,
+		    int sibling_count_hint)
 {
 	struct sched_domain *tmp, *sd = NULL;
 	int cpu = smp_processor_id();
@@ -6433,10 +7241,23 @@
 
 	if (sd_flag & SD_BALANCE_WAKE) {
 		record_wakee(p);
-		want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu)
-			      && cpumask_test_cpu(cpu, &p->cpus_allowed);
+
+		if (static_branch_unlikely(&sched_energy_present)) {
+			if (schedtune_prefer_idle(p) && !sched_feat(EAS_PREFER_IDLE) && !sync)
+				goto sd_loop;
+
+			new_cpu = find_energy_efficient_cpu(p, prev_cpu, sync);
+			if (new_cpu >= 0)
+				return new_cpu;
+			new_cpu = prev_cpu;
+		}
+
+		want_affine = !wake_wide(p, sibling_count_hint) &&
+			      !wake_cap(p, cpu, prev_cpu) &&
+			      cpumask_test_cpu(cpu, &p->cpus_allowed);
 	}
 
+sd_loop:
 	rcu_read_lock();
 	for_each_domain(cpu, tmp) {
 		if (!(tmp->flags & SD_LOAD_BALANCE))
@@ -6826,9 +7647,12 @@
 	if (hrtick_enabled(rq))
 		hrtick_start_fair(rq, p);
 
+	update_misfit_status(p, rq);
+
 	return p;
 
 idle:
+	update_misfit_status(NULL, rq);
 	new_tasks = idle_balance(rq, rf);
 
 	/*
@@ -6842,6 +7666,12 @@
 	if (new_tasks > 0)
 		goto again;
 
+	/*
+	 * rq is about to be idle, check if we need to update the
+	 * lost_idle_time of clock_pelt
+	 */
+	update_idle_rq_clock_pelt(rq);
+
 	return NULL;
 }
 
@@ -7034,6 +7864,13 @@
 
 enum fbq_type { regular, remote, all };
 
+enum group_type {
+	group_other = 0,
+	group_misfit_task,
+	group_imbalanced,
+	group_overloaded,
+};
+
 #define LBF_ALL_PINNED	0x01
 #define LBF_NEED_BREAK	0x02
 #define LBF_DST_PINNED  0x04
@@ -7054,6 +7891,7 @@
 	int			new_dst_cpu;
 	enum cpu_idle_type	idle;
 	long			imbalance;
+	unsigned int		src_grp_nr_running;
 	/* The set of CPUs under consideration for load-balancing */
 	struct cpumask		*cpus;
 
@@ -7064,6 +7902,7 @@
 	unsigned int		loop_max;
 
 	enum fbq_type		fbq_type;
+	enum group_type		src_grp_type;
 	struct list_head	tasks;
 };
 
@@ -7476,7 +8315,7 @@
 		if (throttled_hierarchy(cfs_rq))
 			continue;
 
-		if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq))
+		if (update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq))
 			update_tg_load_avg(cfs_rq, 0);
 
 		/* Propagate pending load changes to the parent, if any: */
@@ -7490,8 +8329,8 @@
 	}
 
 	curr_class = rq->curr->sched_class;
-	update_rt_rq_load_avg(rq_clock_task(rq), rq, curr_class == &rt_sched_class);
-	update_dl_rq_load_avg(rq_clock_task(rq), rq, curr_class == &dl_sched_class);
+	update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
+	update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
 	update_irq_load_avg(rq, 0);
 	/* Don't need periodic decay once load/util_avg are null */
 	if (others_have_blocked(rq))
@@ -7561,11 +8400,11 @@
 
 	rq_lock_irqsave(rq, &rf);
 	update_rq_clock(rq);
-	update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
+	update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq);
 
 	curr_class = rq->curr->sched_class;
-	update_rt_rq_load_avg(rq_clock_task(rq), rq, curr_class == &rt_sched_class);
-	update_dl_rq_load_avg(rq_clock_task(rq), rq, curr_class == &dl_sched_class);
+	update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
+	update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
 	update_irq_load_avg(rq, 0);
 #ifdef CONFIG_NO_HZ_COMMON
 	rq->last_blocked_load_update_tick = jiffies;
@@ -7583,12 +8422,6 @@
 
 /********** Helpers for find_busiest_group ************************/
 
-enum group_type {
-	group_other = 0,
-	group_imbalanced,
-	group_overloaded,
-};
-
 /*
  * sg_lb_stats - stats of a sched_group required for load_balancing
  */
@@ -7604,6 +8437,7 @@
 	unsigned int group_weight;
 	enum group_type group_type;
 	int group_no_capacity;
+	unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */
 #ifdef CONFIG_NUMA_BALANCING
 	unsigned int nr_numa_running;
 	unsigned int nr_preferred_running;
@@ -7676,10 +8510,9 @@
 	return load_idx;
 }
 
-static unsigned long scale_rt_capacity(struct sched_domain *sd, int cpu)
+static unsigned long scale_rt_capacity(int cpu, unsigned long max)
 {
 	struct rq *rq = cpu_rq(cpu);
-	unsigned long max = arch_scale_cpu_capacity(sd, cpu);
 	unsigned long used, free;
 	unsigned long irq;
 
@@ -7699,12 +8532,46 @@
 	return scale_irq_capacity(free, irq, max);
 }
 
+void init_max_cpu_capacity(struct max_cpu_capacity *mcc) {
+	raw_spin_lock_init(&mcc->lock);
+	mcc->val = 0;
+	mcc->cpu = -1;
+}
+
 static void update_cpu_capacity(struct sched_domain *sd, int cpu)
 {
-	unsigned long capacity = scale_rt_capacity(sd, cpu);
+	unsigned long capacity = arch_scale_cpu_capacity(sd, cpu);
 	struct sched_group *sdg = sd->groups;
+	struct max_cpu_capacity *mcc;
+	unsigned long max_capacity;
+	int max_cap_cpu;
+	unsigned long flags;
 
-	cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(sd, cpu);
+	cpu_rq(cpu)->cpu_capacity_orig = capacity;
+
+	capacity *= arch_scale_max_freq_capacity(sd, cpu);
+	capacity >>= SCHED_CAPACITY_SHIFT;
+
+	mcc = &cpu_rq(cpu)->rd->max_cpu_capacity;
+
+	raw_spin_lock_irqsave(&mcc->lock, flags);
+	max_capacity = mcc->val;
+	max_cap_cpu = mcc->cpu;
+
+	if ((max_capacity > capacity && max_cap_cpu == cpu) ||
+	    (max_capacity < capacity)) {
+		mcc->val = capacity;
+		mcc->cpu = cpu;
+#ifdef CONFIG_SCHED_DEBUG
+		raw_spin_unlock_irqrestore(&mcc->lock, flags);
+		pr_info("CPU%d: update max cpu_capacity %lu\n", cpu, capacity);
+		goto skip_unlock;
+#endif
+	}
+	raw_spin_unlock_irqrestore(&mcc->lock, flags);
+
+skip_unlock: __attribute__ ((unused));
+	capacity = scale_rt_capacity(cpu, capacity);
 
 	if (!capacity)
 		capacity = 1;
@@ -7712,13 +8579,14 @@
 	cpu_rq(cpu)->cpu_capacity = capacity;
 	sdg->sgc->capacity = capacity;
 	sdg->sgc->min_capacity = capacity;
+	sdg->sgc->max_capacity = capacity;
 }
 
 void update_group_capacity(struct sched_domain *sd, int cpu)
 {
 	struct sched_domain *child = sd->child;
 	struct sched_group *group, *sdg = sd->groups;
-	unsigned long capacity, min_capacity;
+	unsigned long capacity, min_capacity, max_capacity;
 	unsigned long interval;
 
 	interval = msecs_to_jiffies(sd->balance_interval);
@@ -7732,6 +8600,7 @@
 
 	capacity = 0;
 	min_capacity = ULONG_MAX;
+	max_capacity = 0;
 
 	if (child->flags & SD_OVERLAP) {
 		/*
@@ -7762,6 +8631,7 @@
 			}
 
 			min_capacity = min(capacity, min_capacity);
+			max_capacity = max(capacity, max_capacity);
 		}
 	} else  {
 		/*
@@ -7775,12 +8645,14 @@
 
 			capacity += sgc->capacity;
 			min_capacity = min(sgc->min_capacity, min_capacity);
+			max_capacity = max(sgc->max_capacity, max_capacity);
 			group = group->next;
 		} while (group != child->groups);
 	}
 
 	sdg->sgc->capacity = capacity;
 	sdg->sgc->min_capacity = min_capacity;
+	sdg->sgc->max_capacity = max_capacity;
 }
 
 /*
@@ -7876,16 +8748,27 @@
 }
 
 /*
- * group_smaller_cpu_capacity: Returns true if sched_group sg has smaller
+ * group_smaller_min_cpu_capacity: Returns true if sched_group sg has smaller
  * per-CPU capacity than sched_group ref.
  */
 static inline bool
-group_smaller_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
+group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
 {
 	return sg->sgc->min_capacity * capacity_margin <
 						ref->sgc->min_capacity * 1024;
 }
 
+/*
+ * group_smaller_max_cpu_capacity: Returns true if sched_group sg has smaller
+ * per-CPU capacity_orig than sched_group ref.
+ */
+static inline bool
+group_smaller_max_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
+{
+	return sg->sgc->max_capacity * capacity_margin <
+						ref->sgc->max_capacity * 1024;
+}
+
 static inline enum
 group_type group_classify(struct sched_group *group,
 			  struct sg_lb_stats *sgs)
@@ -7896,6 +8779,9 @@
 	if (sg_imbalanced(group))
 		return group_imbalanced;
 
+	if (sgs->group_misfit_task_load)
+		return group_misfit_task;
+
 	return group_other;
 }
 
@@ -7925,16 +8811,16 @@
  * update_sg_lb_stats - Update sched_group's statistics for load balancing.
  * @env: The load balancing environment.
  * @group: sched_group whose statistics are to be updated.
- * @load_idx: Load index of sched_domain of this_cpu for load calc.
- * @local_group: Does group contain this_cpu.
  * @sgs: variable to hold the statistics for this group.
- * @overload: Indicate more than one runnable task for any CPU.
+ * @sg_status: Holds flag indicating the status of the sched_group
  */
 static inline void update_sg_lb_stats(struct lb_env *env,
-			struct sched_group *group, int load_idx,
-			int local_group, struct sg_lb_stats *sgs,
-			bool *overload)
+				      struct sched_group *group,
+				      struct sg_lb_stats *sgs,
+				      int *sg_status)
 {
+	int local_group = cpumask_test_cpu(env->dst_cpu, sched_group_span(group));
+	int load_idx = get_sd_load_idx(env->sd, env->idle);
 	unsigned long load;
 	int i, nr_running;
 
@@ -7958,7 +8844,10 @@
 
 		nr_running = rq->nr_running;
 		if (nr_running > 1)
-			*overload = true;
+			*sg_status |= SG_OVERLOAD;
+
+		if (cpu_overutilized(i))
+			*sg_status |= SG_OVERUTILIZED;
 
 #ifdef CONFIG_NUMA_BALANCING
 		sgs->nr_numa_running += rq->nr_numa_running;
@@ -7970,6 +8859,12 @@
 		 */
 		if (!nr_running && idle_cpu(i))
 			sgs->idle_cpus++;
+
+		if (env->sd->flags & SD_ASYM_CPUCAPACITY &&
+		    sgs->group_misfit_task_load < rq->misfit_task_load) {
+			sgs->group_misfit_task_load = rq->misfit_task_load;
+			*sg_status |= SG_OVERLOAD;
+		}
 	}
 
 	/* Adjust by relative CPU capacity of the group */
@@ -8005,6 +8900,17 @@
 {
 	struct sg_lb_stats *busiest = &sds->busiest_stat;
 
+	/*
+	 * Don't try to pull misfit tasks we can't help.
+	 * We can use max_capacity here as reduction in capacity on some
+	 * CPUs in the group should either be possible to resolve
+	 * internally or be covered by avg_load imbalance (eventually).
+	 */
+	if (sgs->group_type == group_misfit_task &&
+	    (!group_smaller_max_cpu_capacity(sg, sds->local) ||
+	     !group_has_capacity(env, &sds->local_stat)))
+		return false;
+
 	if (sgs->group_type > busiest->group_type)
 		return true;
 
@@ -8024,7 +8930,14 @@
 	 * power/energy consequences are not considered.
 	 */
 	if (sgs->sum_nr_running <= sgs->group_weight &&
-	    group_smaller_cpu_capacity(sds->local, sg))
+	    group_smaller_min_cpu_capacity(sds->local, sg))
+		return false;
+
+	/*
+	 * If we have more than one misfit sg go with the biggest misfit.
+	 */
+	if (sgs->group_type == group_misfit_task &&
+	    sgs->group_misfit_task_load < busiest->group_misfit_task_load)
 		return false;
 
 asym_packing:
@@ -8095,19 +9008,14 @@
 	struct sched_group *sg = env->sd->groups;
 	struct sg_lb_stats *local = &sds->local_stat;
 	struct sg_lb_stats tmp_sgs;
-	int load_idx, prefer_sibling = 0;
-	bool overload = false;
-
-	if (child && child->flags & SD_PREFER_SIBLING)
-		prefer_sibling = 1;
+	bool prefer_sibling = child && child->flags & SD_PREFER_SIBLING;
+	int sg_status = 0;
 
 #ifdef CONFIG_NO_HZ_COMMON
 	if (env->idle == CPU_NEWLY_IDLE && READ_ONCE(nohz.has_blocked))
 		env->flags |= LBF_NOHZ_STATS;
 #endif
 
-	load_idx = get_sd_load_idx(env->sd, env->idle);
-
 	do {
 		struct sg_lb_stats *sgs = &tmp_sgs;
 		int local_group;
@@ -8122,8 +9030,7 @@
 				update_group_capacity(env->sd, env->dst_cpu);
 		}
 
-		update_sg_lb_stats(env, sg, load_idx, local_group, sgs,
-						&overload);
+		update_sg_lb_stats(env, sg, sgs, &sg_status);
 
 		if (local_group)
 			goto next_group;
@@ -8171,11 +9078,22 @@
 	if (env->sd->flags & SD_NUMA)
 		env->fbq_type = fbq_classify_group(&sds->busiest_stat);
 
+	env->src_grp_nr_running = sds->busiest_stat.sum_nr_running;
+
 	if (!env->sd->parent) {
+		struct root_domain *rd = env->dst_rq->rd;
+
 		/* update overload indicator if we are at root domain */
-		if (env->dst_rq->rd->overload != overload)
-			env->dst_rq->rd->overload = overload;
+		WRITE_ONCE(rd->overload, sg_status & SG_OVERLOAD);
+
+		/* Update over-utilization (tipping point, U >= 0) indicator */
+		WRITE_ONCE(rd->overutilized, sg_status & SG_OVERUTILIZED);
+		trace_sched_overutilized(!!(sg_status & SG_OVERUTILIZED));
+	} else if (sg_status & SG_OVERUTILIZED) {
+		WRITE_ONCE(env->dst_rq->rd->overutilized, SG_OVERUTILIZED);
+		trace_sched_overutilized(1);
 	}
+
 }
 
 /**
@@ -8291,7 +9209,22 @@
 	capa_move /= SCHED_CAPACITY_SCALE;
 
 	/* Move if we gain throughput */
-	if (capa_move > capa_now)
+	if (capa_move > capa_now) {
+		env->imbalance = busiest->load_per_task;
+		return;
+	}
+
+	/* We can't see throughput improvement with the load-based
+	 * method, but it is possible depending upon group size and
+	 * capacity range that there might still be an underutilized
+	 * cpu available in an asymmetric capacity system. Do one last
+	 * check just in case.
+	 */
+	if (env->sd->flags & SD_ASYM_CPUCAPACITY &&
+		busiest->group_type == group_overloaded &&
+		busiest->sum_nr_running > busiest->group_weight &&
+		local->sum_nr_running < local->group_weight &&
+		local->group_capacity < busiest->group_capacity)
 		env->imbalance = busiest->load_per_task;
 }
 
@@ -8324,8 +9257,9 @@
 	 * factors in sg capacity and sgs with smaller group_type are
 	 * skipped when updating the busiest sg:
 	 */
-	if (busiest->avg_load <= sds->avg_load ||
-	    local->avg_load >= sds->avg_load) {
+	if (busiest->group_type != group_misfit_task &&
+	    (busiest->avg_load <= sds->avg_load ||
+	     local->avg_load >= sds->avg_load)) {
 		env->imbalance = 0;
 		return fix_small_imbalance(env, sds);
 	}
@@ -8359,6 +9293,22 @@
 		(sds->avg_load - local->avg_load) * local->group_capacity
 	) / SCHED_CAPACITY_SCALE;
 
+	/* Boost imbalance to allow misfit task to be balanced.
+	 * Always do this if we are doing a NEWLY_IDLE balance
+	 * on the assumption that any tasks we have must not be
+	 * long-running (and hence we cannot rely upon load).
+	 * However if we are not idle, we should assume the tasks
+	 * we have are longer running and not override load-based
+	 * calculations above unless we are sure that the local
+	 * group is underutilized.
+	 */
+	if (busiest->group_type == group_misfit_task &&
+		(env->idle == CPU_NEWLY_IDLE ||
+		local->sum_nr_running < local->group_weight)) {
+		env->imbalance = max_t(long, env->imbalance,
+				       busiest->group_misfit_task_load);
+	}
+
 	/*
 	 * if *imbalance is less than the average load per runnable task
 	 * there is no guarantee that any tasks will be moved so we'll have
@@ -8394,6 +9344,14 @@
 	 * this level.
 	 */
 	update_sd_lb_stats(env, &sds);
+
+	if (static_branch_unlikely(&sched_energy_present)) {
+		struct root_domain *rd = env->dst_rq->rd;
+
+		if (rcu_dereference(rd->pd) && !READ_ONCE(rd->overutilized))
+			goto out_balanced;
+	}
+
 	local = &sds.local_stat;
 	busiest = &sds.busiest_stat;
 
@@ -8425,6 +9383,10 @@
 	    busiest->group_no_capacity)
 		goto force_balance;
 
+	/* Misfit tasks should be dealt with regardless of the avg load */
+	if (busiest->group_type == group_misfit_task)
+		goto force_balance;
+
 	/*
 	 * If the local group is busier than the selected busiest group
 	 * don't try and pull any tasks.
@@ -8462,6 +9424,7 @@
 
 force_balance:
 	/* Looks like there is an imbalance. Compute it */
+	env->src_grp_type = busiest->group_type;
 	calculate_imbalance(env, &sds);
 	return env->imbalance ? sds.busiest : NULL;
 
@@ -8509,8 +9472,32 @@
 		if (rt > env->fbq_type)
 			continue;
 
+		/*
+		 * For ASYM_CPUCAPACITY domains with misfit tasks we simply
+		 * seek the "biggest" misfit task.
+		 */
+		if (env->src_grp_type == group_misfit_task) {
+			if (rq->misfit_task_load > busiest_load) {
+				busiest_load = rq->misfit_task_load;
+				busiest = rq;
+			}
+
+			continue;
+		}
+
 		capacity = capacity_of(i);
 
+		/*
+		 * For ASYM_CPUCAPACITY domains, don't pick a CPU that could
+		 * eventually lead to active_balancing high->low capacity.
+		 * Higher per-CPU capacity is considered better than balancing
+		 * average load.
+		 */
+		if (env->sd->flags & SD_ASYM_CPUCAPACITY &&
+		    capacity_of(env->dst_cpu) < capacity &&
+		    rq->nr_running == 1)
+			continue;
+
 		wl = weighted_cpuload(rq);
 
 		/*
@@ -8578,6 +9565,20 @@
 			return 1;
 	}
 
+	if (env->src_grp_type == group_misfit_task)
+		return 1;
+
+	if ((capacity_of(env->src_cpu) < capacity_of(env->dst_cpu)) &&
+				env->src_rq->cfs.h_nr_running == 1 &&
+				cpu_overutilized(env->src_cpu) &&
+				!cpu_overutilized(env->dst_cpu)) {
+		return 1;
+	}
+
+	if (env->src_grp_type == group_overloaded && env->src_rq->misfit_task_load)
+		return 1;
+
+
 	return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
 }
 
@@ -8796,7 +9797,8 @@
 		 * excessive cache_hot migrations and active balances.
 		 */
 		if (idle != CPU_NEWLY_IDLE)
-			sd->nr_balance_failed++;
+			if (env.src_grp_nr_running > 1)
+				sd->nr_balance_failed++;
 
 		if (need_active_balance(&env)) {
 			unsigned long flags;
@@ -9224,7 +10226,7 @@
 	if (time_before(now, nohz.next_balance))
 		goto out;
 
-	if (rq->nr_running >= 2) {
+	if (rq->nr_running >= 2 || rq->misfit_task_load) {
 		flags = NOHZ_KICK_MASK;
 		goto out;
 	}
@@ -9253,7 +10255,7 @@
 		}
 	}
 
-	sd = rcu_dereference(per_cpu(sd_asym, cpu));
+	sd = rcu_dereference(per_cpu(sd_asym_packing, cpu));
 	if (sd) {
 		for_each_cpu(i, sched_domain_span(sd)) {
 			if (i == cpu ||
@@ -9593,7 +10595,7 @@
 	rq_unpin_lock(this_rq, rf);
 
 	if (this_rq->avg_idle < sysctl_sched_migration_cost ||
-	    !this_rq->rd->overload) {
+	    !READ_ONCE(this_rq->rd->overload)) {
 
 		rcu_read_lock();
 		sd = rcu_dereference_check_sched_domain(this_rq->sd);
@@ -9755,6 +10757,9 @@
 
 	if (static_branch_unlikely(&sched_numa_balancing))
 		task_tick_numa(rq, curr);
+
+	update_misfit_status(curr, rq);
+	update_overutilized_status(task_rq(curr));
 }
 
 /*

diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 85ae848..6660d53 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h

@@ -90,3 +90,33 @@
  * UtilEstimation. Use estimated CPU utilization.
  */
 SCHED_FEAT(UTIL_EST, true)
+
+/*
+ * Fast pre-selection of CPU candidates for EAS.
+ */
+SCHED_FEAT(FIND_BEST_TARGET, false)
+
+/*
+ * Energy aware scheduling algorithm choices:
+ * EAS_PREFER_IDLE
+ *   Direct tasks in a schedtune.prefer_idle=1 group through
+ *   the EAS path for wakeup task placement. Otherwise, put
+ *   those tasks through the mainline slow path.
+ */
+SCHED_FEAT(EAS_PREFER_IDLE, true)
+
+/*
+ * Request max frequency from schedutil whenever a RT task is running.
+ */
+SCHED_FEAT(SUGOV_RT_MAX_FREQ, false)
+
+/*
+ * Apply schedtune boost hold to tasks of all sched classes.
+ * If enabled, schedtune will hold the boost applied to a CPU
+ * for 50ms regardless of task activation - if the task is
+ * still running 50ms later, the boost hold expires and schedtune
+ * boost will expire immediately the task stops.
+ * If disabled, this behaviour will only apply to tasks of the
+ * RT class.
+ */
+SCHED_FEAT(SCHEDTUNE_BOOST_HOLD_ALL, false)

diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 16f8414..2c8719f 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c

@@ -16,9 +16,10 @@
  * sched_idle_set_state - Record idle state for the current CPU.
  * @idle_state: State to record.
  */
-void sched_idle_set_state(struct cpuidle_state *idle_state)
+void sched_idle_set_state(struct cpuidle_state *idle_state, int index)
 {
 	idle_set_state(this_rq(), idle_state);
+	idle_set_state_idx(this_rq(), index);
 }
 
 static int __read_mostly cpu_idle_force_poll;
@@ -374,7 +375,8 @@
 
 #ifdef CONFIG_SMP
 static int
-select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags)
+select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags,
+		    int sibling_count_hint)
 {
 	return task_cpu(p); /* IDLE tasks as never migrated */
 }

diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index a171c12..28a5165 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c

@@ -91,19 +91,73 @@
 	return delta;
 }
 
-/*
- * a1 = a0 * e + a * (1 - e)
+/**
+ * fixed_power_int - compute: x^n, in O(log n) time
+ *
+ * @x:         base of the power
+ * @frac_bits: fractional bits of @x
+ * @n:         power to raise @x to.
+ *
+ * By exploiting the relation between the definition of the natural power
+ * function: x^n := x*x*...*x (x multiplied by itself for n times), and
+ * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
+ * (where: n_i \elem {0, 1}, the binary vector representing n),
+ * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
+ * of course trivially computable in O(log_2 n), the length of our binary
+ * vector.
  */
 static unsigned long
-calc_load(unsigned long load, unsigned long exp, unsigned long active)
+fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
 {
-	unsigned long newload;
+	unsigned long result = 1UL << frac_bits;
 
-	newload = load * exp + active * (FIXED_1 - exp);
-	if (active >= load)
-		newload += FIXED_1-1;
+	if (n) {
+		for (;;) {
+			if (n & 1) {
+				result *= x;
+				result += 1UL << (frac_bits - 1);
+				result >>= frac_bits;
+			}
+			n >>= 1;
+			if (!n)
+				break;
+			x *= x;
+			x += 1UL << (frac_bits - 1);
+			x >>= frac_bits;
+		}
+	}
 
-	return newload / FIXED_1;
+	return result;
+}
+
+/*
+ * a1 = a0 * e + a * (1 - e)
+ *
+ * a2 = a1 * e + a * (1 - e)
+ *    = (a0 * e + a * (1 - e)) * e + a * (1 - e)
+ *    = a0 * e^2 + a * (1 - e) * (1 + e)
+ *
+ * a3 = a2 * e + a * (1 - e)
+ *    = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
+ *    = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
+ *
+ *  ...
+ *
+ * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
+ *    = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
+ *    = a0 * e^n + a * (1 - e^n)
+ *
+ * [1] application of the geometric series:
+ *
+ *              n         1 - x^(n+1)
+ *     S_n := \Sum x^i = -------------
+ *             i=0          1 - x
+ */
+unsigned long
+calc_load_n(unsigned long load, unsigned long exp,
+	    unsigned long active, unsigned int n)
+{
+	return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
 }
 
 #ifdef CONFIG_NO_HZ_COMMON
@@ -225,75 +279,6 @@
 	return delta;
 }
 
-/**
- * fixed_power_int - compute: x^n, in O(log n) time
- *
- * @x:         base of the power
- * @frac_bits: fractional bits of @x
- * @n:         power to raise @x to.
- *
- * By exploiting the relation between the definition of the natural power
- * function: x^n := x*x*...*x (x multiplied by itself for n times), and
- * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
- * (where: n_i \elem {0, 1}, the binary vector representing n),
- * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
- * of course trivially computable in O(log_2 n), the length of our binary
- * vector.
- */
-static unsigned long
-fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
-{
-	unsigned long result = 1UL << frac_bits;
-
-	if (n) {
-		for (;;) {
-			if (n & 1) {
-				result *= x;
-				result += 1UL << (frac_bits - 1);
-				result >>= frac_bits;
-			}
-			n >>= 1;
-			if (!n)
-				break;
-			x *= x;
-			x += 1UL << (frac_bits - 1);
-			x >>= frac_bits;
-		}
-	}
-
-	return result;
-}
-
-/*
- * a1 = a0 * e + a * (1 - e)
- *
- * a2 = a1 * e + a * (1 - e)
- *    = (a0 * e + a * (1 - e)) * e + a * (1 - e)
- *    = a0 * e^2 + a * (1 - e) * (1 + e)
- *
- * a3 = a2 * e + a * (1 - e)
- *    = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
- *    = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
- *
- *  ...
- *
- * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
- *    = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
- *    = a0 * e^n + a * (1 - e^n)
- *
- * [1] application of the geometric series:
- *
- *              n         1 - x^(n+1)
- *     S_n := \Sum x^i = -------------
- *             i=0          1 - x
- */
-static unsigned long
-calc_load_n(unsigned long load, unsigned long exp,
-	    unsigned long active, unsigned int n)
-{
-	return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
-}
-
 /*
  * NO_HZ can leave us missing all per-CPU ticks calling
  * calc_load_fold_active(), but since a NO_HZ CPU folds its delta into

diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
index 48a1264..19ba404 100644
--- a/kernel/sched/pelt.c
+++ b/kernel/sched/pelt.c

@@ -26,9 +26,10 @@
 
 #include <linux/sched.h>
 #include "sched.h"
-#include "sched-pelt.h"
 #include "pelt.h"
 
+#include <trace/events/sched.h>
+
 /*
  * Approximate:
  *   val * y^n,    where y^32 ~= 0.5 (~1 scheduling period)
@@ -106,16 +107,12 @@
  *                     n=1
  */
 static __always_inline u32
-accumulate_sum(u64 delta, int cpu, struct sched_avg *sa,
+accumulate_sum(u64 delta, struct sched_avg *sa,
 	       unsigned long load, unsigned long runnable, int running)
 {
-	unsigned long scale_freq, scale_cpu;
 	u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */
 	u64 periods;
 
-	scale_freq = arch_scale_freq_capacity(cpu);
-	scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
-
 	delta += sa->period_contrib;
 	periods = delta / 1024; /* A period is 1024us (~1ms) */
 
@@ -137,13 +134,12 @@
 	}
 	sa->period_contrib = delta;
 
-	contrib = cap_scale(contrib, scale_freq);
 	if (load)
 		sa->load_sum += load * contrib;
 	if (runnable)
 		sa->runnable_load_sum += runnable * contrib;
 	if (running)
-		sa->util_sum += contrib * scale_cpu;
+		sa->util_sum += contrib << SCHED_CAPACITY_SHIFT;
 
 	return periods;
 }
@@ -177,7 +173,7 @@
  *            = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
  */
 static __always_inline int
-___update_load_sum(u64 now, int cpu, struct sched_avg *sa,
+___update_load_sum(u64 now, struct sched_avg *sa,
 		  unsigned long load, unsigned long runnable, int running)
 {
 	u64 delta;
@@ -221,7 +217,7 @@
 	 * Step 1: accumulate *_sum since last_update_time. If we haven't
 	 * crossed period boundaries, finish.
 	 */
-	if (!accumulate_sum(delta, cpu, sa, load, runnable, running))
+	if (!accumulate_sum(delta, sa, load, runnable, running))
 		return 0;
 
 	return 1;
@@ -267,43 +263,46 @@
  *   runnable_load_avg = \Sum se->avg.runable_load_avg
  */
 
-int __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se)
+int __update_load_avg_blocked_se(u64 now, struct sched_entity *se)
 {
-	if (entity_is_task(se))
-		se->runnable_weight = se->load.weight;
-
-	if (___update_load_sum(now, cpu, &se->avg, 0, 0, 0)) {
+	if (___update_load_sum(now, &se->avg, 0, 0, 0)) {
 		___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
+
+		trace_sched_load_se(se);
+
 		return 1;
 	}
 
 	return 0;
 }
 
-int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se)
+int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	if (entity_is_task(se))
-		se->runnable_weight = se->load.weight;
-
-	if (___update_load_sum(now, cpu, &se->avg, !!se->on_rq, !!se->on_rq,
+	if (___update_load_sum(now, &se->avg, !!se->on_rq, !!se->on_rq,
 				cfs_rq->curr == se)) {
 
 		___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
 		cfs_se_util_change(&se->avg);
+
+		trace_sched_load_se(se);
+
 		return 1;
 	}
 
 	return 0;
 }
 
-int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq)
+int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq)
 {
-	if (___update_load_sum(now, cpu, &cfs_rq->avg,
+	if (___update_load_sum(now, &cfs_rq->avg,
 				scale_load_down(cfs_rq->load.weight),
 				scale_load_down(cfs_rq->runnable_weight),
 				cfs_rq->curr != NULL)) {
 
 		___update_load_avg(&cfs_rq->avg, 1, 1);
+
+		trace_sched_load_cfs_rq(cfs_rq);
+
 		return 1;
 	}
 
@@ -323,12 +322,15 @@
 
 int update_rt_rq_load_avg(u64 now, struct rq *rq, int running)
 {
-	if (___update_load_sum(now, rq->cpu, &rq->avg_rt,
+	if (___update_load_sum(now, &rq->avg_rt,
 				running,
 				running,
 				running)) {
 
 		___update_load_avg(&rq->avg_rt, 1, 1);
+
+		trace_sched_load_rt_rq(rq);
+
 		return 1;
 	}
 
@@ -346,7 +348,7 @@
 
 int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
 {
-	if (___update_load_sum(now, rq->cpu, &rq->avg_dl,
+	if (___update_load_sum(now, &rq->avg_dl,
 				running,
 				running,
 				running)) {
@@ -371,22 +373,31 @@
 int update_irq_load_avg(struct rq *rq, u64 running)
 {
 	int ret = 0;
+
+	/*
+	 * We can't use clock_pelt because irq time is not accounted in
+	 * clock_task. Instead we directly scale the running time to
+	 * reflect the real amount of computation
+	 */
+	running = cap_scale(running, arch_scale_freq_capacity(cpu_of(rq)));
+	running = cap_scale(running, arch_scale_cpu_capacity(NULL, cpu_of(rq)));
+
 	/*
 	 * We know the time that has been used by interrupt since last update
 	 * but we don't when. Let be pessimistic and assume that interrupt has
 	 * happened just before the update. This is not so far from reality
 	 * because interrupt will most probably wake up task and trig an update
-	 * of rq clock during which the metric si updated.
+	 * of rq clock during which the metric is updated.
 	 * We start to decay with normal context time and then we add the
 	 * interrupt context time.
 	 * We can safely remove running from rq->clock because
 	 * rq->clock += delta with delta >= running
 	 */
-	ret = ___update_load_sum(rq->clock - running, rq->cpu, &rq->avg_irq,
+	ret = ___update_load_sum(rq->clock - running, &rq->avg_irq,
 				0,
 				0,
 				0);
-	ret += ___update_load_sum(rq->clock, rq->cpu, &rq->avg_irq,
+	ret += ___update_load_sum(rq->clock, &rq->avg_irq,
 				1,
 				1,
 				1);

diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
index 7e56b48..7489d5f 100644
--- a/kernel/sched/pelt.h
+++ b/kernel/sched/pelt.h

@@ -1,8 +1,9 @@
 #ifdef CONFIG_SMP
+#include "sched-pelt.h"
 
-int __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se);
-int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se);
-int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq);
+int __update_load_avg_blocked_se(u64 now, struct sched_entity *se);
+int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se);
+int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq);
 int update_rt_rq_load_avg(u64 now, struct rq *rq, int running);
 int update_dl_rq_load_avg(u64 now, struct rq *rq, int running);
 
@@ -42,6 +43,101 @@
 	WRITE_ONCE(avg->util_est.enqueued, enqueued);
 }
 
+/*
+ * The clock_pelt scales the time to reflect the effective amount of
+ * computation done during the running delta time but then sync back to
+ * clock_task when rq is idle.
+ *
+ *
+ * absolute time   | 1| 2| 3| 4| 5| 6| 7| 8| 9|10|11|12|13|14|15|16
+ * @ max capacity  ------******---------------******---------------
+ * @ half capacity ------************---------************---------
+ * clock pelt      | 1| 2|    3|    4| 7| 8| 9|   10|   11|14|15|16
+ *
+ */
+static inline void update_rq_clock_pelt(struct rq *rq, s64 delta)
+{
+	if (unlikely(is_idle_task(rq->curr))) {
+		/* The rq is idle, we can sync to clock_task */
+		rq->clock_pelt  = rq_clock_task(rq);
+		return;
+	}
+
+	/*
+	 * When a rq runs at a lower compute capacity, it will need
+	 * more time to do the same amount of work than at max
+	 * capacity. In order to be invariant, we scale the delta to
+	 * reflect how much work has been really done.
+	 * Running longer results in stealing idle time that will
+	 * disturb the load signal compared to max capacity. This
+	 * stolen idle time will be automatically reflected when the
+	 * rq will be idle and the clock will be synced with
+	 * rq_clock_task.
+	 */
+
+	/*
+	 * Scale the elapsed time to reflect the real amount of
+	 * computation
+	 */
+	delta = cap_scale(delta, arch_scale_cpu_capacity(NULL, cpu_of(rq)));
+	delta = cap_scale(delta, arch_scale_freq_capacity(cpu_of(rq)));
+
+	rq->clock_pelt += delta;
+}
+
+/*
+ * When rq becomes idle, we have to check if it has lost idle time
+ * because it was fully busy. A rq is fully used when the /Sum util_sum
+ * is greater or equal to:
+ * (LOAD_AVG_MAX - 1024 + rq->cfs.avg.period_contrib) << SCHED_CAPACITY_SHIFT;
+ * For optimization and computing rounding purpose, we don't take into account
+ * the position in the current window (period_contrib) and we use the higher
+ * bound of util_sum to decide.
+ */
+static inline void update_idle_rq_clock_pelt(struct rq *rq)
+{
+	u32 divider = ((LOAD_AVG_MAX - 1024) << SCHED_CAPACITY_SHIFT) - LOAD_AVG_MAX;
+	u32 util_sum = rq->cfs.avg.util_sum;
+	util_sum += rq->avg_rt.util_sum;
+	util_sum += rq->avg_dl.util_sum;
+
+	/*
+	 * Reflecting stolen time makes sense only if the idle
+	 * phase would be present at max capacity. As soon as the
+	 * utilization of a rq has reached the maximum value, it is
+	 * considered as an always runnig rq without idle time to
+	 * steal. This potential idle time is considered as lost in
+	 * this case. We keep track of this lost idle time compare to
+	 * rq's clock_task.
+	 */
+	if (util_sum >= divider)
+		rq->lost_idle_time += rq_clock_task(rq) - rq->clock_pelt;
+}
+
+static inline u64 rq_clock_pelt(struct rq *rq)
+{
+	lockdep_assert_held(&rq->lock);
+	assert_clock_updated(rq);
+
+	return rq->clock_pelt - rq->lost_idle_time;
+}
+
+#ifdef CONFIG_CFS_BANDWIDTH
+/* rq->task_clock normalized against any time this cfs_rq has spent throttled */
+static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
+{
+	if (unlikely(cfs_rq->throttle_count))
+		return cfs_rq->throttled_clock_task - cfs_rq->throttled_clock_task_time;
+
+	return rq_clock_pelt(rq_of(cfs_rq)) - cfs_rq->throttled_clock_task_time;
+}
+#else
+static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
+{
+	return rq_clock_pelt(rq_of(cfs_rq));
+}
+#endif
+
 #else
 
 static inline int
@@ -67,6 +163,18 @@
 {
 	return 0;
 }
+
+static inline u64 rq_clock_pelt(struct rq *rq)
+{
+	return rq_clock_task(rq);
+}
+
+static inline void
+update_rq_clock_pelt(struct rq *rq, s64 delta) { }
+
+static inline void
+update_idle_rq_clock_pelt(struct rq *rq) { }
+
 #endif
 
 

diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
new file mode 100644
index 0000000..e88918e
--- /dev/null
+++ b/kernel/sched/psi.c

@@ -0,0 +1,1280 @@
+/*
+ * Pressure stall information for CPU, memory and IO
+ *
+ * Copyright (c) 2018 Facebook, Inc.
+ * Author: Johannes Weiner <hannes@cmpxchg.org>
+ *
+ * Polling support by Suren Baghdasaryan <surenb@google.com>
+ * Copyright (c) 2018 Google, Inc.
+ *
+ * When CPU, memory and IO are contended, tasks experience delays that
+ * reduce throughput and introduce latencies into the workload. Memory
+ * and IO contention, in addition, can cause a full loss of forward
+ * progress in which the CPU goes idle.
+ *
+ * This code aggregates individual task delays into resource pressure
+ * metrics that indicate problems with both workload health and
+ * resource utilization.
+ *
+ *			Model
+ *
+ * The time in which a task can execute on a CPU is our baseline for
+ * productivity. Pressure expresses the amount of time in which this
+ * potential cannot be realized due to resource contention.
+ *
+ * This concept of productivity has two components: the workload and
+ * the CPU. To measure the impact of pressure on both, we define two
+ * contention states for a resource: SOME and FULL.
+ *
+ * In the SOME state of a given resource, one or more tasks are
+ * delayed on that resource. This affects the workload's ability to
+ * perform work, but the CPU may still be executing other tasks.
+ *
+ * In the FULL state of a given resource, all non-idle tasks are
+ * delayed on that resource such that nobody is advancing and the CPU
+ * goes idle. This leaves both workload and CPU unproductive.
+ *
+ * (Naturally, the FULL state doesn't exist for the CPU resource.)
+ *
+ *	SOME = nr_delayed_tasks != 0
+ *	FULL = nr_delayed_tasks != 0 && nr_running_tasks == 0
+ *
+ * The percentage of wallclock time spent in those compound stall
+ * states gives pressure numbers between 0 and 100 for each resource,
+ * where the SOME percentage indicates workload slowdowns and the FULL
+ * percentage indicates reduced CPU utilization:
+ *
+ *	%SOME = time(SOME) / period
+ *	%FULL = time(FULL) / period
+ *
+ *			Multiple CPUs
+ *
+ * The more tasks and available CPUs there are, the more work can be
+ * performed concurrently. This means that the potential that can go
+ * unrealized due to resource contention *also* scales with non-idle
+ * tasks and CPUs.
+ *
+ * Consider a scenario where 257 number crunching tasks are trying to
+ * run concurrently on 256 CPUs. If we simply aggregated the task
+ * states, we would have to conclude a CPU SOME pressure number of
+ * 100%, since *somebody* is waiting on a runqueue at all
+ * times. However, that is clearly not the amount of contention the
+ * workload is experiencing: only one out of 256 possible exceution
+ * threads will be contended at any given time, or about 0.4%.
+ *
+ * Conversely, consider a scenario of 4 tasks and 4 CPUs where at any
+ * given time *one* of the tasks is delayed due to a lack of memory.
+ * Again, looking purely at the task state would yield a memory FULL
+ * pressure number of 0%, since *somebody* is always making forward
+ * progress. But again this wouldn't capture the amount of execution
+ * potential lost, which is 1 out of 4 CPUs, or 25%.
+ *
+ * To calculate wasted potential (pressure) with multiple processors,
+ * we have to base our calculation on the number of non-idle tasks in
+ * conjunction with the number of available CPUs, which is the number
+ * of potential execution threads. SOME becomes then the proportion of
+ * delayed tasks to possibe threads, and FULL is the share of possible
+ * threads that are unproductive due to delays:
+ *
+ *	threads = min(nr_nonidle_tasks, nr_cpus)
+ *	   SOME = min(nr_delayed_tasks / threads, 1)
+ *	   FULL = (threads - min(nr_running_tasks, threads)) / threads
+ *
+ * For the 257 number crunchers on 256 CPUs, this yields:
+ *
+ *	threads = min(257, 256)
+ *	   SOME = min(1 / 256, 1)             = 0.4%
+ *	   FULL = (256 - min(257, 256)) / 256 = 0%
+ *
+ * For the 1 out of 4 memory-delayed tasks, this yields:
+ *
+ *	threads = min(4, 4)
+ *	   SOME = min(1 / 4, 1)               = 25%
+ *	   FULL = (4 - min(3, 4)) / 4         = 25%
+ *
+ * [ Substitute nr_cpus with 1, and you can see that it's a natural
+ *   extension of the single-CPU model. ]
+ *
+ *			Implementation
+ *
+ * To assess the precise time spent in each such state, we would have
+ * to freeze the system on task changes and start/stop the state
+ * clocks accordingly. Obviously that doesn't scale in practice.
+ *
+ * Because the scheduler aims to distribute the compute load evenly
+ * among the available CPUs, we can track task state locally to each
+ * CPU and, at much lower frequency, extrapolate the global state for
+ * the cumulative stall times and the running averages.
+ *
+ * For each runqueue, we track:
+ *
+ *	   tSOME[cpu] = time(nr_delayed_tasks[cpu] != 0)
+ *	   tFULL[cpu] = time(nr_delayed_tasks[cpu] && !nr_running_tasks[cpu])
+ *	tNONIDLE[cpu] = time(nr_nonidle_tasks[cpu] != 0)
+ *
+ * and then periodically aggregate:
+ *
+ *	tNONIDLE = sum(tNONIDLE[i])
+ *
+ *	   tSOME = sum(tSOME[i] * tNONIDLE[i]) / tNONIDLE
+ *	   tFULL = sum(tFULL[i] * tNONIDLE[i]) / tNONIDLE
+ *
+ *	   %SOME = tSOME / period
+ *	   %FULL = tFULL / period
+ *
+ * This gives us an approximation of pressure that is practical
+ * cost-wise, yet way more sensitive and accurate than periodic
+ * sampling of the aggregate task states would be.
+ */
+
+#include "../workqueue_internal.h"
+#include <linux/sched/loadavg.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/seqlock.h>
+#include <linux/uaccess.h>
+#include <linux/cgroup.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/ctype.h>
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/psi.h>
+#include "sched.h"
+
+static int psi_bug __read_mostly;
+
+DEFINE_STATIC_KEY_FALSE(psi_disabled);
+
+#ifdef CONFIG_PSI_DEFAULT_DISABLED
+static bool psi_enable;
+#else
+static bool psi_enable = true;
+#endif
+static int __init setup_psi(char *str)
+{
+	return kstrtobool(str, &psi_enable) == 0;
+}
+__setup("psi=", setup_psi);
+
+/* Running averages - we need to be higher-res than loadavg */
+#define PSI_FREQ	(2*HZ+1)	/* 2 sec intervals */
+#define EXP_10s		1677		/* 1/exp(2s/10s) as fixed-point */
+#define EXP_60s		1981		/* 1/exp(2s/60s) */
+#define EXP_300s	2034		/* 1/exp(2s/300s) */
+
+/* PSI trigger definitions */
+#define WINDOW_MIN_US 500000	/* Min window size is 500ms */
+#define WINDOW_MAX_US 10000000	/* Max window size is 10s */
+#define UPDATES_PER_WINDOW 10	/* 10 updates per window */
+
+/* Sampling frequency in nanoseconds */
+static u64 psi_period __read_mostly;
+
+/* System-level pressure and stall tracking */
+static DEFINE_PER_CPU(struct psi_group_cpu, system_group_pcpu);
+static struct psi_group psi_system = {
+	.pcpu = &system_group_pcpu,
+};
+
+static void psi_avgs_work(struct work_struct *work);
+
+static void group_init(struct psi_group *group)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		seqcount_init(&per_cpu_ptr(group->pcpu, cpu)->seq);
+	group->avg_next_update = sched_clock() + psi_period;
+	INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work);
+	mutex_init(&group->avgs_lock);
+	/* Init trigger-related members */
+	atomic_set(&group->poll_scheduled, 0);
+	mutex_init(&group->trigger_lock);
+	INIT_LIST_HEAD(&group->triggers);
+	memset(group->nr_triggers, 0, sizeof(group->nr_triggers));
+	group->poll_states = 0;
+	group->poll_min_period = U32_MAX;
+	memset(group->polling_total, 0, sizeof(group->polling_total));
+	group->polling_next_update = ULLONG_MAX;
+	group->polling_until = 0;
+	rcu_assign_pointer(group->poll_kworker, NULL);
+}
+
+void __init psi_init(void)
+{
+	if (!psi_enable) {
+		static_branch_enable(&psi_disabled);
+		return;
+	}
+
+	psi_period = jiffies_to_nsecs(PSI_FREQ);
+	group_init(&psi_system);
+}
+
+static bool test_state(unsigned int *tasks, enum psi_states state)
+{
+	switch (state) {
+	case PSI_IO_SOME:
+		return tasks[NR_IOWAIT];
+	case PSI_IO_FULL:
+		return tasks[NR_IOWAIT] && !tasks[NR_RUNNING];
+	case PSI_MEM_SOME:
+		return tasks[NR_MEMSTALL];
+	case PSI_MEM_FULL:
+		return tasks[NR_MEMSTALL] && !tasks[NR_RUNNING];
+	case PSI_CPU_SOME:
+		return tasks[NR_RUNNING] > 1;
+	case PSI_NONIDLE:
+		return tasks[NR_IOWAIT] || tasks[NR_MEMSTALL] ||
+			tasks[NR_RUNNING];
+	default:
+		return false;
+	}
+}
+
+static void get_recent_times(struct psi_group *group, int cpu,
+			     enum psi_aggregators aggregator, u32 *times,
+			     u32 *pchanged_states)
+{
+	struct psi_group_cpu *groupc = per_cpu_ptr(group->pcpu, cpu);
+	u64 now, state_start;
+	enum psi_states s;
+	unsigned int seq;
+	u32 state_mask;
+
+	*pchanged_states = 0;
+
+	/* Snapshot a coherent view of the CPU state */
+	do {
+		seq = read_seqcount_begin(&groupc->seq);
+		now = cpu_clock(cpu);
+		memcpy(times, groupc->times, sizeof(groupc->times));
+		state_mask = groupc->state_mask;
+		state_start = groupc->state_start;
+	} while (read_seqcount_retry(&groupc->seq, seq));
+
+	/* Calculate state time deltas against the previous snapshot */
+	for (s = 0; s < NR_PSI_STATES; s++) {
+		u32 delta;
+		/*
+		 * In addition to already concluded states, we also
+		 * incorporate currently active states on the CPU,
+		 * since states may last for many sampling periods.
+		 *
+		 * This way we keep our delta sampling buckets small
+		 * (u32) and our reported pressure close to what's
+		 * actually happening.
+		 */
+		if (state_mask & (1 << s))
+			times[s] += now - state_start;
+
+		delta = times[s] - groupc->times_prev[aggregator][s];
+		groupc->times_prev[aggregator][s] = times[s];
+
+		times[s] = delta;
+		if (delta)
+			*pchanged_states |= (1 << s);
+	}
+}
+
+static void calc_avgs(unsigned long avg[3], int missed_periods,
+		      u64 time, u64 period)
+{
+	unsigned long pct;
+
+	/* Fill in zeroes for periods of no activity */
+	if (missed_periods) {
+		avg[0] = calc_load_n(avg[0], EXP_10s, 0, missed_periods);
+		avg[1] = calc_load_n(avg[1], EXP_60s, 0, missed_periods);
+		avg[2] = calc_load_n(avg[2], EXP_300s, 0, missed_periods);
+	}
+
+	/* Sample the most recent active period */
+	pct = div_u64(time * 100, period);
+	pct *= FIXED_1;
+	avg[0] = calc_load(avg[0], EXP_10s, pct);
+	avg[1] = calc_load(avg[1], EXP_60s, pct);
+	avg[2] = calc_load(avg[2], EXP_300s, pct);
+}
+
+static void collect_percpu_times(struct psi_group *group,
+				 enum psi_aggregators aggregator,
+				 u32 *pchanged_states)
+{
+	u64 deltas[NR_PSI_STATES - 1] = { 0, };
+	unsigned long nonidle_total = 0;
+	u32 changed_states = 0;
+	int cpu;
+	int s;
+
+	/*
+	 * Collect the per-cpu time buckets and average them into a
+	 * single time sample that is normalized to wallclock time.
+	 *
+	 * For averaging, each CPU is weighted by its non-idle time in
+	 * the sampling period. This eliminates artifacts from uneven
+	 * loading, or even entirely idle CPUs.
+	 */
+	for_each_possible_cpu(cpu) {
+		u32 times[NR_PSI_STATES];
+		u32 nonidle;
+		u32 cpu_changed_states;
+
+		get_recent_times(group, cpu, aggregator, times,
+				&cpu_changed_states);
+		changed_states |= cpu_changed_states;
+
+		nonidle = nsecs_to_jiffies(times[PSI_NONIDLE]);
+		nonidle_total += nonidle;
+
+		for (s = 0; s < PSI_NONIDLE; s++)
+			deltas[s] += (u64)times[s] * nonidle;
+	}
+
+	/*
+	 * Integrate the sample into the running statistics that are
+	 * reported to userspace: the cumulative stall times and the
+	 * decaying averages.
+	 *
+	 * Pressure percentages are sampled at PSI_FREQ. We might be
+	 * called more often when the user polls more frequently than
+	 * that; we might be called less often when there is no task
+	 * activity, thus no data, and clock ticks are sporadic. The
+	 * below handles both.
+	 */
+
+	/* total= */
+	for (s = 0; s < NR_PSI_STATES - 1; s++)
+		group->total[aggregator][s] +=
+				div_u64(deltas[s], max(nonidle_total, 1UL));
+
+	if (pchanged_states)
+		*pchanged_states = changed_states;
+}
+
+static u64 update_averages(struct psi_group *group, u64 now)
+{
+	unsigned long missed_periods = 0;
+	u64 expires, period;
+	u64 avg_next_update;
+	int s;
+
+	/* avgX= */
+	expires = group->avg_next_update;
+	if (now - expires >= psi_period)
+		missed_periods = div_u64(now - expires, psi_period);
+
+	/*
+	 * The periodic clock tick can get delayed for various
+	 * reasons, especially on loaded systems. To avoid clock
+	 * drift, we schedule the clock in fixed psi_period intervals.
+	 * But the deltas we sample out of the per-cpu buckets above
+	 * are based on the actual time elapsing between clock ticks.
+	 */
+	avg_next_update = expires + ((1 + missed_periods) * psi_period);
+	period = now - (group->avg_last_update + (missed_periods * psi_period));
+	group->avg_last_update = now;
+
+	for (s = 0; s < NR_PSI_STATES - 1; s++) {
+		u32 sample;
+
+		sample = group->total[PSI_AVGS][s] - group->avg_total[s];
+		/*
+		 * Due to the lockless sampling of the time buckets,
+		 * recorded time deltas can slip into the next period,
+		 * which under full pressure can result in samples in
+		 * excess of the period length.
+		 *
+		 * We don't want to report non-sensical pressures in
+		 * excess of 100%, nor do we want to drop such events
+		 * on the floor. Instead we punt any overage into the
+		 * future until pressure subsides. By doing this we
+		 * don't underreport the occurring pressure curve, we
+		 * just report it delayed by one period length.
+		 *
+		 * The error isn't cumulative. As soon as another
+		 * delta slips from a period P to P+1, by definition
+		 * it frees up its time T in P.
+		 */
+		if (sample > period)
+			sample = period;
+		group->avg_total[s] += sample;
+		calc_avgs(group->avg[s], missed_periods, sample, period);
+	}
+
+	return avg_next_update;
+}
+
+static void psi_avgs_work(struct work_struct *work)
+{
+	struct delayed_work *dwork;
+	struct psi_group *group;
+	u32 changed_states;
+	bool nonidle;
+	u64 now;
+
+	dwork = to_delayed_work(work);
+	group = container_of(dwork, struct psi_group, avgs_work);
+
+	mutex_lock(&group->avgs_lock);
+
+	now = sched_clock();
+
+	collect_percpu_times(group, PSI_AVGS, &changed_states);
+	nonidle = changed_states & (1 << PSI_NONIDLE);
+	/*
+	 * If there is task activity, periodically fold the per-cpu
+	 * times and feed samples into the running averages. If things
+	 * are idle and there is no data to process, stop the clock.
+	 * Once restarted, we'll catch up the running averages in one
+	 * go - see calc_avgs() and missed_periods.
+	 */
+	if (now >= group->avg_next_update)
+		group->avg_next_update = update_averages(group, now);
+
+	if (nonidle) {
+		schedule_delayed_work(dwork, nsecs_to_jiffies(
+				group->avg_next_update - now) + 1);
+	}
+
+	mutex_unlock(&group->avgs_lock);
+}
+
+/* Trigger tracking window manupulations */
+static void window_reset(struct psi_window *win, u64 now, u64 value,
+			 u64 prev_growth)
+{
+	win->start_time = now;
+	win->start_value = value;
+	win->prev_growth = prev_growth;
+}
+
+/*
+ * PSI growth tracking window update and growth calculation routine.
+ *
+ * This approximates a sliding tracking window by interpolating
+ * partially elapsed windows using historical growth data from the
+ * previous intervals. This minimizes memory requirements (by not storing
+ * all the intermediate values in the previous window) and simplifies
+ * the calculations. It works well because PSI signal changes only in
+ * positive direction and over relatively small window sizes the growth
+ * is close to linear.
+ */
+static u64 window_update(struct psi_window *win, u64 now, u64 value)
+{
+	u64 elapsed;
+	u64 growth;
+
+	elapsed = now - win->start_time;
+	growth = value - win->start_value;
+	/*
+	 * After each tracking window passes win->start_value and
+	 * win->start_time get reset and win->prev_growth stores
+	 * the average per-window growth of the previous window.
+	 * win->prev_growth is then used to interpolate additional
+	 * growth from the previous window assuming it was linear.
+	 */
+	if (elapsed > win->size)
+		window_reset(win, now, value, growth);
+	else {
+		u32 remaining;
+
+		remaining = win->size - elapsed;
+		growth += div_u64(win->prev_growth * remaining, win->size);
+	}
+
+	return growth;
+}
+
+static void init_triggers(struct psi_group *group, u64 now)
+{
+	struct psi_trigger *t;
+
+	list_for_each_entry(t, &group->triggers, node)
+		window_reset(&t->win, now,
+				group->total[PSI_POLL][t->state], 0);
+	memcpy(group->polling_total, group->total[PSI_POLL],
+		   sizeof(group->polling_total));
+	group->polling_next_update = now + group->poll_min_period;
+}
+
+static u64 update_triggers(struct psi_group *group, u64 now)
+{
+	struct psi_trigger *t;
+	bool new_stall = false;
+	u64 *total = group->total[PSI_POLL];
+
+	/*
+	 * On subsequent updates, calculate growth deltas and let
+	 * watchers know when their specified thresholds are exceeded.
+	 */
+	list_for_each_entry(t, &group->triggers, node) {
+		u64 growth;
+
+		/* Check for stall activity */
+		if (group->polling_total[t->state] == total[t->state])
+			continue;
+
+		/*
+		 * Multiple triggers might be looking at the same state,
+		 * remember to update group->polling_total[] once we've
+		 * been through all of them. Also remember to extend the
+		 * polling time if we see new stall activity.
+		 */
+		new_stall = true;
+
+		/* Calculate growth since last update */
+		growth = window_update(&t->win, now, total[t->state]);
+		if (growth < t->threshold)
+			continue;
+
+		/* Limit event signaling to once per window */
+		if (now < t->last_event_time + t->win.size)
+			continue;
+
+		/* Generate an event */
+		if (cmpxchg(&t->event, 0, 1) == 0)
+			wake_up_interruptible(&t->event_wait);
+		t->last_event_time = now;
+	}
+
+	if (new_stall)
+		memcpy(group->polling_total, total,
+				sizeof(group->polling_total));
+
+	return now + group->poll_min_period;
+}
+
+/*
+ * Schedule polling if it's not already scheduled. It's safe to call even from
+ * hotpath because even though kthread_queue_delayed_work takes worker->lock
+ * spinlock that spinlock is never contended due to poll_scheduled atomic
+ * preventing such competition.
+ */
+static void psi_schedule_poll_work(struct psi_group *group, unsigned long delay)
+{
+	struct kthread_worker *kworker;
+
+	/* Do not reschedule if already scheduled */
+	if (atomic_cmpxchg(&group->poll_scheduled, 0, 1) != 0)
+		return;
+
+	rcu_read_lock();
+
+	kworker = rcu_dereference(group->poll_kworker);
+	/*
+	 * kworker might be NULL in case psi_trigger_destroy races with
+	 * psi_task_change (hotpath) which can't use locks
+	 */
+	if (likely(kworker))
+		kthread_queue_delayed_work(kworker, &group->poll_work, delay);
+	else
+		atomic_set(&group->poll_scheduled, 0);
+
+	rcu_read_unlock();
+}
+
+static void psi_poll_work(struct kthread_work *work)
+{
+	struct kthread_delayed_work *dwork;
+	struct psi_group *group;
+	u32 changed_states;
+	u64 now;
+
+	dwork = container_of(work, struct kthread_delayed_work, work);
+	group = container_of(dwork, struct psi_group, poll_work);
+
+	atomic_set(&group->poll_scheduled, 0);
+
+	mutex_lock(&group->trigger_lock);
+
+	now = sched_clock();
+
+	collect_percpu_times(group, PSI_POLL, &changed_states);
+
+	if (changed_states & group->poll_states) {
+		/* Initialize trigger windows when entering polling mode */
+		if (now > group->polling_until)
+			init_triggers(group, now);
+
+		/*
+		 * Keep the monitor active for at least the duration of the
+		 * minimum tracking window as long as monitor states are
+		 * changing.
+		 */
+		group->polling_until = now +
+			group->poll_min_period * UPDATES_PER_WINDOW;
+	}
+
+	if (now > group->polling_until) {
+		group->polling_next_update = ULLONG_MAX;
+		goto out;
+	}
+
+	if (now >= group->polling_next_update)
+		group->polling_next_update = update_triggers(group, now);
+
+	psi_schedule_poll_work(group,
+		nsecs_to_jiffies(group->polling_next_update - now) + 1);
+
+out:
+	mutex_unlock(&group->trigger_lock);
+}
+
+static void record_times(struct psi_group_cpu *groupc, int cpu,
+			 bool memstall_tick)
+{
+	u32 delta;
+	u64 now;
+
+	now = cpu_clock(cpu);
+	delta = now - groupc->state_start;
+	groupc->state_start = now;
+
+	if (groupc->state_mask & (1 << PSI_IO_SOME)) {
+		groupc->times[PSI_IO_SOME] += delta;
+		if (groupc->state_mask & (1 << PSI_IO_FULL))
+			groupc->times[PSI_IO_FULL] += delta;
+	}
+
+	if (groupc->state_mask & (1 << PSI_MEM_SOME)) {
+		groupc->times[PSI_MEM_SOME] += delta;
+		if (groupc->state_mask & (1 << PSI_MEM_FULL))
+			groupc->times[PSI_MEM_FULL] += delta;
+		else if (memstall_tick) {
+			u32 sample;
+			/*
+			 * Since we care about lost potential, a
+			 * memstall is FULL when there are no other
+			 * working tasks, but also when the CPU is
+			 * actively reclaiming and nothing productive
+			 * could run even if it were runnable.
+			 *
+			 * When the timer tick sees a reclaiming CPU,
+			 * regardless of runnable tasks, sample a FULL
+			 * tick (or less if it hasn't been a full tick
+			 * since the last state change).
+			 */
+			sample = min(delta, (u32)jiffies_to_nsecs(1));
+			groupc->times[PSI_MEM_FULL] += sample;
+		}
+	}
+
+	if (groupc->state_mask & (1 << PSI_CPU_SOME))
+		groupc->times[PSI_CPU_SOME] += delta;
+
+	if (groupc->state_mask & (1 << PSI_NONIDLE))
+		groupc->times[PSI_NONIDLE] += delta;
+}
+
+static u32 psi_group_change(struct psi_group *group, int cpu,
+			    unsigned int clear, unsigned int set)
+{
+	struct psi_group_cpu *groupc;
+	unsigned int t, m;
+	enum psi_states s;
+	u32 state_mask = 0;
+
+	groupc = per_cpu_ptr(group->pcpu, cpu);
+
+	/*
+	 * First we assess the aggregate resource states this CPU's
+	 * tasks have been in since the last change, and account any
+	 * SOME and FULL time these may have resulted in.
+	 *
+	 * Then we update the task counts according to the state
+	 * change requested through the @clear and @set bits.
+	 */
+	write_seqcount_begin(&groupc->seq);
+
+	record_times(groupc, cpu, false);
+
+	for (t = 0, m = clear; m; m &= ~(1 << t), t++) {
+		if (!(m & (1 << t)))
+			continue;
+		if (groupc->tasks[t] == 0 && !psi_bug) {
+			printk_deferred(KERN_ERR "psi: task underflow! cpu=%d t=%d tasks=[%u %u %u] clear=%x set=%x\n",
+					cpu, t, groupc->tasks[0],
+					groupc->tasks[1], groupc->tasks[2],
+					clear, set);
+			psi_bug = 1;
+		}
+		groupc->tasks[t]--;
+	}
+
+	for (t = 0; set; set &= ~(1 << t), t++)
+		if (set & (1 << t))
+			groupc->tasks[t]++;
+
+	/* Calculate state mask representing active states */
+	for (s = 0; s < NR_PSI_STATES; s++) {
+		if (test_state(groupc->tasks, s))
+			state_mask |= (1 << s);
+	}
+	groupc->state_mask = state_mask;
+
+	write_seqcount_end(&groupc->seq);
+
+	return state_mask;
+}
+
+static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
+{
+#ifdef CONFIG_CGROUPS
+	struct cgroup *cgroup = NULL;
+
+	if (!*iter)
+		cgroup = task->cgroups->dfl_cgrp;
+	else if (*iter == &psi_system)
+		return NULL;
+	else
+		cgroup = cgroup_parent(*iter);
+
+	if (cgroup && cgroup_parent(cgroup)) {
+		*iter = cgroup;
+		return cgroup_psi(cgroup);
+	}
+#else
+	if (*iter)
+		return NULL;
+#endif
+	*iter = &psi_system;
+	return &psi_system;
+}
+
+void psi_task_change(struct task_struct *task, int clear, int set)
+{
+	int cpu = task_cpu(task);
+	struct psi_group *group;
+	bool wake_clock = true;
+	void *iter = NULL;
+
+	if (!task->pid)
+		return;
+
+	if (((task->psi_flags & set) ||
+	     (task->psi_flags & clear) != clear) &&
+	    !psi_bug) {
+		printk_deferred(KERN_ERR "psi: inconsistent task state! task=%d:%s cpu=%d psi_flags=%x clear=%x set=%x\n",
+				task->pid, task->comm, cpu,
+				task->psi_flags, clear, set);
+		psi_bug = 1;
+	}
+
+	task->psi_flags &= ~clear;
+	task->psi_flags |= set;
+
+	/*
+	 * Periodic aggregation shuts off if there is a period of no
+	 * task changes, so we wake it back up if necessary. However,
+	 * don't do this if the task change is the aggregation worker
+	 * itself going to sleep, or we'll ping-pong forever.
+	 */
+	if (unlikely((clear & TSK_RUNNING) &&
+		     (task->flags & PF_WQ_WORKER) &&
+		     wq_worker_last_func(task) == psi_avgs_work))
+		wake_clock = false;
+
+	while ((group = iterate_groups(task, &iter))) {
+		u32 state_mask = psi_group_change(group, cpu, clear, set);
+
+		if (state_mask & group->poll_states)
+			psi_schedule_poll_work(group, 1);
+
+		if (wake_clock && !delayed_work_pending(&group->avgs_work))
+			schedule_delayed_work(&group->avgs_work, PSI_FREQ);
+	}
+}
+
+void psi_memstall_tick(struct task_struct *task, int cpu)
+{
+	struct psi_group *group;
+	void *iter = NULL;
+
+	while ((group = iterate_groups(task, &iter))) {
+		struct psi_group_cpu *groupc;
+
+		groupc = per_cpu_ptr(group->pcpu, cpu);
+		write_seqcount_begin(&groupc->seq);
+		record_times(groupc, cpu, true);
+		write_seqcount_end(&groupc->seq);
+	}
+}
+
+/**
+ * psi_memstall_enter - mark the beginning of a memory stall section
+ * @flags: flags to handle nested sections
+ *
+ * Marks the calling task as being stalled due to a lack of memory,
+ * such as waiting for a refault or performing reclaim.
+ */
+void psi_memstall_enter(unsigned long *flags)
+{
+	struct rq_flags rf;
+	struct rq *rq;
+
+	if (static_branch_likely(&psi_disabled))
+		return;
+
+	*flags = current->flags & PF_MEMSTALL;
+	if (*flags)
+		return;
+	/*
+	 * PF_MEMSTALL setting & accounting needs to be atomic wrt
+	 * changes to the task's scheduling state, otherwise we can
+	 * race with CPU migration.
+	 */
+	rq = this_rq_lock_irq(&rf);
+
+	current->flags |= PF_MEMSTALL;
+	psi_task_change(current, 0, TSK_MEMSTALL);
+
+	rq_unlock_irq(rq, &rf);
+}
+
+/**
+ * psi_memstall_leave - mark the end of an memory stall section
+ * @flags: flags to handle nested memdelay sections
+ *
+ * Marks the calling task as no longer stalled due to lack of memory.
+ */
+void psi_memstall_leave(unsigned long *flags)
+{
+	struct rq_flags rf;
+	struct rq *rq;
+
+	if (static_branch_likely(&psi_disabled))
+		return;
+
+	if (*flags)
+		return;
+	/*
+	 * PF_MEMSTALL clearing & accounting needs to be atomic wrt
+	 * changes to the task's scheduling state, otherwise we could
+	 * race with CPU migration.
+	 */
+	rq = this_rq_lock_irq(&rf);
+
+	current->flags &= ~PF_MEMSTALL;
+	psi_task_change(current, TSK_MEMSTALL, 0);
+
+	rq_unlock_irq(rq, &rf);
+}
+
+#ifdef CONFIG_CGROUPS
+int psi_cgroup_alloc(struct cgroup *cgroup)
+{
+	if (static_branch_likely(&psi_disabled))
+		return 0;
+
+	cgroup->psi.pcpu = alloc_percpu(struct psi_group_cpu);
+	if (!cgroup->psi.pcpu)
+		return -ENOMEM;
+	group_init(&cgroup->psi);
+	return 0;
+}
+
+void psi_cgroup_free(struct cgroup *cgroup)
+{
+	if (static_branch_likely(&psi_disabled))
+		return;
+
+	cancel_delayed_work_sync(&cgroup->psi.avgs_work);
+	free_percpu(cgroup->psi.pcpu);
+	/* All triggers must be removed by now */
+	WARN_ONCE(cgroup->psi.poll_states, "psi: trigger leak\n");
+}
+
+/**
+ * cgroup_move_task - move task to a different cgroup
+ * @task: the task
+ * @to: the target css_set
+ *
+ * Move task to a new cgroup and safely migrate its associated stall
+ * state between the different groups.
+ *
+ * This function acquires the task's rq lock to lock out concurrent
+ * changes to the task's scheduling state and - in case the task is
+ * running - concurrent changes to its stall state.
+ */
+void cgroup_move_task(struct task_struct *task, struct css_set *to)
+{
+	unsigned int task_flags = 0;
+	struct rq_flags rf;
+	struct rq *rq;
+
+	if (static_branch_likely(&psi_disabled)) {
+		/*
+		 * Lame to do this here, but the scheduler cannot be locked
+		 * from the outside, so we move cgroups from inside sched/.
+		 */
+		rcu_assign_pointer(task->cgroups, to);
+		return;
+	}
+
+	rq = task_rq_lock(task, &rf);
+
+	if (task_on_rq_queued(task))
+		task_flags = TSK_RUNNING;
+	else if (task->in_iowait)
+		task_flags = TSK_IOWAIT;
+
+	if (task->flags & PF_MEMSTALL)
+		task_flags |= TSK_MEMSTALL;
+
+	if (task_flags)
+		psi_task_change(task, task_flags, 0);
+
+	/* See comment above */
+	rcu_assign_pointer(task->cgroups, to);
+
+	if (task_flags)
+		psi_task_change(task, 0, task_flags);
+
+	task_rq_unlock(rq, task, &rf);
+}
+#endif /* CONFIG_CGROUPS */
+
+int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
+{
+	int full;
+	u64 now;
+
+	if (static_branch_likely(&psi_disabled))
+		return -EOPNOTSUPP;
+
+	/* Update averages before reporting them */
+	mutex_lock(&group->avgs_lock);
+	now = sched_clock();
+	collect_percpu_times(group, PSI_AVGS, NULL);
+	if (now >= group->avg_next_update)
+		group->avg_next_update = update_averages(group, now);
+	mutex_unlock(&group->avgs_lock);
+
+	for (full = 0; full < 2 - (res == PSI_CPU); full++) {
+		unsigned long avg[3];
+		u64 total;
+		int w;
+
+		for (w = 0; w < 3; w++)
+			avg[w] = group->avg[res * 2 + full][w];
+		total = div_u64(group->total[PSI_AVGS][res * 2 + full],
+				NSEC_PER_USEC);
+
+		seq_printf(m, "%s avg10=%lu.%02lu avg60=%lu.%02lu avg300=%lu.%02lu total=%llu\n",
+			   full ? "full" : "some",
+			   LOAD_INT(avg[0]), LOAD_FRAC(avg[0]),
+			   LOAD_INT(avg[1]), LOAD_FRAC(avg[1]),
+			   LOAD_INT(avg[2]), LOAD_FRAC(avg[2]),
+			   total);
+	}
+
+	return 0;
+}
+
+static int psi_io_show(struct seq_file *m, void *v)
+{
+	return psi_show(m, &psi_system, PSI_IO);
+}
+
+static int psi_memory_show(struct seq_file *m, void *v)
+{
+	return psi_show(m, &psi_system, PSI_MEM);
+}
+
+static int psi_cpu_show(struct seq_file *m, void *v)
+{
+	return psi_show(m, &psi_system, PSI_CPU);
+}
+
+static int psi_io_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, psi_io_show, NULL);
+}
+
+static int psi_memory_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, psi_memory_show, NULL);
+}
+
+static int psi_cpu_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, psi_cpu_show, NULL);
+}
+
+struct psi_trigger *psi_trigger_create(struct psi_group *group,
+			char *buf, size_t nbytes, enum psi_res res)
+{
+	struct psi_trigger *t;
+	enum psi_states state;
+	u32 threshold_us;
+	u32 window_us;
+
+	if (static_branch_likely(&psi_disabled))
+		return ERR_PTR(-EOPNOTSUPP);
+
+	if (sscanf(buf, "some %u %u", &threshold_us, &window_us) == 2)
+		state = PSI_IO_SOME + res * 2;
+	else if (sscanf(buf, "full %u %u", &threshold_us, &window_us) == 2)
+		state = PSI_IO_FULL + res * 2;
+	else
+		return ERR_PTR(-EINVAL);
+
+	if (state >= PSI_NONIDLE)
+		return ERR_PTR(-EINVAL);
+
+	if (window_us < WINDOW_MIN_US ||
+		window_us > WINDOW_MAX_US)
+		return ERR_PTR(-EINVAL);
+
+	/* Check threshold */
+	if (threshold_us == 0 || threshold_us > window_us)
+		return ERR_PTR(-EINVAL);
+
+	t = kmalloc(sizeof(*t), GFP_KERNEL);
+	if (!t)
+		return ERR_PTR(-ENOMEM);
+
+	t->group = group;
+	t->state = state;
+	t->threshold = threshold_us * NSEC_PER_USEC;
+	t->win.size = window_us * NSEC_PER_USEC;
+	window_reset(&t->win, 0, 0, 0);
+
+	t->event = 0;
+	t->last_event_time = 0;
+	init_waitqueue_head(&t->event_wait);
+	kref_init(&t->refcount);
+
+	mutex_lock(&group->trigger_lock);
+
+	if (!rcu_access_pointer(group->poll_kworker)) {
+		struct sched_param param = {
+			.sched_priority = MAX_RT_PRIO - 1,
+		};
+		struct kthread_worker *kworker;
+
+		kworker = kthread_create_worker(0, "psimon");
+		if (IS_ERR(kworker)) {
+			kfree(t);
+			mutex_unlock(&group->trigger_lock);
+			return ERR_CAST(kworker);
+		}
+		sched_setscheduler(kworker->task, SCHED_FIFO, &param);
+		kthread_init_delayed_work(&group->poll_work,
+				psi_poll_work);
+		rcu_assign_pointer(group->poll_kworker, kworker);
+	}
+
+	list_add(&t->node, &group->triggers);
+	group->poll_min_period = min(group->poll_min_period,
+		div_u64(t->win.size, UPDATES_PER_WINDOW));
+	group->nr_triggers[t->state]++;
+	group->poll_states |= (1 << t->state);
+
+	mutex_unlock(&group->trigger_lock);
+
+	return t;
+}
+
+static void psi_trigger_destroy(struct kref *ref)
+{
+	struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount);
+	struct psi_group *group = t->group;
+	struct kthread_worker *kworker_to_destroy = NULL;
+
+	if (static_branch_likely(&psi_disabled))
+		return;
+
+	/*
+	 * Wakeup waiters to stop polling. Can happen if cgroup is deleted
+	 * from under a polling process.
+	 */
+	wake_up_interruptible(&t->event_wait);
+
+	mutex_lock(&group->trigger_lock);
+
+	if (!list_empty(&t->node)) {
+		struct psi_trigger *tmp;
+		u64 period = ULLONG_MAX;
+
+		list_del(&t->node);
+		group->nr_triggers[t->state]--;
+		if (!group->nr_triggers[t->state])
+			group->poll_states &= ~(1 << t->state);
+		/* reset min update period for the remaining triggers */
+		list_for_each_entry(tmp, &group->triggers, node)
+			period = min(period, div_u64(tmp->win.size,
+					UPDATES_PER_WINDOW));
+		group->poll_min_period = period;
+		/* Destroy poll_kworker when the last trigger is destroyed */
+		if (group->poll_states == 0) {
+			group->polling_until = 0;
+			kworker_to_destroy = rcu_dereference_protected(
+					group->poll_kworker,
+					lockdep_is_held(&group->trigger_lock));
+			rcu_assign_pointer(group->poll_kworker, NULL);
+		}
+	}
+
+	mutex_unlock(&group->trigger_lock);
+
+	/*
+	 * Wait for both *trigger_ptr from psi_trigger_replace and
+	 * poll_kworker RCUs to complete their read-side critical sections
+	 * before destroying the trigger and optionally the poll_kworker
+	 */
+	synchronize_rcu();
+	/*
+	 * Destroy the kworker after releasing trigger_lock to prevent a
+	 * deadlock while waiting for psi_poll_work to acquire trigger_lock
+	 */
+	if (kworker_to_destroy) {
+		kthread_cancel_delayed_work_sync(&group->poll_work);
+		kthread_destroy_worker(kworker_to_destroy);
+	}
+	kfree(t);
+}
+
+void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new)
+{
+	struct psi_trigger *old = *trigger_ptr;
+
+	if (static_branch_likely(&psi_disabled))
+		return;
+
+	rcu_assign_pointer(*trigger_ptr, new);
+	if (old)
+		kref_put(&old->refcount, psi_trigger_destroy);
+}
+
+__poll_t psi_trigger_poll(void **trigger_ptr,
+				struct file *file, poll_table *wait)
+{
+	__poll_t ret = DEFAULT_POLLMASK;
+	struct psi_trigger *t;
+
+	if (static_branch_likely(&psi_disabled))
+		return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
+
+	rcu_read_lock();
+
+	t = rcu_dereference(*(void __rcu __force **)trigger_ptr);
+	if (!t) {
+		rcu_read_unlock();
+		return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
+	}
+	kref_get(&t->refcount);
+
+	rcu_read_unlock();
+
+	poll_wait(file, &t->event_wait, wait);
+
+	if (cmpxchg(&t->event, 1, 0) == 1)
+		ret |= EPOLLPRI;
+
+	kref_put(&t->refcount, psi_trigger_destroy);
+
+	return ret;
+}
+
+static ssize_t psi_write(struct file *file, const char __user *user_buf,
+			 size_t nbytes, enum psi_res res)
+{
+	char buf[32];
+	size_t buf_size;
+	struct seq_file *seq;
+	struct psi_trigger *new;
+
+	if (static_branch_likely(&psi_disabled))
+		return -EOPNOTSUPP;
+
+	buf_size = min(nbytes, (sizeof(buf) - 1));
+	if (copy_from_user(buf, user_buf, buf_size))
+		return -EFAULT;
+
+	buf[buf_size - 1] = '\0';
+
+	new = psi_trigger_create(&psi_system, buf, nbytes, res);
+	if (IS_ERR(new))
+		return PTR_ERR(new);
+
+	seq = file->private_data;
+	/* Take seq->lock to protect seq->private from concurrent writes */
+	mutex_lock(&seq->lock);
+	psi_trigger_replace(&seq->private, new);
+	mutex_unlock(&seq->lock);
+
+	return nbytes;
+}
+
+static ssize_t psi_io_write(struct file *file, const char __user *user_buf,
+			    size_t nbytes, loff_t *ppos)
+{
+	return psi_write(file, user_buf, nbytes, PSI_IO);
+}
+
+static ssize_t psi_memory_write(struct file *file, const char __user *user_buf,
+				size_t nbytes, loff_t *ppos)
+{
+	return psi_write(file, user_buf, nbytes, PSI_MEM);
+}
+
+static ssize_t psi_cpu_write(struct file *file, const char __user *user_buf,
+			     size_t nbytes, loff_t *ppos)
+{
+	return psi_write(file, user_buf, nbytes, PSI_CPU);
+}
+
+static __poll_t psi_fop_poll(struct file *file, poll_table *wait)
+{
+	struct seq_file *seq = file->private_data;
+
+	return psi_trigger_poll(&seq->private, file, wait);
+}
+
+static int psi_fop_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq = file->private_data;
+
+	psi_trigger_replace(&seq->private, NULL);
+	return single_release(inode, file);
+}
+
+static const struct file_operations psi_io_fops = {
+	.open           = psi_io_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.write          = psi_io_write,
+	.poll           = psi_fop_poll,
+	.release        = psi_fop_release,
+};
+
+static const struct file_operations psi_memory_fops = {
+	.open           = psi_memory_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.write          = psi_memory_write,
+	.poll           = psi_fop_poll,
+	.release        = psi_fop_release,
+};
+
+static const struct file_operations psi_cpu_fops = {
+	.open           = psi_cpu_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.write          = psi_cpu_write,
+	.poll           = psi_fop_poll,
+	.release        = psi_fop_release,
+};
+
+static int __init psi_proc_init(void)
+{
+	proc_mkdir("pressure", NULL);
+	proc_create("pressure/io", 0, NULL, &psi_io_fops);
+	proc_create("pressure/memory", 0, NULL, &psi_memory_fops);
+	proc_create("pressure/cpu", 0, NULL, &psi_cpu_fops);
+	return 0;
+}
+module_init(psi_proc_init);

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index b980cc9..150cde3 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c

@@ -1329,6 +1329,8 @@
 {
 	struct sched_rt_entity *rt_se = &p->rt;
 
+	schedtune_enqueue_task(p, cpu_of(rq));
+
 	if (flags & ENQUEUE_WAKEUP)
 		rt_se->timeout = 0;
 
@@ -1342,6 +1344,8 @@
 {
 	struct sched_rt_entity *rt_se = &p->rt;
 
+	schedtune_dequeue_task(p, cpu_of(rq));
+
 	update_curr_rt(rq);
 	dequeue_rt_entity(rt_se, flags);
 
@@ -1386,7 +1390,8 @@
 static int find_lowest_rq(struct task_struct *task);
 
 static int
-select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
+select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags,
+		  int sibling_count_hint)
 {
 	struct task_struct *curr;
 	struct rq *rq;
@@ -1584,7 +1589,7 @@
 	 * rt task
 	 */
 	if (rq->curr->sched_class != &rt_sched_class)
-		update_rt_rq_load_avg(rq_clock_task(rq), rq, 0);
+		update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
 
 	return p;
 }
@@ -1593,7 +1598,7 @@
 {
 	update_curr_rt(rq);
 
-	update_rt_rq_load_avg(rq_clock_task(rq), rq, 1);
+	update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
 
 	/*
 	 * The previous task needs to be made eligible for pushing
@@ -2324,7 +2329,7 @@
 	struct sched_rt_entity *rt_se = &p->rt;
 
 	update_curr_rt(rq);
-	update_rt_rq_load_avg(rq_clock_task(rq), rq, 1);
+	update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
 
 	watchdog(rq, p);
 

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 9a7c3d08..b795e78 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h

@@ -45,6 +45,7 @@
 #include <linux/ctype.h>
 #include <linux/debugfs.h>
 #include <linux/delayacct.h>
+#include <linux/energy_model.h>
 #include <linux/init_task.h>
 #include <linux/kprobes.h>
 #include <linux/kthread.h>
@@ -55,6 +56,7 @@
 #include <linux/proc_fs.h>
 #include <linux/prefetch.h>
 #include <linux/profile.h>
+#include <linux/psi.h>
 #include <linux/rcupdate_wait.h>
 #include <linux/security.h>
 #include <linux/stackprotector.h>
@@ -80,6 +82,8 @@
 # define SCHED_WARN_ON(x)	({ (void)(x), 0; })
 #endif
 
+#include "tune.h"
+
 struct rq;
 struct cpuidle_state;
 
@@ -321,6 +325,7 @@
 #ifdef CONFIG_CGROUP_SCHED
 
 #include <linux/cgroup.h>
+#include <linux/psi.h>
 
 struct cfs_rq;
 struct rt_rq;
@@ -703,6 +708,22 @@
 	return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
 }
 
+struct perf_domain {
+	struct em_perf_domain *em_pd;
+	struct perf_domain *next;
+	struct rcu_head rcu;
+};
+
+struct max_cpu_capacity {
+	raw_spinlock_t lock;
+	unsigned long val;
+	int cpu;
+};
+
+/* Scheduling group status flags */
+#define SG_OVERLOAD		0x1 /* More than one runnable task on a CPU. */
+#define SG_OVERUTILIZED		0x2 /* One or more CPUs are over-utilized. */
+
 /*
  * We add the notion of a root-domain which will be used to define per-domain
  * variables. Each exclusive cpuset essentially defines an island domain by
@@ -718,8 +739,15 @@
 	cpumask_var_t		span;
 	cpumask_var_t		online;
 
-	/* Indicate more than one runnable task for any CPU */
-	bool			overload;
+	/*
+	 * Indicate pullable load on at least one CPU, e.g:
+	 * - More than one runnable task
+	 * - Running task is misfit
+	 */
+	int			overload;
+
+	/* Indicate one or more cpus over-utilized (tipping point) */
+	int			overutilized;
 
 	/*
 	 * The bit corresponding to a CPU gets set here if such CPU has more
@@ -750,13 +778,21 @@
 	cpumask_var_t		rto_mask;
 	struct cpupri		cpupri;
 
-	unsigned long		max_cpu_capacity;
+	/* Maximum cpu capacity in the system. */
+	struct max_cpu_capacity max_cpu_capacity;
+
+	/*
+	 * NULL-terminated list of performance domains intersecting with the
+	 * CPUs of the rd. Protected by RCU.
+	 */
+	struct perf_domain	*pd;
 };
 
 extern struct root_domain def_root_domain;
 extern struct mutex sched_domains_mutex;
 
 extern void init_defrootdomain(void);
+extern void init_max_cpu_capacity(struct max_cpu_capacity *mcc);
 extern int sched_init_domains(const struct cpumask *cpu_map);
 extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
 extern void sched_get_rd(struct root_domain *rd);
@@ -831,7 +867,10 @@
 
 	unsigned int		clock_update_flags;
 	u64			clock;
-	u64			clock_task;
+	/* Ensure that all clocks are in the same cache line */
+	u64			clock_task ____cacheline_aligned;
+	u64			clock_pelt;
+	unsigned long		lost_idle_time;
 
 	atomic_t		nr_iowait;
 
@@ -846,6 +885,8 @@
 
 	unsigned char		idle_balance;
 
+	unsigned long		misfit_task_load;
+
 	/* For active balancing */
 	int			active_balance;
 	int			push_cpu;
@@ -916,9 +957,26 @@
 #ifdef CONFIG_CPU_IDLE
 	/* Must be inspected within a rcu lock section */
 	struct cpuidle_state	*idle_state;
+	int			idle_state_idx;
 #endif
 };
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
+
+/* CPU runqueue to which this cfs_rq is attached */
+static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
+{
+	return cfs_rq->rq;
+}
+
+#else
+
+static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
+{
+	return container_of(cfs_rq, struct rq, cfs);
+}
+#endif
+
 static inline int cpu_of(struct rq *rq)
 {
 #ifdef CONFIG_SMP
@@ -950,6 +1008,8 @@
 #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
 #define raw_rq()		raw_cpu_ptr(&runqueues)
 
+extern void update_rq_clock(struct rq *rq);
+
 static inline u64 __rq_clock_broken(struct rq *rq)
 {
 	return READ_ONCE(rq->clock);
@@ -1068,6 +1128,98 @@
 #endif
 }
 
+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
+	__acquires(rq->lock);
+
+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
+	__acquires(p->pi_lock)
+	__acquires(rq->lock);
+
+static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
+	__releases(rq->lock)
+{
+	rq_unpin_lock(rq, rf);
+	raw_spin_unlock(&rq->lock);
+}
+
+static inline void
+task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
+	__releases(rq->lock)
+	__releases(p->pi_lock)
+{
+	rq_unpin_lock(rq, rf);
+	raw_spin_unlock(&rq->lock);
+	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
+}
+
+static inline void
+rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
+	__acquires(rq->lock)
+{
+	raw_spin_lock_irqsave(&rq->lock, rf->flags);
+	rq_pin_lock(rq, rf);
+}
+
+static inline void
+rq_lock_irq(struct rq *rq, struct rq_flags *rf)
+	__acquires(rq->lock)
+{
+	raw_spin_lock_irq(&rq->lock);
+	rq_pin_lock(rq, rf);
+}
+
+static inline void
+rq_lock(struct rq *rq, struct rq_flags *rf)
+	__acquires(rq->lock)
+{
+	raw_spin_lock(&rq->lock);
+	rq_pin_lock(rq, rf);
+}
+
+static inline void
+rq_relock(struct rq *rq, struct rq_flags *rf)
+	__acquires(rq->lock)
+{
+	raw_spin_lock(&rq->lock);
+	rq_repin_lock(rq, rf);
+}
+
+static inline void
+rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
+	__releases(rq->lock)
+{
+	rq_unpin_lock(rq, rf);
+	raw_spin_unlock_irqrestore(&rq->lock, rf->flags);
+}
+
+static inline void
+rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
+	__releases(rq->lock)
+{
+	rq_unpin_lock(rq, rf);
+	raw_spin_unlock_irq(&rq->lock);
+}
+
+static inline void
+rq_unlock(struct rq *rq, struct rq_flags *rf)
+	__releases(rq->lock)
+{
+	rq_unpin_lock(rq, rf);
+	raw_spin_unlock(&rq->lock);
+}
+
+static inline struct rq *
+this_rq_lock_irq(struct rq_flags *rf)
+	__acquires(rq->lock)
+{
+	struct rq *rq;
+
+	local_irq_disable();
+	rq = this_rq();
+	rq_lock(rq, rf);
+	return rq;
+}
+
 #ifdef CONFIG_NUMA
 enum numa_topology_type {
 	NUMA_DIRECT,
@@ -1184,7 +1336,9 @@
 DECLARE_PER_CPU(int, sd_llc_id);
 DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
 DECLARE_PER_CPU(struct sched_domain *, sd_numa);
-DECLARE_PER_CPU(struct sched_domain *, sd_asym);
+DECLARE_PER_CPU(struct sched_domain *, sd_asym_packing);
+DECLARE_PER_CPU(struct sched_domain *, sd_asym_cpucapacity);
+extern struct static_key_false sched_asym_cpucapacity;
 
 struct sched_group_capacity {
 	atomic_t		ref;
@@ -1194,6 +1348,7 @@
 	 */
 	unsigned long		capacity;
 	unsigned long		min_capacity;		/* Min per-CPU capacity in group */
+	unsigned long		max_capacity;		/* Max per-CPU capacity in group */
 	unsigned long		next_update;
 	int			imbalance;		/* XXX unrelated to capacity but shared group state */
 
@@ -1522,7 +1677,8 @@
 	void (*put_prev_task)(struct rq *rq, struct task_struct *p);
 
 #ifdef CONFIG_SMP
-	int  (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
+	int  (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags,
+			       int subling_count_hint);
 	void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
 
 	void (*task_woken)(struct rq *this_rq, struct task_struct *task);
@@ -1610,6 +1766,17 @@
 
 	return rq->idle_state;
 }
+
+static inline void idle_set_state_idx(struct rq *rq, int idle_state_idx)
+{
+	rq->idle_state_idx = idle_state_idx;
+}
+
+static inline int idle_get_state_idx(struct rq *rq)
+{
+	WARN_ON(!rcu_read_lock_held());
+	return rq->idle_state_idx;
+}
 #else
 static inline void idle_set_state(struct rq *rq,
 				  struct cpuidle_state *idle_state)
@@ -1620,6 +1787,15 @@
 {
 	return NULL;
 }
+
+static inline void idle_set_state_idx(struct rq *rq, int idle_state_idx)
+{
+}
+
+static inline int idle_get_state_idx(struct rq *rq)
+{
+	return -1;
+}
 #endif
 
 extern void schedule_idle(void);
@@ -1693,8 +1869,8 @@
 
 	if (prev_nr < 2 && rq->nr_running >= 2) {
 #ifdef CONFIG_SMP
-		if (!rq->rd->overload)
-			rq->rd->overload = true;
+		if (!READ_ONCE(rq->rd->overload))
+			WRITE_ONCE(rq->rd->overload, 1);
 #endif
 	}
 
@@ -1708,8 +1884,6 @@
 	sched_update_tick_dependency(rq);
 }
 
-extern void update_rq_clock(struct rq *rq);
-
 extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
 extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
 
@@ -1753,106 +1927,14 @@
 }
 #endif
 
-#ifdef CONFIG_SMP
-#ifndef arch_scale_cpu_capacity
+#ifndef arch_scale_max_freq_capacity
+struct sched_domain;
 static __always_inline
-unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
-{
-	if (sd && (sd->flags & SD_SHARE_CPUCAPACITY) && (sd->span_weight > 1))
-		return sd->smt_gain / sd->span_weight;
-
-	return SCHED_CAPACITY_SCALE;
-}
-#endif
-#else
-#ifndef arch_scale_cpu_capacity
-static __always_inline
-unsigned long arch_scale_cpu_capacity(void __always_unused *sd, int cpu)
+unsigned long arch_scale_max_freq_capacity(struct sched_domain *sd, int cpu)
 {
 	return SCHED_CAPACITY_SCALE;
 }
 #endif
-#endif
-
-struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-	__acquires(rq->lock);
-
-struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-	__acquires(p->pi_lock)
-	__acquires(rq->lock);
-
-static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
-	__releases(rq->lock)
-{
-	rq_unpin_lock(rq, rf);
-	raw_spin_unlock(&rq->lock);
-}
-
-static inline void
-task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
-	__releases(rq->lock)
-	__releases(p->pi_lock)
-{
-	rq_unpin_lock(rq, rf);
-	raw_spin_unlock(&rq->lock);
-	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-}
-
-static inline void
-rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
-	__acquires(rq->lock)
-{
-	raw_spin_lock_irqsave(&rq->lock, rf->flags);
-	rq_pin_lock(rq, rf);
-}
-
-static inline void
-rq_lock_irq(struct rq *rq, struct rq_flags *rf)
-	__acquires(rq->lock)
-{
-	raw_spin_lock_irq(&rq->lock);
-	rq_pin_lock(rq, rf);
-}
-
-static inline void
-rq_lock(struct rq *rq, struct rq_flags *rf)
-	__acquires(rq->lock)
-{
-	raw_spin_lock(&rq->lock);
-	rq_pin_lock(rq, rf);
-}
-
-static inline void
-rq_relock(struct rq *rq, struct rq_flags *rf)
-	__acquires(rq->lock)
-{
-	raw_spin_lock(&rq->lock);
-	rq_repin_lock(rq, rf);
-}
-
-static inline void
-rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
-	__releases(rq->lock)
-{
-	rq_unpin_lock(rq, rf);
-	raw_spin_unlock_irqrestore(&rq->lock, rf->flags);
-}
-
-static inline void
-rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
-	__releases(rq->lock)
-{
-	rq_unpin_lock(rq, rf);
-	raw_spin_unlock_irq(&rq->lock);
-}
-
-static inline void
-rq_unlock(struct rq *rq, struct rq_flags *rf)
-	__releases(rq->lock)
-{
-	rq_unpin_lock(rq, rf);
-	raw_spin_unlock(&rq->lock);
-}
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_PREEMPT
@@ -2185,7 +2267,46 @@
 # define arch_scale_freq_invariant()	false
 #endif
 
+#ifdef CONFIG_SMP
+static inline unsigned long capacity_orig_of(int cpu)
+{
+	return cpu_rq(cpu)->cpu_capacity_orig;
+}
+#endif
+
 #ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
+/**
+ * enum schedutil_type - CPU utilization type
+ * @FREQUENCY_UTIL:	Utilization used to select frequency
+ * @ENERGY_UTIL:	Utilization used during energy calculation
+ *
+ * The utilization signals of all scheduling classes (CFS/RT/DL) and IRQ time
+ * need to be aggregated differently depending on the usage made of them. This
+ * enum is used within schedutil_freq_util() to differentiate the types of
+ * utilization expected by the callers, and adjust the aggregation accordingly.
+ */
+enum schedutil_type {
+	FREQUENCY_UTIL,
+	ENERGY_UTIL,
+};
+
+unsigned long schedutil_freq_util(int cpu, unsigned long util,
+			          unsigned long max, enum schedutil_type type);
+
+static inline unsigned long schedutil_energy_util(int cpu, unsigned long util)
+{
+	unsigned long max = arch_scale_cpu_capacity(NULL, cpu);
+
+	return schedutil_freq_util(cpu, util, max, ENERGY_UTIL);
+}
+#else /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
+static inline unsigned long schedutil_energy_util(int cpu, unsigned long util)
+{
+	return util;
+}
+#endif
+
+#ifdef CONFIG_SMP
 static inline unsigned long cpu_bw_dl(struct rq *rq)
 {
 	return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;
@@ -2241,3 +2362,13 @@
 	return util;
 }
 #endif
+
+#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
+#define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus)))
+#else
+#define perf_domain_span(pd) NULL
+#endif
+
+#ifdef CONFIG_SMP
+extern struct static_key_false sched_energy_present;
+#endif

diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 8aea199..aa0de24 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h

@@ -55,6 +55,92 @@
 # define   schedstat_val_or_zero(var)	0
 #endif /* CONFIG_SCHEDSTATS */
 
+#ifdef CONFIG_PSI
+/*
+ * PSI tracks state that persists across sleeps, such as iowaits and
+ * memory stalls. As a result, it has to distinguish between sleeps,
+ * where a task's runnable state changes, and requeues, where a task
+ * and its state are being moved between CPUs and runqueues.
+ */
+static inline void psi_enqueue(struct task_struct *p, bool wakeup)
+{
+	int clear = 0, set = TSK_RUNNING;
+
+	if (static_branch_likely(&psi_disabled))
+		return;
+
+	if (!wakeup || p->sched_psi_wake_requeue) {
+		if (p->flags & PF_MEMSTALL)
+			set |= TSK_MEMSTALL;
+		if (p->sched_psi_wake_requeue)
+			p->sched_psi_wake_requeue = 0;
+	} else {
+		if (p->in_iowait)
+			clear |= TSK_IOWAIT;
+	}
+
+	psi_task_change(p, clear, set);
+}
+
+static inline void psi_dequeue(struct task_struct *p, bool sleep)
+{
+	int clear = TSK_RUNNING, set = 0;
+
+	if (static_branch_likely(&psi_disabled))
+		return;
+
+	if (!sleep) {
+		if (p->flags & PF_MEMSTALL)
+			clear |= TSK_MEMSTALL;
+	} else {
+		if (p->in_iowait)
+			set |= TSK_IOWAIT;
+	}
+
+	psi_task_change(p, clear, set);
+}
+
+static inline void psi_ttwu_dequeue(struct task_struct *p)
+{
+	if (static_branch_likely(&psi_disabled))
+		return;
+	/*
+	 * Is the task being migrated during a wakeup? Make sure to
+	 * deregister its sleep-persistent psi states from the old
+	 * queue, and let psi_enqueue() know it has to requeue.
+	 */
+	if (unlikely(p->in_iowait || (p->flags & PF_MEMSTALL))) {
+		struct rq_flags rf;
+		struct rq *rq;
+		int clear = 0;
+
+		if (p->in_iowait)
+			clear |= TSK_IOWAIT;
+		if (p->flags & PF_MEMSTALL)
+			clear |= TSK_MEMSTALL;
+
+		rq = __task_rq_lock(p, &rf);
+		psi_task_change(p, clear, 0);
+		p->sched_psi_wake_requeue = 1;
+		__task_rq_unlock(rq, &rf);
+	}
+}
+
+static inline void psi_task_tick(struct rq *rq)
+{
+	if (static_branch_likely(&psi_disabled))
+		return;
+
+	if (unlikely(rq->curr->flags & PF_MEMSTALL))
+		psi_memstall_tick(rq->curr, cpu_of(rq));
+}
+#else /* CONFIG_PSI */
+static inline void psi_enqueue(struct task_struct *p, bool wakeup) {}
+static inline void psi_dequeue(struct task_struct *p, bool sleep) {}
+static inline void psi_ttwu_dequeue(struct task_struct *p) {}
+static inline void psi_task_tick(struct rq *rq) {}
+#endif /* CONFIG_PSI */
+
 #ifdef CONFIG_SCHED_INFO
 static inline void sched_info_reset_dequeued(struct task_struct *t)
 {

diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index c183b79..6446d61 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c

@@ -11,7 +11,8 @@
 
 #ifdef CONFIG_SMP
 static int
-select_task_rq_stop(struct task_struct *p, int cpu, int sd_flag, int flags)
+select_task_rq_stop(struct task_struct *p, int cpu, int sd_flag, int flags,
+		    int sibling_count_hint)
 {
 	return task_cpu(p); /* stop tasks as never migrate */
 }

diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index c0a7514..30cf16f 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c

@@ -201,6 +201,199 @@
 	return 1;
 }
 
+DEFINE_STATIC_KEY_FALSE(sched_energy_present);
+#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
+DEFINE_MUTEX(sched_energy_mutex);
+bool sched_energy_update;
+
+static void free_pd(struct perf_domain *pd)
+{
+	struct perf_domain *tmp;
+
+	while (pd) {
+		tmp = pd->next;
+		kfree(pd);
+		pd = tmp;
+	}
+}
+
+static struct perf_domain *find_pd(struct perf_domain *pd, int cpu)
+{
+	while (pd) {
+		if (cpumask_test_cpu(cpu, perf_domain_span(pd)))
+			return pd;
+		pd = pd->next;
+	}
+
+	return NULL;
+}
+
+static struct perf_domain *pd_init(int cpu)
+{
+	struct em_perf_domain *obj = em_cpu_get(cpu);
+	struct perf_domain *pd;
+
+	if (!obj) {
+		if (sched_debug())
+			pr_info("%s: no EM found for CPU%d\n", __func__, cpu);
+		return NULL;
+	}
+
+	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return NULL;
+	pd->em_pd = obj;
+
+	return pd;
+}
+
+static void perf_domain_debug(const struct cpumask *cpu_map,
+						struct perf_domain *pd)
+{
+	if (!sched_debug() || !pd)
+		return;
+
+	printk(KERN_DEBUG "root_domain %*pbl:", cpumask_pr_args(cpu_map));
+
+	while (pd) {
+		printk(KERN_CONT " pd%d:{ cpus=%*pbl nr_cstate=%d }",
+				cpumask_first(perf_domain_span(pd)),
+				cpumask_pr_args(perf_domain_span(pd)),
+				em_pd_nr_cap_states(pd->em_pd));
+		pd = pd->next;
+	}
+
+	printk(KERN_CONT "\n");
+}
+
+static void destroy_perf_domain_rcu(struct rcu_head *rp)
+{
+	struct perf_domain *pd;
+
+	pd = container_of(rp, struct perf_domain, rcu);
+	free_pd(pd);
+}
+
+static void sched_energy_set(bool has_eas)
+{
+	if (!has_eas && static_branch_unlikely(&sched_energy_present)) {
+		if (sched_debug())
+			pr_info("%s: stopping EAS\n", __func__);
+		static_branch_disable_cpuslocked(&sched_energy_present);
+	} else if (has_eas && !static_branch_unlikely(&sched_energy_present)) {
+		if (sched_debug())
+			pr_info("%s: starting EAS\n", __func__);
+		static_branch_enable_cpuslocked(&sched_energy_present);
+	}
+}
+
+/*
+ * EAS can be used on a root domain if it meets all the following conditions:
+ *    1. an Energy Model (EM) is available;
+ *    2. the SD_ASYM_CPUCAPACITY flag is set in the sched_domain hierarchy.
+ *    3. the EM complexity is low enough to keep scheduling overheads low;
+ *    4. schedutil is driving the frequency of all CPUs of the rd;
+ *
+ * The complexity of the Energy Model is defined as:
+ *
+ *              C = nr_pd * (nr_cpus + nr_cs)
+ *
+ * with parameters defined as:
+ *  - nr_pd:    the number of performance domains
+ *  - nr_cpus:  the number of CPUs
+ *  - nr_cs:    the sum of the number of capacity states of all performance
+ *              domains (for example, on a system with 2 performance domains,
+ *              with 10 capacity states each, nr_cs = 2 * 10 = 20).
+ *
+ * It is generally not a good idea to use such a model in the wake-up path on
+ * very complex platforms because of the associated scheduling overheads. The
+ * arbitrary constraint below prevents that. It makes EAS usable up to 16 CPUs
+ * with per-CPU DVFS and less than 8 capacity states each, for example.
+ */
+#define EM_MAX_COMPLEXITY 2048
+
+extern struct cpufreq_governor schedutil_gov;
+static bool build_perf_domains(const struct cpumask *cpu_map)
+{
+	int i, nr_pd = 0, nr_cs = 0, nr_cpus = cpumask_weight(cpu_map);
+	struct perf_domain *pd = NULL, *tmp;
+	int cpu = cpumask_first(cpu_map);
+	struct root_domain *rd = cpu_rq(cpu)->rd;
+	struct cpufreq_policy *policy;
+	struct cpufreq_governor *gov;
+
+	/* EAS is enabled for asymmetric CPU capacity topologies. */
+	if (!per_cpu(sd_asym_cpucapacity, cpu)) {
+		if (sched_debug()) {
+			pr_info("rd %*pbl: CPUs do not have asymmetric capacities\n",
+					cpumask_pr_args(cpu_map));
+		}
+		goto free;
+	}
+
+	for_each_cpu(i, cpu_map) {
+		/* Skip already covered CPUs. */
+		if (find_pd(pd, i))
+			continue;
+
+		/* Do not attempt EAS if schedutil is not being used. */
+		policy = cpufreq_cpu_get(i);
+		if (!policy)
+			goto free;
+		gov = policy->governor;
+		cpufreq_cpu_put(policy);
+		if (gov != &schedutil_gov) {
+			if (rd->pd)
+				pr_warn("rd %*pbl: Disabling EAS, schedutil is mandatory\n",
+						cpumask_pr_args(cpu_map));
+			goto free;
+		}
+
+		/* Create the new pd and add it to the local list. */
+		tmp = pd_init(i);
+		if (!tmp)
+			goto free;
+		tmp->next = pd;
+		pd = tmp;
+
+		/*
+		 * Count performance domains and capacity states for the
+		 * complexity check.
+		 */
+		nr_pd++;
+		nr_cs += em_pd_nr_cap_states(pd->em_pd);
+	}
+
+	/* Bail out if the Energy Model complexity is too high. */
+	if (nr_pd * (nr_cs + nr_cpus) > EM_MAX_COMPLEXITY) {
+		WARN(1, "rd %*pbl: Failed to start EAS, EM complexity is too high\n",
+						cpumask_pr_args(cpu_map));
+		goto free;
+	}
+
+	perf_domain_debug(cpu_map, pd);
+
+	/* Attach the new list of performance domains to the root domain. */
+	tmp = rd->pd;
+	rcu_assign_pointer(rd->pd, pd);
+	if (tmp)
+		call_rcu(&tmp->rcu, destroy_perf_domain_rcu);
+
+	return !!pd;
+
+free:
+	free_pd(pd);
+	tmp = rd->pd;
+	rcu_assign_pointer(rd->pd, NULL);
+	if (tmp)
+		call_rcu(&tmp->rcu, destroy_perf_domain_rcu);
+
+	return false;
+}
+#else
+static void free_pd(struct perf_domain *pd) { }
+#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL*/
+
 static void free_rootdomain(struct rcu_head *rcu)
 {
 	struct root_domain *rd = container_of(rcu, struct root_domain, rcu);
@@ -211,6 +404,7 @@
 	free_cpumask_var(rd->rto_mask);
 	free_cpumask_var(rd->online);
 	free_cpumask_var(rd->span);
+	free_pd(rd->pd);
 	kfree(rd);
 }
 
@@ -287,6 +481,9 @@
 
 	if (cpupri_init(&rd->cpupri) != 0)
 		goto free_cpudl;
+
+	init_max_cpu_capacity(&rd->max_cpu_capacity);
+
 	return 0;
 
 free_cpudl:
@@ -397,7 +594,9 @@
 DEFINE_PER_CPU(int, sd_llc_id);
 DEFINE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
 DEFINE_PER_CPU(struct sched_domain *, sd_numa);
-DEFINE_PER_CPU(struct sched_domain *, sd_asym);
+DEFINE_PER_CPU(struct sched_domain *, sd_asym_packing);
+DEFINE_PER_CPU(struct sched_domain *, sd_asym_cpucapacity);
+DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity);
 
 static void update_top_cache_domain(int cpu)
 {
@@ -422,7 +621,10 @@
 	rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
 
 	sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
-	rcu_assign_pointer(per_cpu(sd_asym, cpu), sd);
+	rcu_assign_pointer(per_cpu(sd_asym_packing, cpu), sd);
+
+	sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY);
+	rcu_assign_pointer(per_cpu(sd_asym_cpucapacity, cpu), sd);
 }
 
 /*
@@ -692,6 +894,7 @@
 	sg_span = sched_group_span(sg);
 	sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
 	sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
+	sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
 }
 
 static int
@@ -851,6 +1054,7 @@
 
 	sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sched_group_span(sg));
 	sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
+	sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
 
 	return sg;
 }
@@ -1061,7 +1265,6 @@
  *   SD_SHARE_PKG_RESOURCES - describes shared caches
  *   SD_NUMA                - describes NUMA topologies
  *   SD_SHARE_POWERDOMAIN   - describes shared power domain
- *   SD_ASYM_CPUCAPACITY    - describes mixed capacity topologies
  *
  * Odd one out, which beside describing the topology has a quirk also
  * prescribes the desired behaviour that goes along with it:
@@ -1073,13 +1276,12 @@
 	 SD_SHARE_PKG_RESOURCES |	\
 	 SD_NUMA		|	\
 	 SD_ASYM_PACKING	|	\
-	 SD_ASYM_CPUCAPACITY	|	\
 	 SD_SHARE_POWERDOMAIN)
 
 static struct sched_domain *
 sd_init(struct sched_domain_topology_level *tl,
 	const struct cpumask *cpu_map,
-	struct sched_domain *child, int cpu)
+	struct sched_domain *child, int dflags, int cpu)
 {
 	struct sd_data *sdd = &tl->data;
 	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
@@ -1100,6 +1302,9 @@
 			"wrong sd_flags in topology description\n"))
 		sd_flags &= ~TOPOLOGY_SD_FLAGS;
 
+	/* Apply detected topology flags */
+	sd_flags |= dflags;
+
 	*sd = (struct sched_domain){
 		.min_interval		= sd_weight,
 		.max_interval		= 2*sd_weight,
@@ -1122,7 +1327,7 @@
 					| 0*SD_SHARE_CPUCAPACITY
 					| 0*SD_SHARE_PKG_RESOURCES
 					| 0*SD_SERIALIZE
-					| 0*SD_PREFER_SIBLING
+					| 1*SD_PREFER_SIBLING
 					| 0*SD_NUMA
 					| sd_flags
 					,
@@ -1148,17 +1353,21 @@
 	if (sd->flags & SD_ASYM_CPUCAPACITY) {
 		struct sched_domain *t = sd;
 
+		/*
+		 * Don't attempt to spread across CPUs of different capacities.
+		 */
+		if (sd->child)
+			sd->child->flags &= ~SD_PREFER_SIBLING;
+
 		for_each_lower_domain(t)
 			t->flags |= SD_BALANCE_WAKE;
 	}
 
 	if (sd->flags & SD_SHARE_CPUCAPACITY) {
-		sd->flags |= SD_PREFER_SIBLING;
 		sd->imbalance_pct = 110;
 		sd->smt_gain = 1178; /* ~15% */
 
 	} else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
-		sd->flags |= SD_PREFER_SIBLING;
 		sd->imbalance_pct = 117;
 		sd->cache_nice_tries = 1;
 		sd->busy_idx = 2;
@@ -1169,6 +1378,7 @@
 		sd->busy_idx = 3;
 		sd->idle_idx = 2;
 
+		sd->flags &= ~SD_PREFER_SIBLING;
 		sd->flags |= SD_SERIALIZE;
 		if (sched_domains_numa_distance[tl->numa_level] > RECLAIM_DISTANCE) {
 			sd->flags &= ~(SD_BALANCE_EXEC |
@@ -1178,7 +1388,6 @@
 
 #endif
 	} else {
-		sd->flags |= SD_PREFER_SIBLING;
 		sd->cache_nice_tries = 1;
 		sd->busy_idx = 2;
 		sd->idle_idx = 1;
@@ -1604,9 +1813,9 @@
 
 static struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
 		const struct cpumask *cpu_map, struct sched_domain_attr *attr,
-		struct sched_domain *child, int cpu)
+		struct sched_domain *child, int dflags, int cpu)
 {
-	struct sched_domain *sd = sd_init(tl, cpu_map, child, cpu);
+	struct sched_domain *sd = sd_init(tl, cpu_map, child, dflags, cpu);
 
 	if (child) {
 		sd->level = child->level + 1;
@@ -1633,6 +1842,65 @@
 }
 
 /*
+ * Find the sched_domain_topology_level where all CPU capacities are visible
+ * for all CPUs.
+ */
+static struct sched_domain_topology_level
+*asym_cpu_capacity_level(const struct cpumask *cpu_map)
+{
+	int i, j, asym_level = 0;
+	bool asym = false;
+	struct sched_domain_topology_level *tl, *asym_tl = NULL;
+	unsigned long cap;
+
+	/* Is there any asymmetry? */
+	cap = arch_scale_cpu_capacity(NULL, cpumask_first(cpu_map));
+
+	for_each_cpu(i, cpu_map) {
+		if (arch_scale_cpu_capacity(NULL, i) != cap) {
+			asym = true;
+			break;
+		}
+	}
+
+	if (!asym)
+		return NULL;
+
+	/*
+	 * Examine topology from all CPU's point of views to detect the lowest
+	 * sched_domain_topology_level where a highest capacity CPU is visible
+	 * to everyone.
+	 */
+	for_each_cpu(i, cpu_map) {
+		unsigned long max_capacity = arch_scale_cpu_capacity(NULL, i);
+		int tl_id = 0;
+
+		for_each_sd_topology(tl) {
+			if (tl_id < asym_level)
+				goto next_level;
+
+			for_each_cpu_and(j, tl->mask(i), cpu_map) {
+				unsigned long capacity;
+
+				capacity = arch_scale_cpu_capacity(NULL, j);
+
+				if (capacity <= max_capacity)
+					continue;
+
+				max_capacity = capacity;
+				asym_level = tl_id;
+				asym_tl = tl;
+			}
+next_level:
+			tl_id++;
+		}
+	}
+
+	return asym_tl;
+}
+
+
+/*
  * Build sched domains for a given set of CPUs and attach the sched domains
  * to the individual CPUs
  */
@@ -1642,20 +1910,31 @@
 	enum s_alloc alloc_state;
 	struct sched_domain *sd;
 	struct s_data d;
-	struct rq *rq = NULL;
 	int i, ret = -ENOMEM;
+	struct sched_domain_topology_level *tl_asym;
+	bool has_asym = false;
 
 	alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
 	if (alloc_state != sa_rootdomain)
 		goto error;
 
+	tl_asym = asym_cpu_capacity_level(cpu_map);
+
 	/* Set up domains for CPUs specified by the cpu_map: */
 	for_each_cpu(i, cpu_map) {
 		struct sched_domain_topology_level *tl;
 
 		sd = NULL;
 		for_each_sd_topology(tl) {
-			sd = build_sched_domain(tl, cpu_map, attr, sd, i);
+			int dflags = 0;
+
+			if (tl == tl_asym) {
+				dflags |= SD_ASYM_CPUCAPACITY;
+				has_asym = true;
+			}
+
+			sd = build_sched_domain(tl, cpu_map, attr, sd, dflags, i);
+
 			if (tl == sched_domain_topology)
 				*per_cpu_ptr(d.sd, i) = sd;
 			if (tl->flags & SDTL_OVERLAP)
@@ -1693,21 +1972,13 @@
 	/* Attach the domains */
 	rcu_read_lock();
 	for_each_cpu(i, cpu_map) {
-		rq = cpu_rq(i);
 		sd = *per_cpu_ptr(d.sd, i);
-
-		/* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */
-		if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity))
-			WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig);
-
 		cpu_attach_domain(sd, d.rd, i);
 	}
 	rcu_read_unlock();
 
-	if (rq && sched_debug_enabled) {
-		pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
-			cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
-	}
+	if (has_asym)
+		static_branch_enable_cpuslocked(&sched_asym_cpucapacity);
 
 	ret = 0;
 error:
@@ -1852,6 +2123,7 @@
 void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
 			     struct sched_domain_attr *dattr_new)
 {
+	bool __maybe_unused has_eas = false;
 	int i, j, n;
 	int new_topology;
 
@@ -1879,8 +2151,8 @@
 	/* Destroy deleted domains: */
 	for (i = 0; i < ndoms_cur; i++) {
 		for (j = 0; j < n && !new_topology; j++) {
-			if (cpumask_equal(doms_cur[i], doms_new[j])
-			    && dattrs_equal(dattr_cur, i, dattr_new, j))
+			if (cpumask_equal(doms_cur[i], doms_new[j]) &&
+			    dattrs_equal(dattr_cur, i, dattr_new, j))
 				goto match1;
 		}
 		/* No match - a current sched domain not in new doms_new[] */
@@ -1900,8 +2172,8 @@
 	/* Build new domains: */
 	for (i = 0; i < ndoms_new; i++) {
 		for (j = 0; j < n && !new_topology; j++) {
-			if (cpumask_equal(doms_new[i], doms_cur[j])
-			    && dattrs_equal(dattr_new, i, dattr_cur, j))
+			if (cpumask_equal(doms_new[i], doms_cur[j]) &&
+			    dattrs_equal(dattr_new, i, dattr_cur, j))
 				goto match2;
 		}
 		/* No match - add a new doms_new */
@@ -1910,6 +2182,24 @@
 		;
 	}
 
+#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
+	/* Build perf. domains: */
+	for (i = 0; i < ndoms_new; i++) {
+		for (j = 0; j < n && !sched_energy_update; j++) {
+			if (cpumask_equal(doms_new[i], doms_cur[j]) &&
+			    cpu_rq(cpumask_first(doms_cur[j]))->rd->pd) {
+				has_eas = true;
+				goto match3;
+			}
+		}
+		/* No match - add perf. domains for a new rd */
+		has_eas |= build_perf_domains(doms_new[i]);
+match3:
+		;
+	}
+	sched_energy_set(has_eas);
+#endif
+
 	/* Remember the new sched domains: */
 	if (doms_cur != &fallback_doms)
 		free_sched_domains(doms_cur, ndoms_cur);

diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c
new file mode 100644
index 0000000..3b231c6
--- /dev/null
+++ b/kernel/sched/tune.c

@@ -0,0 +1,692 @@
+#include <linux/cgroup.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/printk.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+
+#include <trace/events/sched.h>
+
+#include "sched.h"
+
+bool schedtune_initialized = false;
+extern struct reciprocal_value schedtune_spc_rdiv;
+
+/* We hold schedtune boost in effect for at least this long */
+#define SCHEDTUNE_BOOST_HOLD_NS 50000000ULL
+
+/*
+ * EAS scheduler tunables for task groups.
+ *
+ * When CGroup support is enabled, we have to synchronize two different
+ * paths:
+ *  - slow path: where CGroups are created/updated/removed
+ *  - fast path: where tasks in a CGroups are accounted
+ *
+ * The slow path tracks (a limited number of) CGroups and maps each on a
+ * "boost_group" index. The fastpath accounts tasks currently RUNNABLE on each
+ * "boost_group".
+ *
+ * Once a new CGroup is created, a boost group idx is assigned and the
+ * corresponding "boost_group" marked as valid on each CPU.
+ * Once a CGroup is release, the corresponding "boost_group" is marked as
+ * invalid on each CPU. The CPU boost value (boost_max) is aggregated by
+ * considering only valid boost_groups with a non null tasks counter.
+ *
+ * .:: Locking strategy
+ *
+ * The fast path uses a spin lock for each CPU boost_group which protects the
+ * tasks counter.
+ *
+ * The "valid" and "boost" values of each CPU boost_group is instead
+ * protected by the RCU lock provided by the CGroups callbacks. Thus, only the
+ * slow path can access and modify the boost_group attribtues of each CPU.
+ * The fast path will catch up the most updated values at the next scheduling
+ * event (i.e. enqueue/dequeue).
+ *
+ *                                                        |
+ *                                             SLOW PATH  |   FAST PATH
+ *                              CGroup add/update/remove  |   Scheduler enqueue/dequeue events
+ *                                                        |
+ *                                                        |
+ *                                                        |     DEFINE_PER_CPU(struct boost_groups)
+ *                                                        |     +--------------+----+---+----+----+
+ *                                                        |     |  idle        |    |   |    |    |
+ *                                                        |     |  boost_max   |    |   |    |    |
+ *                                                        |  +---->lock        |    |   |    |    |
+ *  struct schedtune                  allocated_groups    |  |  |  group[    ] |    |   |    |    |
+ *  +------------------------------+         +-------+    |  |  +--+---------+-+----+---+----+----+
+ *  | idx                          |         |       |    |  |     |  valid  |
+ *  | boots / prefer_idle          |         |       |    |  |     |  boost  |
+ *  | perf_{boost/constraints}_idx | <---------+(*)  |    |  |     |  tasks  | <------------+
+ *  | css                          |         +-------+    |  |     +---------+              |
+ *  +-+----------------------------+         |       |    |  |     |         |              |
+ *    ^                                      |       |    |  |     |         |              |
+ *    |                                      +-------+    |  |     +---------+              |
+ *    |                                      |       |    |  |     |         |              |
+ *    |                                      |       |    |  |     |         |              |
+ *    |                                      +-------+    |  |     +---------+              |
+ *    | zmalloc                              |       |    |  |     |         |              |
+ *    |                                      |       |    |  |     |         |              |
+ *    |                                      +-------+    |  |     +---------+              |
+ *    +                              BOOSTGROUPS_COUNT    |  |     BOOSTGROUPS_COUNT        |
+ *  schedtune_boostgroup_init()                           |  +                              |
+ *                                                        |  schedtune_{en,de}queue_task()  |
+ *                                                        |                                 +
+ *                                                        |          schedtune_tasks_update()
+ *                                                        |
+ */
+
+/* SchdTune tunables for a group of tasks */
+struct schedtune {
+	/* SchedTune CGroup subsystem */
+	struct cgroup_subsys_state css;
+
+	/* Boost group allocated ID */
+	int idx;
+
+	/* Boost value for tasks on that SchedTune CGroup */
+	int boost;
+
+	/* Hint to bias scheduling of tasks on that SchedTune CGroup
+	 * towards idle CPUs */
+	int prefer_idle;
+};
+
+static inline struct schedtune *css_st(struct cgroup_subsys_state *css)
+{
+	return css ? container_of(css, struct schedtune, css) : NULL;
+}
+
+static inline struct schedtune *task_schedtune(struct task_struct *tsk)
+{
+	return css_st(task_css(tsk, schedtune_cgrp_id));
+}
+
+static inline struct schedtune *parent_st(struct schedtune *st)
+{
+	return css_st(st->css.parent);
+}
+
+/*
+ * SchedTune root control group
+ * The root control group is used to defined a system-wide boosting tuning,
+ * which is applied to all tasks in the system.
+ * Task specific boost tuning could be specified by creating and
+ * configuring a child control group under the root one.
+ * By default, system-wide boosting is disabled, i.e. no boosting is applied
+ * to tasks which are not into a child control group.
+ */
+static struct schedtune
+root_schedtune = {
+	.boost	= 0,
+	.prefer_idle = 0,
+};
+
+/*
+ * Maximum number of boost groups to support
+ * When per-task boosting is used we still allow only limited number of
+ * boost groups for two main reasons:
+ * 1. on a real system we usually have only few classes of workloads which
+ *    make sense to boost with different values (e.g. background vs foreground
+ *    tasks, interactive vs low-priority tasks)
+ * 2. a limited number allows for a simpler and more memory/time efficient
+ *    implementation especially for the computation of the per-CPU boost
+ *    value
+ */
+#define BOOSTGROUPS_COUNT 5
+
+/* Array of configured boostgroups */
+static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = {
+	&root_schedtune,
+	NULL,
+};
+
+/* SchedTune boost groups
+ * Keep track of all the boost groups which impact on CPU, for example when a
+ * CPU has two RUNNABLE tasks belonging to two different boost groups and thus
+ * likely with different boost values.
+ * Since on each system we expect only a limited number of boost groups, here
+ * we use a simple array to keep track of the metrics required to compute the
+ * maximum per-CPU boosting value.
+ */
+struct boost_groups {
+	/* Maximum boost value for all RUNNABLE tasks on a CPU */
+	int boost_max;
+	u64 boost_ts;
+	struct {
+		/* True when this boost group maps an actual cgroup */
+		bool valid;
+		/* The boost for tasks on that boost group */
+		int boost;
+		/* Count of RUNNABLE tasks on that boost group */
+		unsigned tasks;
+		/* Timestamp of boost activation */
+		u64 ts;
+	} group[BOOSTGROUPS_COUNT];
+	/* CPU's boost group locking */
+	raw_spinlock_t lock;
+};
+
+/* Boost groups affecting each CPU in the system */
+DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups);
+
+static inline bool schedtune_boost_timeout(u64 now, u64 ts)
+{
+	return ((now - ts) > SCHEDTUNE_BOOST_HOLD_NS);
+}
+
+static inline bool
+schedtune_boost_group_active(int idx, struct boost_groups* bg, u64 now)
+{
+	if (bg->group[idx].tasks)
+		return true;
+
+	return !schedtune_boost_timeout(now, bg->group[idx].ts);
+}
+
+static void
+schedtune_cpu_update(int cpu, u64 now)
+{
+	struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu);
+	int boost_max;
+	u64 boost_ts;
+	int idx;
+
+	/* The root boost group is always active */
+	boost_max = bg->group[0].boost;
+	boost_ts = now;
+	for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx) {
+
+		/* Ignore non boostgroups not mapping a cgroup */
+		if (!bg->group[idx].valid)
+			continue;
+
+		/*
+		 * A boost group affects a CPU only if it has
+		 * RUNNABLE tasks on that CPU or it has hold
+		 * in effect from a previous task.
+		 */
+		if (!schedtune_boost_group_active(idx, bg, now))
+			continue;
+
+		/* This boost group is active */
+		if (boost_max > bg->group[idx].boost)
+			continue;
+
+		boost_max = bg->group[idx].boost;
+		boost_ts =  bg->group[idx].ts;
+	}
+
+	/* Ensures boost_max is non-negative when all cgroup boost values
+	 * are neagtive. Avoids under-accounting of cpu capacity which may cause
+	 * task stacking and frequency spikes.*/
+	boost_max = max(boost_max, 0);
+	bg->boost_max = boost_max;
+	bg->boost_ts = boost_ts;
+}
+
+static int
+schedtune_boostgroup_update(int idx, int boost)
+{
+	struct boost_groups *bg;
+	int cur_boost_max;
+	int old_boost;
+	int cpu;
+	u64 now;
+
+	/* Update per CPU boost groups */
+	for_each_possible_cpu(cpu) {
+		bg = &per_cpu(cpu_boost_groups, cpu);
+
+		/* CGroups are never associated to non active cgroups */
+		BUG_ON(!bg->group[idx].valid);
+
+		/*
+		 * Keep track of current boost values to compute the per CPU
+		 * maximum only when it has been affected by the new value of
+		 * the updated boost group
+		 */
+		cur_boost_max = bg->boost_max;
+		old_boost = bg->group[idx].boost;
+
+		/* Update the boost value of this boost group */
+		bg->group[idx].boost = boost;
+
+		/* Check if this update increase current max */
+		now = sched_clock_cpu(cpu);
+		if (boost > cur_boost_max &&
+			schedtune_boost_group_active(idx, bg, now)) {
+			bg->boost_max = boost;
+			bg->boost_ts = bg->group[idx].ts;
+
+			trace_sched_tune_boostgroup_update(cpu, 1, bg->boost_max);
+			continue;
+		}
+
+		/* Check if this update has decreased current max */
+		if (cur_boost_max == old_boost && old_boost > boost) {
+			schedtune_cpu_update(cpu, now);
+			trace_sched_tune_boostgroup_update(cpu, -1, bg->boost_max);
+			continue;
+		}
+
+		trace_sched_tune_boostgroup_update(cpu, 0, bg->boost_max);
+	}
+
+	return 0;
+}
+
+#define ENQUEUE_TASK  1
+#define DEQUEUE_TASK -1
+
+static inline bool
+schedtune_update_timestamp(struct task_struct *p)
+{
+	if (sched_feat(SCHEDTUNE_BOOST_HOLD_ALL))
+		return true;
+
+	return task_has_rt_policy(p);
+}
+
+static inline void
+schedtune_tasks_update(struct task_struct *p, int cpu, int idx, int task_count)
+{
+	struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu);
+	int tasks = bg->group[idx].tasks + task_count;
+
+	/* Update boosted tasks count while avoiding to make it negative */
+	bg->group[idx].tasks = max(0, tasks);
+
+	/* Update timeout on enqueue */
+	if (task_count > 0) {
+		u64 now = sched_clock_cpu(cpu);
+
+		if (schedtune_update_timestamp(p))
+			bg->group[idx].ts = now;
+
+		/* Boost group activation or deactivation on that RQ */
+		if (bg->group[idx].tasks == 1)
+			schedtune_cpu_update(cpu, now);
+	}
+
+	trace_sched_tune_tasks_update(p, cpu, tasks, idx,
+			bg->group[idx].boost, bg->boost_max,
+			bg->group[idx].ts);
+}
+
+/*
+ * NOTE: This function must be called while holding the lock on the CPU RQ
+ */
+void schedtune_enqueue_task(struct task_struct *p, int cpu)
+{
+	struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu);
+	unsigned long irq_flags;
+	struct schedtune *st;
+	int idx;
+
+	if (unlikely(!schedtune_initialized))
+		return;
+
+	/*
+	 * Boost group accouting is protected by a per-cpu lock and requires
+	 * interrupt to be disabled to avoid race conditions for example on
+	 * do_exit()::cgroup_exit() and task migration.
+	 */
+	raw_spin_lock_irqsave(&bg->lock, irq_flags);
+	rcu_read_lock();
+
+	st = task_schedtune(p);
+	idx = st->idx;
+
+	schedtune_tasks_update(p, cpu, idx, ENQUEUE_TASK);
+
+	rcu_read_unlock();
+	raw_spin_unlock_irqrestore(&bg->lock, irq_flags);
+}
+
+int schedtune_can_attach(struct cgroup_taskset *tset)
+{
+	struct task_struct *task;
+	struct cgroup_subsys_state *css;
+	struct boost_groups *bg;
+	struct rq_flags rq_flags;
+	unsigned int cpu;
+	struct rq *rq;
+	int src_bg; /* Source boost group index */
+	int dst_bg; /* Destination boost group index */
+	int tasks;
+	u64 now;
+
+	if (unlikely(!schedtune_initialized))
+		return 0;
+
+
+	cgroup_taskset_for_each(task, css, tset) {
+
+		/*
+		 * Lock the CPU's RQ the task is enqueued to avoid race
+		 * conditions with migration code while the task is being
+		 * accounted
+		 */
+		rq = task_rq_lock(task, &rq_flags);
+
+		if (!task->on_rq) {
+			task_rq_unlock(rq, task, &rq_flags);
+			continue;
+		}
+
+		/*
+		 * Boost group accouting is protected by a per-cpu lock and requires
+		 * interrupt to be disabled to avoid race conditions on...
+		 */
+		cpu = cpu_of(rq);
+		bg = &per_cpu(cpu_boost_groups, cpu);
+		raw_spin_lock(&bg->lock);
+
+		dst_bg = css_st(css)->idx;
+		src_bg = task_schedtune(task)->idx;
+
+		/*
+		 * Current task is not changing boostgroup, which can
+		 * happen when the new hierarchy is in use.
+		 */
+		if (unlikely(dst_bg == src_bg)) {
+			raw_spin_unlock(&bg->lock);
+			task_rq_unlock(rq, task, &rq_flags);
+			continue;
+		}
+
+		/*
+		 * This is the case of a RUNNABLE task which is switching its
+		 * current boost group.
+		 */
+
+		/* Move task from src to dst boost group */
+		tasks = bg->group[src_bg].tasks - 1;
+		bg->group[src_bg].tasks = max(0, tasks);
+		bg->group[dst_bg].tasks += 1;
+
+		/* Update boost hold start for this group */
+		now = sched_clock_cpu(cpu);
+		bg->group[dst_bg].ts = now;
+
+		/* Force boost group re-evaluation at next boost check */
+		bg->boost_ts = now - SCHEDTUNE_BOOST_HOLD_NS;
+
+		raw_spin_unlock(&bg->lock);
+		task_rq_unlock(rq, task, &rq_flags);
+	}
+
+	return 0;
+}
+
+void schedtune_cancel_attach(struct cgroup_taskset *tset)
+{
+	/* This can happen only if SchedTune controller is mounted with
+	 * other hierarchies ane one of them fails. Since usually SchedTune is
+	 * mouted on its own hierarcy, for the time being we do not implement
+	 * a proper rollback mechanism */
+	WARN(1, "SchedTune cancel attach not implemented");
+}
+
+/*
+ * NOTE: This function must be called while holding the lock on the CPU RQ
+ */
+void schedtune_dequeue_task(struct task_struct *p, int cpu)
+{
+	struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu);
+	unsigned long irq_flags;
+	struct schedtune *st;
+	int idx;
+
+	if (unlikely(!schedtune_initialized))
+		return;
+
+	/*
+	 * Boost group accouting is protected by a per-cpu lock and requires
+	 * interrupt to be disabled to avoid race conditions on...
+	 */
+	raw_spin_lock_irqsave(&bg->lock, irq_flags);
+	rcu_read_lock();
+
+	st = task_schedtune(p);
+	idx = st->idx;
+
+	schedtune_tasks_update(p, cpu, idx, DEQUEUE_TASK);
+
+	rcu_read_unlock();
+	raw_spin_unlock_irqrestore(&bg->lock, irq_flags);
+}
+
+int schedtune_cpu_boost(int cpu)
+{
+	struct boost_groups *bg;
+	u64 now;
+
+	bg = &per_cpu(cpu_boost_groups, cpu);
+	now = sched_clock_cpu(cpu);
+
+	/* Check to see if we have a hold in effect */
+	if (schedtune_boost_timeout(now, bg->boost_ts))
+		schedtune_cpu_update(cpu, now);
+
+	return bg->boost_max;
+}
+
+int schedtune_task_boost(struct task_struct *p)
+{
+	struct schedtune *st;
+	int task_boost;
+
+	if (unlikely(!schedtune_initialized))
+		return 0;
+
+	/* Get task boost value */
+	rcu_read_lock();
+	st = task_schedtune(p);
+	task_boost = st->boost;
+	rcu_read_unlock();
+
+	return task_boost;
+}
+
+int schedtune_prefer_idle(struct task_struct *p)
+{
+	struct schedtune *st;
+	int prefer_idle;
+
+	if (unlikely(!schedtune_initialized))
+		return 0;
+
+	/* Get prefer_idle value */
+	rcu_read_lock();
+	st = task_schedtune(p);
+	prefer_idle = st->prefer_idle;
+	rcu_read_unlock();
+
+	return prefer_idle;
+}
+
+static u64
+prefer_idle_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	struct schedtune *st = css_st(css);
+
+	return st->prefer_idle;
+}
+
+static int
+prefer_idle_write(struct cgroup_subsys_state *css, struct cftype *cft,
+	    u64 prefer_idle)
+{
+	struct schedtune *st = css_st(css);
+	st->prefer_idle = !!prefer_idle;
+
+	return 0;
+}
+
+static s64
+boost_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	struct schedtune *st = css_st(css);
+
+	return st->boost;
+}
+
+static int
+boost_write(struct cgroup_subsys_state *css, struct cftype *cft,
+	    s64 boost)
+{
+	struct schedtune *st = css_st(css);
+
+	if (boost < 0 || boost > 100)
+		return -EINVAL;
+
+	st->boost = boost;
+
+	/* Update CPU boost */
+	schedtune_boostgroup_update(st->idx, st->boost);
+
+	return 0;
+}
+
+static struct cftype files[] = {
+	{
+		.name = "boost",
+		.read_s64 = boost_read,
+		.write_s64 = boost_write,
+	},
+	{
+		.name = "prefer_idle",
+		.read_u64 = prefer_idle_read,
+		.write_u64 = prefer_idle_write,
+	},
+	{ }	/* terminate */
+};
+
+static void
+schedtune_boostgroup_init(struct schedtune *st, int idx)
+{
+	struct boost_groups *bg;
+	int cpu;
+
+	/* Initialize per CPUs boost group support */
+	for_each_possible_cpu(cpu) {
+		bg = &per_cpu(cpu_boost_groups, cpu);
+		bg->group[idx].boost = 0;
+		bg->group[idx].valid = true;
+		bg->group[idx].ts = 0;
+	}
+
+	/* Keep track of allocated boost groups */
+	allocated_group[idx] = st;
+	st->idx = idx;
+}
+
+static struct cgroup_subsys_state *
+schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+	struct schedtune *st;
+	int idx;
+
+	if (!parent_css)
+		return &root_schedtune.css;
+
+	/* Allow only single level hierachies */
+	if (parent_css != &root_schedtune.css) {
+		pr_err("Nested SchedTune boosting groups not allowed\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* Allow only a limited number of boosting groups */
+	for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx)
+		if (!allocated_group[idx])
+			break;
+	if (idx == BOOSTGROUPS_COUNT) {
+		pr_err("Trying to create more than %d SchedTune boosting groups\n",
+		       BOOSTGROUPS_COUNT);
+		return ERR_PTR(-ENOSPC);
+	}
+
+	st = kzalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		goto out;
+
+	/* Initialize per CPUs boost group support */
+	schedtune_boostgroup_init(st, idx);
+
+	return &st->css;
+
+out:
+	return ERR_PTR(-ENOMEM);
+}
+
+static void
+schedtune_boostgroup_release(struct schedtune *st)
+{
+	struct boost_groups *bg;
+	int cpu;
+
+	/* Reset per CPUs boost group support */
+	for_each_possible_cpu(cpu) {
+		bg = &per_cpu(cpu_boost_groups, cpu);
+		bg->group[st->idx].valid = false;
+		bg->group[st->idx].boost = 0;
+	}
+
+	/* Keep track of allocated boost groups */
+	allocated_group[st->idx] = NULL;
+}
+
+static void
+schedtune_css_free(struct cgroup_subsys_state *css)
+{
+	struct schedtune *st = css_st(css);
+
+	/* Release per CPUs boost group support */
+	schedtune_boostgroup_release(st);
+	kfree(st);
+}
+
+struct cgroup_subsys schedtune_cgrp_subsys = {
+	.css_alloc	= schedtune_css_alloc,
+	.css_free	= schedtune_css_free,
+	.can_attach     = schedtune_can_attach,
+	.cancel_attach  = schedtune_cancel_attach,
+	.legacy_cftypes	= files,
+	.early_init	= 1,
+};
+
+static inline void
+schedtune_init_cgroups(void)
+{
+	struct boost_groups *bg;
+	int cpu;
+
+	/* Initialize the per CPU boost groups */
+	for_each_possible_cpu(cpu) {
+		bg = &per_cpu(cpu_boost_groups, cpu);
+		memset(bg, 0, sizeof(struct boost_groups));
+		bg->group[0].valid = true;
+		raw_spin_lock_init(&bg->lock);
+	}
+
+	pr_info("schedtune: configured to support %d boost groups\n",
+		BOOSTGROUPS_COUNT);
+
+	schedtune_initialized = true;
+}
+
+/*
+ * Initialize the cgroup structures
+ */
+static int
+schedtune_init(void)
+{
+	schedtune_spc_rdiv = reciprocal_value(100);
+	schedtune_init_cgroups();
+	return 0;
+}
+postcore_initcall(schedtune_init);

diff --git a/kernel/sched/tune.h b/kernel/sched/tune.h
new file mode 100644
index 0000000..821f026
--- /dev/null
+++ b/kernel/sched/tune.h

@@ -0,0 +1,37 @@
+
+#ifdef CONFIG_SCHED_TUNE
+
+#include <linux/reciprocal_div.h>
+
+/*
+ * System energy normalization constants
+ */
+struct target_nrg {
+	unsigned long min_power;
+	unsigned long max_power;
+	struct reciprocal_value rdiv;
+};
+
+int schedtune_cpu_boost(int cpu);
+int schedtune_task_boost(struct task_struct *tsk);
+
+int schedtune_prefer_idle(struct task_struct *tsk);
+
+void schedtune_enqueue_task(struct task_struct *p, int cpu);
+void schedtune_dequeue_task(struct task_struct *p, int cpu);
+
+unsigned long boosted_cpu_util(int cpu, unsigned long other_util);
+
+#else /* CONFIG_SCHED_TUNE */
+
+#define schedtune_cpu_boost(cpu)  0
+#define schedtune_task_boost(tsk) 0
+
+#define schedtune_prefer_idle(tsk) 0
+
+#define schedtune_enqueue_task(task, cpu) do { } while (0)
+#define schedtune_dequeue_task(task, cpu) do { } while (0)
+
+#define boosted_cpu_util(cpu, other_util) cpu_util_cfs(cpu_rq(cpu))
+
+#endif /* CONFIG_SCHED_TUNE */

diff --git a/kernel/sys.c b/kernel/sys.c
index 096932a..32b78d0 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c

@@ -42,6 +42,8 @@
 #include <linux/syscore_ops.h>
 #include <linux/version.h>
 #include <linux/ctype.h>
+#include <linux/mm.h>
+#include <linux/mempolicy.h>
 
 #include <linux/compat.h>
 #include <linux/syscalls.h>
@@ -2258,6 +2260,153 @@
 	return -EINVAL;
 }
 
+#ifdef CONFIG_MMU
+static int prctl_update_vma_anon_name(struct vm_area_struct *vma,
+		struct vm_area_struct **prev,
+		unsigned long start, unsigned long end,
+		const char __user *name_addr)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	int error = 0;
+	pgoff_t pgoff;
+
+	if (name_addr == vma_get_anon_name(vma)) {
+		*prev = vma;
+		goto out;
+	}
+
+	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
+	*prev = vma_merge(mm, *prev, start, end, vma->vm_flags, vma->anon_vma,
+				vma->vm_file, pgoff, vma_policy(vma),
+				vma->vm_userfaultfd_ctx, name_addr);
+	if (*prev) {
+		vma = *prev;
+		goto success;
+	}
+
+	*prev = vma;
+
+	if (start != vma->vm_start) {
+		error = split_vma(mm, vma, start, 1);
+		if (error)
+			goto out;
+	}
+
+	if (end != vma->vm_end) {
+		error = split_vma(mm, vma, end, 0);
+		if (error)
+			goto out;
+	}
+
+success:
+	if (!vma->vm_file)
+		vma->anon_name = name_addr;
+
+out:
+	if (error == -ENOMEM)
+		error = -EAGAIN;
+	return error;
+}
+
+static int prctl_set_vma_anon_name(unsigned long start, unsigned long end,
+			unsigned long arg)
+{
+	unsigned long tmp;
+	struct vm_area_struct *vma, *prev;
+	int unmapped_error = 0;
+	int error = -EINVAL;
+
+	/*
+	 * If the interval [start,end) covers some unmapped address
+	 * ranges, just ignore them, but return -ENOMEM at the end.
+	 * - this matches the handling in madvise.
+	 */
+	vma = find_vma_prev(current->mm, start, &prev);
+	if (vma && start > vma->vm_start)
+		prev = vma;
+
+	for (;;) {
+		/* Still start < end. */
+		error = -ENOMEM;
+		if (!vma)
+			return error;
+
+		/* Here start < (end|vma->vm_end). */
+		if (start < vma->vm_start) {
+			unmapped_error = -ENOMEM;
+			start = vma->vm_start;
+			if (start >= end)
+				return error;
+		}
+
+		/* Here vma->vm_start <= start < (end|vma->vm_end) */
+		tmp = vma->vm_end;
+		if (end < tmp)
+			tmp = end;
+
+		/* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
+		error = prctl_update_vma_anon_name(vma, &prev, start, tmp,
+				(const char __user *)arg);
+		if (error)
+			return error;
+		start = tmp;
+		if (prev && start < prev->vm_end)
+			start = prev->vm_end;
+		error = unmapped_error;
+		if (start >= end)
+			return error;
+		if (prev)
+			vma = prev->vm_next;
+		else	/* madvise_remove dropped mmap_sem */
+			vma = find_vma(current->mm, start);
+	}
+}
+
+static int prctl_set_vma(unsigned long opt, unsigned long start,
+		unsigned long len_in, unsigned long arg)
+{
+	struct mm_struct *mm = current->mm;
+	int error;
+	unsigned long len;
+	unsigned long end;
+
+	if (start & ~PAGE_MASK)
+		return -EINVAL;
+	len = (len_in + ~PAGE_MASK) & PAGE_MASK;
+
+	/* Check to see whether len was rounded up from small -ve to zero */
+	if (len_in && !len)
+		return -EINVAL;
+
+	end = start + len;
+	if (end < start)
+		return -EINVAL;
+
+	if (end == start)
+		return 0;
+
+	down_write(&mm->mmap_sem);
+
+	switch (opt) {
+	case PR_SET_VMA_ANON_NAME:
+		error = prctl_set_vma_anon_name(start, end, arg);
+		break;
+	default:
+		error = -EINVAL;
+	}
+
+	up_write(&mm->mmap_sem);
+
+	return error;
+}
+#else /* CONFIG_MMU */
+static int prctl_set_vma(unsigned long opt, unsigned long start,
+		unsigned long len_in, unsigned long arg)
+{
+	return -EINVAL;
+}
+#endif
+
 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		unsigned long, arg4, unsigned long, arg5)
 {
@@ -2476,6 +2625,9 @@
 			return -EINVAL;
 		error = arch_prctl_spec_ctrl_set(me, arg2, arg3);
 		break;
+	case PR_SET_VMA:
+		error = prctl_set_vma(arg2, arg3, arg4, arg5);
+		break;
 	default:
 		error = -EINVAL;
 		break;

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f857650..33c64f3 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c

@@ -106,6 +106,7 @@
 extern unsigned int core_pipe_limit;
 #endif
 extern int pid_max;
+extern int extra_free_kbytes;
 extern int pid_max_min, pid_max_max;
 extern int percpu_pagelist_fraction;
 extern int latencytop_enabled;
@@ -323,6 +324,13 @@
 	},
 #ifdef CONFIG_SCHED_DEBUG
 	{
+		.procname       = "sched_cstate_aware",
+		.data           = &sysctl_sched_cstate_aware,
+		.maxlen         = sizeof(unsigned int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec,
+	},
+	{
 		.procname	= "sched_min_granularity_ns",
 		.data		= &sysctl_sched_min_granularity,
 		.maxlen		= sizeof(unsigned int),
@@ -341,6 +349,13 @@
 		.extra2		= &max_sched_granularity_ns,
 	},
 	{
+		.procname	= "sched_sync_hint_enable",
+		.data		= &sysctl_sched_sync_hint_enable,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
 		.procname	= "sched_wakeup_granularity_ns",
 		.data		= &sysctl_sched_wakeup_granularity,
 		.maxlen		= sizeof(unsigned int),
@@ -1462,6 +1477,14 @@
 		.extra2		= &one_thousand,
 	},
 	{
+		.procname	= "extra_free_kbytes",
+		.data		= &extra_free_kbytes,
+		.maxlen		= sizeof(extra_free_kbytes),
+		.mode		= 0644,
+		.proc_handler	= min_free_kbytes_sysctl_handler,
+		.extra1		= &zero,
+	},
+	{
 		.procname	= "percpu_pagelist_fraction",
 		.data		= &percpu_pagelist_fraction,
 		.maxlen		= sizeof(percpu_pagelist_fraction),

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7e215da..18d782a 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c

@@ -124,8 +124,9 @@
 				 struct ftrace_ops *op, struct pt_regs *regs);
 #else
 /* See comment below, where ftrace_ops_list_func is defined */
-static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
-#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
+static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip,
+			      struct ftrace_ops *op, struct pt_regs *regs);
+#define ftrace_ops_list_func ftrace_ops_no_ops
 #endif
 
 /*
@@ -5521,9 +5522,9 @@
 	return 0;
 }
 
-static int ftrace_process_locs(struct module *mod,
-			       unsigned long *start,
-			       unsigned long *end)
+static int __norecordmcount ftrace_process_locs(struct module *mod,
+						unsigned long *start,
+						unsigned long *end)
 {
 	struct ftrace_page *start_pg;
 	struct ftrace_page *pg;
@@ -6334,7 +6335,8 @@
 }
 NOKPROBE_SYMBOL(ftrace_ops_list_func);
 #else
-static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip)
+static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip,
+			      struct ftrace_ops *op, struct pt_regs *regs)
 {
 	__ftrace_ops_list_func(ip, parent_ip, NULL, NULL);
 }
@@ -6797,14 +6799,17 @@
 	fgraph_graph_time = enable;
 }
 
+void ftrace_graph_return_stub(struct ftrace_graph_ret *trace)
+{
+}
+
 int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
 {
 	return 0;
 }
 
 /* The callbacks that hook a function */
-trace_func_graph_ret_t ftrace_graph_return =
-			(trace_func_graph_ret_t)ftrace_stub;
+trace_func_graph_ret_t ftrace_graph_return = ftrace_graph_return_stub;
 trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
 static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub;
 
@@ -7034,7 +7039,7 @@
 		goto out;
 
 	ftrace_graph_active--;
-	ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
+	ftrace_graph_return = ftrace_graph_return_stub;
 	ftrace_graph_entry = ftrace_graph_entry_stub;
 	__ftrace_graph_entry = ftrace_graph_entry_stub;
 	ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET);

diff --git a/kernel/user.c b/kernel/user.c
index 0df9b16..7f74a8a 100644
--- a/kernel/user.c
+++ b/kernel/user.c

@@ -17,6 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/export.h>
 #include <linux/user_namespace.h>
+#include <linux/proc_fs.h>
 #include <linux/proc_ns.h>
 
 /*
@@ -208,6 +209,7 @@
 		}
 		spin_unlock_irq(&uidhash_lock);
 	}
+	proc_register_uid(uid);
 
 	return up;
 
@@ -229,6 +231,7 @@
 	spin_lock_irq(&uidhash_lock);
 	uid_hash_insert(&root_user, uidhashentry(GLOBAL_ROOT_UID));
 	spin_unlock_irq(&uidhash_lock);
+	proc_register_uid(GLOBAL_ROOT_UID);
 
 	return 0;
 }

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index cd8b61b..c4f6308 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c

@@ -910,6 +910,26 @@
 }
 
 /**
+ * wq_worker_last_func - retrieve worker's last work function
+ *
+ * Determine the last function a worker executed. This is called from
+ * the scheduler to get a worker's last known identity.
+ *
+ * CONTEXT:
+ * spin_lock_irq(rq->lock)
+ *
+ * Return:
+ * The last work function %current executed as a worker, NULL if it
+ * hasn't executed any work yet.
+ */
+work_func_t wq_worker_last_func(struct task_struct *task)
+{
+	struct worker *worker = kthread_data(task);
+
+	return worker->last_func;
+}
+
+/**
  * worker_set_flags - set worker flags and adjust nr_running accordingly
  * @worker: self
  * @flags: flags to set
@@ -2184,6 +2204,9 @@
 	if (unlikely(cpu_intensive))
 		worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
 
+	/* tag the worker for identification in schedule() */
+	worker->last_func = worker->current_func;
+
 	/* we're done with it, release */
 	hash_del(&worker->hentry);
 	worker->current_work = NULL;

diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 66fbb5a..cb68b03 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h

@@ -53,6 +53,9 @@
 
 	/* used only by rescuers to point to the target workqueue */
 	struct workqueue_struct	*rescue_wq;	/* I: the workqueue to rescue */
+
+	/* used by the scheduler to determine a worker's last known identity */
+	work_func_t		last_func;
 };
 
 /**
@@ -67,9 +70,10 @@
 
 /*
  * Scheduler hooks for concurrency managed workqueue.  Only to be used from
- * sched/core.c and workqueue.c.
+ * sched/ and workqueue.c.
  */
 void wq_worker_waking_up(struct task_struct *task, int cpu);
 struct task_struct *wq_worker_sleeping(struct task_struct *task);
+work_func_t wq_worker_last_func(struct task_struct *task);
 
 #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */

diff --git a/lib/Makefile b/lib/Makefile
index 0ab8083..aeadf7c 100644
--- a/lib/Makefile
+++ b/lib/Makefile

@@ -31,7 +31,7 @@
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o timerqueue.o\
 	 idr.o int_sqrt.o extable.o \
-	 sha1.o chacha20.o irq_regs.o argv_split.o \
+	 sha1.o chacha.o irq_regs.o argv_split.o \
 	 flex_proportions.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
 	 earlycpio.o seq_buf.o siphash.o dec_and_lock.o \

diff --git a/lib/chacha.c b/lib/chacha.c
new file mode 100644
index 0000000..a46d2832
--- /dev/null
+++ b/lib/chacha.c

@@ -0,0 +1,117 @@
+/*
+ * The "hash function" used as the core of the ChaCha stream cipher (RFC7539)
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/bitops.h>
+#include <linux/cryptohash.h>
+#include <asm/unaligned.h>
+#include <crypto/chacha.h>
+
+static void chacha_permute(u32 *x, int nrounds)
+{
+	int i;
+
+	/* whitelist the allowed round counts */
+	WARN_ON_ONCE(nrounds != 20 && nrounds != 12);
+
+	for (i = 0; i < nrounds; i += 2) {
+		x[0]  += x[4];    x[12] = rol32(x[12] ^ x[0],  16);
+		x[1]  += x[5];    x[13] = rol32(x[13] ^ x[1],  16);
+		x[2]  += x[6];    x[14] = rol32(x[14] ^ x[2],  16);
+		x[3]  += x[7];    x[15] = rol32(x[15] ^ x[3],  16);
+
+		x[8]  += x[12];   x[4]  = rol32(x[4]  ^ x[8],  12);
+		x[9]  += x[13];   x[5]  = rol32(x[5]  ^ x[9],  12);
+		x[10] += x[14];   x[6]  = rol32(x[6]  ^ x[10], 12);
+		x[11] += x[15];   x[7]  = rol32(x[7]  ^ x[11], 12);
+
+		x[0]  += x[4];    x[12] = rol32(x[12] ^ x[0],   8);
+		x[1]  += x[5];    x[13] = rol32(x[13] ^ x[1],   8);
+		x[2]  += x[6];    x[14] = rol32(x[14] ^ x[2],   8);
+		x[3]  += x[7];    x[15] = rol32(x[15] ^ x[3],   8);
+
+		x[8]  += x[12];   x[4]  = rol32(x[4]  ^ x[8],   7);
+		x[9]  += x[13];   x[5]  = rol32(x[5]  ^ x[9],   7);
+		x[10] += x[14];   x[6]  = rol32(x[6]  ^ x[10],  7);
+		x[11] += x[15];   x[7]  = rol32(x[7]  ^ x[11],  7);
+
+		x[0]  += x[5];    x[15] = rol32(x[15] ^ x[0],  16);
+		x[1]  += x[6];    x[12] = rol32(x[12] ^ x[1],  16);
+		x[2]  += x[7];    x[13] = rol32(x[13] ^ x[2],  16);
+		x[3]  += x[4];    x[14] = rol32(x[14] ^ x[3],  16);
+
+		x[10] += x[15];   x[5]  = rol32(x[5]  ^ x[10], 12);
+		x[11] += x[12];   x[6]  = rol32(x[6]  ^ x[11], 12);
+		x[8]  += x[13];   x[7]  = rol32(x[7]  ^ x[8],  12);
+		x[9]  += x[14];   x[4]  = rol32(x[4]  ^ x[9],  12);
+
+		x[0]  += x[5];    x[15] = rol32(x[15] ^ x[0],   8);
+		x[1]  += x[6];    x[12] = rol32(x[12] ^ x[1],   8);
+		x[2]  += x[7];    x[13] = rol32(x[13] ^ x[2],   8);
+		x[3]  += x[4];    x[14] = rol32(x[14] ^ x[3],   8);
+
+		x[10] += x[15];   x[5]  = rol32(x[5]  ^ x[10],  7);
+		x[11] += x[12];   x[6]  = rol32(x[6]  ^ x[11],  7);
+		x[8]  += x[13];   x[7]  = rol32(x[7]  ^ x[8],   7);
+		x[9]  += x[14];   x[4]  = rol32(x[4]  ^ x[9],   7);
+	}
+}
+
+/**
+ * chacha_block - generate one keystream block and increment block counter
+ * @state: input state matrix (16 32-bit words)
+ * @stream: output keystream block (64 bytes)
+ * @nrounds: number of rounds (20 or 12; 20 is recommended)
+ *
+ * This is the ChaCha core, a function from 64-byte strings to 64-byte strings.
+ * The caller has already converted the endianness of the input.  This function
+ * also handles incrementing the block counter in the input matrix.
+ */
+void chacha_block(u32 *state, u8 *stream, int nrounds)
+{
+	u32 x[16];
+	int i;
+
+	memcpy(x, state, 64);
+
+	chacha_permute(x, nrounds);
+
+	for (i = 0; i < ARRAY_SIZE(x); i++)
+		put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]);
+
+	state[12]++;
+}
+EXPORT_SYMBOL(chacha_block);
+
+/**
+ * hchacha_block - abbreviated ChaCha core, for XChaCha
+ * @in: input state matrix (16 32-bit words)
+ * @out: output (8 32-bit words)
+ * @nrounds: number of rounds (20 or 12; 20 is recommended)
+ *
+ * HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step
+ * towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf).  HChaCha
+ * skips the final addition of the initial state, and outputs only certain words
+ * of the state.  It should not be used for streaming directly.
+ */
+void hchacha_block(const u32 *in, u32 *out, int nrounds)
+{
+	u32 x[16];
+
+	memcpy(x, in, 64);
+
+	chacha_permute(x, nrounds);
+
+	memcpy(&out[0], &x[0], 16);
+	memcpy(&out[4], &x[12], 16);
+}
+EXPORT_SYMBOL(hchacha_block);

diff --git a/lib/chacha20.c b/lib/chacha20.c
deleted file mode 100644
index c1cc50f..0000000
--- a/lib/chacha20.c
+++ /dev/null

@@ -1,74 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/bitops.h>
-#include <linux/cryptohash.h>
-#include <asm/unaligned.h>
-#include <crypto/chacha20.h>
-
-void chacha20_block(u32 *state, u32 *stream)
-{
-	u32 x[16], *out = stream;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(x); i++)
-		x[i] = state[i];
-
-	for (i = 0; i < 20; i += 2) {
-		x[0]  += x[4];    x[12] = rol32(x[12] ^ x[0],  16);
-		x[1]  += x[5];    x[13] = rol32(x[13] ^ x[1],  16);
-		x[2]  += x[6];    x[14] = rol32(x[14] ^ x[2],  16);
-		x[3]  += x[7];    x[15] = rol32(x[15] ^ x[3],  16);
-
-		x[8]  += x[12];   x[4]  = rol32(x[4]  ^ x[8],  12);
-		x[9]  += x[13];   x[5]  = rol32(x[5]  ^ x[9],  12);
-		x[10] += x[14];   x[6]  = rol32(x[6]  ^ x[10], 12);
-		x[11] += x[15];   x[7]  = rol32(x[7]  ^ x[11], 12);
-
-		x[0]  += x[4];    x[12] = rol32(x[12] ^ x[0],   8);
-		x[1]  += x[5];    x[13] = rol32(x[13] ^ x[1],   8);
-		x[2]  += x[6];    x[14] = rol32(x[14] ^ x[2],   8);
-		x[3]  += x[7];    x[15] = rol32(x[15] ^ x[3],   8);
-
-		x[8]  += x[12];   x[4]  = rol32(x[4]  ^ x[8],   7);
-		x[9]  += x[13];   x[5]  = rol32(x[5]  ^ x[9],   7);
-		x[10] += x[14];   x[6]  = rol32(x[6]  ^ x[10],  7);
-		x[11] += x[15];   x[7]  = rol32(x[7]  ^ x[11],  7);
-
-		x[0]  += x[5];    x[15] = rol32(x[15] ^ x[0],  16);
-		x[1]  += x[6];    x[12] = rol32(x[12] ^ x[1],  16);
-		x[2]  += x[7];    x[13] = rol32(x[13] ^ x[2],  16);
-		x[3]  += x[4];    x[14] = rol32(x[14] ^ x[3],  16);
-
-		x[10] += x[15];   x[5]  = rol32(x[5]  ^ x[10], 12);
-		x[11] += x[12];   x[6]  = rol32(x[6]  ^ x[11], 12);
-		x[8]  += x[13];   x[7]  = rol32(x[7]  ^ x[8],  12);
-		x[9]  += x[14];   x[4]  = rol32(x[4]  ^ x[9],  12);
-
-		x[0]  += x[5];    x[15] = rol32(x[15] ^ x[0],   8);
-		x[1]  += x[6];    x[12] = rol32(x[12] ^ x[1],   8);
-		x[2]  += x[7];    x[13] = rol32(x[13] ^ x[2],   8);
-		x[3]  += x[4];    x[14] = rol32(x[14] ^ x[3],   8);
-
-		x[10] += x[15];   x[5]  = rol32(x[5]  ^ x[10],  7);
-		x[11] += x[12];   x[6]  = rol32(x[6]  ^ x[11],  7);
-		x[8]  += x[13];   x[7]  = rol32(x[7]  ^ x[8],   7);
-		x[9]  += x[14];   x[4]  = rol32(x[4]  ^ x[9],   7);
-	}
-
-	for (i = 0; i < ARRAY_SIZE(x); i++)
-		out[i] = cpu_to_le32(x[i] + state[i]);
-
-	state[12]++;
-}
-EXPORT_SYMBOL(chacha20_block);

diff --git a/mm/compaction.c b/mm/compaction.c
index faca45e..7c60747 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c

@@ -22,6 +22,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/page_owner.h>
+#include <linux/psi.h>
 #include "internal.h"
 
 #ifdef CONFIG_COMPACTION
@@ -2068,11 +2069,15 @@
 	pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1;
 
 	while (!kthread_should_stop()) {
+		unsigned long pflags;
+
 		trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
 		wait_event_freezable(pgdat->kcompactd_wait,
 				kcompactd_work_requested(pgdat));
 
+		psi_memstall_enter(&pflags);
 		kcompactd_do_work(pgdat);
+		psi_memstall_leave(&pflags);
 	}
 
 	return 0;

diff --git a/mm/filemap.c b/mm/filemap.c
index 287f3fa..2981c760 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c

@@ -36,6 +36,8 @@
 #include <linux/cleancache.h>
 #include <linux/shmem_fs.h>
 #include <linux/rmap.h>
+#include <linux/delayacct.h>
+#include <linux/psi.h>
 #include "internal.h"
 
 #define CREATE_TRACE_POINTS
@@ -937,12 +939,9 @@
 		 * data from the working set, only to cache data that will
 		 * get overwritten with something else, is a waste of memory.
 		 */
-		if (!(gfp_mask & __GFP_WRITE) &&
-		    shadow && workingset_refault(shadow)) {
-			SetPageActive(page);
-			workingset_activation(page);
-		} else
-			ClearPageActive(page);
+		WARN_ON_ONCE(PageActive(page));
+		if (!(gfp_mask & __GFP_WRITE) && shadow)
+			workingset_refault(page, shadow);
 		lru_cache_add(page);
 	}
 	return ret;
@@ -1098,8 +1097,18 @@
 {
 	struct wait_page_queue wait_page;
 	wait_queue_entry_t *wait = &wait_page.wait;
+	bool thrashing = false;
+	unsigned long pflags;
 	int ret = 0;
 
+	if (bit_nr == PG_locked &&
+	    !PageUptodate(page) && PageWorkingset(page)) {
+		if (!PageSwapBacked(page))
+			delayacct_thrashing_start();
+		psi_memstall_enter(&pflags);
+		thrashing = true;
+	}
+
 	init_wait(wait);
 	wait->flags = lock ? WQ_FLAG_EXCLUSIVE : 0;
 	wait->func = wake_page_function;
@@ -1138,6 +1147,12 @@
 
 	finish_wait(q, wait);
 
+	if (thrashing) {
+		if (!PageSwapBacked(page))
+			delayacct_thrashing_end();
+		psi_memstall_leave(&pflags);
+	}
+
 	/*
 	 * A signal could leave PageWaiters set. Clearing it here if
 	 * !waitqueue_active would be possible (by open-coding finish_wait),
@@ -1547,7 +1562,10 @@
  * - FGP_CREAT: If page is not present then a new page is allocated using
  *   @gfp_mask and added to the page cache and the VM's LRU
  *   list. The page is returned locked and with an increased
- *   refcount. Otherwise, NULL is returned.
+ *   refcount.
+ * - FGP_FOR_MMAP: Similar to FGP_CREAT, only we want to allow the caller to do
+ *   its own locking dance if the page is already in cache, or unlock the page
+ *   before returning if we had to add the page to pagecache.
  *
  * If FGP_LOCK or FGP_CREAT are specified then the function may sleep even
  * if the GFP flags specified for FGP_CREAT are atomic.
@@ -1600,7 +1618,7 @@
 		if (!page)
 			return NULL;
 
-		if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK)))
+		if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP))))
 			fgp_flags |= FGP_LOCK;
 
 		/* Init accessed so avoid atomic mark_page_accessed later */
@@ -1614,6 +1632,13 @@
 			if (err == -EEXIST)
 				goto repeat;
 		}
+
+		/*
+		 * add_to_page_cache_lru locks the page, and for mmap we expect
+		 * an unlocked page.
+		 */
+		if (page && (fgp_flags & FGP_FOR_MMAP))
+			unlock_page(page);
 	}
 
 	return page;
@@ -2388,62 +2413,98 @@
 EXPORT_SYMBOL(generic_file_read_iter);
 
 #ifdef CONFIG_MMU
-/**
- * page_cache_read - adds requested page to the page cache if not already there
- * @file:	file to read
- * @offset:	page index
- * @gfp_mask:	memory allocation flags
- *
- * This adds the requested page to the page cache if it isn't already there,
- * and schedules an I/O to read in its contents from disk.
- */
-static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask)
+#define MMAP_LOTSAMISS  (100)
+static struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
+					     struct file *fpin)
 {
-	struct address_space *mapping = file->f_mapping;
-	struct page *page;
-	int ret;
+	int flags = vmf->flags;
 
-	do {
-		page = __page_cache_alloc(gfp_mask);
-		if (!page)
-			return -ENOMEM;
+	if (fpin)
+		return fpin;
 
-		ret = add_to_page_cache_lru(page, mapping, offset, gfp_mask);
-		if (ret == 0)
-			ret = mapping->a_ops->readpage(file, page);
-		else if (ret == -EEXIST)
-			ret = 0; /* losing race to add is OK */
-
-		put_page(page);
-
-	} while (ret == AOP_TRUNCATED_PAGE);
-
-	return ret;
+	/*
+	 * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or
+	 * anything, so we only pin the file and drop the mmap_sem if only
+	 * FAULT_FLAG_ALLOW_RETRY is set.
+	 */
+	if ((flags & (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT)) ==
+	    FAULT_FLAG_ALLOW_RETRY) {
+		fpin = get_file(vmf->vma->vm_file);
+		up_read(&vmf->vma->vm_mm->mmap_sem);
+	}
+	return fpin;
 }
 
-#define MMAP_LOTSAMISS  (100)
+/*
+ * lock_page_maybe_drop_mmap - lock the page, possibly dropping the mmap_sem
+ * @vmf - the vm_fault for this fault.
+ * @page - the page to lock.
+ * @fpin - the pointer to the file we may pin (or is already pinned).
+ *
+ * This works similar to lock_page_or_retry in that it can drop the mmap_sem.
+ * It differs in that it actually returns the page locked if it returns 1 and 0
+ * if it couldn't lock the page.  If we did have to drop the mmap_sem then fpin
+ * will point to the pinned file and needs to be fput()'ed at a later point.
+ */
+static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
+				     struct file **fpin)
+{
+	if (trylock_page(page))
+		return 1;
+
+	/*
+	 * NOTE! This will make us return with VM_FAULT_RETRY, but with
+	 * the mmap_sem still held. That's how FAULT_FLAG_RETRY_NOWAIT
+	 * is supposed to work. We have way too many special cases..
+	 */
+	if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
+		return 0;
+
+	*fpin = maybe_unlock_mmap_for_io(vmf, *fpin);
+	if (vmf->flags & FAULT_FLAG_KILLABLE) {
+		if (__lock_page_killable(page)) {
+			/*
+			 * We didn't have the right flags to drop the mmap_sem,
+			 * but all fault_handlers only check for fatal signals
+			 * if we return VM_FAULT_RETRY, so we need to drop the
+			 * mmap_sem here and return 0 if we don't have a fpin.
+			 */
+			if (*fpin == NULL)
+				up_read(&vmf->vma->vm_mm->mmap_sem);
+			return 0;
+		}
+	} else
+		__lock_page(page);
+	return 1;
+}
+
 
 /*
- * Synchronous readahead happens when we don't even find
- * a page in the page cache at all.
+ * Synchronous readahead happens when we don't even find a page in the page
+ * cache at all.  We don't want to perform IO under the mmap sem, so if we have
+ * to drop the mmap sem we return the file that was pinned in order for us to do
+ * that.  If we didn't pin a file then we return NULL.  The file that is
+ * returned needs to be fput()'ed when we're done with it.
  */
-static void do_sync_mmap_readahead(struct vm_area_struct *vma,
-				   struct file_ra_state *ra,
-				   struct file *file,
-				   pgoff_t offset)
+static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
 {
+	struct file *file = vmf->vma->vm_file;
+	struct file_ra_state *ra = &file->f_ra;
 	struct address_space *mapping = file->f_mapping;
+	struct file *fpin = NULL;
+	pgoff_t offset = vmf->pgoff;
 
 	/* If we don't want any read-ahead, don't bother */
-	if (vma->vm_flags & VM_RAND_READ)
-		return;
+	if (vmf->vma->vm_flags & VM_RAND_READ)
+		return fpin;
 	if (!ra->ra_pages)
-		return;
+		return fpin;
 
-	if (vma->vm_flags & VM_SEQ_READ) {
+	if (vmf->vma->vm_flags & VM_SEQ_READ) {
+		fpin = maybe_unlock_mmap_for_io(vmf, fpin);
 		page_cache_sync_readahead(mapping, ra, file, offset,
 					  ra->ra_pages);
-		return;
+		return fpin;
 	}
 
 	/* Avoid banging the cache line if not needed */
@@ -2455,37 +2516,44 @@
 	 * stop bothering with read-ahead. It will only hurt.
 	 */
 	if (ra->mmap_miss > MMAP_LOTSAMISS)
-		return;
+		return fpin;
 
 	/*
 	 * mmap read-around
 	 */
+	fpin = maybe_unlock_mmap_for_io(vmf, fpin);
 	ra->start = max_t(long, 0, offset - ra->ra_pages / 2);
 	ra->size = ra->ra_pages;
 	ra->async_size = ra->ra_pages / 4;
 	ra_submit(ra, mapping, file);
+	return fpin;
 }
 
 /*
  * Asynchronous readahead happens when we find the page and PG_readahead,
- * so we want to possibly extend the readahead further..
+ * so we want to possibly extend the readahead further.  We return the file that
+ * was pinned if we have to drop the mmap_sem in order to do IO.
  */
-static void do_async_mmap_readahead(struct vm_area_struct *vma,
-				    struct file_ra_state *ra,
-				    struct file *file,
-				    struct page *page,
-				    pgoff_t offset)
+static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
+					    struct page *page)
 {
+	struct file *file = vmf->vma->vm_file;
+	struct file_ra_state *ra = &file->f_ra;
 	struct address_space *mapping = file->f_mapping;
+	struct file *fpin = NULL;
+	pgoff_t offset = vmf->pgoff;
 
 	/* If we don't want any read-ahead, don't bother */
-	if (vma->vm_flags & VM_RAND_READ)
-		return;
+	if (vmf->vma->vm_flags & VM_RAND_READ)
+		return fpin;
 	if (ra->mmap_miss > 0)
 		ra->mmap_miss--;
-	if (PageReadahead(page))
+	if (PageReadahead(page)) {
+		fpin = maybe_unlock_mmap_for_io(vmf, fpin);
 		page_cache_async_readahead(mapping, ra, file,
 					   page, offset, ra->ra_pages);
+	}
+	return fpin;
 }
 
 /**
@@ -2515,6 +2583,7 @@
 {
 	int error;
 	struct file *file = vmf->vma->vm_file;
+	struct file *fpin = NULL;
 	struct address_space *mapping = file->f_mapping;
 	struct file_ra_state *ra = &file->f_ra;
 	struct inode *inode = mapping->host;
@@ -2536,23 +2605,26 @@
 		 * We found the page, so try async readahead before
 		 * waiting for the lock.
 		 */
-		do_async_mmap_readahead(vmf->vma, ra, file, page, offset);
+		fpin = do_async_mmap_readahead(vmf, page);
 	} else if (!page) {
 		/* No page in the page cache at all */
-		do_sync_mmap_readahead(vmf->vma, ra, file, offset);
 		count_vm_event(PGMAJFAULT);
 		count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
 		ret = VM_FAULT_MAJOR;
+		fpin = do_sync_mmap_readahead(vmf);
 retry_find:
-		page = find_get_page(mapping, offset);
-		if (!page)
-			goto no_cached_page;
+		page = pagecache_get_page(mapping, offset,
+					  FGP_CREAT|FGP_FOR_MMAP,
+					  vmf->gfp_mask);
+		if (!page) {
+			if (fpin)
+				goto out_retry;
+			return VM_FAULT_OOM;
+		}
 	}
 
-	if (!lock_page_or_retry(page, vmf->vma->vm_mm, vmf->flags)) {
-		put_page(page);
-		return ret | VM_FAULT_RETRY;
-	}
+	if (!lock_page_maybe_drop_mmap(vmf, page, &fpin))
+		goto out_retry;
 
 	/* Did it get truncated? */
 	if (unlikely(page->mapping != mapping)) {
@@ -2570,6 +2642,16 @@
 		goto page_not_uptodate;
 
 	/*
+	 * We've made it this far and we had to drop our mmap_sem, now is the
+	 * time to return to the upper layer and have it re-find the vma and
+	 * redo the fault.
+	 */
+	if (fpin) {
+		unlock_page(page);
+		goto out_retry;
+	}
+
+	/*
 	 * Found the page and have a reference on it.
 	 * We must recheck i_size under page lock.
 	 */
@@ -2583,30 +2665,6 @@
 	vmf->page = page;
 	return ret | VM_FAULT_LOCKED;
 
-no_cached_page:
-	/*
-	 * We're only likely to ever get here if MADV_RANDOM is in
-	 * effect.
-	 */
-	error = page_cache_read(file, offset, vmf->gfp_mask);
-
-	/*
-	 * The page we want has now been added to the page cache.
-	 * In the unlikely event that someone removed it in the
-	 * meantime, we'll just come back here and read it again.
-	 */
-	if (error >= 0)
-		goto retry_find;
-
-	/*
-	 * An error return from page_cache_read can result if the
-	 * system is low on memory, or a problem occurs while trying
-	 * to schedule I/O.
-	 */
-	if (error == -ENOMEM)
-		return VM_FAULT_OOM;
-	return VM_FAULT_SIGBUS;
-
 page_not_uptodate:
 	/*
 	 * Umm, take care of errors if the page isn't up-to-date.
@@ -2615,12 +2673,15 @@
 	 * and we need to check for errors.
 	 */
 	ClearPageError(page);
+	fpin = maybe_unlock_mmap_for_io(vmf, fpin);
 	error = mapping->a_ops->readpage(file, page);
 	if (!error) {
 		wait_on_page_locked(page);
 		if (!PageUptodate(page))
 			error = -EIO;
 	}
+	if (fpin)
+		goto out_retry;
 	put_page(page);
 
 	if (!error || error == AOP_TRUNCATED_PAGE)
@@ -2629,6 +2690,18 @@
 	/* Things didn't work out. Return zero to tell the mm layer so. */
 	shrink_readahead_size_eio(file, ra);
 	return VM_FAULT_SIGBUS;
+
+out_retry:
+	/*
+	 * We dropped the mmap_sem, we need to return to the fault handler to
+	 * re-find the vma and come back and find our hopefully still populated
+	 * page.
+	 */
+	if (page)
+		put_page(page);
+	if (fpin)
+		fput(fpin);
+	return ret | VM_FAULT_RETRY;
 }
 EXPORT_SYMBOL(filemap_fault);
 
@@ -2824,7 +2897,11 @@
 		}
 
 filler:
-		err = filler(data, page);
+		if (filler)
+			err = filler(data, page);
+		else
+			err = mapping->a_ops->readpage(data, page);
+
 		if (err < 0) {
 			put_page(page);
 			return ERR_PTR(err);
@@ -2931,9 +3008,7 @@
 				pgoff_t index,
 				gfp_t gfp)
 {
-	filler_t *filler = (filler_t *)mapping->a_ops->readpage;
-
-	return do_read_cache_page(mapping, index, filler, NULL, gfp);
+	return do_read_cache_page(mapping, index, NULL, NULL, gfp);
 }
 EXPORT_SYMBOL(read_cache_page_gfp);
 

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 09ce852..8012cf2 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c

@@ -2412,6 +2412,7 @@
 			 (1L << PG_mlocked) |
 			 (1L << PG_uptodate) |
 			 (1L << PG_active) |
+			 (1L << PG_workingset) |
 			 (1L << PG_locked) |
 			 (1L << PG_unevictable) |
 			 (1L << PG_dirty)));

diff --git a/mm/madvise.c b/mm/madvise.c
index 71d21df..899b19e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c

@@ -138,7 +138,7 @@
 	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
 	*prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma,
 			  vma->vm_file, pgoff, vma_policy(vma),
-			  vma->vm_userfaultfd_ctx);
+			  vma->vm_userfaultfd_ctx, vma_get_anon_name(vma));
 	if (*prev) {
 		vma = *prev;
 		goto success;

diff --git a/mm/memfd.c b/mm/memfd.c
index 2bb5e25..a9ece5f 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c

@@ -150,7 +150,8 @@
 #define F_ALL_SEALS (F_SEAL_SEAL | \
 		     F_SEAL_SHRINK | \
 		     F_SEAL_GROW | \
-		     F_SEAL_WRITE)
+		     F_SEAL_WRITE | \
+		     F_SEAL_FUTURE_WRITE)
 
 static int memfd_add_seals(struct file *file, unsigned int seals)
 {

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 70298b6..47536fa 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c

@@ -758,7 +758,8 @@
 			((vmstart - vma->vm_start) >> PAGE_SHIFT);
 		prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
 				 vma->anon_vma, vma->vm_file, pgoff,
-				 new_pol, vma->vm_userfaultfd_ctx);
+				 new_pol, vma->vm_userfaultfd_ctx,
+				 vma_get_anon_name(vma));
 		if (prev) {
 			vma = prev;
 			next = vma->vm_next;

diff --git a/mm/migrate.c b/mm/migrate.c
index b2ea7d1..bd1a769 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c

@@ -683,6 +683,8 @@
 		SetPageActive(newpage);
 	} else if (TestClearPageUnevictable(page))
 		SetPageUnevictable(newpage);
+	if (PageWorkingset(page))
+		SetPageWorkingset(newpage);
 	if (PageChecked(page))
 		SetPageChecked(newpage);
 	if (PageMappedToDisk(page))

diff --git a/mm/mlock.c b/mm/mlock.c
index 0ab8250..02d8a89 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c

@@ -535,7 +535,7 @@
 	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
 	*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
 			  vma->vm_file, pgoff, vma_policy(vma),
-			  vma->vm_userfaultfd_ctx);
+			  vma->vm_userfaultfd_ctx, vma_get_anon_name(vma));
 	if (*prev) {
 		vma = *prev;
 		goto success;

diff --git a/mm/mmap.c b/mm/mmap.c
index 1480880..3cc7a94 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c

@@ -983,7 +983,8 @@
  */
 static inline int is_mergeable_vma(struct vm_area_struct *vma,
 				struct file *file, unsigned long vm_flags,
-				struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
+				struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
+				const char __user *anon_name)
 {
 	/*
 	 * VM_SOFTDIRTY should not prevent from VMA merging, if we
@@ -1001,6 +1002,8 @@
 		return 0;
 	if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
 		return 0;
+	if (vma_get_anon_name(vma) != anon_name)
+		return 0;
 	return 1;
 }
 
@@ -1033,9 +1036,10 @@
 can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
 		     struct anon_vma *anon_vma, struct file *file,
 		     pgoff_t vm_pgoff,
-		     struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
+		     struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
+		     const char __user *anon_name)
 {
-	if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
+	if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
 	    is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
 		if (vma->vm_pgoff == vm_pgoff)
 			return 1;
@@ -1054,9 +1058,10 @@
 can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
 		    struct anon_vma *anon_vma, struct file *file,
 		    pgoff_t vm_pgoff,
-		    struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
+		    struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
+		    const char __user *anon_name)
 {
-	if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
+	if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
 	    is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
 		pgoff_t vm_pglen;
 		vm_pglen = vma_pages(vma);
@@ -1067,9 +1072,9 @@
 }
 
 /*
- * Given a mapping request (addr,end,vm_flags,file,pgoff), figure out
- * whether that can be merged with its predecessor or its successor.
- * Or both (it neatly fills a hole).
+ * Given a mapping request (addr,end,vm_flags,file,pgoff,anon_name),
+ * figure out whether that can be merged with its predecessor or its
+ * successor.  Or both (it neatly fills a hole).
  *
  * In most cases - when called for mmap, brk or mremap - [addr,end) is
  * certain not to be mapped by the time vma_merge is called; but when
@@ -1111,7 +1116,8 @@
 			unsigned long end, unsigned long vm_flags,
 			struct anon_vma *anon_vma, struct file *file,
 			pgoff_t pgoff, struct mempolicy *policy,
-			struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
+			struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
+			const char __user *anon_name)
 {
 	pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
 	struct vm_area_struct *area, *next;
@@ -1144,7 +1150,8 @@
 			mpol_equal(vma_policy(prev), policy) &&
 			can_vma_merge_after(prev, vm_flags,
 					    anon_vma, file, pgoff,
-					    vm_userfaultfd_ctx)) {
+					    vm_userfaultfd_ctx,
+					    anon_name)) {
 		/*
 		 * OK, it can.  Can we now merge in the successor as well?
 		 */
@@ -1153,7 +1160,8 @@
 				can_vma_merge_before(next, vm_flags,
 						     anon_vma, file,
 						     pgoff+pglen,
-						     vm_userfaultfd_ctx) &&
+						     vm_userfaultfd_ctx,
+						     anon_name) &&
 				is_mergeable_anon_vma(prev->anon_vma,
 						      next->anon_vma, NULL)) {
 							/* cases 1, 6 */
@@ -1176,7 +1184,8 @@
 			mpol_equal(policy, vma_policy(next)) &&
 			can_vma_merge_before(next, vm_flags,
 					     anon_vma, file, pgoff+pglen,
-					     vm_userfaultfd_ctx)) {
+					     vm_userfaultfd_ctx,
+					     anon_name)) {
 		if (prev && addr < prev->vm_end)	/* case 4 */
 			err = __vma_adjust(prev, prev->vm_start,
 					 addr, prev->vm_pgoff, NULL, next);
@@ -1721,7 +1730,7 @@
 	 * Can we just expand an old mapping?
 	 */
 	vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
-			NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX);
+			NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
 	if (vma)
 		goto out;
 
@@ -2977,7 +2986,7 @@
 
 	/* Can we just expand an old private anonymous mapping? */
 	vma = vma_merge(mm, prev, addr, addr + len, flags,
-			NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX);
+			NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
 	if (vma)
 		goto out;
 
@@ -3175,7 +3184,7 @@
 		return NULL;	/* should never get here */
 	new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
 			    vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
-			    vma->vm_userfaultfd_ctx);
+			    vma->vm_userfaultfd_ctx, vma_get_anon_name(vma));
 	if (new_vma) {
 		/*
 		 * Source vma may have been merged into new_vma

diff --git a/mm/mprotect.c b/mm/mprotect.c
index 6d33162..58f591d 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c

@@ -398,7 +398,7 @@
 	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
 	*pprev = vma_merge(mm, *pprev, start, end, newflags,
 			   vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
-			   vma->vm_userfaultfd_ctx);
+			   vma->vm_userfaultfd_ctx, vma_get_anon_name(vma));
 	if (*pprev) {
 		vma = *pprev;
 		VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY);

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2d04bd2..4e977e0e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c

@@ -66,6 +66,7 @@
 #include <linux/ftrace.h>
 #include <linux/lockdep.h>
 #include <linux/nmi.h>
+#include <linux/psi.h>
 
 #include <asm/sections.h>
 #include <asm/tlbflush.h>
@@ -261,10 +262,22 @@
 #endif
 };
 
+/*
+ * Try to keep at least this much lowmem free.  Do not allow normal
+ * allocations below this point, only high priority ones. Automatically
+ * tuned according to the amount of memory in the system.
+ */
 int min_free_kbytes = 1024;
 int user_min_free_kbytes = -1;
 int watermark_scale_factor = 10;
 
+/*
+ * Extra memory for the system to try freeing. Used to temporarily
+ * free memory, to make space for new workloads. Anyone can allocate
+ * down to the min watermarks controlled by min_free_kbytes above.
+ */
+int extra_free_kbytes = 0;
+
 static unsigned long nr_kernel_pages __meminitdata;
 static unsigned long nr_all_pages __meminitdata;
 static unsigned long dma_reserve __meminitdata;
@@ -3570,15 +3583,20 @@
 		enum compact_priority prio, enum compact_result *compact_result)
 {
 	struct page *page;
+	unsigned long pflags;
 	unsigned int noreclaim_flag;
 
 	if (!order)
 		return NULL;
 
+	psi_memstall_enter(&pflags);
 	noreclaim_flag = memalloc_noreclaim_save();
+
 	*compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
 									prio);
+
 	memalloc_noreclaim_restore(noreclaim_flag);
+	psi_memstall_leave(&pflags);
 
 	if (*compact_result <= COMPACT_INACTIVE)
 		return NULL;
@@ -3777,11 +3795,13 @@
 	struct reclaim_state reclaim_state;
 	int progress;
 	unsigned int noreclaim_flag;
+	unsigned long pflags;
 
 	cond_resched();
 
 	/* We now go into synchronous reclaim */
 	cpuset_memory_pressure_bump();
+	psi_memstall_enter(&pflags);
 	fs_reclaim_acquire(gfp_mask);
 	noreclaim_flag = memalloc_noreclaim_save();
 	reclaim_state.reclaimed_slab = 0;
@@ -3793,6 +3813,7 @@
 	current->reclaim_state = NULL;
 	memalloc_noreclaim_restore(noreclaim_flag);
 	fs_reclaim_release(gfp_mask);
+	psi_memstall_leave(&pflags);
 
 	cond_resched();
 
@@ -7237,6 +7258,7 @@
 static void __setup_per_zone_wmarks(void)
 {
 	unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
+	unsigned long pages_low = extra_free_kbytes >> (PAGE_SHIFT - 10);
 	unsigned long lowmem_pages = 0;
 	struct zone *zone;
 	unsigned long flags;
@@ -7248,11 +7270,14 @@
 	}
 
 	for_each_zone(zone) {
-		u64 tmp;
+		u64 min, low;
 
 		spin_lock_irqsave(&zone->lock, flags);
-		tmp = (u64)pages_min * zone->managed_pages;
-		do_div(tmp, lowmem_pages);
+		min = (u64)pages_min * zone->managed_pages;
+		do_div(min, lowmem_pages);
+		low = (u64)pages_low * zone->managed_pages;
+		do_div(low, vm_total_pages);
+
 		if (is_highmem(zone)) {
 			/*
 			 * __GFP_HIGH and PF_MEMALLOC allocations usually don't
@@ -7273,7 +7298,7 @@
 			 * If it's a lowmem zone, reserve a number of pages
 			 * proportionate to the zone's size.
 			 */
-			zone->watermark[WMARK_MIN] = tmp;
+			zone->watermark[WMARK_MIN] = min;
 		}
 
 		/*
@@ -7281,12 +7306,14 @@
 		 * scale factor in proportion to available memory, but
 		 * ensure a minimum size on small systems.
 		 */
-		tmp = max_t(u64, tmp >> 2,
+		min = max_t(u64, min >> 2,
 			    mult_frac(zone->managed_pages,
 				      watermark_scale_factor, 10000));
 
-		zone->watermark[WMARK_LOW]  = min_wmark_pages(zone) + tmp;
-		zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + tmp * 2;
+		zone->watermark[WMARK_LOW]  = min_wmark_pages(zone) +
+					low + min;
+		zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) +
+					low + min * 2;
 
 		spin_unlock_irqrestore(&zone->lock, flags);
 	}
@@ -7369,7 +7396,7 @@
 /*
  * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so
  *	that we can call two helper functions whenever min_free_kbytes
- *	changes.
+ *	or extra_free_kbytes changes.
  */
 int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
 	void __user *buffer, size_t *length, loff_t *ppos)

diff --git a/mm/shmem.c b/mm/shmem.c
index 3c87426..2311dfe 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c

@@ -2169,6 +2169,24 @@
 
 static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 {
+	struct shmem_inode_info *info = SHMEM_I(file_inode(file));
+
+	if (info->seals & F_SEAL_FUTURE_WRITE) {
+		/*
+		 * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
+		 * "future write" seal active.
+		 */
+		if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
+			return -EPERM;
+
+		/*
+		 * Since the F_SEAL_FUTURE_WRITE seals allow for a MAP_SHARED
+		 * read-only mapping, take care to not allow mprotect to revert
+		 * protections.
+		 */
+		vma->vm_flags &= ~(VM_MAYWRITE);
+	}
+
 	file_accessed(file);
 	vma->vm_ops = &shmem_vm_ops;
 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) &&
@@ -2422,8 +2440,9 @@
 	pgoff_t index = pos >> PAGE_SHIFT;
 
 	/* i_mutex is held by caller */
-	if (unlikely(info->seals & (F_SEAL_WRITE | F_SEAL_GROW))) {
-		if (info->seals & F_SEAL_WRITE)
+	if (unlikely(info->seals & (F_SEAL_GROW |
+				   F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))) {
+		if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))
 			return -EPERM;
 		if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
 			return -EPERM;
@@ -2686,7 +2705,7 @@
 		DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
 
 		/* protected by i_mutex */
-		if (info->seals & F_SEAL_WRITE) {
+		if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
 			error = -EPERM;
 			goto out;
 		}

diff --git a/mm/swap_state.c b/mm/swap_state.c
index ecee9c6c..0d6a7f2 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c

@@ -448,6 +448,7 @@
 			/*
 			 * Initiate read into locked page and return.
 			 */
+			SetPageWorkingset(new_page);
 			lru_cache_add_anon(new_page);
 			*new_page_allocated = true;
 			return new_page;

diff --git a/mm/vmscan.c b/mm/vmscan.c
index b37610c..3e0f85a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c

@@ -49,6 +49,7 @@
 #include <linux/prefetch.h>
 #include <linux/printk.h>
 #include <linux/dax.h>
+#include <linux/psi.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -2142,6 +2143,7 @@
 		}
 
 		ClearPageActive(page);	/* we are de-activating */
+		SetPageWorkingset(page);
 		list_add(&page->lru, &l_inactive);
 	}
 
@@ -3290,6 +3292,7 @@
 {
 	struct zonelist *zonelist;
 	unsigned long nr_reclaimed;
+	unsigned long pflags;
 	int nid;
 	unsigned int noreclaim_flag;
 	struct scan_control sc = {
@@ -3318,9 +3321,13 @@
 					    sc.gfp_mask,
 					    sc.reclaim_idx);
 
+	psi_memstall_enter(&pflags);
 	noreclaim_flag = memalloc_noreclaim_save();
+
 	nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+
 	memalloc_noreclaim_restore(noreclaim_flag);
+	psi_memstall_leave(&pflags);
 
 	trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
 
@@ -3485,6 +3492,7 @@
 	int i;
 	unsigned long nr_soft_reclaimed;
 	unsigned long nr_soft_scanned;
+	unsigned long pflags;
 	struct zone *zone;
 	struct scan_control sc = {
 		.gfp_mask = GFP_KERNEL,
@@ -3495,6 +3503,7 @@
 		.may_swap = 1,
 	};
 
+	psi_memstall_enter(&pflags);
 	__fs_reclaim_acquire();
 
 	count_vm_event(PAGEOUTRUN);
@@ -3596,6 +3605,7 @@
 out:
 	snapshot_refaults(NULL, pgdat);
 	__fs_reclaim_release();
+	psi_memstall_leave(&pflags);
 	/*
 	 * Return the order kswapd stopped reclaiming at as
 	 * prepare_kswapd_sleep() takes it into account. If another caller

diff --git a/mm/vmstat.c b/mm/vmstat.c
index 4a38793..8970b4b 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c

@@ -1145,6 +1145,7 @@
 	"nr_isolated_file",
 	"workingset_refault",
 	"workingset_activate",
+	"workingset_restore",
 	"workingset_nodereclaim",
 	"nr_anon_pages",
 	"nr_mapped",

diff --git a/mm/workingset.c b/mm/workingset.c
index 4516dd7..8abfd26 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c

@@ -121,7 +121,7 @@
  * the only thing eating into inactive list space is active pages.
  *
  *
- *		Activating refaulting pages
+ *		Refaulting inactive pages
  *
  * All that is known about the active list is that the pages have been
  * accessed more than once in the past.  This means that at any given
@@ -134,6 +134,10 @@
  * used less frequently than the refaulting page - or even not used at
  * all anymore.
  *
+ * That means if inactive cache is refaulting with a suitable refault
+ * distance, we assume the cache workingset is transitioning and put
+ * pressure on the current active list.
+ *
  * If this is wrong and demotion kicks in, the pages which are truly
  * used more frequently will be reactivated while the less frequently
  * used once will be evicted from memory.
@@ -141,6 +145,14 @@
  * But if this is right, the stale pages will be pushed out of memory
  * and the used pages get to stay in cache.
  *
+ *		Refaulting active pages
+ *
+ * If on the other hand the refaulting pages have recently been
+ * deactivated, it means that the active list is no longer protecting
+ * actively used cache from reclaim. The cache is NOT transitioning to
+ * a different workingset; the existing workingset is thrashing in the
+ * space allocated to the page cache.
+ *
  *
  *		Implementation
  *
@@ -156,8 +168,7 @@
  */
 
 #define EVICTION_SHIFT	(RADIX_TREE_EXCEPTIONAL_ENTRY + \
-			 NODES_SHIFT +	\
-			 MEM_CGROUP_ID_SHIFT)
+			 1 + NODES_SHIFT + MEM_CGROUP_ID_SHIFT)
 #define EVICTION_MASK	(~0UL >> EVICTION_SHIFT)
 
 /*
@@ -170,23 +181,28 @@
  */
 static unsigned int bucket_order __read_mostly;
 
-static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction)
+static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction,
+			 bool workingset)
 {
 	eviction >>= bucket_order;
 	eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid;
 	eviction = (eviction << NODES_SHIFT) | pgdat->node_id;
+	eviction = (eviction << 1) | workingset;
 	eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT);
 
 	return (void *)(eviction | RADIX_TREE_EXCEPTIONAL_ENTRY);
 }
 
 static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
-			  unsigned long *evictionp)
+			  unsigned long *evictionp, bool *workingsetp)
 {
 	unsigned long entry = (unsigned long)shadow;
 	int memcgid, nid;
+	bool workingset;
 
 	entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT;
+	workingset = entry & 1;
+	entry >>= 1;
 	nid = entry & ((1UL << NODES_SHIFT) - 1);
 	entry >>= NODES_SHIFT;
 	memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1);
@@ -195,6 +211,7 @@
 	*memcgidp = memcgid;
 	*pgdat = NODE_DATA(nid);
 	*evictionp = entry << bucket_order;
+	*workingsetp = workingset;
 }
 
 /**
@@ -207,8 +224,8 @@
  */
 void *workingset_eviction(struct address_space *mapping, struct page *page)
 {
-	struct mem_cgroup *memcg = page_memcg(page);
 	struct pglist_data *pgdat = page_pgdat(page);
+	struct mem_cgroup *memcg = page_memcg(page);
 	int memcgid = mem_cgroup_id(memcg);
 	unsigned long eviction;
 	struct lruvec *lruvec;
@@ -220,30 +237,30 @@
 
 	lruvec = mem_cgroup_lruvec(pgdat, memcg);
 	eviction = atomic_long_inc_return(&lruvec->inactive_age);
-	return pack_shadow(memcgid, pgdat, eviction);
+	return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page));
 }
 
 /**
  * workingset_refault - evaluate the refault of a previously evicted page
+ * @page: the freshly allocated replacement page
  * @shadow: shadow entry of the evicted page
  *
  * Calculates and evaluates the refault distance of the previously
  * evicted page in the context of the node it was allocated in.
- *
- * Returns %true if the page should be activated, %false otherwise.
  */
-bool workingset_refault(void *shadow)
+void workingset_refault(struct page *page, void *shadow)
 {
 	unsigned long refault_distance;
+	struct pglist_data *pgdat;
 	unsigned long active_file;
 	struct mem_cgroup *memcg;
 	unsigned long eviction;
 	struct lruvec *lruvec;
 	unsigned long refault;
-	struct pglist_data *pgdat;
+	bool workingset;
 	int memcgid;
 
-	unpack_shadow(shadow, &memcgid, &pgdat, &eviction);
+	unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset);
 
 	rcu_read_lock();
 	/*
@@ -263,41 +280,51 @@
 	 * configurations instead.
 	 */
 	memcg = mem_cgroup_from_id(memcgid);
-	if (!mem_cgroup_disabled() && !memcg) {
-		rcu_read_unlock();
-		return false;
-	}
+	if (!mem_cgroup_disabled() && !memcg)
+		goto out;
 	lruvec = mem_cgroup_lruvec(pgdat, memcg);
 	refault = atomic_long_read(&lruvec->inactive_age);
 	active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES);
 
 	/*
-	 * The unsigned subtraction here gives an accurate distance
-	 * across inactive_age overflows in most cases.
+	 * Calculate the refault distance
 	 *
-	 * There is a special case: usually, shadow entries have a
-	 * short lifetime and are either refaulted or reclaimed along
-	 * with the inode before they get too old.  But it is not
-	 * impossible for the inactive_age to lap a shadow entry in
-	 * the field, which can then can result in a false small
-	 * refault distance, leading to a false activation should this
-	 * old entry actually refault again.  However, earlier kernels
-	 * used to deactivate unconditionally with *every* reclaim
-	 * invocation for the longest time, so the occasional
-	 * inappropriate activation leading to pressure on the active
-	 * list is not a problem.
+	 * The unsigned subtraction here gives an accurate distance
+	 * across inactive_age overflows in most cases. There is a
+	 * special case: usually, shadow entries have a short lifetime
+	 * and are either refaulted or reclaimed along with the inode
+	 * before they get too old.  But it is not impossible for the
+	 * inactive_age to lap a shadow entry in the field, which can
+	 * then result in a false small refault distance, leading to a
+	 * false activation should this old entry actually refault
+	 * again.  However, earlier kernels used to deactivate
+	 * unconditionally with *every* reclaim invocation for the
+	 * longest time, so the occasional inappropriate activation
+	 * leading to pressure on the active list is not a problem.
 	 */
 	refault_distance = (refault - eviction) & EVICTION_MASK;
 
 	inc_lruvec_state(lruvec, WORKINGSET_REFAULT);
 
-	if (refault_distance <= active_file) {
-		inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE);
-		rcu_read_unlock();
-		return true;
+	/*
+	 * Compare the distance to the existing workingset size. We
+	 * don't act on pages that couldn't stay resident even if all
+	 * the memory was available to the page cache.
+	 */
+	if (refault_distance > active_file)
+		goto out;
+
+	SetPageActive(page);
+	atomic_long_inc(&lruvec->inactive_age);
+	inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE);
+
+	/* Page was active prior to eviction */
+	if (workingset) {
+		SetPageWorkingset(page);
+		inc_lruvec_state(lruvec, WORKINGSET_RESTORE);
 	}
+out:
 	rcu_read_unlock();
-	return false;
 }
 
 /**

diff --git a/net/Kconfig b/net/Kconfig
index 228dfa3..8cc61e3 100644
--- a/net/Kconfig
+++ b/net/Kconfig

@@ -287,6 +287,7 @@
 	bool "enable BPF Just In Time compiler"
 	depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT
 	depends on MODULES
+	depends on !CFI
 	---help---
 	  Berkeley Packet Filter filtering capabilities are normally handled
 	  by an interpreter. This option allows kernel to generate a native

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b7ef367..3ed0789 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c

@@ -3899,8 +3899,8 @@
 	init_net.ipv4.sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
 
 	init_net.ipv4.sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
-	init_net.ipv4.sysctl_tcp_rmem[1] = 87380;
-	init_net.ipv4.sysctl_tcp_rmem[2] = max(87380, max_rshare);
+	init_net.ipv4.sysctl_tcp_rmem[1] = 131072;
+	init_net.ipv4.sysctl_tcp_rmem[2] = max(131072, max_rshare);
 
 	pr_info("Hash tables configured (established %u bind %u)\n",
 		tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4a8869d..5de1d76 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c

@@ -426,26 +426,7 @@
 	}
 }
 
-/* 3. Tuning rcvbuf, when connection enters established state. */
-static void tcp_fixup_rcvbuf(struct sock *sk)
-{
-	u32 mss = tcp_sk(sk)->advmss;
-	int rcvmem;
-
-	rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) *
-		 tcp_default_init_rwnd(mss);
-
-	/* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency
-	 * Allow enough cushion so that sender is not limited by our window
-	 */
-	if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)
-		rcvmem <<= 2;
-
-	if (sk->sk_rcvbuf < rcvmem)
-		sk->sk_rcvbuf = min(rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
-}
-
-/* 4. Try to fixup all. It is made immediately after connection enters
+/* 3. Try to fixup all. It is made immediately after connection enters
  *    established state.
  */
 void tcp_init_buffer_space(struct sock *sk)
@@ -454,12 +435,10 @@
 	struct tcp_sock *tp = tcp_sk(sk);
 	int maxwin;
 
-	if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
-		tcp_fixup_rcvbuf(sk);
 	if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
 		tcp_sndbuf_expand(sk);
 
-	tp->rcvq_space.space = tp->rcv_wnd;
+	tp->rcvq_space.space = min_t(u32, tp->rcv_wnd, TCP_INIT_CWND * tp->advmss);
 	tcp_mstamp_refresh(tp);
 	tp->rcvq_space.time = tp->tcp_mstamp;
 	tp->rcvq_space.seq = tp->copied_seq;
@@ -485,7 +464,7 @@
 	tp->snd_cwnd_stamp = tcp_jiffies32;
 }
 
-/* 5. Recalculate window clamp after socket hit its memory bounds. */
+/* 4. Recalculate window clamp after socket hit its memory bounds. */
 static void tcp_clamp_window(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 88c7e821..8454512 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c

@@ -179,21 +179,6 @@
 	inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
 }
 
-
-u32 tcp_default_init_rwnd(u32 mss)
-{
-	/* Initial receive window should be twice of TCP_INIT_CWND to
-	 * enable proper sending of new unsent data during fast recovery
-	 * (RFC 3517, Section 4, NextSeg() rule (2)). Further place a
-	 * limit when mss is larger than 1460.
-	 */
-	u32 init_rwnd = TCP_INIT_CWND * 2;
-
-	if (mss > 1460)
-		init_rwnd = max((1460 * init_rwnd) / mss, 2U);
-	return init_rwnd;
-}
-
 /* Determine a window scaling and initial window to offer.
  * Based on the assumption that the given amount of space
  * will be offered. Store the results in the tp structure.
@@ -228,7 +213,10 @@
 	if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
 		(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
 	else
-		(*rcv_wnd) = space;
+		(*rcv_wnd) = min_t(u32, space, U16_MAX);
+
+	if (init_rcv_wnd)
+		*rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
 
 	(*rcv_wscale) = 0;
 	if (wscale_ok) {
@@ -241,11 +229,6 @@
 			(*rcv_wscale)++;
 		}
 	}
-
-	if (!init_rcv_wnd) /* Use default unless specified otherwise */
-		init_rcv_wnd = tcp_default_init_rwnd(mss);
-	*rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
-
 	/* Set the clamp no higher than max representable value */
 	(*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
 }

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 49e2f6d..038637e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c

@@ -220,6 +220,7 @@
 	.accept_ra_rt_info_max_plen = 0,
 #endif
 #endif
+	.accept_ra_rt_table	= 0,
 	.proxy_ndp		= 0,
 	.accept_source_route	= 0,	/* we do not accept RH0 by default. */
 	.disable_ipv6		= 0,
@@ -274,6 +275,7 @@
 	.accept_ra_rt_info_max_plen = 0,
 #endif
 #endif
+	.accept_ra_rt_table	= 0,
 	.proxy_ndp		= 0,
 	.accept_source_route	= 0,	/* we do not accept RH0 by default. */
 	.disable_ipv6		= 0,
@@ -2312,6 +2314,26 @@
 		ipv6_regen_rndid(idev);
 }
 
+u32 addrconf_rt_table(const struct net_device *dev, u32 default_table)
+{
+	struct inet6_dev *idev = in6_dev_get(dev);
+	int sysctl;
+	u32 table;
+
+	if (!idev)
+		return default_table;
+	sysctl = idev->cnf.accept_ra_rt_table;
+	if (sysctl == 0) {
+		table = default_table;
+	} else if (sysctl > 0) {
+		table = (u32) sysctl;
+	} else {
+		table = (unsigned) dev->ifindex + (-sysctl);
+	}
+	in6_dev_put(idev);
+	return table;
+}
+
 /*
  *	Add prefix route.
  */
@@ -2322,7 +2344,7 @@
 		      u32 flags, gfp_t gfp_flags)
 {
 	struct fib6_config cfg = {
-		.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX,
+		.fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_PREFIX),
 		.fc_metric = metric ? : IP6_RT_PRIO_ADDRCONF,
 		.fc_ifindex = dev->ifindex,
 		.fc_expires = expires,
@@ -2356,7 +2378,7 @@
 	struct fib6_node *fn;
 	struct fib6_info *rt = NULL;
 	struct fib6_table *table;
-	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX;
+	u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_PREFIX);
 
 	table = fib6_get_table(dev_net(dev), tb_id);
 	if (!table)
@@ -6349,6 +6371,13 @@
 #endif
 #endif
 	{
+		.procname	= "accept_ra_rt_table",
+		.data		= &ipv6_devconf.accept_ra_rt_table,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
 		.procname	= "proxy_ndp",
 		.data		= &ipv6_devconf.proxy_ndp,
 		.maxlen		= sizeof(int),

diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 673a4a9..45d8d08 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c

@@ -197,6 +197,7 @@
 {
 	return opt->nd_opt_type == ND_OPT_RDNSS ||
 		opt->nd_opt_type == ND_OPT_DNSSL ||
+		opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
 		ndisc_ops_is_useropt(dev, opt->nd_opt_type);
 }
 

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c885863..10c82095 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c

@@ -3501,7 +3501,7 @@
 					   const struct in6_addr *gwaddr,
 					   struct net_device *dev)
 {
-	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
+	u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO);
 	int ifindex = dev->ifindex;
 	struct fib6_node *fn;
 	struct fib6_info *rt = NULL;
@@ -3551,7 +3551,7 @@
 		.fc_nlinfo.nl_net = net,
 	};
 
-	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
+	cfg.fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO),
 	cfg.fc_dst = *prefix;
 	cfg.fc_gateway = *gwaddr;
 
@@ -3569,7 +3569,7 @@
 				     const struct in6_addr *addr,
 				     struct net_device *dev)
 {
-	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
+	u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_DFLT);
 	struct fib6_info *rt;
 	struct fib6_table *table;
 
@@ -3596,7 +3596,7 @@
 				     unsigned int pref)
 {
 	struct fib6_config cfg = {
-		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
+		.fc_table	= l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_DFLT),
 		.fc_metric	= IP6_RT_PRIO_USER,
 		.fc_ifindex	= dev->ifindex,
 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
@@ -3621,47 +3621,24 @@
 	return rt6_get_dflt_router(net, gwaddr, dev);
 }
 
-static void __rt6_purge_dflt_routers(struct net *net,
-				     struct fib6_table *table)
+static int rt6_addrconf_purge(struct fib6_info *rt, void *arg)
 {
-	struct fib6_info *rt;
+	struct net_device *dev = fib6_info_nh_dev(rt);
+	struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
 
-restart:
-	rcu_read_lock();
-	for_each_fib6_node_rt_rcu(&table->tb6_root) {
-		struct net_device *dev = fib6_info_nh_dev(rt);
-		struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
-
-		if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
-		    (!idev || idev->cnf.accept_ra != 2) &&
-		    fib6_info_hold_safe(rt)) {
-			rcu_read_unlock();
-			ip6_del_rt(net, rt);
-			goto restart;
-		}
+	if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
+	    (!idev || idev->cnf.accept_ra != 2)) {
+		/* Delete this route. See fib6_clean_tree() */
+		return -1;
 	}
-	rcu_read_unlock();
 
-	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
+	/* Continue walking */
+	return 0;
 }
 
 void rt6_purge_dflt_routers(struct net *net)
 {
-	struct fib6_table *table;
-	struct hlist_head *head;
-	unsigned int h;
-
-	rcu_read_lock();
-
-	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
-		head = &net->ipv6.fib_table_hash[h];
-		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
-			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
-				__rt6_purge_dflt_routers(net, table);
-		}
-	}
-
-	rcu_read_unlock();
+	fib6_clean_all(net, rt6_addrconf_purge, NULL);
 }
 
 static void rtmsg_to_fib6_config(struct net *net,

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e0fb56d..bc6983f 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig

@@ -1461,6 +1461,29 @@
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
+config NETFILTER_XT_MATCH_QUOTA2
+	tristate '"quota2" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds a `quota2' match, which allows to match on a
+	  byte counter correctly and not per CPU.
+	  It allows naming the quotas.
+	  This is based on http://xtables-addons.git.sourceforge.net
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_QUOTA2_LOG
+	bool '"quota2" Netfilter LOG support'
+	depends on NETFILTER_XT_MATCH_QUOTA2
+	default n
+	help
+	  This option allows `quota2' to log ONCE when a quota limit
+	  is passed. It logs via NETLINK using the NETLINK_NFLOG family.
+	  It logs similarly to how ipt_ULOG would without data.
+
+	  If unsure, say `N'.
+
 config NETFILTER_XT_MATCH_RATEEST
 	tristate '"rateest" match support'
 	depends on NETFILTER_ADVANCED

diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 16895e0..9c87ed5 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile

@@ -191,6 +191,7 @@
 obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA2) += xt_quota2.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_RATEEST) += xt_rateest.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) += xt_recent.o

diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index 25453a1..f5e165e0 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c

@@ -5,6 +5,7 @@
  * After timer expires a kevent will be sent.
  *
  * Copyright (C) 2004, 2010 Nokia Corporation
+ *
  * Written by Timo Teras <ext-timo.teras@nokia.com>
  *
  * Converted to x_tables and reworked for upstream inclusion
@@ -38,14 +39,17 @@
 #include <linux/netfilter/xt_IDLETIMER.h>
 #include <linux/kdev_t.h>
 #include <linux/kobject.h>
+#include <linux/skbuff.h>
 #include <linux/workqueue.h>
 #include <linux/sysfs.h>
-
-struct idletimer_tg_attr {
-	struct attribute attr;
-	ssize_t	(*show)(struct kobject *kobj,
-			struct attribute *attr, char *buf);
-};
+#include <linux/rtc.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+#include <linux/suspend.h>
+#include <linux/notifier.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <net/inet_sock.h>
 
 struct idletimer_tg {
 	struct list_head entry;
@@ -53,16 +57,112 @@
 	struct work_struct work;
 
 	struct kobject *kobj;
-	struct idletimer_tg_attr attr;
+	struct device_attribute attr;
 
+	struct timespec delayed_timer_trigger;
+	struct timespec last_modified_timer;
+	struct timespec last_suspend_time;
+	struct notifier_block pm_nb;
+
+	int timeout;
 	unsigned int refcnt;
+	bool work_pending;
+	bool send_nl_msg;
+	bool active;
+	uid_t uid;
 };
 
 static LIST_HEAD(idletimer_tg_list);
 static DEFINE_MUTEX(list_mutex);
+static DEFINE_SPINLOCK(timestamp_lock);
 
 static struct kobject *idletimer_tg_kobj;
 
+static bool check_for_delayed_trigger(struct idletimer_tg *timer,
+		struct timespec *ts)
+{
+	bool state;
+	struct timespec temp;
+	spin_lock_bh(&timestamp_lock);
+	timer->work_pending = false;
+	if ((ts->tv_sec - timer->last_modified_timer.tv_sec) > timer->timeout ||
+			timer->delayed_timer_trigger.tv_sec != 0) {
+		state = false;
+		temp.tv_sec = timer->timeout;
+		temp.tv_nsec = 0;
+		if (timer->delayed_timer_trigger.tv_sec != 0) {
+			temp = timespec_add(timer->delayed_timer_trigger, temp);
+			ts->tv_sec = temp.tv_sec;
+			ts->tv_nsec = temp.tv_nsec;
+			timer->delayed_timer_trigger.tv_sec = 0;
+			timer->work_pending = true;
+			schedule_work(&timer->work);
+		} else {
+			temp = timespec_add(timer->last_modified_timer, temp);
+			ts->tv_sec = temp.tv_sec;
+			ts->tv_nsec = temp.tv_nsec;
+		}
+	} else {
+		state = timer->active;
+	}
+	spin_unlock_bh(&timestamp_lock);
+	return state;
+}
+
+static void notify_netlink_uevent(const char *iface, struct idletimer_tg *timer)
+{
+	char iface_msg[NLMSG_MAX_SIZE];
+	char state_msg[NLMSG_MAX_SIZE];
+	char timestamp_msg[NLMSG_MAX_SIZE];
+	char uid_msg[NLMSG_MAX_SIZE];
+	char *envp[] = { iface_msg, state_msg, timestamp_msg, uid_msg, NULL };
+	int res;
+	struct timespec ts;
+	uint64_t time_ns;
+	bool state;
+
+	res = snprintf(iface_msg, NLMSG_MAX_SIZE, "INTERFACE=%s",
+		       iface);
+	if (NLMSG_MAX_SIZE <= res) {
+		pr_err("message too long (%d)", res);
+		return;
+	}
+
+	get_monotonic_boottime(&ts);
+	state = check_for_delayed_trigger(timer, &ts);
+	res = snprintf(state_msg, NLMSG_MAX_SIZE, "STATE=%s",
+			state ? "active" : "inactive");
+
+	if (NLMSG_MAX_SIZE <= res) {
+		pr_err("message too long (%d)", res);
+		return;
+	}
+
+	if (state) {
+		res = snprintf(uid_msg, NLMSG_MAX_SIZE, "UID=%u", timer->uid);
+		if (NLMSG_MAX_SIZE <= res)
+			pr_err("message too long (%d)", res);
+	} else {
+		res = snprintf(uid_msg, NLMSG_MAX_SIZE, "UID=");
+		if (NLMSG_MAX_SIZE <= res)
+			pr_err("message too long (%d)", res);
+	}
+
+	time_ns = timespec_to_ns(&ts);
+	res = snprintf(timestamp_msg, NLMSG_MAX_SIZE, "TIME_NS=%llu", time_ns);
+	if (NLMSG_MAX_SIZE <= res) {
+		timestamp_msg[0] = '\0';
+		pr_err("message too long (%d)", res);
+	}
+
+	pr_debug("putting nlmsg: <%s> <%s> <%s> <%s>\n", iface_msg, state_msg,
+		 timestamp_msg, uid_msg);
+	kobject_uevent_env(idletimer_tg_kobj, KOBJ_CHANGE, envp);
+	return;
+
+
+}
+
 static
 struct idletimer_tg *__idletimer_tg_find_by_label(const char *label)
 {
@@ -78,25 +178,30 @@
 	return NULL;
 }
 
-static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr,
-				 char *buf)
+static ssize_t idletimer_tg_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
 {
 	struct idletimer_tg *timer;
 	unsigned long expires = 0;
+	unsigned long now = jiffies;
 
 	mutex_lock(&list_mutex);
 
-	timer =	__idletimer_tg_find_by_label(attr->name);
+	timer =	__idletimer_tg_find_by_label(attr->attr.name);
 	if (timer)
 		expires = timer->timer.expires;
 
 	mutex_unlock(&list_mutex);
 
-	if (time_after(expires, jiffies))
+	if (time_after(expires, now))
 		return sprintf(buf, "%u\n",
-			       jiffies_to_msecs(expires - jiffies) / 1000);
+			       jiffies_to_msecs(expires - now) / 1000);
 
-	return sprintf(buf, "0\n");
+	if (timer->send_nl_msg)
+		return sprintf(buf, "0 %d\n",
+			jiffies_to_msecs(now - expires) / 1000);
+	else
+		return sprintf(buf, "0\n");
 }
 
 static void idletimer_tg_work(struct work_struct *work)
@@ -105,6 +210,9 @@
 						  work);
 
 	sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name);
+
+	if (timer->send_nl_msg)
+		notify_netlink_uevent(timer->attr.attr.name, timer);
 }
 
 static void idletimer_tg_expired(struct timer_list *t)
@@ -112,8 +220,55 @@
 	struct idletimer_tg *timer = from_timer(timer, t, timer);
 
 	pr_debug("timer %s expired\n", timer->attr.attr.name);
-
+	spin_lock_bh(&timestamp_lock);
+	timer->active = false;
+	timer->work_pending = true;
 	schedule_work(&timer->work);
+	spin_unlock_bh(&timestamp_lock);
+}
+
+static int idletimer_resume(struct notifier_block *notifier,
+		unsigned long pm_event, void *unused)
+{
+	struct timespec ts;
+	unsigned long time_diff, now = jiffies;
+	struct idletimer_tg *timer = container_of(notifier,
+			struct idletimer_tg, pm_nb);
+	if (!timer)
+		return NOTIFY_DONE;
+	switch (pm_event) {
+	case PM_SUSPEND_PREPARE:
+		get_monotonic_boottime(&timer->last_suspend_time);
+		break;
+	case PM_POST_SUSPEND:
+		spin_lock_bh(&timestamp_lock);
+		if (!timer->active) {
+			spin_unlock_bh(&timestamp_lock);
+			break;
+		}
+		/* since jiffies are not updated when suspended now represents
+		 * the time it would have suspended */
+		if (time_after(timer->timer.expires, now)) {
+			get_monotonic_boottime(&ts);
+			ts = timespec_sub(ts, timer->last_suspend_time);
+			time_diff = timespec_to_jiffies(&ts);
+			if (timer->timer.expires > (time_diff + now)) {
+				mod_timer_pending(&timer->timer,
+						(timer->timer.expires - time_diff));
+			} else {
+				del_timer(&timer->timer);
+				timer->timer.expires = 0;
+				timer->active = false;
+				timer->work_pending = true;
+				schedule_work(&timer->work);
+			}
+		}
+		spin_unlock_bh(&timestamp_lock);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
 }
 
 static int idletimer_check_sysfs_name(const char *name, unsigned int size)
@@ -165,6 +320,21 @@
 
 	timer_setup(&info->timer->timer, idletimer_tg_expired, 0);
 	info->timer->refcnt = 1;
+	info->timer->send_nl_msg = (info->send_nl_msg == 0) ? false : true;
+	info->timer->active = true;
+	info->timer->timeout = info->timeout;
+
+	info->timer->delayed_timer_trigger.tv_sec = 0;
+	info->timer->delayed_timer_trigger.tv_nsec = 0;
+	info->timer->work_pending = false;
+	info->timer->uid = 0;
+	get_monotonic_boottime(&info->timer->last_modified_timer);
+
+	info->timer->pm_nb.notifier_call = idletimer_resume;
+	ret = register_pm_notifier(&info->timer->pm_nb);
+	if (ret)
+		printk(KERN_WARNING "[%s] Failed to register pm notifier %d\n",
+				__func__, ret);
 
 	INIT_WORK(&info->timer->work, idletimer_tg_work);
 
@@ -181,6 +351,42 @@
 	return ret;
 }
 
+static void reset_timer(const struct idletimer_tg_info *info,
+			struct sk_buff *skb)
+{
+	unsigned long now = jiffies;
+	struct idletimer_tg *timer = info->timer;
+	bool timer_prev;
+
+	spin_lock_bh(&timestamp_lock);
+	timer_prev = timer->active;
+	timer->active = true;
+	/* timer_prev is used to guard overflow problem in time_before*/
+	if (!timer_prev || time_before(timer->timer.expires, now)) {
+		pr_debug("Starting Checkentry timer (Expired, Jiffies): %lu, %lu\n",
+				timer->timer.expires, now);
+
+		/* Stores the uid resposible for waking up the radio */
+		if (skb && (skb->sk)) {
+			timer->uid = from_kuid_munged(current_user_ns(),
+					sock_i_uid(skb_to_full_sk(skb)));
+		}
+
+		/* checks if there is a pending inactive notification*/
+		if (timer->work_pending)
+			timer->delayed_timer_trigger = timer->last_modified_timer;
+		else {
+			timer->work_pending = true;
+			schedule_work(&timer->work);
+		}
+	}
+
+	get_monotonic_boottime(&timer->last_modified_timer);
+	mod_timer(&timer->timer,
+			msecs_to_jiffies(info->timeout * 1000) + now);
+	spin_unlock_bh(&timestamp_lock);
+}
+
 /*
  * The actual xt_tables plugin.
  */
@@ -188,15 +394,23 @@
 					 const struct xt_action_param *par)
 {
 	const struct idletimer_tg_info *info = par->targinfo;
+	unsigned long now = jiffies;
 
 	pr_debug("resetting timer %s, timeout period %u\n",
 		 info->label, info->timeout);
 
 	BUG_ON(!info->timer);
 
-	mod_timer(&info->timer->timer,
-		  msecs_to_jiffies(info->timeout * 1000) + jiffies);
+	info->timer->active = true;
 
+	if (time_before(info->timer->timer.expires, now)) {
+		schedule_work(&info->timer->work);
+		pr_debug("Starting timer %s (Expired, Jiffies): %lu, %lu\n",
+			 info->label, info->timer->timer.expires, now);
+	}
+
+	/* TODO: Avoid modifying timers on each packet */
+	reset_timer(info, skb);
 	return XT_CONTINUE;
 }
 
@@ -205,7 +419,7 @@
 	struct idletimer_tg_info *info = par->targinfo;
 	int ret;
 
-	pr_debug("checkentry targinfo%s\n", info->label);
+	pr_debug("checkentry targinfo %s\n", info->label);
 
 	if (info->timeout == 0) {
 		pr_debug("timeout value is zero\n");
@@ -227,9 +441,7 @@
 	info->timer = __idletimer_tg_find_by_label(info->label);
 	if (info->timer) {
 		info->timer->refcnt++;
-		mod_timer(&info->timer->timer,
-			  msecs_to_jiffies(info->timeout * 1000) + jiffies);
-
+		reset_timer(info, NULL);
 		pr_debug("increased refcnt of timer %s to %u\n",
 			 info->label, info->timer->refcnt);
 	} else {
@@ -242,6 +454,7 @@
 	}
 
 	mutex_unlock(&list_mutex);
+
 	return 0;
 }
 
@@ -258,13 +471,14 @@
 
 		list_del(&info->timer->entry);
 		del_timer_sync(&info->timer->timer);
-		cancel_work_sync(&info->timer->work);
 		sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr);
+		unregister_pm_notifier(&info->timer->pm_nb);
+		cancel_work_sync(&info->timer->work);
 		kfree(info->timer->attr.attr.name);
 		kfree(info->timer);
 	} else {
 		pr_debug("decreased refcnt of timer %s to %u\n",
-			 info->label, info->timer->refcnt);
+		info->label, info->timer->refcnt);
 	}
 
 	mutex_unlock(&list_mutex);
@@ -272,6 +486,7 @@
 
 static struct xt_target idletimer_tg __read_mostly = {
 	.name		= "IDLETIMER",
+	.revision	= 1,
 	.family		= NFPROTO_UNSPEC,
 	.target		= idletimer_tg_target,
 	.targetsize     = sizeof(struct idletimer_tg_info),
@@ -338,3 +553,4 @@
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("ipt_IDLETIMER");
 MODULE_ALIAS("ip6t_IDLETIMER");
+MODULE_ALIAS("arpt_IDLETIMER");

diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c
new file mode 100644
index 0000000..24b7742
--- /dev/null
+++ b/net/netfilter/xt_quota2.c

@@ -0,0 +1,401 @@
+/*
+ * xt_quota2 - enhanced xt_quota that can count upwards and in packets
+ * as a minimal accounting match.
+ * by Jan Engelhardt <jengelh@medozas.de>, 2008
+ *
+ * Originally based on xt_quota.c:
+ * 	netfilter module to enforce network quotas
+ * 	Sam Johnston <samj@samj.net>
+ *
+ *	This program is free software; you can redistribute it and/or modify
+ *	it under the terms of the GNU General Public License; either
+ *	version 2 of the License, as published by the Free Software Foundation.
+ */
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_quota2.h>
+
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
+/* For compatibility, these definitions are copied from the
+ * deprecated header file <linux/netfilter_ipv4/ipt_ULOG.h> */
+#define ULOG_MAC_LEN	80
+#define ULOG_PREFIX_LEN	32
+
+/* Format of the ULOG packets passed through netlink */
+typedef struct ulog_packet_msg {
+	unsigned long mark;
+	long timestamp_sec;
+	long timestamp_usec;
+	unsigned int hook;
+	char indev_name[IFNAMSIZ];
+	char outdev_name[IFNAMSIZ];
+	size_t data_len;
+	char prefix[ULOG_PREFIX_LEN];
+	unsigned char mac_len;
+	unsigned char mac[ULOG_MAC_LEN];
+	unsigned char payload[0];
+} ulog_packet_msg_t;
+#endif
+
+/**
+ * @lock:	lock to protect quota writers from each other
+ */
+struct xt_quota_counter {
+	u_int64_t quota;
+	spinlock_t lock;
+	struct list_head list;
+	atomic_t ref;
+	char name[sizeof(((struct xt_quota_mtinfo2 *)NULL)->name)];
+	struct proc_dir_entry *procfs_entry;
+};
+
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
+/* Harald's favorite number +1 :D From ipt_ULOG.C */
+static int qlog_nl_event = 112;
+module_param_named(event_num, qlog_nl_event, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(event_num,
+		 "Event number for NETLINK_NFLOG message. 0 disables log."
+		 "111 is what ipt_ULOG uses.");
+static struct sock *nflognl;
+#endif
+
+static LIST_HEAD(counter_list);
+static DEFINE_SPINLOCK(counter_list_lock);
+
+static struct proc_dir_entry *proc_xt_quota;
+static unsigned int quota_list_perms = S_IRUGO | S_IWUSR;
+static kuid_t quota_list_uid = KUIDT_INIT(0);
+static kgid_t quota_list_gid = KGIDT_INIT(0);
+module_param_named(perms, quota_list_perms, uint, S_IRUGO | S_IWUSR);
+
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
+static void quota2_log(unsigned int hooknum,
+		       const struct sk_buff *skb,
+		       const struct net_device *in,
+		       const struct net_device *out,
+		       const char *prefix)
+{
+	ulog_packet_msg_t *pm;
+	struct sk_buff *log_skb;
+	size_t size;
+	struct nlmsghdr *nlh;
+
+	if (!qlog_nl_event)
+		return;
+
+	size = NLMSG_SPACE(sizeof(*pm));
+	size = max(size, (size_t)NLMSG_GOODSIZE);
+	log_skb = alloc_skb(size, GFP_ATOMIC);
+	if (!log_skb) {
+		pr_err("xt_quota2: cannot alloc skb for logging\n");
+		return;
+	}
+
+	nlh = nlmsg_put(log_skb, /*pid*/0, /*seq*/0, qlog_nl_event,
+			sizeof(*pm), 0);
+	if (!nlh) {
+		pr_err("xt_quota2: nlmsg_put failed\n");
+		kfree_skb(log_skb);
+		return;
+	}
+	pm = nlmsg_data(nlh);
+	if (skb->tstamp == 0)
+		__net_timestamp((struct sk_buff *)skb);
+	pm->data_len = 0;
+	pm->hook = hooknum;
+	if (prefix != NULL)
+		strlcpy(pm->prefix, prefix, sizeof(pm->prefix));
+	else
+		*(pm->prefix) = '\0';
+	if (in)
+		strlcpy(pm->indev_name, in->name, sizeof(pm->indev_name));
+	else
+		pm->indev_name[0] = '\0';
+
+	if (out)
+		strlcpy(pm->outdev_name, out->name, sizeof(pm->outdev_name));
+	else
+		pm->outdev_name[0] = '\0';
+
+	NETLINK_CB(log_skb).dst_group = 1;
+	pr_debug("throwing 1 packets to netlink group 1\n");
+	netlink_broadcast(nflognl, log_skb, 0, 1, GFP_ATOMIC);
+}
+#else
+static void quota2_log(unsigned int hooknum,
+		       const struct sk_buff *skb,
+		       const struct net_device *in,
+		       const struct net_device *out,
+		       const char *prefix)
+{
+}
+#endif  /* if+else CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG */
+
+static ssize_t quota_proc_read(struct file *file, char __user *buf,
+			   size_t size, loff_t *ppos)
+{
+	struct xt_quota_counter *e = PDE_DATA(file_inode(file));
+	char tmp[24];
+	size_t tmp_size;
+
+	spin_lock_bh(&e->lock);
+	tmp_size = scnprintf(tmp, sizeof(tmp), "%llu\n", e->quota);
+	spin_unlock_bh(&e->lock);
+	return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
+}
+
+static ssize_t quota_proc_write(struct file *file, const char __user *input,
+                            size_t size, loff_t *ppos)
+{
+	struct xt_quota_counter *e = PDE_DATA(file_inode(file));
+	char buf[sizeof("18446744073709551616")];
+
+	if (size > sizeof(buf))
+		size = sizeof(buf);
+	if (copy_from_user(buf, input, size) != 0)
+		return -EFAULT;
+	buf[sizeof(buf)-1] = '\0';
+
+	spin_lock_bh(&e->lock);
+	e->quota = simple_strtoull(buf, NULL, 0);
+	spin_unlock_bh(&e->lock);
+	return size;
+}
+
+static const struct file_operations q2_counter_fops = {
+	.read		= quota_proc_read,
+	.write		= quota_proc_write,
+	.llseek		= default_llseek,
+};
+
+static struct xt_quota_counter *
+q2_new_counter(const struct xt_quota_mtinfo2 *q, bool anon)
+{
+	struct xt_quota_counter *e;
+	unsigned int size;
+
+	/* Do not need all the procfs things for anonymous counters. */
+	size = anon ? offsetof(typeof(*e), list) : sizeof(*e);
+	e = kmalloc(size, GFP_KERNEL);
+	if (e == NULL)
+		return NULL;
+
+	e->quota = q->quota;
+	spin_lock_init(&e->lock);
+	if (!anon) {
+		INIT_LIST_HEAD(&e->list);
+		atomic_set(&e->ref, 1);
+		strlcpy(e->name, q->name, sizeof(e->name));
+	}
+	return e;
+}
+
+/**
+ * q2_get_counter - get ref to counter or create new
+ * @name:	name of counter
+ */
+static struct xt_quota_counter *
+q2_get_counter(const struct xt_quota_mtinfo2 *q)
+{
+	struct proc_dir_entry *p;
+	struct xt_quota_counter *e = NULL;
+	struct xt_quota_counter *new_e;
+
+	if (*q->name == '\0')
+		return q2_new_counter(q, true);
+
+	/* No need to hold a lock while getting a new counter */
+	new_e = q2_new_counter(q, false);
+	if (new_e == NULL)
+		goto out;
+
+	spin_lock_bh(&counter_list_lock);
+	list_for_each_entry(e, &counter_list, list)
+		if (strcmp(e->name, q->name) == 0) {
+			atomic_inc(&e->ref);
+			spin_unlock_bh(&counter_list_lock);
+			kfree(new_e);
+			pr_debug("xt_quota2: old counter name=%s", e->name);
+			return e;
+		}
+	e = new_e;
+	pr_debug("xt_quota2: new_counter name=%s", e->name);
+	list_add_tail(&e->list, &counter_list);
+	/* The entry having a refcount of 1 is not directly destructible.
+	 * This func has not yet returned the new entry, thus iptables
+	 * has not references for destroying this entry.
+	 * For another rule to try to destroy it, it would 1st need for this
+	 * func* to be re-invoked, acquire a new ref for the same named quota.
+	 * Nobody will access the e->procfs_entry either.
+	 * So release the lock. */
+	spin_unlock_bh(&counter_list_lock);
+
+	/* create_proc_entry() is not spin_lock happy */
+	p = e->procfs_entry = proc_create_data(e->name, quota_list_perms,
+	                      proc_xt_quota, &q2_counter_fops, e);
+
+	if (IS_ERR_OR_NULL(p)) {
+		spin_lock_bh(&counter_list_lock);
+		list_del(&e->list);
+		spin_unlock_bh(&counter_list_lock);
+		goto out;
+	}
+	proc_set_user(p, quota_list_uid, quota_list_gid);
+	return e;
+
+ out:
+	kfree(e);
+	return NULL;
+}
+
+static int quota_mt2_check(const struct xt_mtchk_param *par)
+{
+	struct xt_quota_mtinfo2 *q = par->matchinfo;
+
+	pr_debug("xt_quota2: check() flags=0x%04x", q->flags);
+
+	if (q->flags & ~XT_QUOTA_MASK)
+		return -EINVAL;
+
+	q->name[sizeof(q->name)-1] = '\0';
+	if (*q->name == '.' || strchr(q->name, '/') != NULL) {
+		printk(KERN_ERR "xt_quota.3: illegal name\n");
+		return -EINVAL;
+	}
+
+	q->master = q2_get_counter(q);
+	if (q->master == NULL) {
+		printk(KERN_ERR "xt_quota.3: memory alloc failure\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void quota_mt2_destroy(const struct xt_mtdtor_param *par)
+{
+	struct xt_quota_mtinfo2 *q = par->matchinfo;
+	struct xt_quota_counter *e = q->master;
+
+	if (*q->name == '\0') {
+		kfree(e);
+		return;
+	}
+
+	spin_lock_bh(&counter_list_lock);
+	if (!atomic_dec_and_test(&e->ref)) {
+		spin_unlock_bh(&counter_list_lock);
+		return;
+	}
+
+	list_del(&e->list);
+	remove_proc_entry(e->name, proc_xt_quota);
+	spin_unlock_bh(&counter_list_lock);
+	kfree(e);
+}
+
+static bool
+quota_mt2(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	struct xt_quota_mtinfo2 *q = (void *)par->matchinfo;
+	struct xt_quota_counter *e = q->master;
+	bool ret = q->flags & XT_QUOTA_INVERT;
+
+	spin_lock_bh(&e->lock);
+	if (q->flags & XT_QUOTA_GROW) {
+		/*
+		 * While no_change is pointless in "grow" mode, we will
+		 * implement it here simply to have a consistent behavior.
+		 */
+		if (!(q->flags & XT_QUOTA_NO_CHANGE)) {
+			e->quota += (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len;
+		}
+		ret = true;
+	} else {
+		if (e->quota >= skb->len) {
+			if (!(q->flags & XT_QUOTA_NO_CHANGE))
+				e->quota -= (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len;
+			ret = !ret;
+		} else {
+			/* We are transitioning, log that fact. */
+			if (e->quota) {
+				quota2_log(xt_hooknum(par),
+					   skb,
+					   xt_in(par),
+					   xt_out(par),
+					   q->name);
+			}
+			/* we do not allow even small packets from now on */
+			e->quota = 0;
+		}
+	}
+	spin_unlock_bh(&e->lock);
+	return ret;
+}
+
+static struct xt_match quota_mt2_reg[] __read_mostly = {
+	{
+		.name       = "quota2",
+		.revision   = 3,
+		.family     = NFPROTO_IPV4,
+		.checkentry = quota_mt2_check,
+		.match      = quota_mt2,
+		.destroy    = quota_mt2_destroy,
+		.matchsize  = sizeof(struct xt_quota_mtinfo2),
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "quota2",
+		.revision   = 3,
+		.family     = NFPROTO_IPV6,
+		.checkentry = quota_mt2_check,
+		.match      = quota_mt2,
+		.destroy    = quota_mt2_destroy,
+		.matchsize  = sizeof(struct xt_quota_mtinfo2),
+		.me         = THIS_MODULE,
+	},
+};
+
+static int __init quota_mt2_init(void)
+{
+	int ret;
+	pr_debug("xt_quota2: init()");
+
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
+	nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, NULL);
+	if (!nflognl)
+		return -ENOMEM;
+#endif
+
+	proc_xt_quota = proc_mkdir("xt_quota", init_net.proc_net);
+	if (proc_xt_quota == NULL)
+		return -EACCES;
+
+	ret = xt_register_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg));
+	if (ret < 0)
+		remove_proc_entry("xt_quota", init_net.proc_net);
+	pr_debug("xt_quota2: init() %d", ret);
+	return ret;
+}
+
+static void __exit quota_mt2_exit(void)
+{
+	xt_unregister_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg));
+	remove_proc_entry("xt_quota", init_net.proc_net);
+}
+
+module_init(quota_mt2_init);
+module_exit(quota_mt2_exit);
+MODULE_DESCRIPTION("Xtables: countdown quota match; up counter");
+MODULE_AUTHOR("Sam Johnston <samj@samj.net>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_quota2");
+MODULE_ALIAS("ip6t_quota2");

diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 44ac85f..d0ca0db 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c

@@ -241,7 +241,7 @@
 
 	.uinfo = {
 		.auth = {
-			.icv_truncbits = 96,
+			.icv_truncbits = 128,
 			.icv_fullbits = 256,
 		}
 	},

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 11e09eb..f08ab5c 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c

@@ -2091,11 +2091,6 @@
 	struct xfrm_mgr *km;
 	struct xfrm_policy *pol = NULL;
 
-#ifdef CONFIG_COMPAT
-	if (in_compat_syscall())
-		return -EOPNOTSUPP;
-#endif
-
 	if (!optval && !optlen) {
 		xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
 		xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL);

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 1484bc9..1bb6992 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c

@@ -2633,11 +2633,6 @@
 	const struct xfrm_link *link;
 	int type, err;
 
-#ifdef CONFIG_COMPAT
-	if (in_compat_syscall())
-		return -EOPNOTSUPP;
-#endif
-
 	type = nlh->nlmsg_type;
 	if (type > XFRM_MSG_MAX)
 		return -EINVAL;

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 64fac0a..585ea17 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build

@@ -185,6 +185,23 @@
 
 cmd_cc_o_c = $(CC) $(c_flags) -c -o $(@D)/.tmp_$(@F) $<
 
+ifdef CONFIG_LTO_CLANG
+# Generate .o.symversions files for each .o with exported symbols, and link these
+# to the kernel and/or modules at the end.
+cmd_modversions_c =								\
+	if $(OBJDUMP) -h $(@D)/.tmp_$(@F) >/dev/null 2>/dev/null; then		\
+		if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then	\
+			$(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
+			    > $(@D)/$(@F).symversions;				\
+		fi;								\
+	else									\
+		if $(LLVM_NM) $(@D)/.tmp_$(@F) | grep -q __ksymtab; then	\
+			$(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
+			    > $(@D)/$(@F).symversions;				\
+		fi;								\
+	fi;									\
+	mv -f $(@D)/.tmp_$(@F) $@;
+else
 cmd_modversions_c =								\
 	if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then		\
 		$(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes))	\
@@ -197,6 +214,7 @@
 		mv -f $(@D)/.tmp_$(@F) $@;					\
 	fi;
 endif
+endif
 
 ifdef CONFIG_FTRACE_MCOUNT_RECORD
 ifndef CC_USING_RECORD_MCOUNT
@@ -205,6 +223,12 @@
 ifeq ("$(origin RECORDMCOUNT_WARN)", "command line")
   RECORDMCOUNT_FLAGS = -w
 endif
+
+ifdef CONFIG_LTO_CLANG
+# With LTO, we postpone running recordmcount until after the LTO link step, so
+# let's export the parameters for the link script.
+export RECORDMCOUNT_FLAGS
+else
 # Due to recursion, we must skip empty.o.
 # The empty.o file is created in the make process in order to determine
 # the target endianness and word size. It is made before all other C
@@ -213,22 +237,28 @@
 	if [ $(@) != "scripts/mod/empty.o" ]; then	\
 		$(objtree)/scripts/recordmcount $(RECORDMCOUNT_FLAGS) "$(@)";	\
 	fi;
+endif
+
 recordmcount_source := $(srctree)/scripts/recordmcount.c \
 		    $(srctree)/scripts/recordmcount.h
-else
+else # !BUILD_C_RECORDMCOUNT
 sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
 	"$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \
 	"$(if $(CONFIG_64BIT),64,32)" \
 	"$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)" \
 	"$(LD) $(KBUILD_LDFLAGS)" "$(NM)" "$(RM)" "$(MV)" \
 	"$(if $(part-of-module),1,0)" "$(@)";
+
 recordmcount_source := $(srctree)/scripts/recordmcount.pl
 endif # BUILD_C_RECORDMCOUNT
+
+ifndef CONFIG_LTO_CLANG
 cmd_record_mcount =						\
 	if [ "$(findstring $(CC_FLAGS_FTRACE),$(_c_flags))" =	\
 	     "$(CC_FLAGS_FTRACE)" ]; then			\
 		$(sub_cmd_record_mcount)			\
 	fi;
+endif # CONFIG_LTO_CLANG
 endif # CC_USING_RECORD_MCOUNT
 endif # CONFIG_FTRACE_MCOUNT_RECORD
 
@@ -429,6 +459,26 @@
 # To build objects in subdirs, we need to descend into the directories
 $(sort $(subdir-obj-y)): $(subdir-ym) ;
 
+ifdef CONFIG_LTO_CLANG
+  ifdef CONFIG_MODVERSIONS
+    # combine symversions for later processing
+    update_lto_symversions =				\
+	rm -f $@.symversions; 				\
+	for i in $(filter-out FORCE,$^); do		\
+		if [ -f $$i.symversions ]; then		\
+			cat $$i.symversions 		\
+				>> $@.symversions;	\
+		fi;					\
+	done;
+  endif
+  # rebuild the symbol table with llvm-ar to include IR files
+  update_lto_symtable = ;				\
+	mv -f $@ $@.tmp;				\
+	$(LLVM_AR) rcsT$(KBUILD_ARFLAGS) $@ 		\
+		$$($(AR) t $@.tmp); 			\
+	rm -f $@.tmp
+endif
+
 #
 # Rule to compile a set of .o files into one .o file
 #
@@ -439,7 +489,8 @@
 # scripts/link-vmlinux.sh builds an aggregate built-in.a with a symbol
 # table and index.
 quiet_cmd_ar_builtin = AR      $@
-      cmd_ar_builtin = rm -f $@; \
+      cmd_ar_builtin = $(update_lto_symversions) \
+		     rm -f $@; \
                      $(AR) rcSTP$(KBUILD_ARFLAGS) $@ $(filter $(real-obj-y), $^)
 
 $(builtin-target): $(real-obj-y) FORCE
@@ -468,7 +519,11 @@
 quiet_cmd_link_l_target = AR      $@
 
 # lib target archives do get a symbol table and index
-cmd_link_l_target = rm -f $@; $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(lib-y)
+cmd_link_l_target = 					\
+	$(update_lto_symversions)			\
+	rm -f $@; 					\
+	$(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(lib-y)	\
+	$(update_lto_symtable)
 
 $(lib-target): $(lib-y) FORCE
 	$(call if_changed,link_l_target)
@@ -479,13 +534,34 @@
 ksyms-lds = $(dot-target).lds
 
 quiet_cmd_export_list = EXPORTS $@
-cmd_export_list = $(OBJDUMP) -h $< | \
-	sed -ne '/___ksymtab/s/.*+\([^ ]*\).*/EXTERN(\1)/p' >$(ksyms-lds);\
-	rm -f $(dummy-object);\
+filter_export_list = sed -ne '/___ksymtab/s/.*+\([^ ]*\).*/EXTERN(\1)/p'
+link_export_list = rm -f $(dummy-object); \
 	echo | $(CC) $(a_flags) -c -o $(dummy-object) -x assembler -;\
 	$(LD) $(ld_flags) -r -o $@ -T $(ksyms-lds) $(dummy-object);\
 	rm $(dummy-object) $(ksyms-lds)
 
+ifdef CONFIG_LTO_CLANG
+# objdump doesn't understand IR files and llvm-nm doesn't support archives,
+# so we'll walk through each file in the archive separately
+cmd_export_list = 					\
+	rm -f $(ksyms-lds);				\
+	for o in $$($(AR) t $<); do			\
+		if $(OBJDUMP) -h $$o >/dev/null 2>/dev/null; then \
+			$(OBJDUMP) -h $$o | 		\
+				$(filter_export_list) 	\
+				>>$(ksyms-lds);		\
+		else					\
+			$(LLVM_NM) $$o |		\
+				$(filter_export_list) 	\
+				>>$(ksyms-lds);		\
+		fi; 					\
+	done;						\
+	$(link_export_list)
+else
+cmd_export_list = $(OBJDUMP) -h $< | $(filter_export_list) >$(ksyms-lds); \
+	$(link_export_list)
+endif
+
 $(obj)/lib-ksyms.o: $(lib-target) FORCE
 	$(call if_changed,export_list)
 
@@ -509,13 +585,31 @@
 $($(subst $(obj)/,,$(@:.o=-y)))       \
 $($(subst $(obj)/,,$(@:.o=-m)))), $^)
 
+cmd_link_multi-link = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) $(cmd_secanalysis)
+
+quiet_cmd_link_multi-y = AR      $@
+cmd_link_multi-y = $(update_lto_symversions) \
+	rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) $@ $(link_multi_deps) \
+	$(update_lto_symtable)
+
 quiet_cmd_link_multi-m = LD [M]  $@
-cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) $(cmd_secanalysis)
+
+ifdef CONFIG_LTO_CLANG
+  # don't compile IR until needed
+  cmd_link_multi-m = $(cmd_link_multi-y)
+else
+  cmd_link_multi-m = $(cmd_link_multi-link)
+endif
+
+$(multi-used-y): FORCE
+	$(call if_changed,link_multi-y)
 
 $(multi-used-m): FORCE
 	$(call if_changed,link_multi-m)
 	@{ echo $(@:.o=.ko); echo $(link_multi_deps); \
 	   $(cmd_undef_syms); } > $(MODVERDIR)/$(@F:.o=.mod)
+
+$(call multi_depend, $(multi-used-y), .o, -objs -y)
 $(call multi_depend, $(multi-used-m), .o, -objs -y -m)
 
 targets += $(multi-used-m)

diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index 51884c7..15dd58a 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost

@@ -83,12 +83,28 @@
 
 MODPOST_OPT=$(subst -i,-n,$(filter -i,$(MAKEFLAGS)))
 
+# If CONFIG_LTO_CLANG is enabled, .o files are either LLVM IR, or empty, so we
+# need to link them into actual objects before passing them to modpost
+modpost-ext = $(if $(CONFIG_LTO_CLANG),.lto,)
+
+ifdef CONFIG_LTO_CLANG
+quiet_cmd_cc_lto_link_modules = LD [M]  $@
+cmd_cc_lto_link_modules =						\
+	$(LD) $(ld_flags) -r -o $(@)					\
+		$(shell [ -s $(@:$(modpost-ext).o=.o.symversions) ] &&	\
+			echo -T $(@:$(modpost-ext).o=.o.symversions))	\
+		--whole-archive $(filter-out FORCE,$^)
+
+$(modules:.ko=$(modpost-ext).o): %$(modpost-ext).o: %.o FORCE
+	$(call if_changed,cc_lto_link_modules)
+endif
+
 # We can go over command line length here, so be careful.
 quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules
-      cmd_modpost = $(MODLISTCMD) | sed 's/\.ko$$/.o/' | $(modpost) $(MODPOST_OPT) -s -T -
+      cmd_modpost = $(MODLISTCMD) | sed 's/\.ko$$/$(modpost-ext)\.o/' | $(modpost) $(MODPOST_OPT) -s -T -
 
 PHONY += __modpost
-__modpost: $(modules:.ko=.o) FORCE
+__modpost: $(modules:.ko=$(modpost-ext).o) FORCE
 	$(call cmd,modpost) $(wildcard vmlinux)
 
 quiet_cmd_kernel-mod = MODPOST $@
@@ -100,7 +116,6 @@
 # Declare generated files as targets for modpost
 $(modules:.ko=.mod.c): __modpost ;
 
-
 # Step 5), compile all *.mod.c files
 
 # modname is set to make c_flags define KBUILD_MODNAME
@@ -119,13 +134,28 @@
 
 # Step 6), final link of the modules with optional arch pass after final link
 quiet_cmd_ld_ko_o = LD [M]  $@
+
+ifdef CONFIG_LTO_CLANG
+      cmd_ld_ko_o = 							\
+	$(LD) -r $(LDFLAGS)                                 		\
+		 $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) 		\
+		 $(shell [ -s $(@:.ko=.o.symversions) ] &&		\
+			echo -T $(@:.ko=.o.symversions))  		\
+		 -o $@ --whole-archive					\
+		 $(filter-out FORCE,$(^:$(modpost-ext).o=.o))
+
+  ifdef CONFIG_FTRACE_MCOUNT_RECORD
+      cmd_ld_ko_o += ; $(objtree)/scripts/recordmcount $(RECORDMCOUNT_FLAGS) $@
+  endif
+else
       cmd_ld_ko_o =                                                     \
 	$(LD) -r $(KBUILD_LDFLAGS)                                      \
                  $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE)             \
                  -o $@ $(filter-out FORCE,$^) ;                         \
 	$(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)
+endif
 
-$(modules): %.ko :%.o %.mod.o FORCE
+$(modules): %.ko: %$(modpost-ext).o %.mod.o FORCE
 	+$(call if_changed,ld_ko_o)
 
 targets += $(modules)

diff --git a/scripts/clang-android.sh b/scripts/clang-android.sh
new file mode 100755
index 0000000..9186c4f
--- /dev/null
+++ b/scripts/clang-android.sh

@@ -0,0 +1,4 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+$* -dM -E - </dev/null 2>&1 | grep -q __ANDROID__ && echo "y"

diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index c8cf453..b6473a8 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh

@@ -60,6 +60,38 @@
 	${AR} rcsTP${KBUILD_ARFLAGS} built-in.a			\
 				${KBUILD_VMLINUX_INIT}		\
 				${KBUILD_VMLINUX_MAIN}
+
+	# rebuild with llvm-ar to update the symbol table
+	if [ -n "${CONFIG_LTO_CLANG}" ]; then
+		mv -f built-in.a built-in.a.tmp
+		${LLVM_AR} rcsT${KBUILD_ARFLAGS} built-in.a $(${AR} t built-in.a.tmp)
+		rm -f built-in.a.tmp
+	fi
+}
+
+# If CONFIG_LTO_CLANG is selected, collect generated symbol versions into
+# .tmp_symversions
+modversions()
+{
+	if [ -z "${CONFIG_LTO_CLANG}" ]; then
+		return
+	fi
+
+	if [ -z "${CONFIG_MODVERSIONS}" ]; then
+		return
+	fi
+
+	rm -f .tmp_symversions
+
+	for a in built-in.a ${KBUILD_VMLINUX_LIBS}; do
+		for o in $(${AR} t $a); do
+			if [ -f ${o}.symversions ]; then
+				cat ${o}.symversions >> .tmp_symversions
+			fi
+		done
+	done
+
+	echo "-T .tmp_symversions"
 }
 
 # Link of vmlinux.o used for section mismatch analysis
@@ -75,7 +107,26 @@
 		${KBUILD_VMLINUX_LIBS}				\
 		--end-group"
 
-	${LD} ${KBUILD_LDFLAGS} -r -o ${1} ${objects}
+	if [ -n "${CONFIG_LTO_CLANG}" ]; then
+		# This might take a while, so indicate that we're doing
+		# an LTO link
+		info LTO vmlinux.o
+	fi
+
+	${LD} ${KBUILD_LDFLAGS} -r -o ${1} $(modversions) ${objects}
+}
+
+# If CONFIG_LTO_CLANG is selected, we postpone running recordmcount until
+# we have compiled LLVM IR to an object file.
+recordmcount()
+{
+	if [ -z "${CONFIG_LTO_CLANG}" ]; then
+		return
+	fi
+
+	if [ -n "${CONFIG_FTRACE_MCOUNT_RECORD}" ]; then
+		scripts/recordmcount ${RECORDMCOUNT_FLAGS} $*
+	fi
 }
 
 # Link of vmlinux
@@ -87,13 +138,20 @@
 	local objects
 
 	if [ "${SRCARCH}" != "um" ]; then
-		objects="--whole-archive			\
-			built-in.a				\
-			--no-whole-archive			\
-			--start-group				\
-			${KBUILD_VMLINUX_LIBS}			\
-			--end-group				\
-			${1}"
+		if [ -z "${CONFIG_LTO_CLANG}" ]; then
+			objects="--whole-archive		\
+				built-in.a			\
+				--no-whole-archive		\
+				--start-group			\
+				${KBUILD_VMLINUX_LIBS}		\
+				--end-group			\
+				${1}"
+		else
+			objects="--start-group			\
+				vmlinux.o			\
+				--end-group			\
+				${1}"
+		fi
 
 		${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} -o ${2}	\
 			-T ${lds} ${objects}
@@ -114,7 +172,6 @@
 	fi
 }
 
-
 # Create ${2} .o file with all symbols from the ${1} object file
 kallsyms()
 {
@@ -159,6 +216,7 @@
 {
 	rm -f .tmp_System.map
 	rm -f .tmp_kallsyms*
+	rm -f .tmp_symversions
 	rm -f .tmp_vmlinux*
 	rm -f built-in.a
 	rm -f System.map
@@ -220,12 +278,16 @@
 archive_builtin
 
 #link vmlinux.o
-info LD vmlinux.o
 modpost_link vmlinux.o
 
 # modpost vmlinux.o to check for section mismatches
 ${MAKE} -f "${srctree}/scripts/Makefile.modpost" vmlinux.o
 
+if [ -n "${CONFIG_LTO_CLANG}" ]; then
+	# Call recordmcount if needed
+	recordmcount vmlinux.o
+fi
+
 kallsymso=""
 kallsyms_vmlinux=""
 if [ -n "${CONFIG_KALLSYMS}" ]; then

diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
index 42c5d50..e014b2f 100644
--- a/scripts/mod/Makefile
+++ b/scripts/mod/Makefile

@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 OBJECT_FILES_NON_STANDARD := y
+CFLAGS_empty.o += $(DISABLE_LTO)
 
 hostprogs-y	:= modpost mk_elfconfig
 always		:= $(hostprogs-y) empty.o

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 858cbe5..b16c893e 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c

@@ -145,6 +145,9 @@
 		p[strlen(p) - 2] = '\0';
 		mod->is_dot_o = 1;
 	}
+	/* strip trailing .lto */
+	if (strends(p, ".lto"))
+		p[strlen(p) - 4] = '\0';
 
 	/* add to list */
 	mod->name = p;
@@ -942,6 +945,7 @@
 static const char *const linker_symbols[] =
 	{ "__init_begin", "_sinittext", "_einittext", NULL };
 static const char *const optim_symbols[] = { "*.constprop.*", NULL };
+static const char *const cfi_symbols[] = { "*.cfi", NULL };
 
 enum mismatch {
 	TEXT_TO_ANY_INIT,
@@ -1163,6 +1167,16 @@
  *   fromsec = text section
  *   refsymname = *.constprop.*
  *
+ * Pattern 6:
+ *   With CONFIG_CFI_CLANG, clang appends .cfi to all indirectly called
+ *   functions and creates a function stub with the original name. This
+ *   stub is always placed in .text, even if the actual function with the
+ *   .cfi postfix is in .init.text or .exit.text.
+ *   This pattern is identified by
+ *   tosec   = init or exit section
+ *   fromsec = text section
+ *   tosym   = *.cfi
+ *
  **/
 static int secref_whitelist(const struct sectioncheck *mismatch,
 			    const char *fromsec, const char *fromsym,
@@ -1201,6 +1215,12 @@
 	    match(fromsym, optim_symbols))
 		return 0;
 
+	/* Check for pattern 6 */
+	if (match(fromsec, text_sections) &&
+	    match(tosec, init_exit_sections) &&
+	    match(tosym, cfi_symbols))
+		return 0;
+
 	return 1;
 }
 
@@ -1927,6 +1947,10 @@
 		size_t m = strspn(s + n + 1, "0123456789");
 		if (m && (s[n + m] == '.' || s[n + m] == 0))
 			s[n] = 0;
+
+		/* strip trailing .lto */
+		if (strends(s, ".lto"))
+			s[strlen(s) - 4] = '\0';
 	}
 	return s;
 }

diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index 895c40e..7a9ad23 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c

@@ -406,7 +406,8 @@
 		strcmp(".softirqentry.text", txtname) == 0 ||
 		strcmp(".kprobes.text", txtname) == 0 ||
 		strcmp(".cpuidle.text", txtname) == 0 ||
-		strcmp(".text.unlikely", txtname) == 0;
+		(strncmp(".text.",       txtname, 6) == 0 &&
+		 strcmp(".text..ftrace", txtname) != 0);
 }
 
 /* 32 bit and 64 bit are very similar */

diff --git a/security/Kconfig b/security/Kconfig
index d9aa521..6c379cd 100644
--- a/security/Kconfig
+++ b/security/Kconfig

@@ -18,6 +18,15 @@
 
 	  If you are unsure how to answer this question, answer N.
 
+config SECURITY_PERF_EVENTS_RESTRICT
+	bool "Restrict unprivileged use of performance events"
+	depends on PERF_EVENTS
+	help
+	  If you say Y here, the kernel.perf_event_paranoid sysctl
+	  will be set to 3 by default, and no unprivileged use of the
+	  perf_event_open syscall will be permitted unless it is
+	  changed.
+
 config SECURITY
 	bool "Enable different security models"
 	depends on SYSFS

diff --git a/security/inode.c b/security/inode.c
index 829f156..0107dc7 100644
--- a/security/inode.c
+++ b/security/inode.c

@@ -127,7 +127,7 @@
 	dir = d_inode(parent);
 
 	inode_lock(dir);
-	dentry = lookup_one_len(name, parent, strlen(name));
+	dentry = lookup_one_len2(name, mount, parent, strlen(name));
 	if (IS_ERR(dentry))
 		goto out;
 

diff --git a/security/security.c b/security/security.c
index 5ce2448..f71c586 100644
--- a/security/security.c
+++ b/security/security.c

@@ -607,6 +607,7 @@
 		return 0;
 	return call_int_hook(path_chown, 0, path, uid, gid);
 }
+EXPORT_SYMBOL(security_path_chown);
 
 int security_path_chroot(const struct path *path)
 {

diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c
index 9f420d9..8cb504d 100644
--- a/tools/accounting/getdelays.c
+++ b/tools/accounting/getdelays.c

@@ -203,6 +203,8 @@
 	       "SWAP  %15s%15s%15s\n"
 	       "      %15llu%15llu%15llums\n"
 	       "RECLAIM  %12s%15s%15s\n"
+	       "      %15llu%15llu%15llums\n"
+	       "THRASHING%12s%15s%15s\n"
 	       "      %15llu%15llu%15llums\n",
 	       "count", "real total", "virtual total",
 	       "delay total", "delay average",
@@ -222,7 +224,11 @@
 	       "count", "delay total", "delay average",
 	       (unsigned long long)t->freepages_count,
 	       (unsigned long long)t->freepages_delay_total,
-	       average_ms(t->freepages_delay_total, t->freepages_count));
+	       average_ms(t->freepages_delay_total, t->freepages_count),
+	       "count", "delay total", "delay average",
+	       (unsigned long long)t->thrashing_count,
+	       (unsigned long long)t->thrashing_delay_total,
+	       average_ms(t->thrashing_delay_total, t->thrashing_count));
 }
 
 static void task_context_switch_counts(struct taskstats *t)

diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 10baa16..c67d32e 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c

@@ -54,6 +54,22 @@
 	return fd;
 }
 
+static int mfd_assert_reopen_fd(int fd_in)
+{
+	int r, fd;
+	char path[100];
+
+	sprintf(path, "/proc/self/fd/%d", fd_in);
+
+	fd = open(path, O_RDWR);
+	if (fd < 0) {
+		printf("re-open of existing fd %d failed\n", fd_in);
+		abort();
+	}
+
+	return fd;
+}
+
 static void mfd_fail_new(const char *name, unsigned int flags)
 {
 	int r;
@@ -255,6 +271,25 @@
 	munmap(p, mfd_def_size);
 }
 
+/* Test that PROT_READ + MAP_SHARED mappings work. */
+static void mfd_assert_read_shared(int fd)
+{
+	void *p;
+
+	/* verify PROT_READ and MAP_SHARED *is* allowed */
+	p = mmap(NULL,
+		 mfd_def_size,
+		 PROT_READ,
+		 MAP_SHARED,
+		 fd,
+		 0);
+	if (p == MAP_FAILED) {
+		printf("mmap() failed: %m\n");
+		abort();
+	}
+	munmap(p, mfd_def_size);
+}
+
 static void mfd_assert_write(int fd)
 {
 	ssize_t l;
@@ -693,6 +728,44 @@
 }
 
 /*
+ * Test SEAL_FUTURE_WRITE
+ * Test whether SEAL_FUTURE_WRITE actually prevents modifications.
+ */
+static void test_seal_future_write(void)
+{
+	int fd, fd2;
+	void *p;
+
+	printf("%s SEAL-FUTURE-WRITE\n", memfd_str);
+
+	fd = mfd_assert_new("kern_memfd_seal_future_write",
+			    mfd_def_size,
+			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+	p = mfd_assert_mmap_shared(fd);
+
+	mfd_assert_has_seals(fd, 0);
+
+	mfd_assert_add_seals(fd, F_SEAL_FUTURE_WRITE);
+	mfd_assert_has_seals(fd, F_SEAL_FUTURE_WRITE);
+
+	/* read should pass, writes should fail */
+	mfd_assert_read(fd);
+	mfd_assert_read_shared(fd);
+	mfd_fail_write(fd);
+
+	fd2 = mfd_assert_reopen_fd(fd);
+	/* read should pass, writes should still fail */
+	mfd_assert_read(fd2);
+	mfd_assert_read_shared(fd2);
+	mfd_fail_write(fd2);
+
+	munmap(p, mfd_def_size);
+	close(fd2);
+	close(fd);
+}
+
+/*
  * Test SEAL_SHRINK
  * Test whether SEAL_SHRINK actually prevents shrinking
  */
@@ -945,6 +1018,7 @@
 	test_basic();
 
 	test_seal_write();
+	test_seal_future_write();
 	test_seal_shrink();
 	test_seal_grow();
 	test_seal_resize();
commit	3716d346e9fe202af82396eb2550d4cfb7dd23b4	[log] [tgz]
author	Greg Kroah-Hartman <gregkh@google.com>	Fri Sep 06 12:51:15 2019 +0200
committer	Greg Kroah-Hartman <gregkh@google.com>	Fri Sep 06 12:51:15 2019 +0200
tree	9b6c1c910ed7eb289aa3aa6b6d22d72bba5c8ce3
parent	544e55161db3574e626e4f1eafd1c6bf9a016b7e [diff]
parent	e7d2672c66e4d3675570369bf20856296da312c4 [diff]