Merge 4.14.101 into android-4.14 Changes in 4.14.101 Revert "exec: load_script: don't blindly truncate shebang string" Linux 4.14.101 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
diff --git a/.gitignore b/.gitignore index f6050b8..be92dfa 100644 --- a/.gitignore +++ b/.gitignore
@@ -122,3 +122,6 @@ # Kdevelop4 *.kdev4 + +# fetched Android config fragments +kernel/configs/android-*.cfg
diff --git a/Documentation/ABI/testing/procfs-concurrent_time b/Documentation/ABI/testing/procfs-concurrent_time new file mode 100644 index 0000000..55b4142 --- /dev/null +++ b/Documentation/ABI/testing/procfs-concurrent_time
@@ -0,0 +1,16 @@ +What: /proc/uid_concurrent_active_time +Date: December 2018 +Contact: Connor O'Brien <connoro@google.com> +Description: + The /proc/uid_concurrent_active_time file displays aggregated cputime + numbers for each uid, broken down by the total number of cores that were + active while the uid's task was running. + +What: /proc/uid_concurrent_policy_time +Date: December 2018 +Contact: Connor O'Brien <connoro@google.com> +Description: + The /proc/uid_concurrent_policy_time file displays aggregated cputime + numbers for each uid, broken down based on the cpufreq policy + of the core used by the uid's task and the number of cores associated + with that policy that were active while the uid's task was running.
diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram index c1513c7..14b2bf2 100644 --- a/Documentation/ABI/testing/sysfs-block-zram +++ b/Documentation/ABI/testing/sysfs-block-zram
@@ -98,3 +98,42 @@ The backing_dev file is read-write and set up backing device for zram to write incompressible pages. For using, user should enable CONFIG_ZRAM_WRITEBACK. + +What: /sys/block/zram<id>/idle +Date: November 2018 +Contact: Minchan Kim <minchan@kernel.org> +Description: + idle file is write-only and mark zram slot as idle. + If system has mounted debugfs, user can see which slots + are idle via /sys/kernel/debug/zram/zram<id>/block_state + +What: /sys/block/zram<id>/writeback +Date: November 2018 +Contact: Minchan Kim <minchan@kernel.org> +Description: + The writeback file is write-only and trigger idle and/or + huge page writeback to backing device. + +What: /sys/block/zram<id>/bd_stat +Date: November 2018 +Contact: Minchan Kim <minchan@kernel.org> +Description: + The bd_stat file is read-only and represents backing device's + statistics (bd_count, bd_reads, bd_writes) in a format + similar to block layer statistics file format. + +What: /sys/block/zram<id>/writeback_limit_enable +Date: November 2018 +Contact: Minchan Kim <minchan@kernel.org> +Description: + The writeback_limit_enable file is read-write and specifies + eanbe of writeback_limit feature. "1" means eable the feature. + No limit "0" is the initial state. + +What: /sys/block/zram<id>/writeback_limit +Date: November 2018 +Contact: Minchan Kim <minchan@kernel.org> +Description: + The writeback_limit file is read-write and specifies the maximum + amount of writeback ZRAM can do. The limit could be changed + in run time.
diff --git a/Documentation/ABI/testing/sysfs-class-dual-role-usb b/Documentation/ABI/testing/sysfs-class-dual-role-usb new file mode 100644 index 0000000..a900fd7 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-dual-role-usb
@@ -0,0 +1,71 @@ +What: /sys/class/dual_role_usb/.../ +Date: June 2015 +Contact: Badhri Jagan Sridharan<badhri@google.com> +Description: + Provide a generic interface to monitor and change + the state of dual role usb ports. The name here + refers to the name mentioned in the + dual_role_phy_desc that is passed while registering + the dual_role_phy_intstance through + devm_dual_role_instance_register. + +What: /sys/class/dual_role_usb/.../supported_modes +Date: June 2015 +Contact: Badhri Jagan Sridharan<badhri@google.com> +Description: + This is a static node, once initialized this + is not expected to change during runtime. "dfp" + refers to "downstream facing port" i.e. port can + only act as host. "ufp" refers to "upstream + facing port" i.e. port can only act as device. + "dfp ufp" refers to "dual role port" i.e. the port + can either be a host port or a device port. + +What: /sys/class/dual_role_usb/.../mode +Date: June 2015 +Contact: Badhri Jagan Sridharan<badhri@google.com> +Description: + The mode node refers to the current mode in which the + port is operating. "dfp" for host ports. "ufp" for device + ports and "none" when cable is not connected. + + On devices where the USB mode is software-controllable, + userspace can change the mode by writing "dfp" or "ufp". + On devices where the USB mode is fixed in hardware, + this attribute is read-only. + +What: /sys/class/dual_role_usb/.../power_role +Date: June 2015 +Contact: Badhri Jagan Sridharan<badhri@google.com> +Description: + The power_role node mentions whether the port + is "sink"ing or "source"ing power. "none" if + they are not connected. + + On devices implementing USB Power Delivery, + userspace can control the power role by writing "sink" or + "source". On devices without USB-PD, this attribute is + read-only. + +What: /sys/class/dual_role_usb/.../data_role +Date: June 2015 +Contact: Badhri Jagan Sridharan<badhri@google.com> +Description: + The data_role node mentions whether the port + is acting as "host" or "device" for USB data connection. + "none" if there is no active data link. + + On devices implementing USB Power Delivery, userspace + can control the data role by writing "host" or "device". + On devices without USB-PD, this attribute is read-only + +What: /sys/class/dual_role_usb/.../powers_vconn +Date: June 2015 +Contact: Badhri Jagan Sridharan<badhri@google.com> +Description: + The powers_vconn node mentions whether the port + is supplying power for VCONN pin. + + On devices with software control of VCONN, + userspace can disable the power supply to VCONN by writing "n", + or enable the power supply by writing "y".
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 11b7f4e..381ab9f 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -51,6 +51,26 @@ Controls the dirty page count condition for the in-place-update policies. +What: /sys/fs/f2fs/<disk>/min_seq_blocks +Date: August 2018 +Contact: "Jaegeuk Kim" <jaegeuk@kernel.org> +Description: + Controls the dirty page count condition for batched sequential + writes in ->writepages. + + +What: /sys/fs/f2fs/<disk>/min_hot_blocks +Date: March 2017 +Contact: "Jaegeuk Kim" <jaegeuk@kernel.org> +Description: + Controls the dirty page count condition for redefining hot data. + +What: /sys/fs/f2fs/<disk>/min_ssr_sections +Date: October 2017 +Contact: "Chao Yu" <yuchao0@huawei.com> +Description: + Controls the fee section threshold to trigger SSR allocation. + What: /sys/fs/f2fs/<disk>/max_small_discards Date: November 2013 Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com> @@ -72,6 +92,15 @@ Description: Controls the number of trials to find a victim segment. +What: /sys/fs/f2fs/<disk>/migration_granularity +Date: October 2018 +Contact: "Chao Yu" <yuchao0@huawei.com> +Description: + Controls migration granularity of garbage collection on large + section, it can let GC move partial segment{s} of one section + in one GC cycle, so that dispersing heavy overhead GC to + multiple lightweight one. + What: /sys/fs/f2fs/<disk>/dir_level Date: March 2014 Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com> @@ -89,6 +118,7 @@ Contact: "Jaegeuk Kim" <jaegeuk@kernel.org> Description: Controls the trimming rate in batch mode. + <deprecated> What: /sys/fs/f2fs/<disk>/cp_interval Date: October 2015 @@ -100,7 +130,28 @@ Date: January 2016 Contact: "Jaegeuk Kim" <jaegeuk@kernel.org> Description: - Controls the idle timing. + Controls the idle timing for all paths other than + discard and gc path. + +What: /sys/fs/f2fs/<disk>/discard_idle_interval +Date: September 2018 +Contact: "Chao Yu" <yuchao0@huawei.com> +Contact: "Sahitya Tummala" <stummala@codeaurora.org> +Description: + Controls the idle timing for discard path. + +What: /sys/fs/f2fs/<disk>/gc_idle_interval +Date: September 2018 +Contact: "Chao Yu" <yuchao0@huawei.com> +Contact: "Sahitya Tummala" <stummala@codeaurora.org> +Description: + Controls the idle timing for gc path. + +What: /sys/fs/f2fs/<disk>/iostat_enable +Date: August 2017 +Contact: "Chao Yu" <yuchao0@huawei.com> +Description: + Controls to enable/disable IO stat. What: /sys/fs/f2fs/<disk>/ra_nid_pages Date: October 2015 @@ -122,6 +173,12 @@ Description: Shows total written kbytes issued to disk. +What: /sys/fs/f2fs/<disk>/feature +Date: July 2017 +Contact: "Jaegeuk Kim" <jaegeuk@kernel.org> +Description: + Shows all enabled features in current device. + What: /sys/fs/f2fs/<disk>/inject_rate Date: May 2016 Contact: "Sheng Yong" <shengyong1@huawei.com> @@ -138,7 +195,18 @@ Date: June 2017 Contact: "Chao Yu" <yuchao0@huawei.com> Description: - Controls current reserved blocks in system. + Controls target reserved blocks in system, the threshold + is soft, it could exceed current available user space. + +What: /sys/fs/f2fs/<disk>/current_reserved_blocks +Date: October 2017 +Contact: "Yunlong Song" <yunlong.song@huawei.com> +Contact: "Chao Yu" <yuchao0@huawei.com> +Description: + Shows current reserved blocks in system, it may be temporarily + smaller than target_reserved_blocks, but will gradually + increase to target_reserved_blocks when more free blocks are + freed by user later. What: /sys/fs/f2fs/<disk>/gc_urgent Date: August 2017 @@ -151,3 +219,20 @@ Contact: "Jaegeuk Kim" <jaegeuk@kernel.org> Description: Controls sleep time of GC urgent mode + +What: /sys/fs/f2fs/<disk>/readdir_ra +Date: November 2017 +Contact: "Sheng Yong" <shengyong1@huawei.com> +Description: + Controls readahead inode block in readdir. + +What: /sys/fs/f2fs/<disk>/extension_list +Date: Feburary 2018 +Contact: "Chao Yu" <yuchao0@huawei.com> +Description: + Used to control configure extension list: + - Query: cat /sys/fs/f2fs/<disk>/extension_list + - Add: echo '[h/c]extension' > /sys/fs/f2fs/<disk>/extension_list + - Del: echo '[h/c]!extension' > /sys/fs/f2fs/<disk>/extension_list + - [h] means add/del hot file extension + - [c] means add/del cold file extension
diff --git a/Documentation/ABI/testing/sysfs-kernel-wakeup_reasons b/Documentation/ABI/testing/sysfs-kernel-wakeup_reasons new file mode 100644 index 0000000..acb19b9 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-wakeup_reasons
@@ -0,0 +1,16 @@ +What: /sys/kernel/wakeup_reasons/last_resume_reason +Date: February 2014 +Contact: Ruchi Kandoi <kandoiruchi@google.com> +Description: + The /sys/kernel/wakeup_reasons/last_resume_reason is + used to report wakeup reasons after system exited suspend. + +What: /sys/kernel/wakeup_reasons/last_suspend_time +Date: March 2015 +Contact: jinqian <jinqian@google.com> +Description: + The /sys/kernel/wakeup_reasons/last_suspend_time is + used to report time spent in last suspend cycle. It contains + two numbers (in seconds) separated by space. First number is + the time spent in suspend and resume processes. Second number + is the time spent in sleep state. \ No newline at end of file
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 7d8b17c..98c9502 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -837,6 +837,9 @@ dis_ucode_ldr [X86] Disable the microcode loader. + dm= [DM] Allows early creation of a device-mapper device. + See Documentation/device-mapper/boot.txt. + dma_debug=off If the kernel is compiled with DMA_API_DEBUG support, this option disables the debugging code at boot.
diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt index dad411d..011ddfc1 100644 --- a/Documentation/arm64/cpu-feature-registers.txt +++ b/Documentation/arm64/cpu-feature-registers.txt
@@ -110,10 +110,20 @@ x--------------------------------------------------x | Name | bits | visible | |--------------------------------------------------| - | RES0 | [63-32] | n | + | RES0 | [63-48] | n | + |--------------------------------------------------| + | DP | [47-44] | y | + |--------------------------------------------------| + | SM4 | [43-40] | y | + |--------------------------------------------------| + | SM3 | [39-36] | y | + |--------------------------------------------------| + | SHA3 | [35-32] | y | |--------------------------------------------------| | RDM | [31-28] | y | |--------------------------------------------------| + | RES0 | [27-24] | n | + |--------------------------------------------------| | ATOMICS | [23-20] | y | |--------------------------------------------------| | CRC32 | [19-16] | y |
diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt index 257e657..6e5c2bb 100644 --- a/Documentation/blockdev/zram.txt +++ b/Documentation/blockdev/zram.txt
@@ -156,19 +156,23 @@ A brief description of exported device attributes. For more details please read Documentation/ABI/testing/sysfs-block-zram. -Name access description ----- ------ ----------- -disksize RW show and set the device's disk size -initstate RO shows the initialization state of the device -reset WO trigger device reset -mem_used_max WO reset the `mem_used_max' counter (see later) -mem_limit WO specifies the maximum amount of memory ZRAM can use - to store the compressed data -max_comp_streams RW the number of possible concurrent compress operations -comp_algorithm RW show and change the compression algorithm -compact WO trigger memory compaction -debug_stat RO this file is used for zram debugging purposes -backing_dev RW set up backend storage for zram to write out +Name access description +---- ------ ----------- +disksize RW show and set the device's disk size +initstate RO shows the initialization state of the device +reset WO trigger device reset +mem_used_max WO reset the `mem_used_max' counter (see later) +mem_limit WO specifies the maximum amount of memory ZRAM can use + to store the compressed data +writeback_limit WO specifies the maximum amount of write IO zram can + write out to backing device as 4KB unit +writeback_limit_enable RW show and set writeback_limit feature +max_comp_streams RW the number of possible concurrent compress operations +comp_algorithm RW show and change the compression algorithm +compact WO trigger memory compaction +debug_stat RO this file is used for zram debugging purposes +backing_dev RW set up backend storage for zram to write out +idle WO mark allocated slot as idle User space is advised to use the following files to read the device statistics. @@ -218,6 +222,18 @@ same_pages the number of same element filled pages written to this disk. No memory is allocated for such pages. pages_compacted the number of pages freed during compaction + huge_pages the number of incompressible pages + +File /sys/block/zram<id>/bd_stat + +The stat file represents device's backing device statistics. It consists of +a single line of text and contains the following stats separated by whitespace: + bd_count size of data written in backing device. + Unit: 4K bytes + bd_reads the number of reads from backing device + Unit: 4K bytes + bd_writes the number of writes to backing device + Unit: 4K bytes 9) Deactivate: swapoff /dev/zram0 @@ -236,11 +252,104 @@ = writeback -With incompressible pages, there is no memory saving with zram. -Instead, with CONFIG_ZRAM_WRITEBACK, zram can write incompressible page +With CONFIG_ZRAM_WRITEBACK, zram can write idle/incompressible page to backing storage rather than keeping it in memory. -User should set up backing device via /sys/block/zramX/backing_dev -before disksize setting. +To use the feature, admin should set up backing device via + + "echo /dev/sda5 > /sys/block/zramX/backing_dev" + +before disksize setting. It supports only partition at this moment. +If admin want to use incompressible page writeback, they could do via + + "echo huge > /sys/block/zramX/write" + +To use idle page writeback, first, user need to declare zram pages +as idle. + + "echo all > /sys/block/zramX/idle" + +From now on, any pages on zram are idle pages. The idle mark +will be removed until someone request access of the block. +IOW, unless there is access request, those pages are still idle pages. + +Admin can request writeback of those idle pages at right timing via + + "echo idle > /sys/block/zramX/writeback" + +With the command, zram writeback idle pages from memory to the storage. + +If there are lots of write IO with flash device, potentially, it has +flash wearout problem so that admin needs to design write limitation +to guarantee storage health for entire product life. + +To overcome the concern, zram supports "writeback_limit" feature. +The "writeback_limit_enable"'s default value is 0 so that it doesn't limit +any writeback. IOW, if admin want to apply writeback budget, he should +enable writeback_limit_enable via + + $ echo 1 > /sys/block/zramX/writeback_limit_enable + +Once writeback_limit_enable is set, zram doesn't allow any writeback +until admin set the budget via /sys/block/zramX/writeback_limit. + +(If admin doesn't enable writeback_limit_enable, writeback_limit's value +assigned via /sys/block/zramX/writeback_limit is meaninless.) + +If admin want to limit writeback as per-day 400M, he could do it +like below. + + $ MB_SHIFT=20 + $ 4K_SHIFT=12 + $ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \ + /sys/block/zram0/writeback_limit. + $ echo 1 > /sys/block/zram0/writeback_limit_enable + +If admin want to allow further write again once the bugdet is exausted, +he could do it like below + + $ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \ + /sys/block/zram0/writeback_limit + +If admin want to see remaining writeback budget since he set, + + $ cat /sys/block/zramX/writeback_limit + +If admin want to disable writeback limit, he could do + + $ echo 0 > /sys/block/zramX/writeback_limit_enable + +The writeback_limit count will reset whenever you reset zram(e.g., +system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of +writeback happened until you reset the zram to allocate extra writeback +budget in next setting is user's job. + +If admin want to measure writeback count in a certain period, he could +know it via /sys/block/zram0/bd_stat's 3rd column. + += memory tracking + +With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the +zram block. It could be useful to catch cold or incompressible +pages of the process with*pagemap. +If you enable the feature, you could see block state via +/sys/kernel/debug/zram/zram0/block_state". The output is as follows, + + 300 75.033841 .wh. + 301 63.806904 s... + 302 63.806919 ..hi + +First column is zram's block index. +Second column is access time since the system was booted +Third column is state of the block. +(s: same page +w: written page to backing store +h: huge page +i: idle page) + +First line of above example says 300th block is accessed at 75.033841sec +and the block's state is huge so it is written back to the backing +storage. It's a debugging feature so anyone shouldn't rely on it to work +properly. Nitin Gupta ngupta@vflare.org
diff --git a/Documentation/device-mapper/boot.txt b/Documentation/device-mapper/boot.txt new file mode 100644 index 0000000..adcaad5 --- /dev/null +++ b/Documentation/device-mapper/boot.txt
@@ -0,0 +1,42 @@ +Boot time creation of mapped devices +=================================== + +It is possible to configure a device mapper device to act as the root +device for your system in two ways. + +The first is to build an initial ramdisk which boots to a minimal +userspace which configures the device, then pivot_root(8) in to it. + +For simple device mapper configurations, it is possible to boot directly +using the following kernel command line: + +dm="<name> <uuid> <ro>,table line 1,...,table line n" + +name = the name to associate with the device + after boot, udev, if used, will use that name to label + the device node. +uuid = may be 'none' or the UUID desired for the device. +ro = may be "ro" or "rw". If "ro", the device and device table will be + marked read-only. + +Each table line may be as normal when using the dmsetup tool except for +two variations: +1. Any use of commas will be interpreted as a newline +2. Quotation marks cannot be escaped and cannot be used without + terminating the dm= argument. + +Unless renamed by udev, the device node created will be dm-0 as the +first minor number for the device-mapper is used during early creation. + +Example +======= + +- Booting to a linear array made up of user-mode linux block devices: + + dm="lroot none 0, 0 4096 linear 98:16 0, 4096 4096 linear 98:32 0" \ + root=/dev/dm-0 + +Will boot to a rw dm-linear target of 8192 sectors split across two +block devices identified by their major:minor numbers. After boot, udev +will rename this target to /dev/mapper/lroot (depending on the rules). +No uuid was assigned.
diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt index 89fd8f9..b3d2e4a 100644 --- a/Documentation/device-mapper/verity.txt +++ b/Documentation/device-mapper/verity.txt
@@ -109,6 +109,17 @@ This is the offset, in <data_block_size> blocks, from the start of the FEC device to the beginning of the encoding data. +check_at_most_once + Verify data blocks only the first time they are read from the data device, + rather than every time. This reduces the overhead of dm-verity so that it + can be used on systems that are memory and/or CPU constrained. However, it + provides a reduced level of security because only offline tampering of the + data device's content will be detected, not online tampering. + + Hash blocks are still verified each time they are read from the hash device, + since verification of hash blocks is less performance critical than data + blocks, and a hash block will not be verified any more after all the data + blocks it covers have been verified anyway. Theory of operation ===================
diff --git a/Documentation/devicetree/bindings/misc/memory-state-time.txt b/Documentation/devicetree/bindings/misc/memory-state-time.txt new file mode 100644 index 0000000..c99a506 --- /dev/null +++ b/Documentation/devicetree/bindings/misc/memory-state-time.txt
@@ -0,0 +1,8 @@ +Memory bandwidth and frequency state tracking + +Required properties: +- compatible : should be: + "memory-state-time" +- freq-tbl: Should contain entries with each frequency in Hz. +- bw-buckets: Should contain upper-bound limits for each bandwidth bucket in Mbps. + Must match the framework power_profile.xml for the device.
diff --git a/Documentation/devicetree/bindings/scheduler/sched-energy-costs.txt b/Documentation/devicetree/bindings/scheduler/sched-energy-costs.txt new file mode 100644 index 0000000..447cc32 --- /dev/null +++ b/Documentation/devicetree/bindings/scheduler/sched-energy-costs.txt
@@ -0,0 +1,383 @@ +=========================================================== +Energy cost bindings for Energy Aware Scheduling +=========================================================== + +=========================================================== +1 - Introduction +=========================================================== + +This note specifies bindings required for energy-aware scheduling +(EAS)[1]. Historically, the scheduler's primary objective has been +performance. EAS aims to provide an alternative objective - energy +efficiency. EAS relies on a simple platform energy cost model to +guide scheduling decisions. The model only considers the CPU +subsystem. + +This note is aligned with the definition of the layout of physical +CPUs in the system as described in the ARM topology binding +description [2]. The concept is applicable to any system so long as +the cost model data is provided for those processing elements in +that system's topology that EAS is required to service. + +Processing elements refer to hardware threads, CPUs and clusters of +related CPUs in increasing order of hierarchy. + +EAS requires two key cost metrics - busy costs and idle costs. Busy +costs comprise of a list of compute capacities for the processing +element in question and the corresponding power consumption at that +capacity. Idle costs comprise of a list of power consumption values +for each idle state [C-state] that the processing element supports. +For a detailed description of these metrics, their derivation and +their use see [3]. + +These cost metrics are required for processing elements in all +scheduling domain levels that EAS is required to service. + +=========================================================== +2 - energy-costs node +=========================================================== + +Energy costs for the processing elements in scheduling domains that +EAS is required to service are defined in the energy-costs node +which acts as a container for the actual per processing element cost +nodes. A single energy-costs node is required for a given system. + +- energy-costs node + + Usage: Required + + Description: The energy-costs node is a container node and + it's sub-nodes describe costs for each processing element at + all scheduling domain levels that EAS is required to + service. + + Node name must be "energy-costs". + + The energy-costs node's parent node must be the cpus node. + + The energy-costs node's child nodes can be: + + - one or more cost nodes. + + Any other configuration is considered invalid. + +The energy-costs node can only contain a single type of child node +whose bindings are described in paragraph 4. + +=========================================================== +3 - energy-costs node child nodes naming convention +=========================================================== + +energy-costs child nodes must follow a naming convention where the +node name must be "thread-costN", "core-costN", "cluster-costN" +depending on whether the costs in the node are for a thread, core or +cluster. N (where N = {0, 1, ...}) is the node number and has no +bearing to the OS' logical thread, core or cluster index. + +=========================================================== +4 - cost node bindings +=========================================================== + +Bindings for cost nodes are defined as follows: + +- system-cost node + + Description: Optional. Must be declared within an energy-costs + node. A system should contain no more than one system-cost node. + + Systems with no modelled system cost should not provide this + node. + + The system-cost node name must be "system-costN" as + described in 3 above. + + A system-cost node must be a leaf node with no children. + + Properties for system-cost nodes are described in paragraph + 5 below. + + Any other configuration is considered invalid. + +- cluster-cost node + + Description: must be declared within an energy-costs node. A + system can contain multiple clusters and each cluster + serviced by EAS must have a corresponding cluster-costs + node. + + The cluster-cost node name must be "cluster-costN" as + described in 3 above. + + A cluster-cost node must be a leaf node with no children. + + Properties for cluster-cost nodes are described in paragraph + 5 below. + + Any other configuration is considered invalid. + +- core-cost node + + Description: must be declared within an energy-costs node. A + system can contain multiple cores and each core serviced by + EAS must have a corresponding core-cost node. + + The core-cost node name must be "core-costN" as described in + 3 above. + + A core-cost node must be a leaf node with no children. + + Properties for core-cost nodes are described in paragraph + 5 below. + + Any other configuration is considered invalid. + +- thread-cost node + + Description: must be declared within an energy-costs node. A + system can contain cores with multiple hardware threads and + each thread serviced by EAS must have a corresponding + thread-cost node. + + The core-cost node name must be "core-costN" as described in + 3 above. + + A core-cost node must be a leaf node with no children. + + Properties for thread-cost nodes are described in paragraph + 5 below. + + Any other configuration is considered invalid. + +=========================================================== +5 - Cost node properties +========================================================== + +All cost node types must have only the following properties: + +- busy-cost-data + + Usage: required + Value type: An array of 2-item tuples. Each item is of type + u32. + Definition: The first item in the tuple is the capacity + value as described in [3]. The second item in the tuple is + the energy cost value as described in [3]. + +- idle-cost-data + + Usage: required + Value type: An array of 1-item tuples. The item is of type + u32. + Definition: The item in the tuple is the energy cost value + as described in [3]. + +=========================================================== +4 - Extensions to the cpu node +=========================================================== + +The cpu node is extended with a property that establishes the +connection between the processing element represented by the cpu +node and the cost-nodes associated with this processing element. + +The connection is expressed in line with the topological hierarchy +that this processing element belongs to starting with the level in +the hierarchy that this processing element itself belongs to through +to the highest level that EAS is required to service. The +connection cannot be sparse and must be contiguous from the +processing element's level through to the highest desired level. The +highest desired level must be the same for all processing elements. + +Example: Given that a cpu node may represent a thread that is a part +of a core, this property may contain multiple elements which +associate the thread with cost nodes describing the costs for the +thread itself, the core the thread belongs to, the cluster the core +belongs to and so on. The elements must be ordered from the lowest +level nodes to the highest desired level that EAS must service. The +highest desired level must be the same for all cpu nodes. The +elements must not be sparse: there must be elements for the current +thread, the next level of hierarchy (core) and so on without any +'holes'. + +Example: Given that a cpu node may represent a core that is a part +of a cluster of related cpus this property may contain multiple +elements which associate the core with cost nodes describing the +costs for the core itself, the cluster the core belongs to and so +on. The elements must be ordered from the lowest level nodes to the +highest desired level that EAS must service. The highest desired +level must be the same for all cpu nodes. The elements must not be +sparse: there must be elements for the current thread, the next +level of hierarchy (core) and so on without any 'holes'. + +If the system comprises of hierarchical clusters of clusters, this +property will contain multiple associations with the relevant number +of cluster elements in hierarchical order. + +Property added to the cpu node: + +- sched-energy-costs + + Usage: required + Value type: List of phandles + Definition: a list of phandles to specific cost nodes in the + energy-costs parent node that correspond to the processing + element represented by this cpu node in hierarchical order + of topology. + + The order of phandles in the list is significant. The first + phandle is to the current processing element's own cost + node. Subsequent phandles are to higher hierarchical level + cost nodes up until the maximum level that EAS is to + service. + + All cpu nodes must have the same highest level cost node. + + The phandle list must not be sparsely populated with handles + to non-contiguous hierarchical levels. See commentary above + for clarity. + + Any other configuration is invalid. + +- freq-energy-model + Description: Optional. Must be declared if the energy model + represents frequency/power values. If absent, energy model is + by default considered as capacity/power. + +=========================================================== +5 - Example dts +=========================================================== + +Example 1 (ARM 64-bit, 6-cpu system, two clusters of cpus, one +cluster of 2 Cortex-A57 cpus, one cluster of 4 Cortex-A53 cpus): + +cpus { + #address-cells = <2>; + #size-cells = <0>; + . + . + . + A57_0: cpu@0 { + compatible = "arm,cortex-a57","arm,armv8"; + reg = <0x0 0x0>; + device_type = "cpu"; + enable-method = "psci"; + next-level-cache = <&A57_L2>; + clocks = <&scpi_dvfs 0>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_0 &CLUSTER_COST_0>; + }; + + A57_1: cpu@1 { + compatible = "arm,cortex-a57","arm,armv8"; + reg = <0x0 0x1>; + device_type = "cpu"; + enable-method = "psci"; + next-level-cache = <&A57_L2>; + clocks = <&scpi_dvfs 0>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_0 &CLUSTER_COST_0>; + }; + + A53_0: cpu@100 { + compatible = "arm,cortex-a53","arm,armv8"; + reg = <0x0 0x100>; + device_type = "cpu"; + enable-method = "psci"; + next-level-cache = <&A53_L2>; + clocks = <&scpi_dvfs 1>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_1 &CLUSTER_COST_1>; + }; + + A53_1: cpu@101 { + compatible = "arm,cortex-a53","arm,armv8"; + reg = <0x0 0x101>; + device_type = "cpu"; + enable-method = "psci"; + next-level-cache = <&A53_L2>; + clocks = <&scpi_dvfs 1>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_1 &CLUSTER_COST_1>; + }; + + A53_2: cpu@102 { + compatible = "arm,cortex-a53","arm,armv8"; + reg = <0x0 0x102>; + device_type = "cpu"; + enable-method = "psci"; + next-level-cache = <&A53_L2>; + clocks = <&scpi_dvfs 1>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_1 &CLUSTER_COST_1>; + }; + + A53_3: cpu@103 { + compatible = "arm,cortex-a53","arm,armv8"; + reg = <0x0 0x103>; + device_type = "cpu"; + enable-method = "psci"; + next-level-cache = <&A53_L2>; + clocks = <&scpi_dvfs 1>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_1 &CLUSTER_COST_1>; + }; + + energy-costs { + CPU_COST_0: core-cost0 { + busy-cost-data = < + 417 168 + 579 251 + 744 359 + 883 479 + 1024 616 + >; + idle-cost-data = < + 15 + 0 + >; + }; + CPU_COST_1: core-cost1 { + busy-cost-data = < + 235 33 + 302 46 + 368 61 + 406 76 + 447 93 + >; + idle-cost-data = < + 6 + 0 + >; + }; + CLUSTER_COST_0: cluster-cost0 { + busy-cost-data = < + 417 24 + 579 32 + 744 43 + 883 49 + 1024 64 + >; + idle-cost-data = < + 65 + 24 + >; + }; + CLUSTER_COST_1: cluster-cost1 { + busy-cost-data = < + 235 26 + 303 30 + 368 39 + 406 47 + 447 57 + >; + idle-cost-data = < + 56 + 17 + >; + }; + }; +}; + +=============================================================================== +[1] https://lkml.org/lkml/2015/5/12/728 +[2] Documentation/devicetree/bindings/topology.txt +[3] Documentation/scheduler/sched-energy.txt
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 13c2ff0..fedbea8 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt
@@ -157,6 +157,25 @@ persist data of regular and symlink. fault_injection=%d Enable fault injection in all supported types with specified injection rate. +fault_type=%d Support configuring fault injection type, should be + enabled with fault_injection option, fault type value + is shown below, it supports single or combined type. + Type_Name Type_Value + FAULT_KMALLOC 0x000000001 + FAULT_KVMALLOC 0x000000002 + FAULT_PAGE_ALLOC 0x000000004 + FAULT_PAGE_GET 0x000000008 + FAULT_ALLOC_BIO 0x000000010 + FAULT_ALLOC_NID 0x000000020 + FAULT_ORPHAN 0x000000040 + FAULT_BLOCK 0x000000080 + FAULT_DIR_DEPTH 0x000000100 + FAULT_EVICT_INODE 0x000000200 + FAULT_TRUNCATE 0x000000400 + FAULT_READ_IO 0x000000800 + FAULT_CHECKPOINT 0x000001000 + FAULT_DISCARD 0x000002000 + FAULT_WRITE_IO 0x000004000 mode=%s Control block allocation mode which supports "adaptive" and "lfs". In "lfs" mode, there should be no random writes towards main area. @@ -174,6 +193,30 @@ offprjjquota Turn off project journelled quota. quota Enable plain user disk quota accounting. noquota Disable all plain disk quota option. +whint_mode=%s Control which write hints are passed down to block + layer. This supports "off", "user-based", and + "fs-based". In "off" mode (default), f2fs does not pass + down hints. In "user-based" mode, f2fs tries to pass + down hints given by users. And in "fs-based" mode, f2fs + passes down hints with its policy. +alloc_mode=%s Adjust block allocation policy, which supports "reuse" + and "default". +fsync_mode=%s Control the policy of fsync. Currently supports "posix", + "strict", and "nobarrier". In "posix" mode, which is + default, fsync will follow POSIX semantics and does a + light operation to improve the filesystem performance. + In "strict" mode, fsync will be heavy and behaves in line + with xfs, ext4 and btrfs, where xfstest generic/342 will + pass, but the performance will regress. "nobarrier" is + based on "posix", but doesn't issue flush command for + non-atomic files likewise "nobarrier" mount option. +test_dummy_encryption Enable dummy encryption, which provides a fake fscrypt + context. The fake fscrypt context is used by xfstests. +checkpoint=%s Set to "disable" to turn off checkpointing. Set to "enable" + to reenable checkpointing. Is enabled by default. While + disabled, any unmounting or unexpected shutdowns will cause + the filesystem contents to appear as they did when the + filesystem was mounted with that option. ================================================================================ DEBUGFS ENTRIES
diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst new file mode 100644 index 0000000..3a7b605 --- /dev/null +++ b/Documentation/filesystems/fscrypt.rst
@@ -0,0 +1,641 @@ +===================================== +Filesystem-level encryption (fscrypt) +===================================== + +Introduction +============ + +fscrypt is a library which filesystems can hook into to support +transparent encryption of files and directories. + +Note: "fscrypt" in this document refers to the kernel-level portion, +implemented in ``fs/crypto/``, as opposed to the userspace tool +`fscrypt <https://github.com/google/fscrypt>`_. This document only +covers the kernel-level portion. For command-line examples of how to +use encryption, see the documentation for the userspace tool `fscrypt +<https://github.com/google/fscrypt>`_. Also, it is recommended to use +the fscrypt userspace tool, or other existing userspace tools such as +`fscryptctl <https://github.com/google/fscryptctl>`_ or `Android's key +management system +<https://source.android.com/security/encryption/file-based>`_, over +using the kernel's API directly. Using existing tools reduces the +chance of introducing your own security bugs. (Nevertheless, for +completeness this documentation covers the kernel's API anyway.) + +Unlike dm-crypt, fscrypt operates at the filesystem level rather than +at the block device level. This allows it to encrypt different files +with different keys and to have unencrypted files on the same +filesystem. This is useful for multi-user systems where each user's +data-at-rest needs to be cryptographically isolated from the others. +However, except for filenames, fscrypt does not encrypt filesystem +metadata. + +Unlike eCryptfs, which is a stacked filesystem, fscrypt is integrated +directly into supported filesystems --- currently ext4, F2FS, and +UBIFS. This allows encrypted files to be read and written without +caching both the decrypted and encrypted pages in the pagecache, +thereby nearly halving the memory used and bringing it in line with +unencrypted files. Similarly, half as many dentries and inodes are +needed. eCryptfs also limits encrypted filenames to 143 bytes, +causing application compatibility issues; fscrypt allows the full 255 +bytes (NAME_MAX). Finally, unlike eCryptfs, the fscrypt API can be +used by unprivileged users, with no need to mount anything. + +fscrypt does not support encrypting files in-place. Instead, it +supports marking an empty directory as encrypted. Then, after +userspace provides the key, all regular files, directories, and +symbolic links created in that directory tree are transparently +encrypted. + +Threat model +============ + +Offline attacks +--------------- + +Provided that userspace chooses a strong encryption key, fscrypt +protects the confidentiality of file contents and filenames in the +event of a single point-in-time permanent offline compromise of the +block device content. fscrypt does not protect the confidentiality of +non-filename metadata, e.g. file sizes, file permissions, file +timestamps, and extended attributes. Also, the existence and location +of holes (unallocated blocks which logically contain all zeroes) in +files is not protected. + +fscrypt is not guaranteed to protect confidentiality or authenticity +if an attacker is able to manipulate the filesystem offline prior to +an authorized user later accessing the filesystem. + +Online attacks +-------------- + +fscrypt (and storage encryption in general) can only provide limited +protection, if any at all, against online attacks. In detail: + +fscrypt is only resistant to side-channel attacks, such as timing or +electromagnetic attacks, to the extent that the underlying Linux +Cryptographic API algorithms are. If a vulnerable algorithm is used, +such as a table-based implementation of AES, it may be possible for an +attacker to mount a side channel attack against the online system. +Side channel attacks may also be mounted against applications +consuming decrypted data. + +After an encryption key has been provided, fscrypt is not designed to +hide the plaintext file contents or filenames from other users on the +same system, regardless of the visibility of the keyring key. +Instead, existing access control mechanisms such as file mode bits, +POSIX ACLs, LSMs, or mount namespaces should be used for this purpose. +Also note that as long as the encryption keys are *anywhere* in +memory, an online attacker can necessarily compromise them by mounting +a physical attack or by exploiting any kernel security vulnerability +which provides an arbitrary memory read primitive. + +While it is ostensibly possible to "evict" keys from the system, +recently accessed encrypted files will remain accessible at least +until the filesystem is unmounted or the VFS caches are dropped, e.g. +using ``echo 2 > /proc/sys/vm/drop_caches``. Even after that, if the +RAM is compromised before being powered off, it will likely still be +possible to recover portions of the plaintext file contents, if not +some of the encryption keys as well. (Since Linux v4.12, all +in-kernel keys related to fscrypt are sanitized before being freed. +However, userspace would need to do its part as well.) + +Currently, fscrypt does not prevent a user from maliciously providing +an incorrect key for another user's existing encrypted files. A +protection against this is planned. + +Key hierarchy +============= + +Master Keys +----------- + +Each encrypted directory tree is protected by a *master key*. Master +keys can be up to 64 bytes long, and must be at least as long as the +greater of the key length needed by the contents and filenames +encryption modes being used. For example, if AES-256-XTS is used for +contents encryption, the master key must be 64 bytes (512 bits). Note +that the XTS mode is defined to require a key twice as long as that +required by the underlying block cipher. + +To "unlock" an encrypted directory tree, userspace must provide the +appropriate master key. There can be any number of master keys, each +of which protects any number of directory trees on any number of +filesystems. + +Userspace should generate master keys either using a cryptographically +secure random number generator, or by using a KDF (Key Derivation +Function). Note that whenever a KDF is used to "stretch" a +lower-entropy secret such as a passphrase, it is critical that a KDF +designed for this purpose be used, such as scrypt, PBKDF2, or Argon2. + +Per-file keys +------------- + +Since each master key can protect many files, it is necessary to +"tweak" the encryption of each file so that the same plaintext in two +files doesn't map to the same ciphertext, or vice versa. In most +cases, fscrypt does this by deriving per-file keys. When a new +encrypted inode (regular file, directory, or symlink) is created, +fscrypt randomly generates a 16-byte nonce and stores it in the +inode's encryption xattr. Then, it uses a KDF (Key Derivation +Function) to derive the file's key from the master key and nonce. + +The Adiantum encryption mode (see `Encryption modes and usage`_) is +special, since it accepts longer IVs and is suitable for both contents +and filenames encryption. For it, a "direct key" option is offered +where the file's nonce is included in the IVs and the master key is +used for encryption directly. This improves performance; however, +users must not use the same master key for any other encryption mode. + +Below, the KDF and design considerations are described in more detail. + +The current KDF works by encrypting the master key with AES-128-ECB, +using the file's nonce as the AES key. The output is used as the +derived key. If the output is longer than needed, then it is +truncated to the needed length. + +Note: this KDF meets the primary security requirement, which is to +produce unique derived keys that preserve the entropy of the master +key, assuming that the master key is already a good pseudorandom key. +However, it is nonstandard and has some problems such as being +reversible, so it is generally considered to be a mistake! It may be +replaced with HKDF or another more standard KDF in the future. + +Key derivation was chosen over key wrapping because wrapped keys would +require larger xattrs which would be less likely to fit in-line in the +filesystem's inode table, and there didn't appear to be any +significant advantages to key wrapping. In particular, currently +there is no requirement to support unlocking a file with multiple +alternative master keys or to support rotating master keys. Instead, +the master keys may be wrapped in userspace, e.g. as is done by the +`fscrypt <https://github.com/google/fscrypt>`_ tool. + +Including the inode number in the IVs was considered. However, it was +rejected as it would have prevented ext4 filesystems from being +resized, and by itself still wouldn't have been sufficient to prevent +the same key from being directly reused for both XTS and CTS-CBC. + +Encryption modes and usage +========================== + +fscrypt allows one encryption mode to be specified for file contents +and one encryption mode to be specified for filenames. Different +directory trees are permitted to use different encryption modes. +Currently, the following pairs of encryption modes are supported: + +- AES-256-XTS for contents and AES-256-CTS-CBC for filenames +- AES-128-CBC for contents and AES-128-CTS-CBC for filenames +- Adiantum for both contents and filenames + +If unsure, you should use the (AES-256-XTS, AES-256-CTS-CBC) pair. + +AES-128-CBC was added only for low-powered embedded devices with +crypto accelerators such as CAAM or CESA that do not support XTS. + +Adiantum is a (primarily) stream cipher-based mode that is fast even +on CPUs without dedicated crypto instructions. It's also a true +wide-block mode, unlike XTS. It can also eliminate the need to derive +per-file keys. However, it depends on the security of two primitives, +XChaCha12 and AES-256, rather than just one. See the paper +"Adiantum: length-preserving encryption for entry-level processors" +(https://eprint.iacr.org/2018/720.pdf) for more details. To use +Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled. Also, fast +implementations of ChaCha and NHPoly1305 should be enabled, e.g. +CONFIG_CRYPTO_CHACHA20_NEON and CONFIG_CRYPTO_NHPOLY1305_NEON for ARM. + +New encryption modes can be added relatively easily, without changes +to individual filesystems. However, authenticated encryption (AE) +modes are not currently supported because of the difficulty of dealing +with ciphertext expansion. + +Contents encryption +------------------- + +For file contents, each filesystem block is encrypted independently. +Currently, only the case where the filesystem block size is equal to +the system's page size (usually 4096 bytes) is supported. + +Each block's IV is set to the logical block number within the file as +a little endian number, except that: + +- With CBC mode encryption, ESSIV is also used. Specifically, each IV + is encrypted with AES-256 where the AES-256 key is the SHA-256 hash + of the file's data encryption key. + +- In the "direct key" configuration (FS_POLICY_FLAG_DIRECT_KEY set in + the fscrypt_policy), the file's nonce is also appended to the IV. + Currently this is only allowed with the Adiantum encryption mode. + +Filenames encryption +-------------------- + +For filenames, each full filename is encrypted at once. Because of +the requirements to retain support for efficient directory lookups and +filenames of up to 255 bytes, the same IV is used for every filename +in a directory. + +However, each encrypted directory still uses a unique key; or +alternatively (for the "direct key" configuration) has the file's +nonce included in the IVs. Thus, IV reuse is limited to within a +single directory. + +With CTS-CBC, the IV reuse means that when the plaintext filenames +share a common prefix at least as long as the cipher block size (16 +bytes for AES), the corresponding encrypted filenames will also share +a common prefix. This is undesirable. Adiantum does not have this +weakness, as it is a wide-block encryption mode. + +All supported filenames encryption modes accept any plaintext length +>= 16 bytes; cipher block alignment is not required. However, +filenames shorter than 16 bytes are NUL-padded to 16 bytes before +being encrypted. In addition, to reduce leakage of filename lengths +via their ciphertexts, all filenames are NUL-padded to the next 4, 8, +16, or 32-byte boundary (configurable). 32 is recommended since this +provides the best confidentiality, at the cost of making directory +entries consume slightly more space. Note that since NUL (``\0``) is +not otherwise a valid character in filenames, the padding will never +produce duplicate plaintexts. + +Symbolic link targets are considered a type of filename and are +encrypted in the same way as filenames in directory entries, except +that IV reuse is not a problem as each symlink has its own inode. + +User API +======== + +Setting an encryption policy +---------------------------- + +The FS_IOC_SET_ENCRYPTION_POLICY ioctl sets an encryption policy on an +empty directory or verifies that a directory or regular file already +has the specified encryption policy. It takes in a pointer to a +:c:type:`struct fscrypt_policy`, defined as follows:: + + #define FS_KEY_DESCRIPTOR_SIZE 8 + + struct fscrypt_policy { + __u8 version; + __u8 contents_encryption_mode; + __u8 filenames_encryption_mode; + __u8 flags; + __u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE]; + }; + +This structure must be initialized as follows: + +- ``version`` must be 0. + +- ``contents_encryption_mode`` and ``filenames_encryption_mode`` must + be set to constants from ``<linux/fs.h>`` which identify the + encryption modes to use. If unsure, use + FS_ENCRYPTION_MODE_AES_256_XTS (1) for ``contents_encryption_mode`` + and FS_ENCRYPTION_MODE_AES_256_CTS (4) for + ``filenames_encryption_mode``. + +- ``flags`` must contain a value from ``<linux/fs.h>`` which + identifies the amount of NUL-padding to use when encrypting + filenames. If unsure, use FS_POLICY_FLAGS_PAD_32 (0x3). + In addition, if the chosen encryption modes are both + FS_ENCRYPTION_MODE_ADIANTUM, this can contain + FS_POLICY_FLAG_DIRECT_KEY to specify that the master key should be + used directly, without key derivation. + +- ``master_key_descriptor`` specifies how to find the master key in + the keyring; see `Adding keys`_. It is up to userspace to choose a + unique ``master_key_descriptor`` for each master key. The e4crypt + and fscrypt tools use the first 8 bytes of + ``SHA-512(SHA-512(master_key))``, but this particular scheme is not + required. Also, the master key need not be in the keyring yet when + FS_IOC_SET_ENCRYPTION_POLICY is executed. However, it must be added + before any files can be created in the encrypted directory. + +If the file is not yet encrypted, then FS_IOC_SET_ENCRYPTION_POLICY +verifies that the file is an empty directory. If so, the specified +encryption policy is assigned to the directory, turning it into an +encrypted directory. After that, and after providing the +corresponding master key as described in `Adding keys`_, all regular +files, directories (recursively), and symlinks created in the +directory will be encrypted, inheriting the same encryption policy. +The filenames in the directory's entries will be encrypted as well. + +Alternatively, if the file is already encrypted, then +FS_IOC_SET_ENCRYPTION_POLICY validates that the specified encryption +policy exactly matches the actual one. If they match, then the ioctl +returns 0. Otherwise, it fails with EEXIST. This works on both +regular files and directories, including nonempty directories. + +Note that the ext4 filesystem does not allow the root directory to be +encrypted, even if it is empty. Users who want to encrypt an entire +filesystem with one key should consider using dm-crypt instead. + +FS_IOC_SET_ENCRYPTION_POLICY can fail with the following errors: + +- ``EACCES``: the file is not owned by the process's uid, nor does the + process have the CAP_FOWNER capability in a namespace with the file + owner's uid mapped +- ``EEXIST``: the file is already encrypted with an encryption policy + different from the one specified +- ``EINVAL``: an invalid encryption policy was specified (invalid + version, mode(s), or flags) +- ``ENOTDIR``: the file is unencrypted and is a regular file, not a + directory +- ``ENOTEMPTY``: the file is unencrypted and is a nonempty directory +- ``ENOTTY``: this type of filesystem does not implement encryption +- ``EOPNOTSUPP``: the kernel was not configured with encryption + support for this filesystem, or the filesystem superblock has not + had encryption enabled on it. (For example, to use encryption on an + ext4 filesystem, CONFIG_EXT4_ENCRYPTION must be enabled in the + kernel config, and the superblock must have had the "encrypt" + feature flag enabled using ``tune2fs -O encrypt`` or ``mkfs.ext4 -O + encrypt``.) +- ``EPERM``: this directory may not be encrypted, e.g. because it is + the root directory of an ext4 filesystem +- ``EROFS``: the filesystem is readonly + +Getting an encryption policy +---------------------------- + +The FS_IOC_GET_ENCRYPTION_POLICY ioctl retrieves the :c:type:`struct +fscrypt_policy`, if any, for a directory or regular file. See above +for the struct definition. No additional permissions are required +beyond the ability to open the file. + +FS_IOC_GET_ENCRYPTION_POLICY can fail with the following errors: + +- ``EINVAL``: the file is encrypted, but it uses an unrecognized + encryption context format +- ``ENODATA``: the file is not encrypted +- ``ENOTTY``: this type of filesystem does not implement encryption +- ``EOPNOTSUPP``: the kernel was not configured with encryption + support for this filesystem + +Note: if you only need to know whether a file is encrypted or not, on +most filesystems it is also possible to use the FS_IOC_GETFLAGS ioctl +and check for FS_ENCRYPT_FL, or to use the statx() system call and +check for STATX_ATTR_ENCRYPTED in stx_attributes. + +Getting the per-filesystem salt +------------------------------- + +Some filesystems, such as ext4 and F2FS, also support the deprecated +ioctl FS_IOC_GET_ENCRYPTION_PWSALT. This ioctl retrieves a randomly +generated 16-byte value stored in the filesystem superblock. This +value is intended to used as a salt when deriving an encryption key +from a passphrase or other low-entropy user credential. + +FS_IOC_GET_ENCRYPTION_PWSALT is deprecated. Instead, prefer to +generate and manage any needed salt(s) in userspace. + +Adding keys +----------- + +To provide a master key, userspace must add it to an appropriate +keyring using the add_key() system call (see: +``Documentation/security/keys/core.rst``). The key type must be +"logon"; keys of this type are kept in kernel memory and cannot be +read back by userspace. The key description must be "fscrypt:" +followed by the 16-character lower case hex representation of the +``master_key_descriptor`` that was set in the encryption policy. The +key payload must conform to the following structure:: + + #define FS_MAX_KEY_SIZE 64 + + struct fscrypt_key { + u32 mode; + u8 raw[FS_MAX_KEY_SIZE]; + u32 size; + }; + +``mode`` is ignored; just set it to 0. The actual key is provided in +``raw`` with ``size`` indicating its size in bytes. That is, the +bytes ``raw[0..size-1]`` (inclusive) are the actual key. + +The key description prefix "fscrypt:" may alternatively be replaced +with a filesystem-specific prefix such as "ext4:". However, the +filesystem-specific prefixes are deprecated and should not be used in +new programs. + +There are several different types of keyrings in which encryption keys +may be placed, such as a session keyring, a user session keyring, or a +user keyring. Each key must be placed in a keyring that is "attached" +to all processes that might need to access files encrypted with it, in +the sense that request_key() will find the key. Generally, if only +processes belonging to a specific user need to access a given +encrypted directory and no session keyring has been installed, then +that directory's key should be placed in that user's user session +keyring or user keyring. Otherwise, a session keyring should be +installed if needed, and the key should be linked into that session +keyring, or in a keyring linked into that session keyring. + +Note: introducing the complex visibility semantics of keyrings here +was arguably a mistake --- especially given that by design, after any +process successfully opens an encrypted file (thereby setting up the +per-file key), possessing the keyring key is not actually required for +any process to read/write the file until its in-memory inode is +evicted. In the future there probably should be a way to provide keys +directly to the filesystem instead, which would make the intended +semantics clearer. + +Access semantics +================ + +With the key +------------ + +With the encryption key, encrypted regular files, directories, and +symlinks behave very similarly to their unencrypted counterparts --- +after all, the encryption is intended to be transparent. However, +astute users may notice some differences in behavior: + +- Unencrypted files, or files encrypted with a different encryption + policy (i.e. different key, modes, or flags), cannot be renamed or + linked into an encrypted directory; see `Encryption policy + enforcement`_. Attempts to do so will fail with EPERM. However, + encrypted files can be renamed within an encrypted directory, or + into an unencrypted directory. + +- Direct I/O is not supported on encrypted files. Attempts to use + direct I/O on such files will fall back to buffered I/O. + +- The fallocate operations FALLOC_FL_COLLAPSE_RANGE, + FALLOC_FL_INSERT_RANGE, and FALLOC_FL_ZERO_RANGE are not supported + on encrypted files and will fail with EOPNOTSUPP. + +- Online defragmentation of encrypted files is not supported. The + EXT4_IOC_MOVE_EXT and F2FS_IOC_MOVE_RANGE ioctls will fail with + EOPNOTSUPP. + +- The ext4 filesystem does not support data journaling with encrypted + regular files. It will fall back to ordered data mode instead. + +- DAX (Direct Access) is not supported on encrypted files. + +- The st_size of an encrypted symlink will not necessarily give the + length of the symlink target as required by POSIX. It will actually + give the length of the ciphertext, which will be slightly longer + than the plaintext due to NUL-padding and an extra 2-byte overhead. + +- The maximum length of an encrypted symlink is 2 bytes shorter than + the maximum length of an unencrypted symlink. For example, on an + EXT4 filesystem with a 4K block size, unencrypted symlinks can be up + to 4095 bytes long, while encrypted symlinks can only be up to 4093 + bytes long (both lengths excluding the terminating null). + +Note that mmap *is* supported. This is possible because the pagecache +for an encrypted file contains the plaintext, not the ciphertext. + +Without the key +--------------- + +Some filesystem operations may be performed on encrypted regular +files, directories, and symlinks even before their encryption key has +been provided: + +- File metadata may be read, e.g. using stat(). + +- Directories may be listed, in which case the filenames will be + listed in an encoded form derived from their ciphertext. The + current encoding algorithm is described in `Filename hashing and + encoding`_. The algorithm is subject to change, but it is + guaranteed that the presented filenames will be no longer than + NAME_MAX bytes, will not contain the ``/`` or ``\0`` characters, and + will uniquely identify directory entries. + + The ``.`` and ``..`` directory entries are special. They are always + present and are not encrypted or encoded. + +- Files may be deleted. That is, nondirectory files may be deleted + with unlink() as usual, and empty directories may be deleted with + rmdir() as usual. Therefore, ``rm`` and ``rm -r`` will work as + expected. + +- Symlink targets may be read and followed, but they will be presented + in encrypted form, similar to filenames in directories. Hence, they + are unlikely to point to anywhere useful. + +Without the key, regular files cannot be opened or truncated. +Attempts to do so will fail with ENOKEY. This implies that any +regular file operations that require a file descriptor, such as +read(), write(), mmap(), fallocate(), and ioctl(), are also forbidden. + +Also without the key, files of any type (including directories) cannot +be created or linked into an encrypted directory, nor can a name in an +encrypted directory be the source or target of a rename, nor can an +O_TMPFILE temporary file be created in an encrypted directory. All +such operations will fail with ENOKEY. + +It is not currently possible to backup and restore encrypted files +without the encryption key. This would require special APIs which +have not yet been implemented. + +Encryption policy enforcement +============================= + +After an encryption policy has been set on a directory, all regular +files, directories, and symbolic links created in that directory +(recursively) will inherit that encryption policy. Special files --- +that is, named pipes, device nodes, and UNIX domain sockets --- will +not be encrypted. + +Except for those special files, it is forbidden to have unencrypted +files, or files encrypted with a different encryption policy, in an +encrypted directory tree. Attempts to link or rename such a file into +an encrypted directory will fail with EPERM. This is also enforced +during ->lookup() to provide limited protection against offline +attacks that try to disable or downgrade encryption in known locations +where applications may later write sensitive data. It is recommended +that systems implementing a form of "verified boot" take advantage of +this by validating all top-level encryption policies prior to access. + +Implementation details +====================== + +Encryption context +------------------ + +An encryption policy is represented on-disk by a :c:type:`struct +fscrypt_context`. It is up to individual filesystems to decide where +to store it, but normally it would be stored in a hidden extended +attribute. It should *not* be exposed by the xattr-related system +calls such as getxattr() and setxattr() because of the special +semantics of the encryption xattr. (In particular, there would be +much confusion if an encryption policy were to be added to or removed +from anything other than an empty directory.) The struct is defined +as follows:: + + #define FS_KEY_DESCRIPTOR_SIZE 8 + #define FS_KEY_DERIVATION_NONCE_SIZE 16 + + struct fscrypt_context { + u8 format; + u8 contents_encryption_mode; + u8 filenames_encryption_mode; + u8 flags; + u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE]; + u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE]; + }; + +Note that :c:type:`struct fscrypt_context` contains the same +information as :c:type:`struct fscrypt_policy` (see `Setting an +encryption policy`_), except that :c:type:`struct fscrypt_context` +also contains a nonce. The nonce is randomly generated by the kernel +and is used to derive the inode's encryption key as described in +`Per-file keys`_. + +Data path changes +----------------- + +For the read path (->readpage()) of regular files, filesystems can +read the ciphertext into the page cache and decrypt it in-place. The +page lock must be held until decryption has finished, to prevent the +page from becoming visible to userspace prematurely. + +For the write path (->writepage()) of regular files, filesystems +cannot encrypt data in-place in the page cache, since the cached +plaintext must be preserved. Instead, filesystems must encrypt into a +temporary buffer or "bounce page", then write out the temporary +buffer. Some filesystems, such as UBIFS, already use temporary +buffers regardless of encryption. Other filesystems, such as ext4 and +F2FS, have to allocate bounce pages specially for encryption. + +Filename hashing and encoding +----------------------------- + +Modern filesystems accelerate directory lookups by using indexed +directories. An indexed directory is organized as a tree keyed by +filename hashes. When a ->lookup() is requested, the filesystem +normally hashes the filename being looked up so that it can quickly +find the corresponding directory entry, if any. + +With encryption, lookups must be supported and efficient both with and +without the encryption key. Clearly, it would not work to hash the +plaintext filenames, since the plaintext filenames are unavailable +without the key. (Hashing the plaintext filenames would also make it +impossible for the filesystem's fsck tool to optimize encrypted +directories.) Instead, filesystems hash the ciphertext filenames, +i.e. the bytes actually stored on-disk in the directory entries. When +asked to do a ->lookup() with the key, the filesystem just encrypts +the user-supplied name to get the ciphertext. + +Lookups without the key are more complicated. The raw ciphertext may +contain the ``\0`` and ``/`` characters, which are illegal in +filenames. Therefore, readdir() must base64-encode the ciphertext for +presentation. For most filenames, this works fine; on ->lookup(), the +filesystem just base64-decodes the user-supplied name to get back to +the raw ciphertext. + +However, for very long filenames, base64 encoding would cause the +filename length to exceed NAME_MAX. To prevent this, readdir() +actually presents long filenames in an abbreviated form which encodes +a strong "hash" of the ciphertext filename, along with the optional +filesystem-specific hash(es) needed for directory lookups. This +allows the filesystem to still, with a high degree of confidence, map +the filename given in ->lookup() back to a particular directory entry +that was previously listed by readdir(). See :c:type:`struct +fscrypt_digested_name` in the source for more details. + +Note that the precise way that filenames are presented to userspace +without the key is subject to change in the future. It is only meant +as a way to temporarily present valid filenames so that commands like +``rm -r`` work as expected on encrypted directories.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 4cee34c..09bc35fb 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt
@@ -396,6 +396,8 @@ [stack] = the stack of the main process [vdso] = the "virtual dynamic shared object", the kernel system call handler + [anon:<name>] = an anonymous mapping that has been + named by userspace or if empty, the mapping is anonymous. @@ -424,6 +426,7 @@ MMUPageSize: 4 kB Locked: 0 kB VmFlags: rd ex mr mw me dw +Name: name from userspace the first of these lines shows the same information as is displayed for the mapping in /proc/PID/maps. The remaining lines show the size of the mapping @@ -498,6 +501,9 @@ might change in future as well. So each consumer of these flags has to follow each specific kernel version for the exact semantic. +The "Name" field will only be present on a mapping that has been named by +userspace, and will show the name passed in by userspace. + This file is only present if the CONFIG_MMU kernel configuration option is enabled.
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index a054b5a..6a98ad3 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt
@@ -605,6 +605,16 @@ initial value when the blackhole issue goes away. By default, it is set to 1hr. +tcp_fwmark_accept - BOOLEAN + If set, incoming connections to listening sockets that do not have a + socket mark will set the mark of the accepting socket to the fwmark of + the incoming SYN packet. This will cause all packets on that connection + (starting from the first SYNACK) to be sent with that fwmark. The + listening socket's mark is unchanged. Listening sockets that already + have a fwmark set via setsockopt(SOL_SOCKET, SO_MARK, ...) are + unaffected. + Default: 0 + tcp_syn_retries - INTEGER Number of times initial SYNs for an active TCP connection attempt will be retransmitted. Should not be higher than 127. Default value
diff --git a/Documentation/scheduler/sched-energy.txt b/Documentation/scheduler/sched-energy.txt new file mode 100644 index 0000000..dab2f90 --- /dev/null +++ b/Documentation/scheduler/sched-energy.txt
@@ -0,0 +1,362 @@ +Energy cost model for energy-aware scheduling (EXPERIMENTAL) + +Introduction +============= + +The basic energy model uses platform energy data stored in sched_group_energy +data structures attached to the sched_groups in the sched_domain hierarchy. The +energy cost model offers two functions that can be used to guide scheduling +decisions: + +1. static unsigned int sched_group_energy(struct energy_env *eenv) +2. static int energy_diff(struct energy_env *eenv) + +sched_group_energy() estimates the energy consumed by all cpus in a specific +sched_group including any shared resources owned exclusively by this group of +cpus. Resources shared with other cpus are excluded (e.g. later level caches). + +energy_diff() estimates the total energy impact of a utilization change. That +is, adding, removing, or migrating utilization (tasks). + +Both functions use a struct energy_env to specify the scenario to be evaluated: + + struct energy_env { + struct sched_group *sg_top; + struct sched_group *sg_cap; + int cap_idx; + int util_delta; + int src_cpu; + int dst_cpu; + int energy; + }; + +sg_top: sched_group to be evaluated. Not used by energy_diff(). + +sg_cap: sched_group covering the cpus in the same frequency domain. Set by +sched_group_energy(). + +cap_idx: Capacity state to be used for energy calculations. Set by +find_new_capacity(). + +util_delta: Amount of utilization to be added, removed, or migrated. + +src_cpu: Source cpu from where 'util_delta' utilization is removed. Should be +-1 if no source (e.g. task wake-up). + +dst_cpu: Destination cpu where 'util_delta' utilization is added. Should be -1 +if utilization is removed (e.g. terminating tasks). + +energy: Result of sched_group_energy(). + +The metric used to represent utilization is the actual per-entity running time +averaged over time using a geometric series. Very similar to the existing +per-entity load-tracking, but _not_ scaled by task priority and capped by the +capacity of the cpu. The latter property does mean that utilization may +underestimate the compute requirements for task on fully/over utilized cpus. +The greatest potential for energy savings without affecting performance too much +is scenarios where the system isn't fully utilized. If the system is deemed +fully utilized load-balancing should be done with task load (includes task +priority) instead in the interest of fairness and performance. + + +Background and Terminology +=========================== + +To make it clear from the start: + +energy = [joule] (resource like a battery on powered devices) +power = energy/time = [joule/second] = [watt] + +The goal of energy-aware scheduling is to minimize energy, while still getting +the job done. That is, we want to maximize: + + performance [inst/s] + -------------------- + power [W] + +which is equivalent to minimizing: + + energy [J] + ----------- + instruction + +while still getting 'good' performance. It is essentially an alternative +optimization objective to the current performance-only objective for the +scheduler. This alternative considers two objectives: energy-efficiency and +performance. Hence, there needs to be a user controllable knob to switch the +objective. Since it is early days, this is currently a sched_feature +(ENERGY_AWARE). + +The idea behind introducing an energy cost model is to allow the scheduler to +evaluate the implications of its decisions rather than applying energy-saving +techniques blindly that may only have positive effects on some platforms. At +the same time, the energy cost model must be as simple as possible to minimize +the scheduler latency impact. + +Platform topology +------------------ + +The system topology (cpus, caches, and NUMA information, not peripherals) is +represented in the scheduler by the sched_domain hierarchy which has +sched_groups attached at each level that covers one or more cpus (see +sched-domains.txt for more details). To add energy awareness to the scheduler +we need to consider power and frequency domains. + +Power domain: + +A power domain is a part of the system that can be powered on/off +independently. Power domains are typically organized in a hierarchy where you +may be able to power down just a cpu or a group of cpus along with any +associated resources (e.g. shared caches). Powering up a cpu means that all +power domains it is a part of in the hierarchy must be powered up. Hence, it is +more expensive to power up the first cpu that belongs to a higher level power +domain than powering up additional cpus in the same high level domain. Two +level power domain hierarchy example: + + Power source + +-------------------------------+----... +per group PD G G + | +----------+ | + +--------+-------| Shared | (other groups) +per-cpu PD G G | resource | + | | +----------+ + +-------+ +-------+ + | CPU 0 | | CPU 1 | + +-------+ +-------+ + +Frequency domain: + +Frequency domains (P-states) typically cover the same group of cpus as one of +the power domain levels. That is, there might be several smaller power domains +sharing the same frequency (P-state) or there might be a power domain spanning +multiple frequency domains. + +From a scheduling point of view there is no need to know the actual frequencies +[Hz]. All the scheduler cares about is the compute capacity available at the +current state (P-state) the cpu is in and any other available states. For that +reason, and to also factor in any cpu micro-architecture differences, compute +capacity scaling states are called 'capacity states' in this document. For SMP +systems this is equivalent to P-states. For mixed micro-architecture systems +(like ARM big.LITTLE) it is P-states scaled according to the micro-architecture +performance relative to the other cpus in the system. + +Energy modelling: +------------------ + +Due to the hierarchical nature of the power domains, the most obvious way to +model energy costs is therefore to associate power and energy costs with +domains (groups of cpus). Energy costs of shared resources are associated with +the group of cpus that share the resources, only the cost of powering the +cpu itself and any private resources (e.g. private L1 caches) is associated +with the per-cpu groups (lowest level). + +For example, for an SMP system with per-cpu power domains and a cluster level +(group of cpus) power domain we get the overall energy costs to be: + + energy = energy_cluster + n * energy_cpu + +where 'n' is the number of cpus powered up and energy_cluster is the cost paid +as soon as any cpu in the cluster is powered up. + +The power and frequency domains can naturally be mapped onto the existing +sched_domain hierarchy and sched_groups by adding the necessary data to the +existing data structures. + +The energy model considers energy consumption from two contributors (shown in +the illustration below): + +1. Busy energy: Energy consumed while a cpu and the higher level groups that it +belongs to are busy running tasks. Busy energy is associated with the state of +the cpu, not an event. The time the cpu spends in this state varies. Thus, the +most obvious platform parameter for this contribution is busy power +(energy/time). + +2. Idle energy: Energy consumed while a cpu and higher level groups that it +belongs to are idle (in a C-state). Like busy energy, idle energy is associated +with the state of the cpu. Thus, the platform parameter for this contribution +is idle power (energy/time). + +Energy consumed during transitions from an idle-state (C-state) to a busy state +(P-state) or going the other way is ignored by the model to simplify the energy +model calculations. + + + Power + ^ + | busy->idle idle->busy + | transition transition + | + | _ __ + | / \ / \__________________ + |______________/ \ / + | \ / + | Busy \ Idle / Busy + | low P-state \____________/ high P-state + | + +------------------------------------------------------------> time + +Busy |--------------| |-----------------| + +Wakeup |------| |------| + +Idle |------------| + + +The basic algorithm +==================== + +The basic idea is to determine the total energy impact when utilization is +added or removed by estimating the impact at each level in the sched_domain +hierarchy starting from the bottom (sched_group contains just a single cpu). +The energy cost comes from busy time (sched_group is awake because one or more +cpus are busy) and idle time (in an idle-state). Energy model numbers account +for energy costs associated with all cpus in the sched_group as a group. + + for_each_domain(cpu, sd) { + sg = sched_group_of(cpu) + energy_before = curr_util(sg) * busy_power(sg) + + (1-curr_util(sg)) * idle_power(sg) + energy_after = new_util(sg) * busy_power(sg) + + (1-new_util(sg)) * idle_power(sg) + energy_diff += energy_before - energy_after + + } + + return energy_diff + +{curr, new}_util: The cpu utilization at the lowest level and the overall +non-idle time for the entire group for higher levels. Utilization is in the +range 0.0 to 1.0 in the pseudo-code. + +busy_power: The power consumption of the sched_group. + +idle_power: The power consumption of the sched_group when idle. + +Note: It is a fundamental assumption that the utilization is (roughly) scale +invariant. Task utilization tracking factors in any frequency scaling and +performance scaling differences due to difference cpu microarchitectures such +that task utilization can be used across the entire system. + + +Platform energy data +===================== + +struct sched_group_energy can be attached to sched_groups in the sched_domain +hierarchy and has the following members: + +cap_states: + List of struct capacity_state representing the supported capacity states + (P-states). struct capacity_state has two members: cap and power, which + represents the compute capacity and the busy_power of the state. The + list must be ordered by capacity low->high. + +nr_cap_states: + Number of capacity states in cap_states list. + +idle_states: + List of struct idle_state containing idle_state power cost for each + idle-state supported by the system orderd by shallowest state first. + All states must be included at all level in the hierarchy, i.e. a + sched_group spanning just a single cpu must also include coupled + idle-states (cluster states). In addition to the cpuidle idle-states, + the list must also contain an entry for the idling using the arch + default idle (arch_idle_cpu()). Despite this state may not be a true + hardware idle-state it is considered the shallowest idle-state in the + energy model and must be the first entry. cpus may enter this state + (possibly 'active idling') if cpuidle decides not enter a cpuidle + idle-state. Default idle may not be used when cpuidle is enabled. + In this case, it should just be a copy of the first cpuidle idle-state. + +nr_idle_states: + Number of idle states in idle_states list. + +There are no unit requirements for the energy cost data. Data can be normalized +with any reference, however, the normalization must be consistent across all +energy cost data. That is, one bogo-joule/watt must be the same quantity for +data, but we don't care what it is. + +A recipe for platform characterization +======================================= + +Obtaining the actual model data for a particular platform requires some way of +measuring power/energy. There isn't a tool to help with this (yet). This +section provides a recipe for use as reference. It covers the steps used to +characterize the ARM TC2 development platform. This sort of measurements is +expected to be done anyway when tuning cpuidle and cpufreq for a given +platform. + +The energy model needs two types of data (struct sched_group_energy holds +these) for each sched_group where energy costs should be taken into account: + +1. Capacity state information + +A list containing the compute capacity and power consumption when fully +utilized attributed to the group as a whole for each available capacity state. +At the lowest level (group contains just a single cpu) this is the power of the +cpu alone without including power consumed by resources shared with other cpus. +It basically needs to fit the basic modelling approach described in "Background +and Terminology" section: + + energy_system = energy_shared + n * energy_cpu + +for a system containing 'n' busy cpus. Only 'energy_cpu' should be included at +the lowest level. 'energy_shared' is included at the next level which +represents the group of cpus among which the resources are shared. + +This model is, of course, a simplification of reality. Thus, power/energy +attributions might not always exactly represent how the hardware is designed. +Also, busy power is likely to depend on the workload. It is therefore +recommended to use a representative mix of workloads when characterizing the +capacity states. + +If the group has no capacity scaling support, the list will contain a single +state where power is the busy power attributed to the group. The capacity +should be set to a default value (1024). + +When frequency domains include multiple power domains, the group representing +the frequency domain and all child groups share capacity states. This must be +indicated by setting the SD_SHARE_CAP_STATES sched_domain flag. All groups at +all levels that share the capacity state must have the list of capacity states +with the power set to the contribution of the individual group. + +2. Idle power information + +Stored in the idle_states list. The power number is the group idle power +consumption in each idle state as well when the group is idle but has not +entered an idle-state ('active idle' as mentioned earlier). Due to the way the +energy model is defined, the idle power of the deepest group idle state can +alternatively be accounted for in the parent group busy power. In that case the +group idle state power values are offset such that the idle power of the +deepest state is zero. It is less intuitive, but it is easier to measure as +idle power consumed by the group and the busy/idle power of the parent group +cannot be distinguished without per group measurement points. + +Measuring capacity states and idle power: + +The capacity states' capacity and power can be estimated by running a benchmark +workload at each available capacity state. By restricting the benchmark to run +on subsets of cpus it is possible to extrapolate the power consumption of +shared resources. + +ARM TC2 has two clusters of two and three cpus respectively. Each cluster has a +shared L2 cache. TC2 has on-chip energy counters per cluster. Running a +benchmark workload on just one cpu in a cluster means that power is consumed in +the cluster (higher level group) and a single cpu (lowest level group). Adding +another benchmark task to another cpu increases the power consumption by the +amount consumed by the additional cpu. Hence, it is possible to extrapolate the +cluster busy power. + +For platforms that don't have energy counters or equivalent instrumentation +built-in, it may be possible to use an external DAQ to acquire similar data. + +If the benchmark includes some performance score (for example sysbench cpu +benchmark), this can be used to record the compute capacity. + +Measuring idle power requires insight into the idle state implementation on the +particular platform. Specifically, if the platform has coupled idle-states (or +package states). To measure non-coupled per-cpu idle-states it is necessary to +keep one cpu busy to keep any shared resources alive to isolate the idle power +of the cpu from idle/busy power of the shared resources. The cpu can be tricked +into different per-cpu idle states by disabling the other states. Based on +various combinations of measurements with specific cpus busy and disabling +idle-states it is possible to extrapolate the idle-state power.
diff --git a/Documentation/scheduler/sched-pelt.c b/Documentation/scheduler/sched-pelt.c index e421913..20ebf04 100644 --- a/Documentation/scheduler/sched-pelt.c +++ b/Documentation/scheduler/sched-pelt.c
@@ -10,34 +10,34 @@ #include <math.h> #include <stdio.h> -#define HALFLIFE 32 +#define HALFLIFE { 32, 16, 8 } #define SHIFT 32 double y; -void calc_runnable_avg_yN_inv(void) +void calc_runnable_avg_yN_inv(const int halflife) { int i; unsigned int x; printf("static const u32 runnable_avg_yN_inv[] = {"); - for (i = 0; i < HALFLIFE; i++) { + for (i = 0; i < halflife; i++) { x = ((1UL<<32)-1)*pow(y, i); - if (i % 6 == 0) printf("\n\t"); - printf("0x%8x, ", x); + if (i % 4 == 0) printf("\n\t"); + printf("0x%8x,", x); } printf("\n};\n\n"); } -int sum = 1024; +int sum; -void calc_runnable_avg_yN_sum(void) +void calc_runnable_avg_yN_sum(const int halflife) { int i; printf("static const u32 runnable_avg_yN_sum[] = {\n\t 0,"); - for (i = 1; i <= HALFLIFE; i++) { + for (i = 1; i <= halflife; i++) { if (i == 1) sum *= y; else @@ -51,11 +51,10 @@ void calc_runnable_avg_yN_sum(void) printf("\n};\n\n"); } -int n = -1; -/* first period */ -long max = 1024; +int n; +long max; -void calc_converged_max(void) +void calc_converged_max(const int halflife) { long last = 0, y_inv = ((1UL<<32)-1)*y; @@ -73,17 +72,17 @@ void calc_converged_max(void) last = max; } n--; - printf("#define LOAD_AVG_PERIOD %d\n", HALFLIFE); + printf("#define LOAD_AVG_PERIOD %d\n", halflife); printf("#define LOAD_AVG_MAX %ld\n", max); -// printf("#define LOAD_AVG_MAX_N %d\n\n", n); + printf("#define LOAD_AVG_MAX_N %d\n\n", n); } -void calc_accumulated_sum_32(void) +void calc_accumulated_sum_32(const int halflife) { int i, x = sum; printf("static const u32 __accumulated_sum_N32[] = {\n\t 0,"); - for (i = 1; i <= n/HALFLIFE+1; i++) { + for (i = 1; i <= n/halflife+1; i++) { if (i > 1) x = x/2 + sum; @@ -97,12 +96,30 @@ void calc_accumulated_sum_32(void) void main(void) { - printf("/* Generated by Documentation/scheduler/sched-pelt; do not modify. */\n\n"); + int hl_value[] = HALFLIFE; + int hl_count = sizeof(hl_value) / sizeof(int); + int hl_idx, halflife; - y = pow(0.5, 1/(double)HALFLIFE); + printf("/* SPDX-License-Identifier: GPL-2.0 */\n"); + printf("/* Generated by Documentation/scheduler/sched-pelt; do not modify. */\n"); - calc_runnable_avg_yN_inv(); -// calc_runnable_avg_yN_sum(); - calc_converged_max(); -// calc_accumulated_sum_32(); + for (hl_idx = 0; hl_idx < hl_count; ++hl_idx) { + halflife = hl_value[hl_idx]; + + y = pow(0.5, 1/(double)halflife); + sum = 1024; + /* first period */ + max = 1024; + n = -1; + + printf("\n#ifdef CONFIG_PELT_UTIL_HALFLIFE_%d\n", halflife); + calc_runnable_avg_yN_inv(halflife); + calc_runnable_avg_yN_sum(halflife); + calc_converged_max(halflife); + /* + * calc_accumulated_sum_32(halflife) precomputed load sum table of half-life, + * not used yet. + */ + printf("#endif\n"); + } }
diff --git a/Documentation/scheduler/sched-tune.txt b/Documentation/scheduler/sched-tune.txt new file mode 100644 index 0000000..1a10371 --- /dev/null +++ b/Documentation/scheduler/sched-tune.txt
@@ -0,0 +1,388 @@ + Central, scheduler-driven, power-performance control + (EXPERIMENTAL) + +Abstract +======== + +The topic of a single simple power-performance tunable, that is wholly +scheduler centric, and has well defined and predictable properties has come up +on several occasions in the past [1,2]. With techniques such as a scheduler +driven DVFS [3], we now have a good framework for implementing such a tunable. +This document describes the overall ideas behind its design and implementation. + + +Table of Contents +================= + +1. Motivation +2. Introduction +3. Signal Boosting Strategy +4. OPP selection using boosted CPU utilization +5. Per task group boosting +6. Per-task wakeup-placement-strategy Selection +7. Question and Answers + - What about "auto" mode? + - What about boosting on a congested system? + - How CPUs are boosted when we have tasks with multiple boost values? +8. References + + +1. Motivation +============= + +Schedutil [3] is a utilization-driven cpufreq governor which allows the +scheduler to select the optimal DVFS operating point (OPP) for running a task +allocated to a CPU. + +However, sometimes it may be desired to intentionally boost the performance of +a workload even if that could imply a reasonable increase in energy +consumption. For example, in order to reduce the response time of a task, we +may want to run the task at a higher OPP than the one that is actually required +by it's CPU bandwidth demand. + +This last requirement is especially important if we consider that one of the +main goals of the utilization-driven governor component is to replace all +currently available CPUFreq policies. Since schedutil is event-based, as +opposed to the sampling driven governors we currently have, they are already +more responsive at selecting the optimal OPP to run tasks allocated to a CPU. +However, just tracking the actual task utilization may not be enough from a +performance standpoint. For example, it is not possible to get behaviors +similar to those provided by the "performance" and "interactive" CPUFreq +governors. + +This document describes an implementation of a tunable, stacked on top of the +utilization-driven governor which extends its functionality to support task +performance boosting. + +By "performance boosting" we mean the reduction of the time required to +complete a task activation, i.e. the time elapsed from a task wakeup to its +next deactivation (e.g. because it goes back to sleep or it terminates). For +example, if we consider a simple periodic task which executes the same workload +for 5[s] every 20[s] while running at a certain OPP, a boosted execution of +that task must complete each of its activations in less than 5[s]. + +The rest of this document introduces in more details the proposed solution +which has been named SchedTune. + + +2. Introduction +=============== + +SchedTune exposes a simple user-space interface provided through a new +CGroup controller 'stune' which provides two power-performance tunables +per group: + + /<stune cgroup mount point>/schedtune.prefer_idle + /<stune cgroup mount point>/schedtune.boost + +The CGroup implementation permits arbitrary user-space defined task +classification to tune the scheduler for different goals depending on the +specific nature of the task, e.g. background vs interactive vs low-priority. + +More details are given in section 5. + +2.1 Boosting +============ + +The boost value is expressed as an integer in the range [0..100]. + +A value of 0 (default) configures the CFS scheduler for maximum energy +efficiency. This means that schedutil runs the tasks at the minimum OPP +required to satisfy their workload demand. + +A value of 100 configures scheduler for maximum performance, which translates +to the selection of the maximum OPP on that CPU. + +The range between 0 and 100 can be set to satisfy other scenarios suitably. For +example to satisfy interactive response or depending on other system events +(battery level etc). + +The overall design of the SchedTune module is built on top of "Per-Entity Load +Tracking" (PELT) signals and schedutil by introducing a bias on the OPP +selection. + +Each time a task is allocated on a CPU, cpufreq is given the opportunity to tune +the operating frequency of that CPU to better match the workload demand. The +selection of the actual OPP being activated is influenced by the boost value +for the task CGroup. + +This simple biasing approach leverages existing frameworks, which means minimal +modifications to the scheduler, and yet it allows to achieve a range of +different behaviours all from a single simple tunable knob. + +In EAS schedulers, we use boosted task and CPU utilization for energy +calculation and energy-aware task placement. + +2.2 prefer_idle +=============== + +This is a flag which indicates to the scheduler that userspace would like +the scheduler to focus on energy or to focus on performance. + +A value of 0 (default) signals to the CFS scheduler that tasks in this group +can be placed according to the energy-aware wakeup strategy. + +A value of 1 signals to the CFS scheduler that tasks in this group should be +placed to minimise wakeup latency. + +Android platforms typically use this flag for application tasks which the +user is currently interacting with. + + +3. Signal Boosting Strategy +=========================== + +The whole PELT machinery works based on the value of a few load tracking signals +which basically track the CPU bandwidth requirements for tasks and the capacity +of CPUs. The basic idea behind the SchedTune knob is to artificially inflate +some of these load tracking signals to make a task or RQ appears more demanding +that it actually is. + +Which signals have to be inflated depends on the specific "consumer". However, +independently from the specific (signal, consumer) pair, it is important to +define a simple and possibly consistent strategy for the concept of boosting a +signal. + +A boosting strategy defines how the "abstract" user-space defined +sched_cfs_boost value is translated into an internal "margin" value to be added +to a signal to get its inflated value: + + margin := boosting_strategy(sched_cfs_boost, signal) + boosted_signal := signal + margin + +The boosting strategy currently implemented in SchedTune is called 'Signal +Proportional Compensation' (SPC). With SPC, the sched_cfs_boost value is used to +compute a margin which is proportional to the complement of the original signal. +When a signal has a maximum possible value, its complement is defined as +the delta from the actual value and its possible maximum. + +Since the tunable implementation uses signals which have SCHED_CAPACITY_SCALE as +the maximum possible value, the margin becomes: + + margin := sched_cfs_boost * (SCHED_CAPACITY_SCALE - signal) + +Using this boosting strategy: +- a 100% sched_cfs_boost means that the signal is scaled to the maximum value +- each value in the range of sched_cfs_boost effectively inflates the signal in + question by a quantity which is proportional to the maximum value. + +For example, by applying the SPC boosting strategy to the selection of the OPP +to run a task it is possible to achieve these behaviors: + +- 0% boosting: run the task at the minimum OPP required by its workload +- 100% boosting: run the task at the maximum OPP available for the CPU +- 50% boosting: run at the half-way OPP between minimum and maximum + +Which means that, at 50% boosting, a task will be scheduled to run at half of +the maximum theoretically achievable performance on the specific target +platform. + +A graphical representation of an SPC boosted signal is represented in the +following figure where: + a) "-" represents the original signal + b) "b" represents a 50% boosted signal + c) "p" represents a 100% boosted signal + + + ^ + | SCHED_CAPACITY_SCALE + +-----------------------------------------------------------------+ + |pppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppp + | + | boosted_signal + | bbbbbbbbbbbbbbbbbbbbbbbb + | + | original signal + | bbbbbbbbbbbbbbbbbbbbbbbb+----------------------+ + | | + |bbbbbbbbbbbbbbbbbb | + | | + | | + | | + | +-----------------------+ + | | + | | + | | + |------------------+ + | + | + +-----------------------------------------------------------------------> + +The plot above shows a ramped load signal (titled 'original_signal') and it's +boosted equivalent. For each step of the original signal the boosted signal +corresponding to a 50% boost is midway from the original signal and the upper +bound. Boosting by 100% generates a boosted signal which is always saturated to +the upper bound. + + +4. OPP selection using boosted CPU utilization +============================================== + +It is worth calling out that the implementation does not introduce any new load +signals. Instead, it provides an API to tune existing signals. This tuning is +done on demand and only in scheduler code paths where it is sensible to do so. +The new API calls are defined to return either the default signal or a boosted +one, depending on the value of sched_cfs_boost. This is a clean an non invasive +modification of the existing existing code paths. + +The signal representing a CPU's utilization is boosted according to the +previously described SPC boosting strategy. To schedutil, this allows a CPU +(ie CFS run-queue) to appear more used then it actually is. + +Thus, with the sched_cfs_boost enabled we have the following main functions to +get the current utilization of a CPU: + + cpu_util() + boosted_cpu_util() + +The new boosted_cpu_util() is similar to the first but returns a boosted +utilization signal which is a function of the sched_cfs_boost value. + +This function is used in the CFS scheduler code paths where schedutil needs to +decide the OPP to run a CPU at. For example, this allows selecting the highest +OPP for a CPU which has the boost value set to 100%. + + +5. Per task group boosting +========================== + +On battery powered devices there usually are many background services which are +long running and need energy efficient scheduling. On the other hand, some +applications are more performance sensitive and require an interactive +response and/or maximum performance, regardless of the energy cost. + +To better service such scenarios, the SchedTune implementation has an extension +that provides a more fine grained boosting interface. + +A new CGroup controller, namely "schedtune", can be enabled which allows to +defined and configure task groups with different boosting values. +Tasks that require special performance can be put into separate CGroups. +The value of the boost associated with the tasks in this group can be specified +using a single knob exposed by the CGroup controller: + + schedtune.boost + +This knob allows the definition of a boost value that is to be used for +SPC boosting of all tasks attached to this group. + +The current schedtune controller implementation is really simple and has these +main characteristics: + + 1) It is only possible to create 1 level depth hierarchies + + The root control groups define the system-wide boost value to be applied + by default to all tasks. Its direct subgroups are named "boost groups" and + they define the boost value for specific set of tasks. + Further nested subgroups are not allowed since they do not have a sensible + meaning from a user-space standpoint. + + 2) It is possible to define only a limited number of "boost groups" + + This number is defined at compile time and by default configured to 16. + This is a design decision motivated by two main reasons: + a) In a real system we do not expect utilization scenarios with more than + a few boost groups. For example, a reasonable collection of groups could + be just "background", "interactive" and "performance". + b) It simplifies the implementation considerably, especially for the code + which has to compute the per CPU boosting once there are multiple + RUNNABLE tasks with different boost values. + +Such a simple design should allow servicing the main utilization scenarios +identified so far. It provides a simple interface which can be used to manage +the power-performance of all tasks or only selected tasks. +Moreover, this interface can be easily integrated by user-space run-times (e.g. +Android, ChromeOS) to implement a QoS solution for task boosting based on tasks +classification, which has been a long standing requirement. + +Setup and usage +--------------- + +0. Use a kernel with CONFIG_SCHED_TUNE support enabled + +1. Check that the "schedtune" CGroup controller is available: + + root@linaro-nano:~# cat /proc/cgroups + #subsys_name hierarchy num_cgroups enabled + cpuset 0 1 1 + cpu 0 1 1 + schedtune 0 1 1 + +2. Mount a tmpfs to create the CGroups mount point (Optional) + + root@linaro-nano:~# sudo mount -t tmpfs cgroups /sys/fs/cgroup + +3. Mount the "schedtune" controller + + root@linaro-nano:~# mkdir /sys/fs/cgroup/stune + root@linaro-nano:~# sudo mount -t cgroup -o schedtune stune /sys/fs/cgroup/stune + +4. Create task groups and configure their specific boost value (Optional) + + For example here we create a "performance" boost group configure to boost + all its tasks to 100% + + root@linaro-nano:~# mkdir /sys/fs/cgroup/stune/performance + root@linaro-nano:~# echo 100 > /sys/fs/cgroup/stune/performance/schedtune.boost + +5. Move tasks into the boost group + + For example, the following moves the tasks with PID $TASKPID (and all its + threads) into the "performance" boost group. + + root@linaro-nano:~# echo "TASKPID > /sys/fs/cgroup/stune/performance/cgroup.procs + +This simple configuration allows only the threads of the $TASKPID task to run, +when needed, at the highest OPP in the most capable CPU of the system. + + +6. Per-task wakeup-placement-strategy Selection +=============================================== + +Many devices have a number of CFS tasks in use which require an absolute +minimum wakeup latency, and many tasks for which wakeup latency is not +important. + +For touch-driven environments, removing additional wakeup latency can be +critical. + +When you use the Schedtume CGroup controller, you have access to a second +parameter which allows a group to be marked such that energy_aware task +placement is bypassed for tasks belonging to that group. + +prefer_idle=0 (default - use energy-aware task placement if available) +prefer_idle=1 (never use energy-aware task placement for these tasks) + +Since the regular wakeup task placement algorithm in CFS is biased for +performance, this has the effect of restoring minimum wakeup latency +for the desired tasks whilst still allowing energy-aware wakeup placement +to save energy for other tasks. + + +7. Question and Answers +======================= + +What about "auto" mode? +----------------------- + +The 'auto' mode as described in [5] can be implemented by interfacing SchedTune +with some suitable user-space element. This element could use the exposed +system-wide or cgroup based interface. + +How are multiple groups of tasks with different boost values managed? +--------------------------------------------------------------------- + +The current SchedTune implementation keeps track of the boosted RUNNABLE tasks +on a CPU. The CPU utilization seen by schedutil (and used to select an +appropriate OPP) is boosted with a value which is the maximum of the boost +values of the currently RUNNABLE tasks in its RQ. + +This allows cpufreq to boost a CPU only while there are boosted tasks ready +to run and switch back to the energy efficient mode as soon as the last boosted +task is dequeued. + + +8. References +============= +[1] http://lwn.net/Articles/552889 +[2] http://lkml.org/lkml/2012/5/18/91 +[3] https://lkml.org/lkml/2016/3/29/1041
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 694968c..b757d6e 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt
@@ -653,7 +653,8 @@ perf_event_paranoid: Controls use of the performance events system by unprivileged -users (without CAP_SYS_ADMIN). The default value is 2. +users (without CAP_SYS_ADMIN). The default value is 3 if +CONFIG_SECURITY_PERF_EVENTS_RESTRICT is set, or 2 otherwise. -1: Allow use of (almost) all events by all users Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK @@ -661,6 +662,7 @@ Disallow raw tracepoint access by users without CAP_SYS_ADMIN >=1: Disallow CPU event access by users without CAP_SYS_ADMIN >=2: Disallow kernel profiling by users without CAP_SYS_ADMIN +>=3: Disallow all event access by users without CAP_SYS_ADMIN ==============================================================
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 9baf66a..1d1f2cb 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt
@@ -30,6 +30,7 @@ - dirty_writeback_centisecs - drop_caches - extfrag_threshold +- extra_free_kbytes - hugepages_treat_as_movable - hugetlb_shm_group - laptop_mode @@ -260,6 +261,21 @@ ============================================================== +extra_free_kbytes + +This parameter tells the VM to keep extra free memory between the threshold +where background reclaim (kswapd) kicks in, and the threshold where direct +reclaim (by allocating processes) kicks in. + +This is useful for workloads that require low latency memory allocations +and have a bounded burstiness in memory allocations, for example a +realtime application that receives and transmits network traffic +(causing in-kernel memory allocations) with a maximum total message burst +size of 200MB may need 200MB of extra free memory to avoid direct reclaim +related latencies. + +============================================================== + hugepages_treat_as_movable This parameter controls whether we can allocate hugepages from ZONE_MOVABLE
diff --git a/Documentation/trace/events-power.txt b/Documentation/trace/events-power.txt index 21d514ce..4d817d5 100644 --- a/Documentation/trace/events-power.txt +++ b/Documentation/trace/events-power.txt
@@ -25,6 +25,7 @@ cpu_idle "state=%lu cpu_id=%lu" cpu_frequency "state=%lu cpu_id=%lu" +cpu_frequency_limits "min=%lu max=%lu cpu_id=%lu" A suspend event is used to indicate the system going in and out of the suspend mode:
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index d4601df..f2fcbb7 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt
@@ -2407,6 +2407,35 @@ 1) 1.449 us | } +You can disable the hierarchical function call formatting and instead print a +flat list of function entry and return events. This uses the format described +in the Output Formatting section and respects all the trace options that +control that formatting. Hierarchical formatting is the default. + + hierachical: echo nofuncgraph-flat > trace_options + flat: echo funcgraph-flat > trace_options + + ie: + + # tracer: function_graph + # + # entries-in-buffer/entries-written: 68355/68355 #P:2 + # + # _-----=> irqs-off + # / _----=> need-resched + # | / _---=> hardirq/softirq + # || / _--=> preempt-depth + # ||| / delay + # TASK-PID CPU# |||| TIMESTAMP FUNCTION + # | | | |||| | | + sh-1806 [001] d... 198.843443: graph_ent: func=_raw_spin_lock + sh-1806 [001] d... 198.843445: graph_ent: func=__raw_spin_lock + sh-1806 [001] d..1 198.843447: graph_ret: func=__raw_spin_lock + sh-1806 [001] d..1 198.843449: graph_ret: func=_raw_spin_lock + sh-1806 [001] d..1 198.843451: graph_ent: func=_raw_spin_unlock_irqrestore + sh-1806 [001] d... 198.843453: graph_ret: func=_raw_spin_unlock_irqrestore + + You might find other useful features for this tracer in the following "dynamic ftrace" section such as tracing only specific functions or tasks.
diff --git a/Makefile b/Makefile index d5b20b6..eddd513 100644 --- a/Makefile +++ b/Makefile
@@ -372,6 +372,7 @@ # Make variables (CC, etc...) AS = $(CROSS_COMPILE)as LD = $(CROSS_COMPILE)ld +LDGOLD = $(CROSS_COMPILE)ld.gold CC = $(CROSS_COMPILE)gcc CPP = $(CC) -E AR = $(CROSS_COMPILE)ar @@ -479,7 +480,11 @@ ifeq ($(cc-name),clang) ifneq ($(CROSS_COMPILE),) -CLANG_FLAGS := --target=$(notdir $(CROSS_COMPILE:%-=%)) +CLANG_TRIPLE ?= $(CROSS_COMPILE) +CLANG_FLAGS := --target=$(notdir $(CLANG_TRIPLE:%-=%)) +ifeq ($(shell $(srctree)/scripts/clang-android.sh $(CC) $(CLANG_FLAGS)), y) +$(error "Clang with Android --target detected. Did you specify CLANG_TRIPLE?") +endif GCC_TOOLCHAIN_DIR := $(dir $(shell which $(LD))) CLANG_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR) GCC_TOOLCHAIN := $(realpath $(GCC_TOOLCHAIN_DIR)/..) @@ -638,6 +643,20 @@ CFLAGS_KCOV := $(call cc-option,-fsanitize-coverage=trace-pc,) export CFLAGS_GCOV CFLAGS_KCOV +# Make toolchain changes before including arch/$(SRCARCH)/Makefile to ensure +# ar/cc/ld-* macros return correct values. +ifdef CONFIG_LTO_CLANG +# use GNU gold with LLVMgold for LTO linking, and LD for vmlinux_link +LDFINAL_vmlinux := $(LD) +LD := $(LDGOLD) +LDFLAGS += -plugin LLVMgold.so +# use llvm-ar for building symbol tables from IR files, and llvm-dis instead +# of objdump for processing symbol versions and exports +LLVM_AR := llvm-ar +LLVM_DIS := llvm-dis +export LLVM_AR LLVM_DIS +endif + # The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default # values of the respective KBUILD_* variables ARCH_CPPFLAGS := @@ -718,6 +737,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier) KBUILD_CFLAGS += $(call cc-disable-warning, gnu) KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) +KBUILD_CFLAGS += $(call cc-disable-warning, duplicate-decl-specifier) # Quiet clang warning: comparison of unsigned expression < 0 is always false KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare) # CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the @@ -793,6 +813,53 @@ KBUILD_CFLAGS += $(call cc-option,-fdata-sections,) endif +ifdef CONFIG_LTO_CLANG +lto-clang-flags := -flto -fvisibility=hidden + +# allow disabling only clang LTO where needed +DISABLE_LTO_CLANG := -fno-lto -fvisibility=default +export DISABLE_LTO_CLANG +endif + +ifdef CONFIG_LTO +lto-flags := $(lto-clang-flags) +KBUILD_CFLAGS += $(lto-flags) + +DISABLE_LTO := $(DISABLE_LTO_CLANG) +export DISABLE_LTO + +# LDFINAL_vmlinux and LDFLAGS_FINAL_vmlinux can be set to override +# the linker and flags for vmlinux_link. +export LDFINAL_vmlinux LDFLAGS_FINAL_vmlinux +endif + +ifdef CONFIG_CFI_CLANG +cfi-clang-flags += -fsanitize=cfi +DISABLE_CFI_CLANG := -fno-sanitize=cfi +ifdef CONFIG_MODULES +cfi-clang-flags += -fsanitize-cfi-cross-dso +DISABLE_CFI_CLANG += -fno-sanitize-cfi-cross-dso +endif +ifdef CONFIG_CFI_PERMISSIVE +cfi-clang-flags += -fsanitize-recover=cfi -fno-sanitize-trap=cfi +endif + +# also disable CFI when LTO is disabled +DISABLE_LTO_CLANG += $(DISABLE_CFI_CLANG) +# allow disabling only clang CFI where needed +export DISABLE_CFI_CLANG +endif + +ifdef CONFIG_CFI +# cfi-flags are re-tested in prepare-compiler-check +cfi-flags := $(cfi-clang-flags) +KBUILD_CFLAGS += $(cfi-flags) + +DISABLE_CFI := $(DISABLE_CFI_CLANG) +DISABLE_LTO += $(DISABLE_CFI) +export DISABLE_CFI +endif + # arch Makefile may override CC so keep this after arch Makefile is included NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) CHECKFLAGS += $(NOSTDINC_FLAGS) @@ -1116,6 +1183,22 @@ # CC_STACKPROTECTOR_STRONG! Why did it build with _REGULAR?!") PHONY += prepare-compiler-check prepare-compiler-check: FORCE +# Make sure we're using a supported toolchain with LTO_CLANG +ifdef CONFIG_LTO_CLANG + ifneq ($(call clang-ifversion, -ge, 0500, y), y) + @echo Cannot use CONFIG_LTO_CLANG: requires clang 5.0 or later >&2 && exit 1 + endif + ifneq ($(call gold-ifversion, -ge, 112000000, y), y) + @echo Cannot use CONFIG_LTO_CLANG: requires GNU gold 1.12 or later >&2 && exit 1 + endif +endif +# Make sure compiler supports LTO flags +ifdef lto-flags + ifeq ($(call cc-option, $(lto-flags)),) + @echo Cannot use CONFIG_LTO: $(lto-flags) not supported by compiler \ + >&2 && exit 1 + endif +endif # Make sure compiler supports requested stack protector flag. ifdef stackp-name ifeq ($(call cc-option, $(stackp-flag)),) @@ -1130,6 +1213,11 @@ $(stackp-flag) available but compiler is broken >&2 && exit 1 endif endif +ifdef cfi-flags + ifeq ($(call cc-option, $(cfi-flags)),) + @echo Cannot use CONFIG_CFI: $(cfi-flags) not supported by compiler >&2 && exit 1 + endif +endif @: # Generate some files @@ -1589,7 +1677,8 @@ -o -name modules.builtin -o -name '.tmp_*.o.*' \ -o -name '*.c.[012]*.*' \ -o -name '*.ll' \ - -o -name '*.gcno' \) -type f -print | xargs rm -f + -o -name '*.gcno' \ + -o -name '*.*.symversions' \) -type f -print | xargs rm -f # Generate tags for editors # ---------------------------------------------------------------------------
diff --git a/arch/Kconfig b/arch/Kconfig index 77b3e21..0f23636 100644 --- a/arch/Kconfig +++ b/arch/Kconfig
@@ -611,6 +611,75 @@ sections (e.g., '.text.init'). Typically '.' in section names is used to distinguish them from label names / C identifiers. +config LTO + def_bool n + +config ARCH_SUPPORTS_LTO_CLANG + bool + help + An architecture should select this option it supports: + - compiling with clang, + - compiling inline assembly with clang's integrated assembler, + - and linking with either lld or GNU gold w/ LLVMgold. + +choice + prompt "Link-Time Optimization (LTO) (EXPERIMENTAL)" + default LTO_NONE + help + This option turns on Link-Time Optimization (LTO). + +config LTO_NONE + bool "None" + +config LTO_CLANG + bool "Use clang Link Time Optimization (LTO) (EXPERIMENTAL)" + depends on ARCH_SUPPORTS_LTO_CLANG + depends on !FTRACE_MCOUNT_RECORD || HAVE_C_RECORDMCOUNT + depends on !KASAN + select LTO + select THIN_ARCHIVES + select LD_DEAD_CODE_DATA_ELIMINATION + help + This option enables clang's Link Time Optimization (LTO), which allows + the compiler to optimize the kernel globally at link time. If you + enable this option, the compiler generates LLVM IR instead of object + files, and the actual compilation from IR occurs at the LTO link step, + which may take several minutes. + + If you select this option, you must compile the kernel with clang >= + 5.0 (make CC=clang) and GNU gold from binutils >= 2.27, and have the + LLVMgold plug-in in LD_LIBRARY_PATH. + +endchoice + +config CFI + bool + +config CFI_PERMISSIVE + bool "Use CFI in permissive mode" + depends on CFI + help + When selected, Control Flow Integrity (CFI) violations result in a + warning instead of a kernel panic. This option is useful for finding + CFI violations in drivers during development. + +config CFI_CLANG + bool "Use clang Control Flow Integrity (CFI) (EXPERIMENTAL)" + depends on LTO_CLANG + depends on KALLSYMS + select CFI + help + This option enables clang Control Flow Integrity (CFI), which adds + runtime checking for indirect function calls. + +config CFI_CLANG_SHADOW + bool "Use CFI shadow to speed up cross-module checks" + default y + depends on CFI_CLANG + help + If you select this option, the kernel builds a fast look-up table of + CFI check functions in loaded modules to reduce overhead. + config HAVE_ARCH_WITHIN_STACK_FRAMES bool help
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index d1346a1..8586381 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig
@@ -1826,6 +1826,15 @@ help Say Y if you want to run Linux in a Virtual Machine on Xen on ARM. +config ARM_FLUSH_CONSOLE_ON_RESTART + bool "Force flush the console on restart" + help + If the console is locked while the system is rebooted, the messages + in the temporary logbuffer would not have propogated to all the + console drivers. This option forces the console lock to be + released if it failed to be acquired, which will cause all the + pending messages to be flushed. + endmenu menu "Boot options" @@ -1854,6 +1863,21 @@ This was deprecated in 2001 and announced to live on for 5 years. Some old boot loaders still use this way. +config BUILD_ARM_APPENDED_DTB_IMAGE + bool "Build a concatenated zImage/dtb by default" + depends on OF + help + Enabling this option will cause a concatenated zImage and list of + DTBs to be built by default (instead of a standalone zImage.) + The image will built in arch/arm/boot/zImage-dtb + +config BUILD_ARM_APPENDED_DTB_IMAGE_NAMES + string "Default dtb names" + depends on BUILD_ARM_APPENDED_DTB_IMAGE + help + Space separated list of names of dtbs to append when + building a concatenated zImage-dtb. + # Compressed boot loader in ROM. Yes, we really want to ask about # TEXT and BSS so we preserve their values in the config files. config ZBOOT_ROM_TEXT
diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 17e80f4..826dff0 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile
@@ -303,6 +303,8 @@ boot := arch/arm/boot ifeq ($(CONFIG_XIP_KERNEL),y) KBUILD_IMAGE := $(boot)/xipImage +else ifeq ($(CONFIG_BUILD_ARM_APPENDED_DTB_IMAGE),y) +KBUILD_IMAGE := $(boot)/zImage-dtb else KBUILD_IMAGE := $(boot)/zImage endif @@ -356,6 +358,9 @@ $(Q)$(MAKE) $(build)=arch/arm/vdso $@ endif +zImage-dtb: vmlinux scripts dtbs + $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@ + # We use MRPROPER_FILES and CLEAN_FILES now archclean: $(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile index 50f8d1be..da75630 100644 --- a/arch/arm/boot/Makefile +++ b/arch/arm/boot/Makefile
@@ -16,6 +16,7 @@ ifneq ($(MACHINE),) include $(MACHINE)/Makefile.boot endif +include $(srctree)/arch/arm/boot/dts/Makefile # Note: the following conditions must always be true: # ZRELADDR == virt_to_phys(PAGE_OFFSET + TEXT_OFFSET) @@ -29,6 +30,14 @@ targets := Image zImage xipImage bootpImage uImage +DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM_APPENDED_DTB_IMAGE_NAMES)) +ifneq ($(DTB_NAMES),) +DTB_LIST := $(addsuffix .dtb,$(DTB_NAMES)) +else +DTB_LIST := $(dtb-y) +endif +DTB_OBJS := $(addprefix $(obj)/dts/,$(DTB_LIST)) + ifeq ($(CONFIG_XIP_KERNEL),y) $(obj)/xipImage: vmlinux FORCE @@ -55,6 +64,10 @@ $(obj)/zImage: $(obj)/compressed/vmlinux FORCE $(call if_changed,objcopy) +$(obj)/zImage-dtb: $(obj)/zImage $(DTB_OBJS) FORCE + $(call if_changed,cat) + @echo ' Kernel: $@ is ready' + endif ifneq ($(LOADADDR),)
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 5f687ba..03c1227 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S
@@ -792,6 +792,8 @@ bic r6, r6, #1 << 31 @ 32-bit translation system bic r6, r6, #(7 << 0) | (1 << 4) @ use only ttbr0 mcrne p15, 0, r3, c2, c0, 0 @ load page table pointer + mcrne p15, 0, r0, c8, c7, 0 @ flush I,D TLBs + mcr p15, 0, r0, c7, c5, 4 @ ISB mcrne p15, 0, r1, c3, c0, 0 @ load domain access control mcrne p15, 0, r6, c2, c0, 2 @ load ttb control #endif
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index eff87a3..86e591c 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile
@@ -1074,5 +1074,15 @@ dtstree := $(srctree)/$(src) dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts)) -always := $(dtb-y) +DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM_APPENDED_DTB_IMAGE_NAMES)) +ifneq ($(DTB_NAMES),) +DTB_LIST := $(addsuffix .dtb,$(DTB_NAMES)) +else +DTB_LIST := $(dtb-y) +endif + +targets += dtbs dtbs_install +targets += $(DTB_LIST) + +always := $(DTB_LIST) clean-files := *.dtb
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts index a4c7713..c5c365f 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
@@ -41,6 +41,7 @@ cci-control-port = <&cci_control1>; cpu-idle-states = <&CLUSTER_SLEEP_BIG>; capacity-dmips-mhz = <1024>; + sched-energy-costs = <&CPU_COST_A15 &CLUSTER_COST_A15>; }; cpu1: cpu@1 { @@ -50,6 +51,7 @@ cci-control-port = <&cci_control1>; cpu-idle-states = <&CLUSTER_SLEEP_BIG>; capacity-dmips-mhz = <1024>; + sched-energy-costs = <&CPU_COST_A15 &CLUSTER_COST_A15>; }; cpu2: cpu@2 { @@ -59,6 +61,7 @@ cci-control-port = <&cci_control2>; cpu-idle-states = <&CLUSTER_SLEEP_LITTLE>; capacity-dmips-mhz = <516>; + sched-energy-costs = <&CPU_COST_A7 &CLUSTER_COST_A7>; }; cpu3: cpu@3 { @@ -68,6 +71,7 @@ cci-control-port = <&cci_control2>; cpu-idle-states = <&CLUSTER_SLEEP_LITTLE>; capacity-dmips-mhz = <516>; + sched-energy-costs = <&CPU_COST_A7 &CLUSTER_COST_A7>; }; cpu4: cpu@4 { @@ -77,6 +81,7 @@ cci-control-port = <&cci_control2>; cpu-idle-states = <&CLUSTER_SLEEP_LITTLE>; capacity-dmips-mhz = <516>; + sched-energy-costs = <&CPU_COST_A7 &CLUSTER_COST_A7>; }; idle-states { @@ -96,6 +101,77 @@ min-residency-us = <2500>; }; }; + + energy-costs { + CPU_COST_A15: core-cost0 { + busy-cost-data = < + 426 2021 + 512 2312 + 597 2756 + 682 3125 + 768 3524 + 853 3846 + 938 5177 + 1024 6997 + >; + idle-cost-data = < + 0 + 0 + 0 + >; + }; + CPU_COST_A7: core-cost1 { + busy-cost-data = < + 150 187 + 172 275 + 215 334 + 258 407 + 301 447 + 344 549 + 387 761 + 430 1024 + >; + idle-cost-data = < + 0 + 0 + 0 + >; + }; + CLUSTER_COST_A15: cluster-cost0 { + busy-cost-data = < + 426 7920 + 512 8165 + 597 8172 + 682 8195 + 768 8265 + 853 8446 + 938 11426 + 1024 15200 + >; + idle-cost-data = < + 70 + 70 + 25 + >; + }; + CLUSTER_COST_A7: cluster-cost1 { + busy-cost-data = < + 150 2967 + 172 2792 + 215 2810 + 258 2815 + 301 2919 + 344 2847 + 387 3917 + 430 4905 + >; + idle-cost-data = < + 25 + 25 + 10 + >; + }; + }; }; memory@80000000 {
diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig index e5ad070..f07f479 100644 --- a/arch/arm/common/Kconfig +++ b/arch/arm/common/Kconfig
@@ -15,3 +15,7 @@ config SHARP_SCOOP bool + +config FIQ_GLUE + bool + select FIQ
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 70b4a14..10b5064 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile
@@ -5,6 +5,7 @@ obj-y += firmware.o +obj-$(CONFIG_FIQ_GLUE) += fiq_glue.o fiq_glue_setup.o obj-$(CONFIG_SA1111) += sa1111.o obj-$(CONFIG_DMABOUNCE) += dmabounce.o obj-$(CONFIG_SHARP_LOCOMO) += locomo.o
diff --git a/arch/arm/common/fiq_glue.S b/arch/arm/common/fiq_glue.S new file mode 100644 index 0000000..24b42ce --- /dev/null +++ b/arch/arm/common/fiq_glue.S
@@ -0,0 +1,118 @@ +/* + * Copyright (C) 2008 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + + .global fiq_glue_end + + /* fiq stack: r0-r15,cpsr,spsr of interrupted mode */ + +ENTRY(fiq_glue) + /* store pc, cpsr from previous mode, reserve space for spsr */ + mrs r12, spsr + sub lr, lr, #4 + subs r10, #1 + bne nested_fiq + + str r12, [sp, #-8]! + str lr, [sp, #-4]! + + /* store r8-r14 from previous mode */ + sub sp, sp, #(7 * 4) + stmia sp, {r8-r14}^ + nop + + /* store r0-r7 from previous mode */ + stmfd sp!, {r0-r7} + + /* setup func(data,regs) arguments */ + mov r0, r9 + mov r1, sp + mov r3, r8 + + mov r7, sp + + /* Get sp and lr from non-user modes */ + and r4, r12, #MODE_MASK + cmp r4, #USR_MODE + beq fiq_from_usr_mode + + mov r7, sp + orr r4, r4, #(PSR_I_BIT | PSR_F_BIT) + msr cpsr_c, r4 + str sp, [r7, #(4 * 13)] + str lr, [r7, #(4 * 14)] + mrs r5, spsr + str r5, [r7, #(4 * 17)] + + cmp r4, #(SVC_MODE | PSR_I_BIT | PSR_F_BIT) + /* use fiq stack if we reenter this mode */ + subne sp, r7, #(4 * 3) + +fiq_from_usr_mode: + msr cpsr_c, #(SVC_MODE | PSR_I_BIT | PSR_F_BIT) + mov r2, sp + sub sp, r7, #12 + stmfd sp!, {r2, ip, lr} + /* call func(data,regs) */ + blx r3 + ldmfd sp, {r2, ip, lr} + mov sp, r2 + + /* restore/discard saved state */ + cmp r4, #USR_MODE + beq fiq_from_usr_mode_exit + + msr cpsr_c, r4 + ldr sp, [r7, #(4 * 13)] + ldr lr, [r7, #(4 * 14)] + msr spsr_cxsf, r5 + +fiq_from_usr_mode_exit: + msr cpsr_c, #(FIQ_MODE | PSR_I_BIT | PSR_F_BIT) + + ldmfd sp!, {r0-r7} + ldr lr, [sp, #(4 * 7)] + ldr r12, [sp, #(4 * 8)] + add sp, sp, #(10 * 4) +exit_fiq: + msr spsr_cxsf, r12 + add r10, #1 + cmp r11, #0 + moveqs pc, lr + bx r11 /* jump to custom fiq return function */ + +nested_fiq: + orr r12, r12, #(PSR_F_BIT) + b exit_fiq + +fiq_glue_end: + +ENTRY(fiq_glue_setup) /* func, data, sp, smc call number */ + stmfd sp!, {r4} + mrs r4, cpsr + msr cpsr_c, #(FIQ_MODE | PSR_I_BIT | PSR_F_BIT) + movs r8, r0 + mov r9, r1 + mov sp, r2 + mov r11, r3 + moveq r10, #0 + movne r10, #1 + msr cpsr_c, r4 + ldmfd sp!, {r4} + bx lr +
diff --git a/arch/arm/common/fiq_glue_setup.c b/arch/arm/common/fiq_glue_setup.c new file mode 100644 index 0000000..8cb1b61 --- /dev/null +++ b/arch/arm/common/fiq_glue_setup.c
@@ -0,0 +1,147 @@ +/* + * Copyright (C) 2010 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/percpu.h> +#include <linux/slab.h> +#include <asm/fiq.h> +#include <asm/fiq_glue.h> + +extern unsigned char fiq_glue, fiq_glue_end; +extern void fiq_glue_setup(void *func, void *data, void *sp, + fiq_return_handler_t fiq_return_handler); + +static struct fiq_handler fiq_debbuger_fiq_handler = { + .name = "fiq_glue", +}; +DEFINE_PER_CPU(void *, fiq_stack); +static struct fiq_glue_handler *current_handler; +static fiq_return_handler_t fiq_return_handler; +static DEFINE_MUTEX(fiq_glue_lock); + +static void fiq_glue_setup_helper(void *info) +{ + struct fiq_glue_handler *handler = info; + fiq_glue_setup(handler->fiq, handler, + __get_cpu_var(fiq_stack) + THREAD_START_SP, + fiq_return_handler); +} + +int fiq_glue_register_handler(struct fiq_glue_handler *handler) +{ + int ret; + int cpu; + + if (!handler || !handler->fiq) + return -EINVAL; + + mutex_lock(&fiq_glue_lock); + if (fiq_stack) { + ret = -EBUSY; + goto err_busy; + } + + for_each_possible_cpu(cpu) { + void *stack; + stack = (void *)__get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); + if (WARN_ON(!stack)) { + ret = -ENOMEM; + goto err_alloc_fiq_stack; + } + per_cpu(fiq_stack, cpu) = stack; + } + + ret = claim_fiq(&fiq_debbuger_fiq_handler); + if (WARN_ON(ret)) + goto err_claim_fiq; + + current_handler = handler; + on_each_cpu(fiq_glue_setup_helper, handler, true); + set_fiq_handler(&fiq_glue, &fiq_glue_end - &fiq_glue); + + mutex_unlock(&fiq_glue_lock); + return 0; + +err_claim_fiq: +err_alloc_fiq_stack: + for_each_possible_cpu(cpu) { + __free_pages(per_cpu(fiq_stack, cpu), THREAD_SIZE_ORDER); + per_cpu(fiq_stack, cpu) = NULL; + } +err_busy: + mutex_unlock(&fiq_glue_lock); + return ret; +} + +static void fiq_glue_update_return_handler(void (*fiq_return)(void)) +{ + fiq_return_handler = fiq_return; + if (current_handler) + on_each_cpu(fiq_glue_setup_helper, current_handler, true); +} + +int fiq_glue_set_return_handler(void (*fiq_return)(void)) +{ + int ret; + + mutex_lock(&fiq_glue_lock); + if (fiq_return_handler) { + ret = -EBUSY; + goto err_busy; + } + fiq_glue_update_return_handler(fiq_return); + ret = 0; +err_busy: + mutex_unlock(&fiq_glue_lock); + + return ret; +} +EXPORT_SYMBOL(fiq_glue_set_return_handler); + +int fiq_glue_clear_return_handler(void (*fiq_return)(void)) +{ + int ret; + + mutex_lock(&fiq_glue_lock); + if (WARN_ON(fiq_return_handler != fiq_return)) { + ret = -EINVAL; + goto err_inval; + } + fiq_glue_update_return_handler(NULL); + ret = 0; +err_inval: + mutex_unlock(&fiq_glue_lock); + + return ret; +} +EXPORT_SYMBOL(fiq_glue_clear_return_handler); + +/** + * fiq_glue_resume - Restore fiqs after suspend or low power idle states + * + * This must be called before calling local_fiq_enable after returning from a + * power state where the fiq mode registers were lost. If a driver provided + * a resume hook when it registered the handler it will be called. + */ + +void fiq_glue_resume(void) +{ + if (!current_handler) + return; + fiq_glue_setup(current_handler->fiq, current_handler, + __get_cpu_var(fiq_stack) + THREAD_START_SP, + fiq_return_handler); + if (current_handler->resume) + current_handler->resume(current_handler); +} +
diff --git a/arch/arm/configs/ranchu_defconfig b/arch/arm/configs/ranchu_defconfig new file mode 100644 index 0000000..83a3c0d --- /dev/null +++ b/arch/arm/configs/ranchu_defconfig
@@ -0,0 +1,313 @@ +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_CGROUPS=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +CONFIG_PROFILING=y +CONFIG_OPROFILE=y +CONFIG_ARCH_MMAP_RND_BITS=16 +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +CONFIG_ARCH_VIRT=y +CONFIG_ARM_KERNMEM_PERMS=y +CONFIG_SMP=y +CONFIG_PREEMPT=y +CONFIG_AEABI=y +CONFIG_HIGHMEM=y +CONFIG_KSM=y +CONFIG_SECCOMP=y +CONFIG_CMDLINE="console=ttyAMA0" +CONFIG_VFP=y +CONFIG_NEON=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set +CONFIG_PM_DEBUG=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_INET_DIAG_DESTROY=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_INET_ESP=y +# CONFIG_INET_LRO is not set +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_OWNER=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_BRIDGE=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_CLS_ACT=y +# CONFIG_WIRELESS is not set +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_MTD=y +CONFIG_MTD_CMDLINE_PARTS=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_CFI=y +CONFIG_MTD_CFI_INTELEXT=y +CONFIG_MTD_CFI_AMDSTD=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y +CONFIG_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_NETDEVICES=y +CONFIG_TUN=y +CONFIG_VIRTIO_NET=y +CONFIG_SMSC911X=y +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_USB_USBNET=y +# CONFIG_WLAN is not set +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYRESET=y +CONFIG_KEYBOARD_GOLDFISH_EVENTS=y +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_INPUT_TABLET=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_SERIO_SERPORT is not set +CONFIG_SERIO_AMBAKMI=y +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVMEM is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_CONSOLE=y +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +CONFIG_MEDIA_SUPPORT=y +CONFIG_FB=y +CONFIG_FB_GOLDFISH=y +CONFIG_FB_SIMPLE=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_HIDRAW=y +CONFIG_UHID=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_HOLTEK=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_ROCCAT=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_WACOM=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_OTG_WAKELOCK=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_PL031=y +CONFIG_VIRTIO_MMIO=y +CONFIG_STAGING=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y +CONFIG_SW_SYNC_USER=y +CONFIG_ION=y +CONFIG_GOLDFISH_AUDIO=y +CONFIG_GOLDFISH=y +CONFIG_GOLDFISH_PIPE=y +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_QUOTA=y +CONFIG_FUSE_FS=y +CONFIG_CUSE=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_PSTORE=y +CONFIG_PSTORE_CONSOLE=y +CONFIG_PSTORE_RAM=y +CONFIG_NFS_FS=y +CONFIG_ROOT_NFS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +CONFIG_DEBUG_INFO=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_PANIC_TIMEOUT=5 +# CONFIG_SCHED_DEBUG is not set +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_ENABLE_DEFAULT_TRACERS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_VIRTUALIZATION=y
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index b8e69fe..5c278a8 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig
@@ -69,6 +69,15 @@ help Use optimized AES assembler routines for ARM platforms. + On ARM processors without the Crypto Extensions, this is the + fastest AES implementation for single blocks. For multiple + blocks, the NEON bit-sliced implementation is usually faster. + + This implementation may be vulnerable to cache timing attacks, + since it uses lookup tables. However, as countermeasures it + disables IRQs and preloads the tables; it is hoped this makes + such attacks very difficult. + config CRYPTO_AES_ARM_BS tristate "Bit sliced AES using NEON instructions" depends on KERNEL_MODE_NEON @@ -116,9 +125,14 @@ select CRYPTO_HASH config CRYPTO_CHACHA20_NEON - tristate "NEON accelerated ChaCha20 symmetric cipher" + tristate "NEON accelerated ChaCha stream cipher algorithms" depends on KERNEL_MODE_NEON select CRYPTO_BLKCIPHER select CRYPTO_CHACHA20 +config CRYPTO_NHPOLY1305_NEON + tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)" + depends on KERNEL_MODE_NEON + select CRYPTO_NHPOLY1305 + endif
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index c9919c2..5668d91 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile
@@ -9,7 +9,8 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o -obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o +obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o +obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o @@ -52,7 +53,8 @@ ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o -chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o +chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o +nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o ifdef REGENERATE_ARM_CRYPTO quiet_cmd_perl = PERL $@
diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S index 54b3840..f2d67c0 100644 --- a/arch/arm/crypto/aes-cipher-core.S +++ b/arch/arm/crypto/aes-cipher-core.S
@@ -10,6 +10,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #include <asm/cache.h> .text @@ -41,7 +42,7 @@ .endif .endm - .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op + .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr __select \out0, \in0, 0 __select t0, \in1, 1 __load \out0, \out0, 0, \sz, \op @@ -73,6 +74,14 @@ __load t0, t0, 3, \sz, \op __load \t4, \t4, 3, \sz, \op + .ifnb \oldcpsr + /* + * This is the final round and we're done with all data-dependent table + * lookups, so we can safely re-enable interrupts. + */ + restore_irqs \oldcpsr + .endif + eor \out1, \out1, t1, ror #24 eor \out0, \out0, t2, ror #16 ldm rk!, {t1, t2} @@ -83,14 +92,14 @@ eor \out1, \out1, t2 .endm - .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op + .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op - __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op + __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr .endm - .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op + .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op - __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op + __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr .endm .macro __rev, out, in @@ -118,13 +127,14 @@ .macro do_crypt, round, ttab, ltab, bsz push {r3-r11, lr} + // Load keys first, to reduce latency in case they're not cached yet. + ldm rk!, {r8-r11} + ldr r4, [in] ldr r5, [in, #4] ldr r6, [in, #8] ldr r7, [in, #12] - ldm rk!, {r8-r11} - #ifdef CONFIG_CPU_BIG_ENDIAN __rev r4, r4 __rev r5, r5 @@ -138,6 +148,25 @@ eor r7, r7, r11 __adrl ttab, \ttab + /* + * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into + * L1 cache, assuming cacheline size >= 32. This is a hardening measure + * intended to make cache-timing attacks more difficult. They may not + * be fully prevented, however; see the paper + * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf + * ("Cache-timing attacks on AES") for a discussion of the many + * difficulties involved in writing truly constant-time AES software. + */ + save_and_disable_irqs t0 + .set i, 0 + .rept 1024 / 128 + ldr r8, [ttab, #i + 0] + ldr r9, [ttab, #i + 32] + ldr r10, [ttab, #i + 64] + ldr r11, [ttab, #i + 96] + .set i, i + 128 + .endr + push {t0} // oldcpsr tst rounds, #2 bne 1f @@ -151,8 +180,21 @@ \round r4, r5, r6, r7, r8, r9, r10, r11 b 0b -2: __adrl ttab, \ltab - \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b +2: .ifb \ltab + add ttab, ttab, #1 + .else + __adrl ttab, \ltab + // Prefetch inverse S-box for final round; see explanation above + .set i, 0 + .rept 256 / 64 + ldr t0, [ttab, #i + 0] + ldr t1, [ttab, #i + 32] + .set i, i + 64 + .endr + .endif + + pop {rounds} // oldcpsr + \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds #ifdef CONFIG_CPU_BIG_ENDIAN __rev r4, r4 @@ -174,6 +216,16 @@ .ltorg .endm +ENTRY(__aes_arm_encrypt) + do_crypt fround, crypto_ft_tab,, 2 +ENDPROC(__aes_arm_encrypt) + + .align 5 +ENTRY(__aes_arm_decrypt) + do_crypt iround, crypto_it_tab, __aes_arm_inverse_sbox, 0 +ENDPROC(__aes_arm_decrypt) + + .section ".rodata", "a" .align L1_CACHE_SHIFT .type __aes_arm_inverse_sbox, %object __aes_arm_inverse_sbox: @@ -210,12 +262,3 @@ .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d .size __aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox - -ENTRY(__aes_arm_encrypt) - do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2 -ENDPROC(__aes_arm_encrypt) - - .align 5 -ENTRY(__aes_arm_decrypt) - do_crypt iround, crypto_it_tab, __aes_arm_inverse_sbox, 0 -ENDPROC(__aes_arm_decrypt)
diff --git a/arch/arm/crypto/chacha-neon-core.S b/arch/arm/crypto/chacha-neon-core.S new file mode 100644 index 0000000..eb22926 --- /dev/null +++ b/arch/arm/crypto/chacha-neon-core.S
@@ -0,0 +1,560 @@ +/* + * ChaCha/XChaCha NEON helper functions + * + * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSE3 functions + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + + /* + * NEON doesn't have a rotate instruction. The alternatives are, more or less: + * + * (a) vshl.u32 + vsri.u32 (needs temporary register) + * (b) vshl.u32 + vshr.u32 + vorr (needs temporary register) + * (c) vrev32.16 (16-bit rotations only) + * (d) vtbl.8 + vtbl.8 (multiple of 8 bits rotations only, + * needs index vector) + * + * ChaCha has 16, 12, 8, and 7-bit rotations. For the 12 and 7-bit rotations, + * the only choices are (a) and (b). We use (a) since it takes two-thirds the + * cycles of (b) on both Cortex-A7 and Cortex-A53. + * + * For the 16-bit rotation, we use vrev32.16 since it's consistently fastest + * and doesn't need a temporary register. + * + * For the 8-bit rotation, we use vtbl.8 + vtbl.8. On Cortex-A7, this sequence + * is twice as fast as (a), even when doing (a) on multiple registers + * simultaneously to eliminate the stall between vshl and vsri. Also, it + * parallelizes better when temporary registers are scarce. + * + * A disadvantage is that on Cortex-A53, the vtbl sequence is the same speed as + * (a), so the need to load the rotation table actually makes the vtbl method + * slightly slower overall on that CPU (~1.3% slower ChaCha20). Still, it + * seems to be a good compromise to get a more significant speed boost on some + * CPUs, e.g. ~4.8% faster ChaCha20 on Cortex-A7. + */ + +#include <linux/linkage.h> + + .text + .fpu neon + .align 5 + +/* + * chacha_permute - permute one block + * + * Permute one 64-byte block where the state matrix is stored in the four NEON + * registers q0-q3. It performs matrix operations on four words in parallel, + * but requires shuffling to rearrange the words after each round. + * + * The round count is given in r3. + * + * Clobbers: r3, ip, q4-q5 + */ +chacha_permute: + + adr ip, .Lrol8_table + vld1.8 {d10}, [ip, :64] + +.Ldoubleround: + // x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vadd.i32 q0, q0, q1 + veor q3, q3, q0 + vrev32.16 q3, q3 + + // x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vadd.i32 q2, q2, q3 + veor q4, q1, q2 + vshl.u32 q1, q4, #12 + vsri.u32 q1, q4, #20 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vadd.i32 q0, q0, q1 + veor q3, q3, q0 + vtbl.8 d6, {d6}, d10 + vtbl.8 d7, {d7}, d10 + + // x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vadd.i32 q2, q2, q3 + veor q4, q1, q2 + vshl.u32 q1, q4, #7 + vsri.u32 q1, q4, #25 + + // x1 = shuffle32(x1, MASK(0, 3, 2, 1)) + vext.8 q1, q1, q1, #4 + // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vext.8 q2, q2, q2, #8 + // x3 = shuffle32(x3, MASK(2, 1, 0, 3)) + vext.8 q3, q3, q3, #12 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vadd.i32 q0, q0, q1 + veor q3, q3, q0 + vrev32.16 q3, q3 + + // x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vadd.i32 q2, q2, q3 + veor q4, q1, q2 + vshl.u32 q1, q4, #12 + vsri.u32 q1, q4, #20 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vadd.i32 q0, q0, q1 + veor q3, q3, q0 + vtbl.8 d6, {d6}, d10 + vtbl.8 d7, {d7}, d10 + + // x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vadd.i32 q2, q2, q3 + veor q4, q1, q2 + vshl.u32 q1, q4, #7 + vsri.u32 q1, q4, #25 + + // x1 = shuffle32(x1, MASK(2, 1, 0, 3)) + vext.8 q1, q1, q1, #12 + // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vext.8 q2, q2, q2, #8 + // x3 = shuffle32(x3, MASK(0, 3, 2, 1)) + vext.8 q3, q3, q3, #4 + + subs r3, r3, #2 + bne .Ldoubleround + + bx lr +ENDPROC(chacha_permute) + +ENTRY(chacha_block_xor_neon) + // r0: Input state matrix, s + // r1: 1 data block output, o + // r2: 1 data block input, i + // r3: nrounds + push {lr} + + // x0..3 = s0..3 + add ip, r0, #0x20 + vld1.32 {q0-q1}, [r0] + vld1.32 {q2-q3}, [ip] + + vmov q8, q0 + vmov q9, q1 + vmov q10, q2 + vmov q11, q3 + + bl chacha_permute + + add ip, r2, #0x20 + vld1.8 {q4-q5}, [r2] + vld1.8 {q6-q7}, [ip] + + // o0 = i0 ^ (x0 + s0) + vadd.i32 q0, q0, q8 + veor q0, q0, q4 + + // o1 = i1 ^ (x1 + s1) + vadd.i32 q1, q1, q9 + veor q1, q1, q5 + + // o2 = i2 ^ (x2 + s2) + vadd.i32 q2, q2, q10 + veor q2, q2, q6 + + // o3 = i3 ^ (x3 + s3) + vadd.i32 q3, q3, q11 + veor q3, q3, q7 + + add ip, r1, #0x20 + vst1.8 {q0-q1}, [r1] + vst1.8 {q2-q3}, [ip] + + pop {pc} +ENDPROC(chacha_block_xor_neon) + +ENTRY(hchacha_block_neon) + // r0: Input state matrix, s + // r1: output (8 32-bit words) + // r2: nrounds + push {lr} + + vld1.32 {q0-q1}, [r0]! + vld1.32 {q2-q3}, [r0] + + mov r3, r2 + bl chacha_permute + + vst1.32 {q0}, [r1]! + vst1.32 {q3}, [r1] + + pop {pc} +ENDPROC(hchacha_block_neon) + + .align 4 +.Lctrinc: .word 0, 1, 2, 3 +.Lrol8_table: .byte 3, 0, 1, 2, 7, 4, 5, 6 + + .align 5 +ENTRY(chacha_4block_xor_neon) + push {r4-r5} + mov r4, sp // preserve the stack pointer + sub ip, sp, #0x20 // allocate a 32 byte buffer + bic ip, ip, #0x1f // aligned to 32 bytes + mov sp, ip + + // r0: Input state matrix, s + // r1: 4 data blocks output, o + // r2: 4 data blocks input, i + // r3: nrounds + + // + // This function encrypts four consecutive ChaCha blocks by loading + // the state matrix in NEON registers four times. The algorithm performs + // each operation on the corresponding word of each state matrix, hence + // requires no word shuffling. The words are re-interleaved before the + // final addition of the original state and the XORing step. + // + + // x0..15[0-3] = s0..15[0-3] + add ip, r0, #0x20 + vld1.32 {q0-q1}, [r0] + vld1.32 {q2-q3}, [ip] + + adr r5, .Lctrinc + vdup.32 q15, d7[1] + vdup.32 q14, d7[0] + vld1.32 {q4}, [r5, :128] + vdup.32 q13, d6[1] + vdup.32 q12, d6[0] + vdup.32 q11, d5[1] + vdup.32 q10, d5[0] + vadd.u32 q12, q12, q4 // x12 += counter values 0-3 + vdup.32 q9, d4[1] + vdup.32 q8, d4[0] + vdup.32 q7, d3[1] + vdup.32 q6, d3[0] + vdup.32 q5, d2[1] + vdup.32 q4, d2[0] + vdup.32 q3, d1[1] + vdup.32 q2, d1[0] + vdup.32 q1, d0[1] + vdup.32 q0, d0[0] + + adr ip, .Lrol8_table + b 1f + +.Ldoubleround4: + vld1.32 {q8-q9}, [sp, :256] +1: + // x0 += x4, x12 = rotl32(x12 ^ x0, 16) + // x1 += x5, x13 = rotl32(x13 ^ x1, 16) + // x2 += x6, x14 = rotl32(x14 ^ x2, 16) + // x3 += x7, x15 = rotl32(x15 ^ x3, 16) + vadd.i32 q0, q0, q4 + vadd.i32 q1, q1, q5 + vadd.i32 q2, q2, q6 + vadd.i32 q3, q3, q7 + + veor q12, q12, q0 + veor q13, q13, q1 + veor q14, q14, q2 + veor q15, q15, q3 + + vrev32.16 q12, q12 + vrev32.16 q13, q13 + vrev32.16 q14, q14 + vrev32.16 q15, q15 + + // x8 += x12, x4 = rotl32(x4 ^ x8, 12) + // x9 += x13, x5 = rotl32(x5 ^ x9, 12) + // x10 += x14, x6 = rotl32(x6 ^ x10, 12) + // x11 += x15, x7 = rotl32(x7 ^ x11, 12) + vadd.i32 q8, q8, q12 + vadd.i32 q9, q9, q13 + vadd.i32 q10, q10, q14 + vadd.i32 q11, q11, q15 + + vst1.32 {q8-q9}, [sp, :256] + + veor q8, q4, q8 + veor q9, q5, q9 + vshl.u32 q4, q8, #12 + vshl.u32 q5, q9, #12 + vsri.u32 q4, q8, #20 + vsri.u32 q5, q9, #20 + + veor q8, q6, q10 + veor q9, q7, q11 + vshl.u32 q6, q8, #12 + vshl.u32 q7, q9, #12 + vsri.u32 q6, q8, #20 + vsri.u32 q7, q9, #20 + + // x0 += x4, x12 = rotl32(x12 ^ x0, 8) + // x1 += x5, x13 = rotl32(x13 ^ x1, 8) + // x2 += x6, x14 = rotl32(x14 ^ x2, 8) + // x3 += x7, x15 = rotl32(x15 ^ x3, 8) + vld1.8 {d16}, [ip, :64] + vadd.i32 q0, q0, q4 + vadd.i32 q1, q1, q5 + vadd.i32 q2, q2, q6 + vadd.i32 q3, q3, q7 + + veor q12, q12, q0 + veor q13, q13, q1 + veor q14, q14, q2 + veor q15, q15, q3 + + vtbl.8 d24, {d24}, d16 + vtbl.8 d25, {d25}, d16 + vtbl.8 d26, {d26}, d16 + vtbl.8 d27, {d27}, d16 + vtbl.8 d28, {d28}, d16 + vtbl.8 d29, {d29}, d16 + vtbl.8 d30, {d30}, d16 + vtbl.8 d31, {d31}, d16 + + vld1.32 {q8-q9}, [sp, :256] + + // x8 += x12, x4 = rotl32(x4 ^ x8, 7) + // x9 += x13, x5 = rotl32(x5 ^ x9, 7) + // x10 += x14, x6 = rotl32(x6 ^ x10, 7) + // x11 += x15, x7 = rotl32(x7 ^ x11, 7) + vadd.i32 q8, q8, q12 + vadd.i32 q9, q9, q13 + vadd.i32 q10, q10, q14 + vadd.i32 q11, q11, q15 + + vst1.32 {q8-q9}, [sp, :256] + + veor q8, q4, q8 + veor q9, q5, q9 + vshl.u32 q4, q8, #7 + vshl.u32 q5, q9, #7 + vsri.u32 q4, q8, #25 + vsri.u32 q5, q9, #25 + + veor q8, q6, q10 + veor q9, q7, q11 + vshl.u32 q6, q8, #7 + vshl.u32 q7, q9, #7 + vsri.u32 q6, q8, #25 + vsri.u32 q7, q9, #25 + + vld1.32 {q8-q9}, [sp, :256] + + // x0 += x5, x15 = rotl32(x15 ^ x0, 16) + // x1 += x6, x12 = rotl32(x12 ^ x1, 16) + // x2 += x7, x13 = rotl32(x13 ^ x2, 16) + // x3 += x4, x14 = rotl32(x14 ^ x3, 16) + vadd.i32 q0, q0, q5 + vadd.i32 q1, q1, q6 + vadd.i32 q2, q2, q7 + vadd.i32 q3, q3, q4 + + veor q15, q15, q0 + veor q12, q12, q1 + veor q13, q13, q2 + veor q14, q14, q3 + + vrev32.16 q15, q15 + vrev32.16 q12, q12 + vrev32.16 q13, q13 + vrev32.16 q14, q14 + + // x10 += x15, x5 = rotl32(x5 ^ x10, 12) + // x11 += x12, x6 = rotl32(x6 ^ x11, 12) + // x8 += x13, x7 = rotl32(x7 ^ x8, 12) + // x9 += x14, x4 = rotl32(x4 ^ x9, 12) + vadd.i32 q10, q10, q15 + vadd.i32 q11, q11, q12 + vadd.i32 q8, q8, q13 + vadd.i32 q9, q9, q14 + + vst1.32 {q8-q9}, [sp, :256] + + veor q8, q7, q8 + veor q9, q4, q9 + vshl.u32 q7, q8, #12 + vshl.u32 q4, q9, #12 + vsri.u32 q7, q8, #20 + vsri.u32 q4, q9, #20 + + veor q8, q5, q10 + veor q9, q6, q11 + vshl.u32 q5, q8, #12 + vshl.u32 q6, q9, #12 + vsri.u32 q5, q8, #20 + vsri.u32 q6, q9, #20 + + // x0 += x5, x15 = rotl32(x15 ^ x0, 8) + // x1 += x6, x12 = rotl32(x12 ^ x1, 8) + // x2 += x7, x13 = rotl32(x13 ^ x2, 8) + // x3 += x4, x14 = rotl32(x14 ^ x3, 8) + vld1.8 {d16}, [ip, :64] + vadd.i32 q0, q0, q5 + vadd.i32 q1, q1, q6 + vadd.i32 q2, q2, q7 + vadd.i32 q3, q3, q4 + + veor q15, q15, q0 + veor q12, q12, q1 + veor q13, q13, q2 + veor q14, q14, q3 + + vtbl.8 d30, {d30}, d16 + vtbl.8 d31, {d31}, d16 + vtbl.8 d24, {d24}, d16 + vtbl.8 d25, {d25}, d16 + vtbl.8 d26, {d26}, d16 + vtbl.8 d27, {d27}, d16 + vtbl.8 d28, {d28}, d16 + vtbl.8 d29, {d29}, d16 + + vld1.32 {q8-q9}, [sp, :256] + + // x10 += x15, x5 = rotl32(x5 ^ x10, 7) + // x11 += x12, x6 = rotl32(x6 ^ x11, 7) + // x8 += x13, x7 = rotl32(x7 ^ x8, 7) + // x9 += x14, x4 = rotl32(x4 ^ x9, 7) + vadd.i32 q10, q10, q15 + vadd.i32 q11, q11, q12 + vadd.i32 q8, q8, q13 + vadd.i32 q9, q9, q14 + + vst1.32 {q8-q9}, [sp, :256] + + veor q8, q7, q8 + veor q9, q4, q9 + vshl.u32 q7, q8, #7 + vshl.u32 q4, q9, #7 + vsri.u32 q7, q8, #25 + vsri.u32 q4, q9, #25 + + veor q8, q5, q10 + veor q9, q6, q11 + vshl.u32 q5, q8, #7 + vshl.u32 q6, q9, #7 + vsri.u32 q5, q8, #25 + vsri.u32 q6, q9, #25 + + subs r3, r3, #2 + bne .Ldoubleround4 + + // x0..7[0-3] are in q0-q7, x10..15[0-3] are in q10-q15. + // x8..9[0-3] are on the stack. + + // Re-interleave the words in the first two rows of each block (x0..7). + // Also add the counter values 0-3 to x12[0-3]. + vld1.32 {q8}, [r5, :128] // load counter values 0-3 + vzip.32 q0, q1 // => (0 1 0 1) (0 1 0 1) + vzip.32 q2, q3 // => (2 3 2 3) (2 3 2 3) + vzip.32 q4, q5 // => (4 5 4 5) (4 5 4 5) + vzip.32 q6, q7 // => (6 7 6 7) (6 7 6 7) + vadd.u32 q12, q8 // x12 += counter values 0-3 + vswp d1, d4 + vswp d3, d6 + vld1.32 {q8-q9}, [r0]! // load s0..7 + vswp d9, d12 + vswp d11, d14 + + // Swap q1 and q4 so that we'll free up consecutive registers (q0-q1) + // after XORing the first 32 bytes. + vswp q1, q4 + + // First two rows of each block are (q0 q1) (q2 q6) (q4 q5) (q3 q7) + + // x0..3[0-3] += s0..3[0-3] (add orig state to 1st row of each block) + vadd.u32 q0, q0, q8 + vadd.u32 q2, q2, q8 + vadd.u32 q4, q4, q8 + vadd.u32 q3, q3, q8 + + // x4..7[0-3] += s4..7[0-3] (add orig state to 2nd row of each block) + vadd.u32 q1, q1, q9 + vadd.u32 q6, q6, q9 + vadd.u32 q5, q5, q9 + vadd.u32 q7, q7, q9 + + // XOR first 32 bytes using keystream from first two rows of first block + vld1.8 {q8-q9}, [r2]! + veor q8, q8, q0 + veor q9, q9, q1 + vst1.8 {q8-q9}, [r1]! + + // Re-interleave the words in the last two rows of each block (x8..15). + vld1.32 {q8-q9}, [sp, :256] + vzip.32 q12, q13 // => (12 13 12 13) (12 13 12 13) + vzip.32 q14, q15 // => (14 15 14 15) (14 15 14 15) + vzip.32 q8, q9 // => (8 9 8 9) (8 9 8 9) + vzip.32 q10, q11 // => (10 11 10 11) (10 11 10 11) + vld1.32 {q0-q1}, [r0] // load s8..15 + vswp d25, d28 + vswp d27, d30 + vswp d17, d20 + vswp d19, d22 + + // Last two rows of each block are (q8 q12) (q10 q14) (q9 q13) (q11 q15) + + // x8..11[0-3] += s8..11[0-3] (add orig state to 3rd row of each block) + vadd.u32 q8, q8, q0 + vadd.u32 q10, q10, q0 + vadd.u32 q9, q9, q0 + vadd.u32 q11, q11, q0 + + // x12..15[0-3] += s12..15[0-3] (add orig state to 4th row of each block) + vadd.u32 q12, q12, q1 + vadd.u32 q14, q14, q1 + vadd.u32 q13, q13, q1 + vadd.u32 q15, q15, q1 + + // XOR the rest of the data with the keystream + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q8 + veor q1, q1, q12 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q2 + veor q1, q1, q6 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q10 + veor q1, q1, q14 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q4 + veor q1, q1, q5 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q9 + veor q1, q1, q13 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q3 + veor q1, q1, q7 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2] + mov sp, r4 // restore original stack pointer + veor q0, q0, q11 + veor q1, q1, q15 + vst1.8 {q0-q1}, [r1] + + pop {r4-r5} + bx lr +ENDPROC(chacha_4block_xor_neon)
diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c new file mode 100644 index 0000000..9d6fda8 --- /dev/null +++ b/arch/arm/crypto/chacha-neon-glue.c
@@ -0,0 +1,201 @@ +/* + * ARM NEON accelerated ChaCha and XChaCha stream ciphers, + * including ChaCha20 (RFC7539) + * + * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <crypto/algapi.h> +#include <crypto/chacha.h> +#include <crypto/internal/skcipher.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <asm/simd.h> + +asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, + int nrounds); +asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, + int nrounds); +asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); + +static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + u8 buf[CHACHA_BLOCK_SIZE]; + + while (bytes >= CHACHA_BLOCK_SIZE * 4) { + chacha_4block_xor_neon(state, dst, src, nrounds); + bytes -= CHACHA_BLOCK_SIZE * 4; + src += CHACHA_BLOCK_SIZE * 4; + dst += CHACHA_BLOCK_SIZE * 4; + state[12] += 4; + } + while (bytes >= CHACHA_BLOCK_SIZE) { + chacha_block_xor_neon(state, dst, src, nrounds); + bytes -= CHACHA_BLOCK_SIZE; + src += CHACHA_BLOCK_SIZE; + dst += CHACHA_BLOCK_SIZE; + state[12]++; + } + if (bytes) { + memcpy(buf, src, bytes); + chacha_block_xor_neon(state, buf, buf, nrounds); + memcpy(dst, buf, bytes); + } +} + +static int chacha_neon_stream_xor(struct skcipher_request *req, + struct chacha_ctx *ctx, u8 *iv) +{ + struct skcipher_walk walk; + u32 state[16]; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + crypto_chacha_init(state, ctx, iv); + + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = round_down(nbytes, walk.stride); + + kernel_neon_begin(); + chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, + nbytes, ctx->nrounds); + kernel_neon_end(); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + + return err; +} + +static int chacha_neon(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd()) + return crypto_chacha_crypt(req); + + return chacha_neon_stream_xor(req, ctx, req->iv); +} + +static int xchacha_neon(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + struct chacha_ctx subctx; + u32 state[16]; + u8 real_iv[16]; + + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd()) + return crypto_xchacha_crypt(req); + + crypto_chacha_init(state, ctx, req->iv); + + kernel_neon_begin(); + hchacha_block_neon(state, subctx.key, ctx->nrounds); + kernel_neon_end(); + subctx.nrounds = ctx->nrounds; + + memcpy(&real_iv[0], req->iv + 24, 8); + memcpy(&real_iv[8], req->iv + 16, 8); + return chacha_neon_stream_xor(req, &subctx, real_iv); +} + +static struct skcipher_alg algs[] = { + { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .walksize = 4 * CHACHA_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = chacha_neon, + .decrypt = chacha_neon, + }, { + .base.cra_name = "xchacha20", + .base.cra_driver_name = "xchacha20-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .walksize = 4 * CHACHA_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = xchacha_neon, + .decrypt = xchacha_neon, + }, { + .base.cra_name = "xchacha12", + .base.cra_driver_name = "xchacha12-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .walksize = 4 * CHACHA_BLOCK_SIZE, + .setkey = crypto_chacha12_setkey, + .encrypt = xchacha_neon, + .decrypt = xchacha_neon, + } +}; + +static int __init chacha_simd_mod_init(void) +{ + if (!(elf_hwcap & HWCAP_NEON)) + return -ENODEV; + + return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); +} + +static void __exit chacha_simd_mod_fini(void) +{ + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); +} + +module_init(chacha_simd_mod_init); +module_exit(chacha_simd_mod_fini); + +MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("chacha20"); +MODULE_ALIAS_CRYPTO("chacha20-neon"); +MODULE_ALIAS_CRYPTO("xchacha20"); +MODULE_ALIAS_CRYPTO("xchacha20-neon"); +MODULE_ALIAS_CRYPTO("xchacha12"); +MODULE_ALIAS_CRYPTO("xchacha12-neon");
diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha20-neon-core.S deleted file mode 100644 index 3fecb21..0000000 --- a/arch/arm/crypto/chacha20-neon-core.S +++ /dev/null
@@ -1,523 +0,0 @@ -/* - * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions - * - * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on: - * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSE3 functions - * - * Copyright (C) 2015 Martin Willi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include <linux/linkage.h> - - .text - .fpu neon - .align 5 - -ENTRY(chacha20_block_xor_neon) - // r0: Input state matrix, s - // r1: 1 data block output, o - // r2: 1 data block input, i - - // - // This function encrypts one ChaCha20 block by loading the state matrix - // in four NEON registers. It performs matrix operation on four words in - // parallel, but requireds shuffling to rearrange the words after each - // round. - // - - // x0..3 = s0..3 - add ip, r0, #0x20 - vld1.32 {q0-q1}, [r0] - vld1.32 {q2-q3}, [ip] - - vmov q8, q0 - vmov q9, q1 - vmov q10, q2 - vmov q11, q3 - - mov r3, #10 - -.Ldoubleround: - // x0 += x1, x3 = rotl32(x3 ^ x0, 16) - vadd.i32 q0, q0, q1 - veor q4, q3, q0 - vshl.u32 q3, q4, #16 - vsri.u32 q3, q4, #16 - - // x2 += x3, x1 = rotl32(x1 ^ x2, 12) - vadd.i32 q2, q2, q3 - veor q4, q1, q2 - vshl.u32 q1, q4, #12 - vsri.u32 q1, q4, #20 - - // x0 += x1, x3 = rotl32(x3 ^ x0, 8) - vadd.i32 q0, q0, q1 - veor q4, q3, q0 - vshl.u32 q3, q4, #8 - vsri.u32 q3, q4, #24 - - // x2 += x3, x1 = rotl32(x1 ^ x2, 7) - vadd.i32 q2, q2, q3 - veor q4, q1, q2 - vshl.u32 q1, q4, #7 - vsri.u32 q1, q4, #25 - - // x1 = shuffle32(x1, MASK(0, 3, 2, 1)) - vext.8 q1, q1, q1, #4 - // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) - vext.8 q2, q2, q2, #8 - // x3 = shuffle32(x3, MASK(2, 1, 0, 3)) - vext.8 q3, q3, q3, #12 - - // x0 += x1, x3 = rotl32(x3 ^ x0, 16) - vadd.i32 q0, q0, q1 - veor q4, q3, q0 - vshl.u32 q3, q4, #16 - vsri.u32 q3, q4, #16 - - // x2 += x3, x1 = rotl32(x1 ^ x2, 12) - vadd.i32 q2, q2, q3 - veor q4, q1, q2 - vshl.u32 q1, q4, #12 - vsri.u32 q1, q4, #20 - - // x0 += x1, x3 = rotl32(x3 ^ x0, 8) - vadd.i32 q0, q0, q1 - veor q4, q3, q0 - vshl.u32 q3, q4, #8 - vsri.u32 q3, q4, #24 - - // x2 += x3, x1 = rotl32(x1 ^ x2, 7) - vadd.i32 q2, q2, q3 - veor q4, q1, q2 - vshl.u32 q1, q4, #7 - vsri.u32 q1, q4, #25 - - // x1 = shuffle32(x1, MASK(2, 1, 0, 3)) - vext.8 q1, q1, q1, #12 - // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) - vext.8 q2, q2, q2, #8 - // x3 = shuffle32(x3, MASK(0, 3, 2, 1)) - vext.8 q3, q3, q3, #4 - - subs r3, r3, #1 - bne .Ldoubleround - - add ip, r2, #0x20 - vld1.8 {q4-q5}, [r2] - vld1.8 {q6-q7}, [ip] - - // o0 = i0 ^ (x0 + s0) - vadd.i32 q0, q0, q8 - veor q0, q0, q4 - - // o1 = i1 ^ (x1 + s1) - vadd.i32 q1, q1, q9 - veor q1, q1, q5 - - // o2 = i2 ^ (x2 + s2) - vadd.i32 q2, q2, q10 - veor q2, q2, q6 - - // o3 = i3 ^ (x3 + s3) - vadd.i32 q3, q3, q11 - veor q3, q3, q7 - - add ip, r1, #0x20 - vst1.8 {q0-q1}, [r1] - vst1.8 {q2-q3}, [ip] - - bx lr -ENDPROC(chacha20_block_xor_neon) - - .align 5 -ENTRY(chacha20_4block_xor_neon) - push {r4-r6, lr} - mov ip, sp // preserve the stack pointer - sub r3, sp, #0x20 // allocate a 32 byte buffer - bic r3, r3, #0x1f // aligned to 32 bytes - mov sp, r3 - - // r0: Input state matrix, s - // r1: 4 data blocks output, o - // r2: 4 data blocks input, i - - // - // This function encrypts four consecutive ChaCha20 blocks by loading - // the state matrix in NEON registers four times. The algorithm performs - // each operation on the corresponding word of each state matrix, hence - // requires no word shuffling. For final XORing step we transpose the - // matrix by interleaving 32- and then 64-bit words, which allows us to - // do XOR in NEON registers. - // - - // x0..15[0-3] = s0..3[0..3] - add r3, r0, #0x20 - vld1.32 {q0-q1}, [r0] - vld1.32 {q2-q3}, [r3] - - adr r3, CTRINC - vdup.32 q15, d7[1] - vdup.32 q14, d7[0] - vld1.32 {q11}, [r3, :128] - vdup.32 q13, d6[1] - vdup.32 q12, d6[0] - vadd.i32 q12, q12, q11 // x12 += counter values 0-3 - vdup.32 q11, d5[1] - vdup.32 q10, d5[0] - vdup.32 q9, d4[1] - vdup.32 q8, d4[0] - vdup.32 q7, d3[1] - vdup.32 q6, d3[0] - vdup.32 q5, d2[1] - vdup.32 q4, d2[0] - vdup.32 q3, d1[1] - vdup.32 q2, d1[0] - vdup.32 q1, d0[1] - vdup.32 q0, d0[0] - - mov r3, #10 - -.Ldoubleround4: - // x0 += x4, x12 = rotl32(x12 ^ x0, 16) - // x1 += x5, x13 = rotl32(x13 ^ x1, 16) - // x2 += x6, x14 = rotl32(x14 ^ x2, 16) - // x3 += x7, x15 = rotl32(x15 ^ x3, 16) - vadd.i32 q0, q0, q4 - vadd.i32 q1, q1, q5 - vadd.i32 q2, q2, q6 - vadd.i32 q3, q3, q7 - - veor q12, q12, q0 - veor q13, q13, q1 - veor q14, q14, q2 - veor q15, q15, q3 - - vrev32.16 q12, q12 - vrev32.16 q13, q13 - vrev32.16 q14, q14 - vrev32.16 q15, q15 - - // x8 += x12, x4 = rotl32(x4 ^ x8, 12) - // x9 += x13, x5 = rotl32(x5 ^ x9, 12) - // x10 += x14, x6 = rotl32(x6 ^ x10, 12) - // x11 += x15, x7 = rotl32(x7 ^ x11, 12) - vadd.i32 q8, q8, q12 - vadd.i32 q9, q9, q13 - vadd.i32 q10, q10, q14 - vadd.i32 q11, q11, q15 - - vst1.32 {q8-q9}, [sp, :256] - - veor q8, q4, q8 - veor q9, q5, q9 - vshl.u32 q4, q8, #12 - vshl.u32 q5, q9, #12 - vsri.u32 q4, q8, #20 - vsri.u32 q5, q9, #20 - - veor q8, q6, q10 - veor q9, q7, q11 - vshl.u32 q6, q8, #12 - vshl.u32 q7, q9, #12 - vsri.u32 q6, q8, #20 - vsri.u32 q7, q9, #20 - - // x0 += x4, x12 = rotl32(x12 ^ x0, 8) - // x1 += x5, x13 = rotl32(x13 ^ x1, 8) - // x2 += x6, x14 = rotl32(x14 ^ x2, 8) - // x3 += x7, x15 = rotl32(x15 ^ x3, 8) - vadd.i32 q0, q0, q4 - vadd.i32 q1, q1, q5 - vadd.i32 q2, q2, q6 - vadd.i32 q3, q3, q7 - - veor q8, q12, q0 - veor q9, q13, q1 - vshl.u32 q12, q8, #8 - vshl.u32 q13, q9, #8 - vsri.u32 q12, q8, #24 - vsri.u32 q13, q9, #24 - - veor q8, q14, q2 - veor q9, q15, q3 - vshl.u32 q14, q8, #8 - vshl.u32 q15, q9, #8 - vsri.u32 q14, q8, #24 - vsri.u32 q15, q9, #24 - - vld1.32 {q8-q9}, [sp, :256] - - // x8 += x12, x4 = rotl32(x4 ^ x8, 7) - // x9 += x13, x5 = rotl32(x5 ^ x9, 7) - // x10 += x14, x6 = rotl32(x6 ^ x10, 7) - // x11 += x15, x7 = rotl32(x7 ^ x11, 7) - vadd.i32 q8, q8, q12 - vadd.i32 q9, q9, q13 - vadd.i32 q10, q10, q14 - vadd.i32 q11, q11, q15 - - vst1.32 {q8-q9}, [sp, :256] - - veor q8, q4, q8 - veor q9, q5, q9 - vshl.u32 q4, q8, #7 - vshl.u32 q5, q9, #7 - vsri.u32 q4, q8, #25 - vsri.u32 q5, q9, #25 - - veor q8, q6, q10 - veor q9, q7, q11 - vshl.u32 q6, q8, #7 - vshl.u32 q7, q9, #7 - vsri.u32 q6, q8, #25 - vsri.u32 q7, q9, #25 - - vld1.32 {q8-q9}, [sp, :256] - - // x0 += x5, x15 = rotl32(x15 ^ x0, 16) - // x1 += x6, x12 = rotl32(x12 ^ x1, 16) - // x2 += x7, x13 = rotl32(x13 ^ x2, 16) - // x3 += x4, x14 = rotl32(x14 ^ x3, 16) - vadd.i32 q0, q0, q5 - vadd.i32 q1, q1, q6 - vadd.i32 q2, q2, q7 - vadd.i32 q3, q3, q4 - - veor q15, q15, q0 - veor q12, q12, q1 - veor q13, q13, q2 - veor q14, q14, q3 - - vrev32.16 q15, q15 - vrev32.16 q12, q12 - vrev32.16 q13, q13 - vrev32.16 q14, q14 - - // x10 += x15, x5 = rotl32(x5 ^ x10, 12) - // x11 += x12, x6 = rotl32(x6 ^ x11, 12) - // x8 += x13, x7 = rotl32(x7 ^ x8, 12) - // x9 += x14, x4 = rotl32(x4 ^ x9, 12) - vadd.i32 q10, q10, q15 - vadd.i32 q11, q11, q12 - vadd.i32 q8, q8, q13 - vadd.i32 q9, q9, q14 - - vst1.32 {q8-q9}, [sp, :256] - - veor q8, q7, q8 - veor q9, q4, q9 - vshl.u32 q7, q8, #12 - vshl.u32 q4, q9, #12 - vsri.u32 q7, q8, #20 - vsri.u32 q4, q9, #20 - - veor q8, q5, q10 - veor q9, q6, q11 - vshl.u32 q5, q8, #12 - vshl.u32 q6, q9, #12 - vsri.u32 q5, q8, #20 - vsri.u32 q6, q9, #20 - - // x0 += x5, x15 = rotl32(x15 ^ x0, 8) - // x1 += x6, x12 = rotl32(x12 ^ x1, 8) - // x2 += x7, x13 = rotl32(x13 ^ x2, 8) - // x3 += x4, x14 = rotl32(x14 ^ x3, 8) - vadd.i32 q0, q0, q5 - vadd.i32 q1, q1, q6 - vadd.i32 q2, q2, q7 - vadd.i32 q3, q3, q4 - - veor q8, q15, q0 - veor q9, q12, q1 - vshl.u32 q15, q8, #8 - vshl.u32 q12, q9, #8 - vsri.u32 q15, q8, #24 - vsri.u32 q12, q9, #24 - - veor q8, q13, q2 - veor q9, q14, q3 - vshl.u32 q13, q8, #8 - vshl.u32 q14, q9, #8 - vsri.u32 q13, q8, #24 - vsri.u32 q14, q9, #24 - - vld1.32 {q8-q9}, [sp, :256] - - // x10 += x15, x5 = rotl32(x5 ^ x10, 7) - // x11 += x12, x6 = rotl32(x6 ^ x11, 7) - // x8 += x13, x7 = rotl32(x7 ^ x8, 7) - // x9 += x14, x4 = rotl32(x4 ^ x9, 7) - vadd.i32 q10, q10, q15 - vadd.i32 q11, q11, q12 - vadd.i32 q8, q8, q13 - vadd.i32 q9, q9, q14 - - vst1.32 {q8-q9}, [sp, :256] - - veor q8, q7, q8 - veor q9, q4, q9 - vshl.u32 q7, q8, #7 - vshl.u32 q4, q9, #7 - vsri.u32 q7, q8, #25 - vsri.u32 q4, q9, #25 - - veor q8, q5, q10 - veor q9, q6, q11 - vshl.u32 q5, q8, #7 - vshl.u32 q6, q9, #7 - vsri.u32 q5, q8, #25 - vsri.u32 q6, q9, #25 - - subs r3, r3, #1 - beq 0f - - vld1.32 {q8-q9}, [sp, :256] - b .Ldoubleround4 - - // x0[0-3] += s0[0] - // x1[0-3] += s0[1] - // x2[0-3] += s0[2] - // x3[0-3] += s0[3] -0: ldmia r0!, {r3-r6} - vdup.32 q8, r3 - vdup.32 q9, r4 - vadd.i32 q0, q0, q8 - vadd.i32 q1, q1, q9 - vdup.32 q8, r5 - vdup.32 q9, r6 - vadd.i32 q2, q2, q8 - vadd.i32 q3, q3, q9 - - // x4[0-3] += s1[0] - // x5[0-3] += s1[1] - // x6[0-3] += s1[2] - // x7[0-3] += s1[3] - ldmia r0!, {r3-r6} - vdup.32 q8, r3 - vdup.32 q9, r4 - vadd.i32 q4, q4, q8 - vadd.i32 q5, q5, q9 - vdup.32 q8, r5 - vdup.32 q9, r6 - vadd.i32 q6, q6, q8 - vadd.i32 q7, q7, q9 - - // interleave 32-bit words in state n, n+1 - vzip.32 q0, q1 - vzip.32 q2, q3 - vzip.32 q4, q5 - vzip.32 q6, q7 - - // interleave 64-bit words in state n, n+2 - vswp d1, d4 - vswp d3, d6 - vswp d9, d12 - vswp d11, d14 - - // xor with corresponding input, write to output - vld1.8 {q8-q9}, [r2]! - veor q8, q8, q0 - veor q9, q9, q4 - vst1.8 {q8-q9}, [r1]! - - vld1.32 {q8-q9}, [sp, :256] - - // x8[0-3] += s2[0] - // x9[0-3] += s2[1] - // x10[0-3] += s2[2] - // x11[0-3] += s2[3] - ldmia r0!, {r3-r6} - vdup.32 q0, r3 - vdup.32 q4, r4 - vadd.i32 q8, q8, q0 - vadd.i32 q9, q9, q4 - vdup.32 q0, r5 - vdup.32 q4, r6 - vadd.i32 q10, q10, q0 - vadd.i32 q11, q11, q4 - - // x12[0-3] += s3[0] - // x13[0-3] += s3[1] - // x14[0-3] += s3[2] - // x15[0-3] += s3[3] - ldmia r0!, {r3-r6} - vdup.32 q0, r3 - vdup.32 q4, r4 - adr r3, CTRINC - vadd.i32 q12, q12, q0 - vld1.32 {q0}, [r3, :128] - vadd.i32 q13, q13, q4 - vadd.i32 q12, q12, q0 // x12 += counter values 0-3 - - vdup.32 q0, r5 - vdup.32 q4, r6 - vadd.i32 q14, q14, q0 - vadd.i32 q15, q15, q4 - - // interleave 32-bit words in state n, n+1 - vzip.32 q8, q9 - vzip.32 q10, q11 - vzip.32 q12, q13 - vzip.32 q14, q15 - - // interleave 64-bit words in state n, n+2 - vswp d17, d20 - vswp d19, d22 - vswp d25, d28 - vswp d27, d30 - - vmov q4, q1 - - vld1.8 {q0-q1}, [r2]! - veor q0, q0, q8 - veor q1, q1, q12 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2]! - veor q0, q0, q2 - veor q1, q1, q6 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2]! - veor q0, q0, q10 - veor q1, q1, q14 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2]! - veor q0, q0, q4 - veor q1, q1, q5 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2]! - veor q0, q0, q9 - veor q1, q1, q13 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2]! - veor q0, q0, q3 - veor q1, q1, q7 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2] - veor q0, q0, q11 - veor q1, q1, q15 - vst1.8 {q0-q1}, [r1] - - mov sp, ip - pop {r4-r6, pc} -ENDPROC(chacha20_4block_xor_neon) - - .align 4 -CTRINC: .word 0, 1, 2, 3
diff --git a/arch/arm/crypto/chacha20-neon-glue.c b/arch/arm/crypto/chacha20-neon-glue.c deleted file mode 100644 index 59a7be0..0000000 --- a/arch/arm/crypto/chacha20-neon-glue.c +++ /dev/null
@@ -1,127 +0,0 @@ -/* - * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions - * - * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on: - * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code - * - * Copyright (C) 2015 Martin Willi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include <crypto/algapi.h> -#include <crypto/chacha20.h> -#include <crypto/internal/skcipher.h> -#include <linux/kernel.h> -#include <linux/module.h> - -#include <asm/hwcap.h> -#include <asm/neon.h> -#include <asm/simd.h> - -asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src); -asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src); - -static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, - unsigned int bytes) -{ - u8 buf[CHACHA20_BLOCK_SIZE]; - - while (bytes >= CHACHA20_BLOCK_SIZE * 4) { - chacha20_4block_xor_neon(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE * 4; - src += CHACHA20_BLOCK_SIZE * 4; - dst += CHACHA20_BLOCK_SIZE * 4; - state[12] += 4; - } - while (bytes >= CHACHA20_BLOCK_SIZE) { - chacha20_block_xor_neon(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE; - src += CHACHA20_BLOCK_SIZE; - dst += CHACHA20_BLOCK_SIZE; - state[12]++; - } - if (bytes) { - memcpy(buf, src, bytes); - chacha20_block_xor_neon(state, buf, buf); - memcpy(dst, buf, bytes); - } -} - -static int chacha20_neon(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - u32 state[16]; - int err; - - if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd()) - return crypto_chacha20_crypt(req); - - err = skcipher_walk_virt(&walk, req, true); - - crypto_chacha20_init(state, ctx, walk.iv); - - kernel_neon_begin(); - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - - if (nbytes < walk.total) - nbytes = round_down(nbytes, walk.stride); - - chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, - nbytes); - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } - kernel_neon_end(); - - return err; -} - -static struct skcipher_alg alg = { - .base.cra_name = "chacha20", - .base.cra_driver_name = "chacha20-neon", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha20_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = CHACHA20_IV_SIZE, - .chunksize = CHACHA20_BLOCK_SIZE, - .walksize = 4 * CHACHA20_BLOCK_SIZE, - .setkey = crypto_chacha20_setkey, - .encrypt = chacha20_neon, - .decrypt = chacha20_neon, -}; - -static int __init chacha20_simd_mod_init(void) -{ - if (!(elf_hwcap & HWCAP_NEON)) - return -ENODEV; - - return crypto_register_skcipher(&alg); -} - -static void __exit chacha20_simd_mod_fini(void) -{ - crypto_unregister_skcipher(&alg); -} - -module_init(chacha20_simd_mod_init); -module_exit(chacha20_simd_mod_fini); - -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("chacha20");
diff --git a/arch/arm/crypto/nh-neon-core.S b/arch/arm/crypto/nh-neon-core.S new file mode 100644 index 0000000..434d80a --- /dev/null +++ b/arch/arm/crypto/nh-neon-core.S
@@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * NH - ε-almost-universal hash function, NEON accelerated version + * + * Copyright 2018 Google LLC + * + * Author: Eric Biggers <ebiggers@google.com> + */ + +#include <linux/linkage.h> + + .text + .fpu neon + + KEY .req r0 + MESSAGE .req r1 + MESSAGE_LEN .req r2 + HASH .req r3 + + PASS0_SUMS .req q0 + PASS0_SUM_A .req d0 + PASS0_SUM_B .req d1 + PASS1_SUMS .req q1 + PASS1_SUM_A .req d2 + PASS1_SUM_B .req d3 + PASS2_SUMS .req q2 + PASS2_SUM_A .req d4 + PASS2_SUM_B .req d5 + PASS3_SUMS .req q3 + PASS3_SUM_A .req d6 + PASS3_SUM_B .req d7 + K0 .req q4 + K1 .req q5 + K2 .req q6 + K3 .req q7 + T0 .req q8 + T0_L .req d16 + T0_H .req d17 + T1 .req q9 + T1_L .req d18 + T1_H .req d19 + T2 .req q10 + T2_L .req d20 + T2_H .req d21 + T3 .req q11 + T3_L .req d22 + T3_H .req d23 + +.macro _nh_stride k0, k1, k2, k3 + + // Load next message stride + vld1.8 {T3}, [MESSAGE]! + + // Load next key stride + vld1.32 {\k3}, [KEY]! + + // Add message words to key words + vadd.u32 T0, T3, \k0 + vadd.u32 T1, T3, \k1 + vadd.u32 T2, T3, \k2 + vadd.u32 T3, T3, \k3 + + // Multiply 32x32 => 64 and accumulate + vmlal.u32 PASS0_SUMS, T0_L, T0_H + vmlal.u32 PASS1_SUMS, T1_L, T1_H + vmlal.u32 PASS2_SUMS, T2_L, T2_H + vmlal.u32 PASS3_SUMS, T3_L, T3_H +.endm + +/* + * void nh_neon(const u32 *key, const u8 *message, size_t message_len, + * u8 hash[NH_HASH_BYTES]) + * + * It's guaranteed that message_len % 16 == 0. + */ +ENTRY(nh_neon) + + vld1.32 {K0,K1}, [KEY]! + vmov.u64 PASS0_SUMS, #0 + vmov.u64 PASS1_SUMS, #0 + vld1.32 {K2}, [KEY]! + vmov.u64 PASS2_SUMS, #0 + vmov.u64 PASS3_SUMS, #0 + + subs MESSAGE_LEN, MESSAGE_LEN, #64 + blt .Lloop4_done +.Lloop4: + _nh_stride K0, K1, K2, K3 + _nh_stride K1, K2, K3, K0 + _nh_stride K2, K3, K0, K1 + _nh_stride K3, K0, K1, K2 + subs MESSAGE_LEN, MESSAGE_LEN, #64 + bge .Lloop4 + +.Lloop4_done: + ands MESSAGE_LEN, MESSAGE_LEN, #63 + beq .Ldone + _nh_stride K0, K1, K2, K3 + + subs MESSAGE_LEN, MESSAGE_LEN, #16 + beq .Ldone + _nh_stride K1, K2, K3, K0 + + subs MESSAGE_LEN, MESSAGE_LEN, #16 + beq .Ldone + _nh_stride K2, K3, K0, K1 + +.Ldone: + // Sum the accumulators for each pass, then store the sums to 'hash' + vadd.u64 T0_L, PASS0_SUM_A, PASS0_SUM_B + vadd.u64 T0_H, PASS1_SUM_A, PASS1_SUM_B + vadd.u64 T1_L, PASS2_SUM_A, PASS2_SUM_B + vadd.u64 T1_H, PASS3_SUM_A, PASS3_SUM_B + vst1.8 {T0-T1}, [HASH] + bx lr +ENDPROC(nh_neon)
diff --git a/arch/arm/crypto/nhpoly1305-neon-glue.c b/arch/arm/crypto/nhpoly1305-neon-glue.c new file mode 100644 index 0000000..49aae87 --- /dev/null +++ b/arch/arm/crypto/nhpoly1305-neon-glue.c
@@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum + * (NEON accelerated version) + * + * Copyright 2018 Google LLC + */ + +#include <asm/neon.h> +#include <asm/simd.h> +#include <crypto/internal/hash.h> +#include <crypto/nhpoly1305.h> +#include <linux/module.h> + +asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len, + u8 hash[NH_HASH_BYTES]); + +/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */ +static void _nh_neon(const u32 *key, const u8 *message, size_t message_len, + __le64 hash[NH_NUM_PASSES]) +{ + nh_neon(key, message, message_len, (u8 *)hash); +} + +static int nhpoly1305_neon_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + if (srclen < 64 || !may_use_simd()) + return crypto_nhpoly1305_update(desc, src, srclen); + + do { + unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); + + kernel_neon_begin(); + crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon); + kernel_neon_end(); + src += n; + srclen -= n; + } while (srclen); + return 0; +} + +static struct shash_alg nhpoly1305_alg = { + .base.cra_name = "nhpoly1305", + .base.cra_driver_name = "nhpoly1305-neon", + .base.cra_priority = 200, + .base.cra_ctxsize = sizeof(struct nhpoly1305_key), + .base.cra_module = THIS_MODULE, + .digestsize = POLY1305_DIGEST_SIZE, + .init = crypto_nhpoly1305_init, + .update = nhpoly1305_neon_update, + .final = crypto_nhpoly1305_final, + .setkey = crypto_nhpoly1305_setkey, + .descsize = sizeof(struct nhpoly1305_state), +}; + +static int __init nhpoly1305_mod_init(void) +{ + if (!(elf_hwcap & HWCAP_NEON)) + return -ENODEV; + + return crypto_register_shash(&nhpoly1305_alg); +} + +static void __exit nhpoly1305_mod_exit(void) +{ + crypto_unregister_shash(&nhpoly1305_alg); +} + +module_init(nhpoly1305_mod_init); +module_exit(nhpoly1305_mod_exit); + +MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (NEON-accelerated)"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>"); +MODULE_ALIAS_CRYPTO("nhpoly1305"); +MODULE_ALIAS_CRYPTO("nhpoly1305-neon");
diff --git a/arch/arm/include/asm/fiq_glue.h b/arch/arm/include/asm/fiq_glue.h new file mode 100644 index 0000000..a9e244f9 --- /dev/null +++ b/arch/arm/include/asm/fiq_glue.h
@@ -0,0 +1,33 @@ +/* + * Copyright (C) 2010 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __ASM_FIQ_GLUE_H +#define __ASM_FIQ_GLUE_H + +struct fiq_glue_handler { + void (*fiq)(struct fiq_glue_handler *h, void *regs, void *svc_sp); + void (*resume)(struct fiq_glue_handler *h); +}; +typedef void (*fiq_return_handler_t)(void); + +int fiq_glue_register_handler(struct fiq_glue_handler *handler); +int fiq_glue_set_return_handler(fiq_return_handler_t fiq_return); +int fiq_glue_clear_return_handler(fiq_return_handler_t fiq_return); + +#ifdef CONFIG_FIQ_GLUE +void fiq_glue_resume(void); +#else +static inline void fiq_glue_resume(void) {} +#endif + +#endif
diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h index f59ab9b..201dc20 100644 --- a/arch/arm/include/asm/topology.h +++ b/arch/arm/include/asm/topology.h
@@ -25,6 +25,20 @@ void init_cpu_topology(void); void store_cpu_topology(unsigned int cpuid); const struct cpumask *cpu_coregroup_mask(int cpu); +#include <linux/arch_topology.h> + +/* Replace task scheduler's default frequency-invariant accounting */ +#define arch_scale_freq_capacity topology_get_freq_scale + +/* Replace task scheduler's default max-frequency-invariant accounting */ +#define arch_scale_max_freq_capacity topology_get_max_freq_scale + +/* Replace task scheduler's default cpu-invariant accounting */ +#define arch_scale_cpu_capacity topology_get_cpu_scale + +/* Enable topology flag updates */ +#define arch_update_cpu_topology topology_update_cpu_topology + #else static inline void init_cpu_topology(void) { }
diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index caa0dbe3..923a725 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c
@@ -141,6 +141,8 @@ int kgdb_arch_handle_exception(int exception_vector, int signo, static int kgdb_brk_fn(struct pt_regs *regs, unsigned int instr) { + if (user_mode(regs)) + return -1; kgdb_handle_exception(1, SIGTRAP, 0, regs); return 0; @@ -148,6 +150,8 @@ static int kgdb_brk_fn(struct pt_regs *regs, unsigned int instr) static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr) { + if (user_mode(regs)) + return -1; compiled_break = 1; kgdb_handle_exception(1, SIGTRAP, 0, regs);
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index d96714e..4b675a8 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c
@@ -94,6 +94,77 @@ void arch_cpu_idle_exit(void) ledtrig_cpu(CPU_LED_IDLE_END); } +/* + * dump a block of kernel memory from around the given address + */ +static void show_data(unsigned long addr, int nbytes, const char *name) +{ + int i, j; + int nlines; + u32 *p; + + /* + * don't attempt to dump non-kernel addresses or + * values that are probably just small negative numbers + */ + if (addr < PAGE_OFFSET || addr > -256UL) + return; + + printk("\n%s: %#lx:\n", name, addr); + + /* + * round address down to a 32 bit boundary + * and always dump a multiple of 32 bytes + */ + p = (u32 *)(addr & ~(sizeof(u32) - 1)); + nbytes += (addr & (sizeof(u32) - 1)); + nlines = (nbytes + 31) / 32; + + + for (i = 0; i < nlines; i++) { + /* + * just display low 16 bits of address to keep + * each line of the dump < 80 characters + */ + printk("%04lx ", (unsigned long)p & 0xffff); + for (j = 0; j < 8; j++) { + u32 data; + if (probe_kernel_address(p, data)) { + printk(" ********"); + } else { + printk(" %08x", data); + } + ++p; + } + printk("\n"); + } +} + +static void show_extra_register_data(struct pt_regs *regs, int nbytes) +{ + mm_segment_t fs; + + fs = get_fs(); + set_fs(KERNEL_DS); + show_data(regs->ARM_pc - nbytes, nbytes * 2, "PC"); + show_data(regs->ARM_lr - nbytes, nbytes * 2, "LR"); + show_data(regs->ARM_sp - nbytes, nbytes * 2, "SP"); + show_data(regs->ARM_ip - nbytes, nbytes * 2, "IP"); + show_data(regs->ARM_fp - nbytes, nbytes * 2, "FP"); + show_data(regs->ARM_r0 - nbytes, nbytes * 2, "R0"); + show_data(regs->ARM_r1 - nbytes, nbytes * 2, "R1"); + show_data(regs->ARM_r2 - nbytes, nbytes * 2, "R2"); + show_data(regs->ARM_r3 - nbytes, nbytes * 2, "R3"); + show_data(regs->ARM_r4 - nbytes, nbytes * 2, "R4"); + show_data(regs->ARM_r5 - nbytes, nbytes * 2, "R5"); + show_data(regs->ARM_r6 - nbytes, nbytes * 2, "R6"); + show_data(regs->ARM_r7 - nbytes, nbytes * 2, "R7"); + show_data(regs->ARM_r8 - nbytes, nbytes * 2, "R8"); + show_data(regs->ARM_r9 - nbytes, nbytes * 2, "R9"); + show_data(regs->ARM_r10 - nbytes, nbytes * 2, "R10"); + set_fs(fs); +} + void __show_regs(struct pt_regs *regs) { unsigned long flags; @@ -185,6 +256,8 @@ void __show_regs(struct pt_regs *regs) printk("Control: %08x%s\n", ctrl, buf); } #endif + + show_extra_register_data(regs, 128); } void show_regs(struct pt_regs * regs)
diff --git a/arch/arm/kernel/reboot.c b/arch/arm/kernel/reboot.c index 3b2aa9a..c742491 100644 --- a/arch/arm/kernel/reboot.c +++ b/arch/arm/kernel/reboot.c
@@ -6,6 +6,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include <linux/console.h> #include <linux/cpu.h> #include <linux/delay.h> #include <linux/reboot.h> @@ -125,6 +126,31 @@ void machine_power_off(void) pm_power_off(); } +#ifdef CONFIG_ARM_FLUSH_CONSOLE_ON_RESTART +void arm_machine_flush_console(void) +{ + printk("\n"); + pr_emerg("Restarting %s\n", linux_banner); + if (console_trylock()) { + console_unlock(); + return; + } + + mdelay(50); + + local_irq_disable(); + if (!console_trylock()) + pr_emerg("arm_restart: Console was locked! Busting\n"); + else + pr_emerg("arm_restart: Console was locked!\n"); + console_unlock(); +} +#else +void arm_machine_flush_console(void) +{ +} +#endif + /* * Restart requires that the secondary CPUs stop performing any activity * while the primary CPU resets the system. Systems with a single CPU can @@ -141,6 +167,10 @@ void machine_restart(char *cmd) local_irq_disable(); smp_send_stop(); + /* Flush the console to make sure all the relevant messages make it + * out to the console drivers */ + arm_machine_flush_console(); + if (arm_pm_restart) arm_pm_restart(reboot_mode, cmd); else
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 24ac3ca..28ca164 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c
@@ -23,6 +23,7 @@ #include <linux/of.h> #include <linux/sched.h> #include <linux/sched/topology.h> +#include <linux/sched/energy.h> #include <linux/slab.h> #include <linux/string.h> @@ -30,6 +31,18 @@ #include <asm/cputype.h> #include <asm/topology.h> +static inline +const struct sched_group_energy * const cpu_core_energy(int cpu) +{ + return sge_array[cpu][SD_LEVEL0]; +} + +static inline +const struct sched_group_energy * const cpu_cluster_energy(int cpu) +{ + return sge_array[cpu][SD_LEVEL1]; +} + /* * cpu capacity scale management */ @@ -278,23 +291,37 @@ void store_cpu_topology(unsigned int cpuid) update_cpu_capacity(cpuid); + topology_detect_flags(); + pr_info("CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n", cpuid, cpu_topology[cpuid].thread_id, cpu_topology[cpuid].core_id, cpu_topology[cpuid].socket_id, mpidr); } +#ifdef CONFIG_SCHED_MC +static int core_flags(void) +{ + return cpu_core_flags() | topology_core_flags(); +} + static inline int cpu_corepower_flags(void) { - return SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN; + return topology_core_flags() + | SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN; +} +#endif + +static int cpu_flags(void) +{ + return topology_cpu_flags(); } static struct sched_domain_topology_level arm_topology[] = { #ifdef CONFIG_SCHED_MC - { cpu_corepower_mask, cpu_corepower_flags, SD_INIT_NAME(GMC) }, - { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, + { cpu_coregroup_mask, core_flags, cpu_core_energy, SD_INIT_NAME(MC) }, #endif - { cpu_cpu_mask, SD_INIT_NAME(DIE) }, + { cpu_cpu_mask, cpu_flags, cpu_cluster_energy, SD_INIT_NAME(DIE) }, { NULL, }, };
diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S index 2465995..11da0f5 100644 --- a/arch/arm/mm/cache-v6.S +++ b/arch/arm/mm/cache-v6.S
@@ -270,6 +270,11 @@ * - end - virtual end address of region */ ENTRY(v6_dma_flush_range) +#ifdef CONFIG_CACHE_FLUSH_RANGE_LIMIT + sub r2, r1, r0 + cmp r2, #CONFIG_CACHE_FLUSH_RANGE_LIMIT + bhi v6_dma_flush_dcache_all +#endif #ifdef CONFIG_DMA_CACHE_RWFO ldrb r2, [r0] @ read for ownership strb r2, [r0] @ write for ownership @@ -292,6 +297,18 @@ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer ret lr +#ifdef CONFIG_CACHE_FLUSH_RANGE_LIMIT +v6_dma_flush_dcache_all: + mov r0, #0 +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c14, 0 @ D cache clean+invalidate +#else + mcr p15, 0, r0, c7, c15, 0 @ Cache clean+invalidate +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr +#endif + /* * dma_map_area(start, size, dir) * - start - kernel virtual start address
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 49b1b80..e8fd320 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c
@@ -277,10 +277,10 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) local_irq_enable(); /* - * If we're in an interrupt or have no user + * If we're in an interrupt, or have no irqs, or have no user * context, we must not take the fault.. */ - if (faulthandler_disabled() || !mm) + if (faulthandler_disabled() || irqs_disabled() || !mm) goto no_context; if (user_mode(regs))
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c30cd78..71c5559 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig
@@ -24,6 +24,7 @@ select ARCH_HAVE_NMI_SAFE_CMPXCHG if ACPI_APEI_SEA select ARCH_USE_CMPXCHG_LOCKREF select ARCH_SUPPORTS_MEMORY_FAILURE + select ARCH_SUPPORTS_LTO_CLANG select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_WANT_COMPAT_IPC_PARSE_VERSION @@ -433,7 +434,7 @@ config ARM64_ERRATUM_843419 bool "Cortex-A53: 843419: A load or store might access an incorrect address" - default y + default y if !LTO_CLANG select ARM64_MODULE_CMODEL_LARGE if MODULES help This option links the kernel with '--fix-cortex-a53-843419' and @@ -1069,7 +1070,7 @@ config RANDOMIZE_MODULE_REGION_FULL bool "Randomize the module region independently from the core kernel" - depends on RANDOMIZE_BASE + depends on RANDOMIZE_BASE && !LTO_CLANG default y help Randomizes the location of the module region without considering the @@ -1103,6 +1104,23 @@ entering them here. As a minimum, you should specify the the root device (e.g. root=/dev/nfs). +choice + prompt "Kernel command line type" if CMDLINE != "" + default CMDLINE_FROM_BOOTLOADER + +config CMDLINE_FROM_BOOTLOADER + bool "Use bootloader kernel arguments if available" + help + Uses the command-line options passed by the boot loader. If + the boot loader doesn't provide any, the default kernel command + string provided in CMDLINE will be used. + +config CMDLINE_EXTEND + bool "Extend bootloader kernel arguments" + help + The command-line arguments provided by the boot loader will be + appended to the default kernel command string. + config CMDLINE_FORCE bool "Always use the default kernel command string" help @@ -1110,6 +1128,7 @@ loader passes other arguments to the kernel. This is useful if you cannot or don't want to change the command-line options your boot loader passes to the kernel. +endchoice config EFI_STUB bool @@ -1142,6 +1161,41 @@ However, even with this option, the resultant kernel should continue to boot on existing non-UEFI platforms. +config BUILD_ARM64_APPENDED_DTB_IMAGE + bool "Build a concatenated Image.gz/dtb by default" + depends on OF + help + Enabling this option will cause a concatenated Image.gz and list of + DTBs to be built by default (instead of a standalone Image.gz.) + The image will built in arch/arm64/boot/Image.gz-dtb + +choice + prompt "Appended DTB Kernel Image name" + depends on BUILD_ARM64_APPENDED_DTB_IMAGE + help + Enabling this option will cause a specific kernel image Image or + Image.gz to be used for final image creation. + The image will built in arch/arm64/boot/IMAGE-NAME-dtb + + config IMG_GZ_DTB + bool "Image.gz-dtb" + config IMG_DTB + bool "Image-dtb" +endchoice + +config BUILD_ARM64_APPENDED_KERNEL_IMAGE_NAME + string + depends on BUILD_ARM64_APPENDED_DTB_IMAGE + default "Image.gz-dtb" if IMG_GZ_DTB + default "Image-dtb" if IMG_DTB + +config BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES + string "Default dtb names" + depends on BUILD_ARM64_APPENDED_DTB_IMAGE + help + Space separated list of names of dtbs to append when + building a concatenated Image.gz-dtb. + endmenu menu "Userspace binary formats"
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 0c5f70e..36b9c3e 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile
@@ -26,8 +26,17 @@ ifeq ($(call ld-option, --fix-cortex-a53-843419),) $(warning ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum) else + ifeq ($(call gold-ifversion, -lt, 114000000, y), y) +$(warning This version of GNU gold may generate incorrect code with --fix-cortex-a53-843419;\ + see https://sourceware.org/bugzilla/show_bug.cgi?id=21491) + endif LDFLAGS_vmlinux += --fix-cortex-a53-843419 endif +else + ifeq ($(ld-name),gold) +# Pass --no-fix-cortex-a53-843419 to ensure the erratum fix is disabled +LDFLAGS += --no-fix-cortex-a53-843419 + endif endif KBUILD_DEFCONFIG := defconfig @@ -49,9 +58,17 @@ endif endif -KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) +ifeq ($(cc-name),clang) +# This is a workaround for https://bugs.llvm.org/show_bug.cgi?id=30792. +# TODO: revert when this is fixed in LLVM. +KBUILD_CFLAGS += -mno-implicit-float +else +KBUILD_CFLAGS += -mgeneral-regs-only +endif +KBUILD_CFLAGS += $(lseinstr) $(brokengasinst) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_CFLAGS += $(call cc-option, -mpc-relative-literal-loads) +KBUILD_CFLAGS += -fno-pic KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) @@ -62,14 +79,22 @@ CHECKFLAGS += -D__AARCH64EB__ AS += -EB LD += -EB +ifeq ($(ld-name),gold) +LDFLAGS += -maarch64_elf64_be_vec +else LDFLAGS += -maarch64linuxb +endif UTS_MACHINE := aarch64_be else KBUILD_CPPFLAGS += -mlittle-endian CHECKFLAGS += -D__AARCH64EL__ AS += -EL LD += -EL +ifeq ($(ld-name),gold) +LDFLAGS += -maarch64_elf64_le_vec +else LDFLAGS += -maarch64linux +endif UTS_MACHINE := aarch64 endif @@ -77,6 +102,10 @@ ifeq ($(CONFIG_ARM64_MODULE_CMODEL_LARGE), y) KBUILD_CFLAGS_MODULE += -mcmodel=large +ifeq ($(CONFIG_LTO_CLANG), y) +# Code model is not stored in LLVM IR, so we need to pass it also to LLVMgold +KBUILD_LDFLAGS_MODULE += -plugin-opt=-code-model=large +endif endif ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) @@ -95,6 +124,10 @@ TEXT_OFFSET := 0x00080000 endif +ifeq ($(cc-name),clang) +KBUILD_CFLAGS += $(call cc-disable-warning, asm-operand-widths) +endif + # KASAN_SHADOW_OFFSET = VA_START + (1 << (VA_BITS - 3)) - (1 << 61) # in 32-bit arithmetic KASAN_SHADOW_OFFSET := $(shell printf "0x%08x00000000\n" $$(( \ @@ -114,10 +147,15 @@ # Default target when executing plain make boot := arch/arm64/boot +ifeq ($(CONFIG_BUILD_ARM64_APPENDED_DTB_IMAGE),y) +KBUILD_IMAGE := $(boot)/$(subst $\",,$(CONFIG_BUILD_ARM64_APPENDED_KERNEL_IMAGE_NAME)) +else KBUILD_IMAGE := $(boot)/Image.gz +endif + KBUILD_DTBS := dtbs -all: Image.gz $(KBUILD_DTBS) +all: Image.gz $(KBUILD_DTBS) $(subst $\",,$(CONFIG_BUILD_ARM64_APPENDED_KERNEL_IMAGE_NAME)) Image: vmlinux @@ -140,6 +178,12 @@ dtbs_install: $(Q)$(MAKE) $(dtbinst)=$(boot)/dts +Image-dtb: vmlinux scripts dtbs + $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + +Image.gz-dtb: vmlinux scripts dtbs Image.gz + $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@
diff --git a/arch/arm64/boot/.gitignore b/arch/arm64/boot/.gitignore index 8dab0bb..34e3520 100644 --- a/arch/arm64/boot/.gitignore +++ b/arch/arm64/boot/.gitignore
@@ -1,2 +1,4 @@ Image +Image-dtb Image.gz +Image.gz-dtb
diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index 1f012c5..2c8cb86 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile
@@ -14,16 +14,29 @@ # Based on the ia64 boot/Makefile. # +include $(srctree)/arch/arm64/boot/dts/Makefile + OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S targets := Image Image.gz +DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES)) +ifneq ($(DTB_NAMES),) +DTB_LIST := $(addsuffix .dtb,$(DTB_NAMES)) +else +DTB_LIST := $(dtb-y) +endif +DTB_OBJS := $(addprefix $(obj)/dts/,$(DTB_LIST)) + $(obj)/Image: vmlinux FORCE $(call if_changed,objcopy) $(obj)/Image.bz2: $(obj)/Image FORCE $(call if_changed,bzip2) +$(obj)/Image-dtb: $(obj)/Image $(DTB_OBJS) FORCE + $(call if_changed,cat) + $(obj)/Image.gz: $(obj)/Image FORCE $(call if_changed,gzip) @@ -36,6 +49,9 @@ $(obj)/Image.lzo: $(obj)/Image FORCE $(call if_changed,lzo) +$(obj)/Image.gz-dtb: $(obj)/Image.gz $(DTB_OBJS) FORCE + $(call if_changed,cat) + install: $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ $(obj)/Image System.map "$(INSTALL_PATH)"
diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile index c6684ab8..db5a708 100644 --- a/arch/arm64/boot/dts/Makefile +++ b/arch/arm64/boot/dts/Makefile
@@ -32,3 +32,17 @@ dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(foreach d,$(dts-dirs), $(wildcard $(dtstree)/$(d)/*.dts))) always := $(dtb-y) + +targets += dtbs + +DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES)) +ifneq ($(DTB_NAMES),) +DTB_LIST := $(addsuffix .dtb,$(DTB_NAMES)) +else +DTB_LIST := $(dtb-y) +endif +targets += $(DTB_LIST) + +dtbs: $(addprefix $(obj)/, $(DTB_LIST)) + +clean-files := dts/*.dtb *.dtb
diff --git a/arch/arm64/boot/dts/arm/juno-r2.dts b/arch/arm64/boot/dts/arm/juno-r2.dts index b39b6d6..d2467e4 100644 --- a/arch/arm64/boot/dts/arm/juno-r2.dts +++ b/arch/arm64/boot/dts/arm/juno-r2.dts
@@ -98,6 +98,7 @@ next-level-cache = <&A72_L2>; clocks = <&scpi_dvfs 0>; cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_A72 &CLUSTER_COST_A72>; capacity-dmips-mhz = <1024>; }; @@ -115,6 +116,7 @@ next-level-cache = <&A72_L2>; clocks = <&scpi_dvfs 0>; cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_A72 &CLUSTER_COST_A72>; capacity-dmips-mhz = <1024>; }; @@ -132,6 +134,7 @@ next-level-cache = <&A53_L2>; clocks = <&scpi_dvfs 1>; cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_A53R2 &CLUSTER_COST_A53R2>; capacity-dmips-mhz = <485>; }; @@ -149,6 +152,7 @@ next-level-cache = <&A53_L2>; clocks = <&scpi_dvfs 1>; cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_A53R2 &CLUSTER_COST_A53R2>; capacity-dmips-mhz = <485>; }; @@ -166,6 +170,7 @@ next-level-cache = <&A53_L2>; clocks = <&scpi_dvfs 1>; cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_A53R2 &CLUSTER_COST_A53R2>; capacity-dmips-mhz = <485>; }; @@ -183,6 +188,7 @@ next-level-cache = <&A53_L2>; clocks = <&scpi_dvfs 1>; cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_A53R2 &CLUSTER_COST_A53R2>; capacity-dmips-mhz = <485>; }; @@ -199,6 +205,7 @@ cache-line-size = <64>; cache-sets = <1024>; }; + /include/ "juno-sched-energy.dtsi" }; pmu_a72 {
diff --git a/arch/arm64/boot/dts/arm/juno-sched-energy.dtsi b/arch/arm64/boot/dts/arm/juno-sched-energy.dtsi new file mode 100644 index 0000000..221196e --- /dev/null +++ b/arch/arm64/boot/dts/arm/juno-sched-energy.dtsi
@@ -0,0 +1,123 @@ +/* + * ARM JUNO specific energy cost model data. There are no unit requirements for + * the data. Data can be normalized to any reference point, but the + * normalization must be consistent. That is, one bogo-joule/watt must be the + * same quantity for all data, but we don't care what it is. + */ + +energy-costs { + /* Juno r0 Energy */ + CPU_COST_A57: core-cost0 { + busy-cost-data = < + 417 168 + 579 251 + 744 359 + 883 479 + 1024 616 + >; + idle-cost-data = < + 15 + 15 + 0 + 0 + >; + }; + CPU_COST_A53: core-cost1 { + busy-cost-data = < + 235 33 + 302 46 + 368 61 + 406 76 + 446 93 + >; + idle-cost-data = < + 6 + 6 + 0 + 0 + >; + }; + CLUSTER_COST_A57: cluster-cost0 { + busy-cost-data = < + 417 24 + 579 32 + 744 43 + 883 49 + 1024 64 + >; + idle-cost-data = < + 65 + 65 + 65 + 24 + >; + }; + CLUSTER_COST_A53: cluster-cost1 { + busy-cost-data = < + 235 26 + 302 30 + 368 39 + 406 47 + 446 57 + >; + idle-cost-data = < + 56 + 56 + 56 + 17 + >; + }; + /* Juno r2 Energy */ + CPU_COST_A72: core-cost2 { + busy-cost-data = < + 501 174 + 849 344 + 1024 526 + >; + idle-cost-data = < + 48 + 48 + 0 + 0 + >; + }; + CPU_COST_A53R2: core-cost3 { + busy-cost-data = < + 276 37 + 501 59 + 593 117 + >; + idle-cost-data = < + 33 + 33 + 0 + 0 + >; + }; + CLUSTER_COST_A72: cluster-cost2 { + busy-cost-data = < + 501 48 + 849 73 + 1024 107 + >; + idle-cost-data = < + 48 + 48 + 48 + 18 + >; + }; + CLUSTER_COST_A53R2: cluster-cost3 { + busy-cost-data = < + 276 41 + 501 86 + 593 107 + >; + idle-cost-data = < + 41 + 41 + 41 + 14 + >; + }; +};
diff --git a/arch/arm64/boot/dts/arm/juno.dts b/arch/arm64/boot/dts/arm/juno.dts index c9236c4..ae5306a 100644 --- a/arch/arm64/boot/dts/arm/juno.dts +++ b/arch/arm64/boot/dts/arm/juno.dts
@@ -97,6 +97,7 @@ next-level-cache = <&A57_L2>; clocks = <&scpi_dvfs 0>; cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_A57 &CLUSTER_COST_A57>; capacity-dmips-mhz = <1024>; }; @@ -114,6 +115,7 @@ next-level-cache = <&A57_L2>; clocks = <&scpi_dvfs 0>; cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_A57 &CLUSTER_COST_A57>; capacity-dmips-mhz = <1024>; }; @@ -131,6 +133,7 @@ next-level-cache = <&A53_L2>; clocks = <&scpi_dvfs 1>; cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_A53 &CLUSTER_COST_A53>; capacity-dmips-mhz = <578>; }; @@ -148,6 +151,7 @@ next-level-cache = <&A53_L2>; clocks = <&scpi_dvfs 1>; cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_A53 &CLUSTER_COST_A53>; capacity-dmips-mhz = <578>; }; @@ -165,6 +169,7 @@ next-level-cache = <&A53_L2>; clocks = <&scpi_dvfs 1>; cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_A53 &CLUSTER_COST_A53>; capacity-dmips-mhz = <578>; }; @@ -182,6 +187,7 @@ next-level-cache = <&A53_L2>; clocks = <&scpi_dvfs 1>; cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_A53 &CLUSTER_COST_A53>; capacity-dmips-mhz = <578>; }; @@ -198,6 +204,7 @@ cache-line-size = <64>; cache-sets = <1024>; }; + /include/ "juno-sched-energy.dtsi" }; pmu_a57 {
diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi index ff1dc89..66d48e3 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
@@ -92,7 +92,9 @@ cooling-max-level = <0>; #cooling-cells = <2>; /* min followed by max */ cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>; + sched-energy-costs = <&CPU_COST &CLUSTER_COST &SYSTEM_COST>; dynamic-power-coefficient = <311>; + capacity-dmips-mhz = <1024>; }; cpu1: cpu@1 { @@ -103,6 +105,8 @@ next-level-cache = <&CLUSTER0_L2>; operating-points-v2 = <&cpu_opp_table>; cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>; + sched-energy-costs = <&CPU_COST &CLUSTER_COST &SYSTEM_COST>; + capacity-dmips-mhz = <1024>; }; cpu2: cpu@2 { @@ -113,6 +117,7 @@ next-level-cache = <&CLUSTER0_L2>; operating-points-v2 = <&cpu_opp_table>; cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>; + capacity-dmips-mhz = <1024>; }; cpu3: cpu@3 { @@ -123,6 +128,8 @@ next-level-cache = <&CLUSTER0_L2>; operating-points-v2 = <&cpu_opp_table>; cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>; + sched-energy-costs = <&CPU_COST &CLUSTER_COST &SYSTEM_COST>; + capacity-dmips-mhz = <1024>; }; cpu4: cpu@100 { @@ -133,6 +140,7 @@ next-level-cache = <&CLUSTER1_L2>; operating-points-v2 = <&cpu_opp_table>; cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>; + capacity-dmips-mhz = <1024>; }; cpu5: cpu@101 { @@ -143,6 +151,8 @@ next-level-cache = <&CLUSTER1_L2>; operating-points-v2 = <&cpu_opp_table>; cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>; + sched-energy-costs = <&CPU_COST &CLUSTER_COST &SYSTEM_COST>; + capacity-dmips-mhz = <1024>; }; cpu6: cpu@102 { @@ -153,6 +163,8 @@ next-level-cache = <&CLUSTER1_L2>; operating-points-v2 = <&cpu_opp_table>; cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>; + sched-energy-costs = <&CPU_COST &CLUSTER_COST &SYSTEM_COST>; + capacity-dmips-mhz = <1024>; }; cpu7: cpu@103 { @@ -163,6 +175,8 @@ next-level-cache = <&CLUSTER1_L2>; operating-points-v2 = <&cpu_opp_table>; cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>; + sched-energy-costs = <&CPU_COST &CLUSTER_COST &SYSTEM_COST>; + capacity-dmips-mhz = <1024>; }; CLUSTER0_L2: l2-cache0 { @@ -172,6 +186,50 @@ CLUSTER1_L2: l2-cache1 { compatible = "cache"; }; + + energy-costs { + SYSTEM_COST: system-cost0 { + busy-cost-data = < + 1024 0 + >; + idle-cost-data = < + 0 + 0 + 0 + 0 + >; + }; + CLUSTER_COST: cluster-cost0 { + busy-cost-data = < + 178 16 + 369 29 + 622 47 + 819 75 + 1024 112 + >; + idle-cost-data = < + 107 + 107 + 47 + 0 + >; + }; + CPU_COST: core-cost0 { + busy-cost-data = < + 178 69 + 369 125 + 622 224 + 819 367 + 1024 670 + >; + idle-cost-data = < + 15 + 15 + 0 + 0 + >; + }; + }; }; cpu_opp_table: cpu_opp_table {
diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig new file mode 100644 index 0000000..5dcc3d5 --- /dev/null +++ b/arch/arm64/configs/cuttlefish_defconfig
@@ -0,0 +1,439 @@ +# CONFIG_FHANDLE is not set +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_MEMCG=y +CONFIG_MEMCG_SWAP=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CPUSETS=y +# CONFIG_PROC_PID_CPUSET is not set +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_BPF=y +CONFIG_SCHED_AUTOGROUP=y +CONFIG_SCHED_TUNE=y +CONFIG_DEFAULT_USE_ENERGY_AWARE=y +CONFIG_BLK_DEV_INITRD=y +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set +# CONFIG_RD_XZ is not set +# CONFIG_RD_LZO is not set +# CONFIG_RD_LZ4 is not set +CONFIG_SGETMASK_SYSCALL=y +# CONFIG_SYSFS_SYSCALL is not set +CONFIG_KALLSYMS_ALL=y +CONFIG_BPF_SYSCALL=y +CONFIG_EMBEDDED=y +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_COMPAT_BRK is not set +# CONFIG_SLAB_MERGE_DEFAULT is not set +CONFIG_PROFILING=y +CONFIG_KPROBES=y +CONFIG_JUMP_LABEL=y +CONFIG_CC_STACKPROTECTOR_STRONG=y +CONFIG_REFCOUNT_FULL=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_PCI=y +CONFIG_PCI_HOST_GENERIC=y +CONFIG_PREEMPT=y +CONFIG_HZ_100=y +# CONFIG_SPARSEMEM_VMEMMAP is not set +CONFIG_KSM=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_ZSMALLOC=y +CONFIG_SECCOMP=y +CONFIG_PARAVIRT=y +CONFIG_ARMV8_DEPRECATED=y +CONFIG_SWP_EMULATION=y +CONFIG_CP15_BARRIER_EMULATION=y +CONFIG_SETEND_EMULATION=y +CONFIG_ARM64_SW_TTBR0_PAN=y +CONFIG_ARM64_LSE_ATOMICS=y +CONFIG_RANDOMIZE_BASE=y +CONFIG_CMDLINE="console=ttyAMA0" +CONFIG_CMDLINE_EXTEND=y +# CONFIG_EFI is not set +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_COMPAT=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set +CONFIG_PM_DEBUG=y +CONFIG_CPU_IDLE=y +CONFIG_ARM_CPUIDLE=y +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +CONFIG_CPUFREQ_DT=y +CONFIG_ARM_BIG_LITTLE_CPUFREQ=y +CONFIG_ARM_DT_BL_CPUFREQ=y +CONFIG_ARM_SCPI_CPUFREQ=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_XFRM_INTERFACE=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_NET_IPGRE_DEMUX=y +CONFIG_NET_IPVTI=y +CONFIG_INET_ESP=y +# CONFIG_INET_XFRM_MODE_BEET is not set +CONFIG_INET_UDP_DIAG=y +CONFIG_INET_DIAG_DESTROY=y +CONFIG_TCP_CONG_ADVANCED=y +# CONFIG_TCP_CONG_BIC is not set +# CONFIG_TCP_CONG_WESTWOOD is not set +# CONFIG_TCP_CONG_HTCP is not set +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_VTI=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_BPF=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_OWNER=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_SOCKET_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_NAT=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_IP_NF_TARGET_NETMAP=y +CONFIG_IP_NF_TARGET_REDIRECT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_NF_SOCKET_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_RPFILTER=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_L2TP=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_SCH_NETEM=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_CLS_BPF=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_CLS_ACT=y +CONFIG_VSOCKETS=y +CONFIG_VIRTIO_VSOCKETS=y +CONFIG_CFG80211=y +# CONFIG_CFG80211_DEFAULT_PS is not set +CONFIG_MAC80211=y +# CONFIG_MAC80211_RC_MINSTREL is not set +CONFIG_RFKILL=y +# CONFIG_UEVENT_HELPER is not set +CONFIG_DEVTMPFS=y +# CONFIG_ALLOW_DEV_COREDUMP is not set +CONFIG_DEBUG_DEVRES=y +CONFIG_OF_UNITTEST=y +CONFIG_ZRAM=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y +CONFIG_UID_SYS_STATS=y +CONFIG_SCSI=y +# CONFIG_SCSI_PROC_FS is not set +CONFIG_BLK_DEV_SD=y +CONFIG_SCSI_VIRTIO=y +CONFIG_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_DM_VERITY_AVB=y +CONFIG_NETDEVICES=y +CONFIG_NETCONSOLE=y +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_TUN=y +CONFIG_VIRTIO_NET=y +# CONFIG_ETHERNET is not set +CONFIG_PHYLIB=y +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_PPTP=y +CONFIG_PPPOL2TP=y +CONFIG_USB_USBNET=y +# CONFIG_USB_NET_AX8817X is not set +# CONFIG_USB_NET_AX88179_178A is not set +# CONFIG_USB_NET_CDCETHER is not set +# CONFIG_USB_NET_CDC_NCM is not set +# CONFIG_USB_NET_NET1080 is not set +# CONFIG_USB_NET_CDC_SUBSET is not set +# CONFIG_USB_NET_ZAURUS is not set +# CONFIG_WLAN_VENDOR_ADMTEK is not set +# CONFIG_WLAN_VENDOR_ATH is not set +# CONFIG_WLAN_VENDOR_ATMEL is not set +# CONFIG_WLAN_VENDOR_BROADCOM is not set +# CONFIG_WLAN_VENDOR_CISCO is not set +# CONFIG_WLAN_VENDOR_INTEL is not set +# CONFIG_WLAN_VENDOR_INTERSIL is not set +# CONFIG_WLAN_VENDOR_MARVELL is not set +# CONFIG_WLAN_VENDOR_MEDIATEK is not set +# CONFIG_WLAN_VENDOR_RALINK is not set +# CONFIG_WLAN_VENDOR_REALTEK is not set +# CONFIG_WLAN_VENDOR_RSI is not set +# CONFIG_WLAN_VENDOR_ST is not set +# CONFIG_WLAN_VENDOR_TI is not set +# CONFIG_WLAN_VENDOR_ZYDAS is not set +# CONFIG_WLAN_VENDOR_QUANTENNA is not set +CONFIG_VIRT_WIFI=y +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYRESET=y +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_INPUT_TABLET=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVMEM is not set +CONFIG_SERIAL_8250=y +# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set +CONFIG_SERIAL_8250_CONSOLE=y +# CONFIG_SERIAL_8250_EXAR is not set +CONFIG_SERIAL_8250_NR_UARTS=48 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_VIRTIO=y +# CONFIG_HW_RANDOM_CAVIUM is not set +# CONFIG_DEVPORT is not set +# CONFIG_I2C_COMPAT is not set +# CONFIG_I2C_HELPER_AUTO is not set +# CONFIG_HWMON is not set +CONFIG_THERMAL=y +CONFIG_CPU_THERMAL=y +CONFIG_MEDIA_SUPPORT=y +# CONFIG_VGA_ARB is not set +CONFIG_DRM=y +# CONFIG_DRM_FBDEV_EMULATION is not set +CONFIG_DRM_VIRTIO_GPU=y +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_HIDRAW=y +CONFIG_UHID=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_HOLTEK=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_ROCCAT=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_WACOM=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_GADGET=y +CONFIG_USB_CONFIGFS=y +CONFIG_USB_CONFIGFS_F_FS=y +CONFIG_USB_CONFIGFS_F_ACC=y +CONFIG_USB_CONFIGFS_F_AUDIO_SRC=y +CONFIG_USB_CONFIGFS_UEVENT=y +CONFIG_USB_CONFIGFS_F_MIDI=y +CONFIG_MMC=y +# CONFIG_PWRSEQ_EMMC is not set +# CONFIG_PWRSEQ_SIMPLE is not set +# CONFIG_MMC_BLOCK is not set +CONFIG_RTC_CLASS=y +# CONFIG_RTC_SYSTOHC is not set +CONFIG_RTC_DRV_PL031=y +CONFIG_VIRTIO_PCI=y +# CONFIG_VIRTIO_PCI_LEGACY is not set +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_INPUT=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y +CONFIG_STAGING=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_VSOC=y +CONFIG_ION=y +CONFIG_COMMON_CLK_SCPI=y +# CONFIG_COMMON_CLK_XGENE is not set +CONFIG_MAILBOX=y +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ARM_SCPI_PROTOCOL=y +# CONFIG_ARM_SCPI_POWER_DOMAIN is not set +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_EXT4_ENCRYPTION=y +CONFIG_F2FS_FS=y +CONFIG_F2FS_FS_SECURITY=y +CONFIG_F2FS_FS_ENCRYPTION=y +# CONFIG_DNOTIFY is not set +CONFIG_QUOTA=y +CONFIG_QFMT_V2=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_SDCARD_FS=y +CONFIG_PSTORE=y +CONFIG_PSTORE_CONSOLE=y +CONFIG_PSTORE_RAM=y +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=1024 +# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_STACK_USAGE=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_SOFTLOCKUP_DETECTOR=y +# CONFIG_DETECT_HUNG_TASK is not set +CONFIG_PANIC_TIMEOUT=5 +CONFIG_SCHEDSTATS=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_ENABLE_DEFAULT_TRACERS=y +CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_LSM_MMAP_MIN_ADDR=65536 +CONFIG_HARDENED_USERCOPY=y +CONFIG_SECURITY_SELINUX=y +CONFIG_CRYPTO_ADIANTUM=y +CONFIG_CRYPTO_SHA512=y +CONFIG_CRYPTO_LZ4=y +CONFIG_CRYPTO_ZSTD=y +CONFIG_CRYPTO_ANSI_CPRNG=y +CONFIG_CRYPTO_DEV_VIRTIO=y +CONFIG_XZ_DEC=y
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index b057965..14f170f 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig
@@ -4,6 +4,7 @@ CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_IRQ_TIME_ACCOUNTING=y +CONFIG_SCHED_WALT=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y @@ -24,8 +25,9 @@ CONFIG_CGROUP_PERF=y CONFIG_USER_NS=y CONFIG_SCHED_AUTOGROUP=y +CONFIG_SCHED_TUNE=y +CONFIG_DEFAULT_USE_ENERGY_AWARE=y CONFIG_BLK_DEV_INITRD=y -CONFIG_KALLSYMS_ALL=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_JUMP_LABEL=y @@ -69,13 +71,13 @@ CONFIG_PCI_LAYERSCAPE=y CONFIG_PCI_HISI=y CONFIG_PCIE_QCOM=y -CONFIG_PCIE_KIRIN=y CONFIG_PCIE_ARMADA_8K=y +CONFIG_PCIE_KIRIN=y CONFIG_PCI_AARDVARK=y CONFIG_PCIE_RCAR=y -CONFIG_PCIE_ROCKCHIP=m CONFIG_PCI_HOST_GENERIC=y CONFIG_PCI_XGENE=y +CONFIG_PCIE_ROCKCHIP=m CONFIG_ARM64_VA_BITS_48=y CONFIG_SCHED_MC=y CONFIG_NUMA=y @@ -93,6 +95,12 @@ CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y CONFIG_ARM_CPUIDLE=y CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_STAT=y +CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y CONFIG_CPUFREQ_DT=y CONFIG_ARM_BIG_LITTLE_CPUFREQ=y CONFIG_ARM_SCPI_CPUFREQ=y @@ -140,11 +148,10 @@ CONFIG_BT_LEDS=y # CONFIG_BT_DEBUGFS is not set CONFIG_BT_HCIUART=m -CONFIG_BT_HCIUART_LL=y -CONFIG_CFG80211=m -CONFIG_MAC80211=m +CONFIG_CFG80211=y +CONFIG_MAC80211=y CONFIG_MAC80211_LEDS=y -CONFIG_RFKILL=m +CONFIG_RFKILL=y CONFIG_NET_9P=y CONFIG_NET_9P_VIRTIO=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" @@ -210,21 +217,16 @@ CONFIG_ROCKCHIP_PHY=y CONFIG_USB_PEGASUS=m CONFIG_USB_RTL8150=m -CONFIG_USB_RTL8152=m -CONFIG_USB_USBNET=m -CONFIG_USB_NET_DM9601=m -CONFIG_USB_NET_SR9800=m -CONFIG_USB_NET_SMSC75XX=m -CONFIG_USB_NET_SMSC95XX=m -CONFIG_USB_NET_PLUSB=m -CONFIG_USB_NET_MCS7830=m +CONFIG_USB_RTL8152=y CONFIG_BRCMFMAC=m +CONFIG_RTL_CARDS=m CONFIG_WL18XX=m CONFIG_WLCORE_SDIO=m +CONFIG_USB_NET_RNDIS_WLAN=y CONFIG_INPUT_EVDEV=y CONFIG_KEYBOARD_ADC=m -CONFIG_KEYBOARD_CROS_EC=y CONFIG_KEYBOARD_GPIO=y +CONFIG_KEYBOARD_CROS_EC=y CONFIG_INPUT_MISC=y CONFIG_INPUT_PM8941_PWRKEY=y CONFIG_INPUT_HISI_POWERKEY=y @@ -275,20 +277,20 @@ CONFIG_I2C_RCAR=y CONFIG_I2C_CROS_EC_TUNNEL=y CONFIG_SPI=y -CONFIG_SPI_MESON_SPICC=m -CONFIG_SPI_MESON_SPIFC=m CONFIG_SPI_BCM2835=m CONFIG_SPI_BCM2835AUX=m +CONFIG_SPI_MESON_SPICC=m +CONFIG_SPI_MESON_SPIFC=m CONFIG_SPI_ORION=y CONFIG_SPI_PL022=y -CONFIG_SPI_QUP=y CONFIG_SPI_ROCKCHIP=y +CONFIG_SPI_QUP=y CONFIG_SPI_S3C64XX=y CONFIG_SPI_SPIDEV=m CONFIG_SPMI=y -CONFIG_PINCTRL_IPQ8074=y CONFIG_PINCTRL_SINGLE=y CONFIG_PINCTRL_MAX77620=y +CONFIG_PINCTRL_IPQ8074=y CONFIG_PINCTRL_MSM8916=y CONFIG_PINCTRL_MSM8994=y CONFIG_PINCTRL_MSM8996=y @@ -315,9 +317,8 @@ CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y CONFIG_CPU_THERMAL=y CONFIG_THERMAL_EMULATION=y -CONFIG_BRCMSTB_THERMAL=m -CONFIG_EXYNOS_THERMAL=y CONFIG_ROCKCHIP_THERMAL=m +CONFIG_EXYNOS_THERMAL=y CONFIG_WATCHDOG=y CONFIG_S3C2410_WATCHDOG=y CONFIG_MESON_GXBB_WATCHDOG=m @@ -336,9 +337,9 @@ CONFIG_MFD_SPMI_PMIC=y CONFIG_MFD_RK808=y CONFIG_MFD_SEC_CORE=y +CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_REGULATOR_AXP20X=y CONFIG_REGULATOR_FAN53555=y -CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_REGULATOR_GPIO=y CONFIG_REGULATOR_HI6421V530=y CONFIG_REGULATOR_HI655X=y @@ -348,16 +349,13 @@ CONFIG_REGULATOR_QCOM_SPMI=y CONFIG_REGULATOR_RK808=y CONFIG_REGULATOR_S2MPS11=y +CONFIG_RC_DEVICES=y +CONFIG_IR_MESON=m CONFIG_MEDIA_SUPPORT=m CONFIG_MEDIA_CAMERA_SUPPORT=y CONFIG_MEDIA_ANALOG_TV_SUPPORT=y CONFIG_MEDIA_DIGITAL_TV_SUPPORT=y CONFIG_MEDIA_CONTROLLER=y -CONFIG_MEDIA_RC_SUPPORT=y -CONFIG_RC_CORE=m -CONFIG_RC_DEVICES=y -CONFIG_RC_DECODERS=y -CONFIG_IR_MESON=m CONFIG_VIDEO_V4L2_SUBDEV_API=y # CONFIG_DVB_NET is not set CONFIG_V4L_MEM2MEM_DRIVERS=y @@ -395,7 +393,6 @@ CONFIG_BACKLIGHT_GENERIC=m CONFIG_BACKLIGHT_PWM=m CONFIG_BACKLIGHT_LP855X=m -CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set @@ -494,7 +491,6 @@ CONFIG_COMMON_CLK_RK808=y CONFIG_COMMON_CLK_SCPI=y CONFIG_COMMON_CLK_CS2000_CP=y -CONFIG_COMMON_CLK_S2MPS11=y CONFIG_CLK_QORIQ=y CONFIG_COMMON_CLK_PWM=y CONFIG_COMMON_CLK_QCOM=y @@ -533,13 +529,13 @@ CONFIG_PWM_ROCKCHIP=y CONFIG_PWM_SAMSUNG=y CONFIG_PWM_TEGRA=m -CONFIG_PHY_RCAR_GEN3_USB2=y -CONFIG_PHY_HI6220_USB=y -CONFIG_PHY_SUN4I_USB=y -CONFIG_PHY_ROCKCHIP_INNO_USB2=y -CONFIG_PHY_ROCKCHIP_EMMC=y -CONFIG_PHY_ROCKCHIP_PCIE=m CONFIG_PHY_XGENE=y +CONFIG_PHY_SUN4I_USB=y +CONFIG_PHY_HI6220_USB=y +CONFIG_PHY_RCAR_GEN3_USB2=y +CONFIG_PHY_ROCKCHIP_EMMC=y +CONFIG_PHY_ROCKCHIP_INNO_USB2=y +CONFIG_PHY_ROCKCHIP_PCIE=m CONFIG_PHY_TEGRA_XUSB=y CONFIG_QCOM_L2_PMU=y CONFIG_QCOM_L3_PMU=y @@ -581,29 +577,27 @@ CONFIG_KVM=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y -CONFIG_LOCKUP_DETECTOR=y -# CONFIG_SCHED_DEBUG is not set +CONFIG_SCHEDSTATS=y # CONFIG_DEBUG_PREEMPT is not set -# CONFIG_FTRACE is not set +CONFIG_PROVE_LOCKING=y +CONFIG_FUNCTION_TRACER=y +CONFIG_IRQSOFF_TRACER=y +CONFIG_PREEMPT_TRACER=y +CONFIG_SCHED_TRACER=y CONFIG_MEMTEST=y CONFIG_SECURITY=y CONFIG_CRYPTO_ECHAINIV=y CONFIG_CRYPTO_ANSI_CPRNG=y CONFIG_ARM64_CRYPTO=y -CONFIG_CRYPTO_SHA256_ARM64=m CONFIG_CRYPTO_SHA512_ARM64=m CONFIG_CRYPTO_SHA1_ARM64_CE=y CONFIG_CRYPTO_SHA2_ARM64_CE=y CONFIG_CRYPTO_GHASH_ARM64_CE=y CONFIG_CRYPTO_CRCT10DIF_ARM64_CE=m CONFIG_CRYPTO_CRC32_ARM64_CE=m -CONFIG_CRYPTO_AES_ARM64=m -CONFIG_CRYPTO_AES_ARM64_CE=m CONFIG_CRYPTO_AES_ARM64_CE_CCM=y CONFIG_CRYPTO_AES_ARM64_CE_BLK=y -CONFIG_CRYPTO_AES_ARM64_NEON_BLK=m CONFIG_CRYPTO_CHACHA20_NEON=m CONFIG_CRYPTO_AES_ARM64_BS=m
diff --git a/arch/arm64/configs/ranchu64_defconfig b/arch/arm64/configs/ranchu64_defconfig new file mode 100644 index 0000000..71e8102 --- /dev/null +++ b/arch/arm64/configs/ranchu64_defconfig
@@ -0,0 +1,309 @@ +# CONFIG_LOCALVERSION_AUTO is not set +# CONFIG_SWAP is not set +CONFIG_POSIX_MQUEUE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_SCHED_AUTOGROUP=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_ARCH_MMAP_RND_BITS=24 +CONFIG_ARCH_MMAP_RND_COMPAT_BITS=16 +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +CONFIG_ARCH_VEXPRESS=y +CONFIG_NR_CPUS=4 +CONFIG_PREEMPT=y +CONFIG_KSM=y +CONFIG_SECCOMP=y +CONFIG_ARMV8_DEPRECATED=y +CONFIG_SWP_EMULATION=y +CONFIG_CP15_BARRIER_EMULATION=y +CONFIG_SETEND_EMULATION=y +CONFIG_CMDLINE="console=ttyAMA0" +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_COMPAT=y +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set +CONFIG_PM_DEBUG=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_INET_DIAG_DESTROY=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_INET_ESP=y +# CONFIG_INET_LRO is not set +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_OWNER=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_RPFILTER=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_TARGET_ECN=y +CONFIG_IP_NF_TARGET_TTL=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_AH=y +CONFIG_IP6_NF_MATCH_EUI64=y +CONFIG_IP6_NF_MATCH_FRAG=y +CONFIG_IP6_NF_MATCH_OPTS=y +CONFIG_IP6_NF_MATCH_HL=y +CONFIG_IP6_NF_MATCH_IPV6HEADER=y +CONFIG_IP6_NF_MATCH_MH=y +CONFIG_IP6_NF_MATCH_RT=y +CONFIG_IP6_NF_TARGET_HL=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_BRIDGE=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_CLS_ACT=y +# CONFIG_WIRELESS is not set +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y +CONFIG_SCSI=y +# CONFIG_SCSI_PROC_FS is not set +CONFIG_BLK_DEV_SD=y +# CONFIG_SCSI_LOWLEVEL is not set +CONFIG_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_NETDEVICES=y +CONFIG_TUN=y +CONFIG_VIRTIO_NET=y +CONFIG_SMC91X=y +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +# CONFIG_WLAN is not set +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYRESET=y +CONFIG_KEYBOARD_GOLDFISH_EVENTS=y +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_INPUT_TABLET=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_SERIO_SERPORT is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVMEM is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_CONSOLE=y +# CONFIG_HW_RANDOM is not set +CONFIG_BATTERY_GOLDFISH=y +# CONFIG_HWMON is not set +CONFIG_MEDIA_SUPPORT=y +CONFIG_FB=y +CONFIG_FB_GOLDFISH=y +CONFIG_FB_SIMPLE=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_HIDRAW=y +CONFIG_UHID=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_WACOM=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +# CONFIG_USB_SUPPORT is not set +CONFIG_RTC_CLASS=y +CONFIG_VIRTIO_MMIO=y +CONFIG_STAGING=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_TIMED_GPIO=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y +CONFIG_SW_SYNC_USER=y +CONFIG_ION=y +CONFIG_GOLDFISH_AUDIO=y +CONFIG_GOLDFISH=y +CONFIG_GOLDFISH_PIPE=y +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_EXT2_FS=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_QUOTA=y +CONFIG_FUSE_FS=y +CONFIG_CUSE=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +# CONFIG_MISC_FILESYSTEMS is not set +CONFIG_NFS_FS=y +CONFIG_ROOT_NFS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_PANIC_TIMEOUT=5 +# CONFIG_SCHED_DEBUG is not set +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +# CONFIG_FTRACE is not set +CONFIG_ATOMIC64_SELFTEST=y +CONFIG_DEBUG_RODATA=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y
diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c index cbdb75d1..f6cfab1 100644 --- a/arch/arm64/crypto/chacha20-neon-glue.c +++ b/arch/arm64/crypto/chacha20-neon-glue.c
@@ -19,7 +19,7 @@ */ #include <crypto/algapi.h> -#include <crypto/chacha20.h> +#include <crypto/chacha.h> #include <crypto/internal/skcipher.h> #include <linux/kernel.h> #include <linux/module.h> @@ -34,20 +34,20 @@ asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src); static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, unsigned int bytes) { - u8 buf[CHACHA20_BLOCK_SIZE]; + u8 buf[CHACHA_BLOCK_SIZE]; - while (bytes >= CHACHA20_BLOCK_SIZE * 4) { + while (bytes >= CHACHA_BLOCK_SIZE * 4) { chacha20_4block_xor_neon(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE * 4; - src += CHACHA20_BLOCK_SIZE * 4; - dst += CHACHA20_BLOCK_SIZE * 4; + bytes -= CHACHA_BLOCK_SIZE * 4; + src += CHACHA_BLOCK_SIZE * 4; + dst += CHACHA_BLOCK_SIZE * 4; state[12] += 4; } - while (bytes >= CHACHA20_BLOCK_SIZE) { + while (bytes >= CHACHA_BLOCK_SIZE) { chacha20_block_xor_neon(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE; - src += CHACHA20_BLOCK_SIZE; - dst += CHACHA20_BLOCK_SIZE; + bytes -= CHACHA_BLOCK_SIZE; + src += CHACHA_BLOCK_SIZE; + dst += CHACHA_BLOCK_SIZE; state[12]++; } if (bytes) { @@ -60,17 +60,17 @@ static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, static int chacha20_neon(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; u32 state[16]; int err; - if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE) - return crypto_chacha20_crypt(req); + if (!may_use_simd() || req->cryptlen <= CHACHA_BLOCK_SIZE) + return crypto_chacha_crypt(req); err = skcipher_walk_virt(&walk, req, true); - crypto_chacha20_init(state, ctx, walk.iv); + crypto_chacha_init(state, ctx, walk.iv); kernel_neon_begin(); while (walk.nbytes > 0) { @@ -93,14 +93,14 @@ static struct skcipher_alg alg = { .base.cra_driver_name = "chacha20-neon", .base.cra_priority = 300, .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha20_ctx), + .base.cra_ctxsize = sizeof(struct chacha_ctx), .base.cra_module = THIS_MODULE, - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = CHACHA20_IV_SIZE, - .chunksize = CHACHA20_BLOCK_SIZE, - .walksize = 4 * CHACHA20_BLOCK_SIZE, + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .walksize = 4 * CHACHA_BLOCK_SIZE, .setkey = crypto_chacha20_setkey, .encrypt = chacha20_neon, .decrypt = chacha20_neon,
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index efbeb3e..656b959 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -29,6 +29,14 @@ struct sha1_ce_state { asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, int blocks); +#ifdef CONFIG_CFI_CLANG +static inline void __cfi_sha1_ce_transform(struct sha1_state *sst, + u8 const *src, int blocks) +{ + sha1_ce_transform((struct sha1_ce_state *)sst, src, blocks); +} +#define sha1_ce_transform __cfi_sha1_ce_transform +#endif const u32 sha1_ce_offsetof_count = offsetof(struct sha1_ce_state, sst.count); const u32 sha1_ce_offsetof_finalize = offsetof(struct sha1_ce_state, finalize);
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index fd1ff2b..ddda748 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -29,6 +29,14 @@ struct sha256_ce_state { asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, int blocks); +#ifdef CONFIG_CFI_CLANG +static inline void __cfi_sha2_ce_transform(struct sha256_state *sst, + u8 const *src, int blocks) +{ + sha2_ce_transform((struct sha256_ce_state *)sst, src, blocks); +} +#define sha2_ce_transform __cfi_sha2_ce_transform +#endif const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state, sst.count);
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 4572a9b5..20bfb8e 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -29,7 +29,9 @@ ({ \ u64 reg; \ asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##nvh),\ - "mrs_s %0, " __stringify(r##vh),\ + DEFINE_MRS_S \ + "mrs_s %0, " __stringify(r##vh) "\n"\ + UNDEFINE_MRS_S, \ ARM64_HAS_VIRT_HOST_EXTN) \ : "=r" (reg)); \ reg; \ @@ -39,7 +41,9 @@ do { \ u64 __val = (u64)(v); \ asm volatile(ALTERNATIVE("msr " __stringify(r##nvh) ", %x0",\ - "msr_s " __stringify(r##vh) ", %x0",\ + DEFINE_MSR_S \ + "msr_s " __stringify(r##vh) ", %x0\n"\ + UNDEFINE_MSR_S, \ ARM64_HAS_VIRT_HOST_EXTN) \ : : "rZ" (__val)); \ } while (0)
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index eec9576..7bf7837 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h
@@ -17,7 +17,12 @@ #else /* __ASSEMBLER__ */ +#ifdef CONFIG_LTO_CLANG +#define __LSE_PREAMBLE ".arch armv8-a+lse\n" +#else __asm__(".arch_extension lse"); +#define __LSE_PREAMBLE +#endif /* Move the ll/sc atomics out-of-line */ #define __LL_SC_INLINE notrace @@ -30,7 +35,7 @@ __asm__(".arch_extension lse"); /* In-line patching at runtime */ #define ARM64_LSE_ATOMIC_INSN(llsc, lse) \ - ALTERNATIVE(llsc, lse, ARM64_HAS_LSE_ATOMICS) + ALTERNATIVE(llsc, __LSE_PREAMBLE lse, ARM64_HAS_LSE_ATOMICS) #endif /* __ASSEMBLER__ */ #else /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 779d7a2..f7ff065 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h
@@ -132,7 +132,7 @@ static inline void cpu_install_idmap(void) * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, * avoiding the possibility of conflicting TLB entries being allocated. */ -static inline void cpu_replace_ttbr1(pgd_t *pgd) +static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgd) { typedef void (ttbr_replace_func)(phys_addr_t); extern ttbr_replace_func idmap_cpu_replace_ttbr1;
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index ede80d4..7bbbba5 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h
@@ -318,6 +318,10 @@ #define SCTLR_EL1_CP15BEN (1 << 5) /* id_aa64isar0 */ +#define ID_AA64ISAR0_DP_SHIFT 44 +#define ID_AA64ISAR0_SM4_SHIFT 40 +#define ID_AA64ISAR0_SM3_SHIFT 36 +#define ID_AA64ISAR0_SHA3_SHIFT 32 #define ID_AA64ISAR0_RDM_SHIFT 28 #define ID_AA64ISAR0_ATOMICS_SHIFT 20 #define ID_AA64ISAR0_CRC32_SHIFT 16 @@ -465,20 +469,39 @@ #include <linux/types.h> -asm( -" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" -" .equ .L__reg_num_x\\num, \\num\n" -" .endr\n" +#define __DEFINE_MRS_MSR_S_REGNUM \ +" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ +" .equ .L__reg_num_x\\num, \\num\n" \ +" .endr\n" \ " .equ .L__reg_num_xzr, 31\n" -"\n" -" .macro mrs_s, rt, sreg\n" - __emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt)) + +#define DEFINE_MRS_S \ + __DEFINE_MRS_MSR_S_REGNUM \ +" .macro mrs_s, rt, sreg\n" \ +" .inst 0xd5200000|(\\sreg)|(.L__reg_num_\\rt)\n" \ " .endm\n" -"\n" -" .macro msr_s, sreg, rt\n" - __emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt)) + +#define DEFINE_MSR_S \ + __DEFINE_MRS_MSR_S_REGNUM \ +" .macro msr_s, sreg, rt\n" \ +" .inst 0xd5000000|(\\sreg)|(.L__reg_num_\\rt)\n" \ " .endm\n" -); + +#define UNDEFINE_MRS_S \ +" .purgem mrs_s\n" + +#define UNDEFINE_MSR_S \ +" .purgem msr_s\n" + +#define __mrs_s(r, v) \ + DEFINE_MRS_S \ +" mrs_s %0, " __stringify(r) "\n" \ + UNDEFINE_MRS_S : "=r" (v) + +#define __msr_s(r, v) \ + DEFINE_MSR_S \ +" msr_s " __stringify(r) ", %x0\n" \ + UNDEFINE_MSR_S : : "rZ" (v) /* * Unlike read_cpuid, calls to read_sysreg are never expected to be @@ -504,15 +527,15 @@ asm( * For registers without architectural names, or simply unsupported by * GAS. */ -#define read_sysreg_s(r) ({ \ - u64 __val; \ - asm volatile("mrs_s %0, " __stringify(r) : "=r" (__val)); \ - __val; \ +#define read_sysreg_s(r) ({ \ + u64 __val; \ + asm volatile(__mrs_s(r, __val)); \ + __val; \ }) -#define write_sysreg_s(v, r) do { \ - u64 __val = (u64)(v); \ - asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val)); \ +#define write_sysreg_s(v, r) do { \ + u64 __val = (u64)(v); \ + asm volatile(__msr_s(r, __val)); \ } while (0) static inline void config_sctlr_el1(u32 clear, u32 set)
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index b320228..b7bd705 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h
@@ -33,6 +33,20 @@ int pcibus_to_node(struct pci_bus *bus); #endif /* CONFIG_NUMA */ +#include <linux/arch_topology.h> + +/* Replace task scheduler's default frequency-invariant accounting */ +#define arch_scale_freq_capacity topology_get_freq_scale + +/* Replace task scheduler's default max-frequency-invariant accounting */ +#define arch_scale_max_freq_capacity topology_get_max_freq_scale + +/* Replace task scheduler's default cpu-invariant accounting */ +#define arch_scale_cpu_capacity topology_get_cpu_scale + +/* Enable topology flag updates */ +#define arch_update_cpu_topology topology_update_cpu_topology + #include <asm-generic/topology.h> #endif /* _ASM_ARM_TOPOLOGY_H */
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index b3fdeee..f243c57d 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -37,5 +37,10 @@ #define HWCAP_FCMA (1 << 14) #define HWCAP_LRCPC (1 << 15) #define HWCAP_DCPOP (1 << 16) +#define HWCAP_SHA3 (1 << 17) +#define HWCAP_SM3 (1 << 18) +#define HWCAP_SM4 (1 << 19) +#define HWCAP_ASIMDDP (1 << 20) +#define HWCAP_SHA512 (1 << 21) #endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 003dd39..12941cf 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c
@@ -107,6 +107,10 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) * sync with the documentation of the CPU feature register ABI. */ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_DP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_SM4_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_SM3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_SHA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0), @@ -842,7 +846,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, ID_AA64PFR0_CSV3_SHIFT); } -static int kpti_install_ng_mappings(void *__unused) +static int __nocfi kpti_install_ng_mappings(void *__unused) { typedef void (kpti_remap_fn)(int, int, phys_addr_t); extern kpti_remap_fn idmap_kpti_install_ng_mappings; @@ -1031,9 +1035,14 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_SHA512), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDRDM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA3), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM3), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 3118859..1ff1c5a 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c
@@ -69,6 +69,11 @@ static const char *const hwcap_str[] = { "fcma", "lrcpc", "dcpop", + "sha3", + "sm3", + "sm4", + "asimddp", + "sha512", NULL };
diff --git a/arch/arm64/kernel/io.c b/arch/arm64/kernel/io.c index 354be2a..79b1738 100644 --- a/arch/arm64/kernel/io.c +++ b/arch/arm64/kernel/io.c
@@ -25,8 +25,7 @@ */ void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count) { - while (count && (!IS_ALIGNED((unsigned long)from, 8) || - !IS_ALIGNED((unsigned long)to, 8))) { + while (count && !IS_ALIGNED((unsigned long)from, 8)) { *(u8 *)to = __raw_readb(from); from++; to++; @@ -54,23 +53,22 @@ EXPORT_SYMBOL(__memcpy_fromio); */ void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count) { - while (count && (!IS_ALIGNED((unsigned long)to, 8) || - !IS_ALIGNED((unsigned long)from, 8))) { - __raw_writeb(*(volatile u8 *)from, to); + while (count && !IS_ALIGNED((unsigned long)to, 8)) { + __raw_writeb(*(u8 *)from, to); from++; to++; count--; } while (count >= 8) { - __raw_writeq(*(volatile u64 *)from, to); + __raw_writeq(*(u64 *)from, to); from += 8; to += 8; count -= 8; } while (count) { - __raw_writeb(*(volatile u8 *)from, to); + __raw_writeb(*(u8 *)from, to); from++; to++; count--;
diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds index 22e36a2..99eb7c2 100644 --- a/arch/arm64/kernel/module.lds +++ b/arch/arm64/kernel/module.lds
@@ -1,5 +1,5 @@ SECTIONS { - .plt (NOLOAD) : { BYTE(0) } - .init.plt (NOLOAD) : { BYTE(0) } - .text.ftrace_trampoline (NOLOAD) : { BYTE(0) } + .plt : { BYTE(0) } + .init.plt : { BYTE(0) } + .text.ftrace_trampoline : { BYTE(0) } }
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 9e77373..e5d670a 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c
@@ -170,6 +170,70 @@ void machine_restart(char *cmd) while (1); } +/* + * dump a block of kernel memory from around the given address + */ +static void show_data(unsigned long addr, int nbytes, const char *name) +{ + int i, j; + int nlines; + u32 *p; + + /* + * don't attempt to dump non-kernel addresses or + * values that are probably just small negative numbers + */ + if (addr < PAGE_OFFSET || addr > -256UL) + return; + + printk("\n%s: %#lx:\n", name, addr); + + /* + * round address down to a 32 bit boundary + * and always dump a multiple of 32 bytes + */ + p = (u32 *)(addr & ~(sizeof(u32) - 1)); + nbytes += (addr & (sizeof(u32) - 1)); + nlines = (nbytes + 31) / 32; + + + for (i = 0; i < nlines; i++) { + /* + * just display low 16 bits of address to keep + * each line of the dump < 80 characters + */ + printk("%04lx ", (unsigned long)p & 0xffff); + for (j = 0; j < 8; j++) { + u32 data; + if (probe_kernel_address(p, data)) { + pr_cont(" ********"); + } else { + pr_cont(" %08x", data); + } + ++p; + } + pr_cont("\n"); + } +} + +static void show_extra_register_data(struct pt_regs *regs, int nbytes) +{ + mm_segment_t fs; + unsigned int i; + + fs = get_fs(); + set_fs(KERNEL_DS); + show_data(regs->pc - nbytes, nbytes * 2, "PC"); + show_data(regs->regs[30] - nbytes, nbytes * 2, "LR"); + show_data(regs->sp - nbytes, nbytes * 2, "SP"); + for (i = 0; i < 30; i++) { + char name[4]; + snprintf(name, sizeof(name), "X%u", i); + show_data(regs->regs[i] - nbytes, nbytes * 2, name); + } + set_fs(fs); +} + void __show_regs(struct pt_regs *regs) { int i, top_reg; @@ -205,6 +269,9 @@ void __show_regs(struct pt_regs *regs) pr_cont("\n"); } + if (!user_mode(regs)) + show_extra_register_data(regs, 128); + printk("\n"); } void show_regs(struct pt_regs * regs)
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 8d48b23..6eb9350 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c
@@ -21,6 +21,7 @@ #include <linux/of.h> #include <linux/sched.h> #include <linux/sched/topology.h> +#include <linux/sched/energy.h> #include <linux/slab.h> #include <linux/string.h> @@ -280,8 +281,58 @@ void store_cpu_topology(unsigned int cpuid) topology_populated: update_siblings_masks(cpuid); + topology_detect_flags(); } +#ifdef CONFIG_SCHED_SMT +static int smt_flags(void) +{ + return cpu_smt_flags() | topology_smt_flags(); +} +#endif + +#ifdef CONFIG_SCHED_MC +static int core_flags(void) +{ + return cpu_core_flags() | topology_core_flags(); +} +#endif + +static int cpu_flags(void) +{ + return topology_cpu_flags(); +} + +static inline +const struct sched_group_energy * const cpu_core_energy(int cpu) +{ + return sge_array[cpu][SD_LEVEL0]; +} + +static inline +const struct sched_group_energy * const cpu_cluster_energy(int cpu) +{ + return sge_array[cpu][SD_LEVEL1]; +} + +static inline +const struct sched_group_energy * const cpu_system_energy(int cpu) +{ + return sge_array[cpu][SD_LEVEL2]; +} + +static struct sched_domain_topology_level arm64_topology[] = { +#ifdef CONFIG_SCHED_SMT + { cpu_smt_mask, smt_flags, SD_INIT_NAME(SMT) }, +#endif +#ifdef CONFIG_SCHED_MC + { cpu_coregroup_mask, core_flags, cpu_core_energy, SD_INIT_NAME(MC) }, +#endif + { cpu_cpu_mask, cpu_flags, cpu_cluster_energy, SD_INIT_NAME(DIE) }, + { cpu_cpu_mask, NULL, cpu_system_energy, SD_INIT_NAME(SYS) }, + { NULL, } +}; + static void __init reset_cpu_topology(void) { unsigned int cpu; @@ -310,4 +361,6 @@ void __init init_cpu_topology(void) */ if (of_have_populated_dt() && parse_dt_topology()) reset_cpu_topology(); + else + set_sched_topology(arm64_topology); }
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index b215c71..ef3f9d9 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile
@@ -15,6 +15,7 @@ ccflags-y := -shared -fno-common -fno-builtin ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +ccflags-y += $(DISABLE_LTO) # Disable gcov profiling for VDSO code GCOV_PROFILE := n
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index 76320e9..c39872a 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -309,7 +309,7 @@ b.ne 4f ldr x2, 6f 2: - cbz w1, 3f + cbz x1, 3f stp xzr, x2, [x1] 3: /* res == NULL. */
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 6edfdf5..a2e6d19 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -61,7 +61,7 @@ #define TRAMP_TEXT \ . = ALIGN(PAGE_SIZE); \ VMLINUX_SYMBOL(__entry_tramp_text_start) = .; \ - *(.entry.tramp.text) \ + KEEP(*(.entry.tramp.text)) \ . = ALIGN(PAGE_SIZE); \ VMLINUX_SYMBOL(__entry_tramp_text_end) = .; #else @@ -151,11 +151,11 @@ . = ALIGN(4); .altinstructions : { __alt_instructions = .; - *(.altinstructions) + KEEP(*(.altinstructions)) __alt_instructions_end = .; } .altinstr_replacement : { - *(.altinstr_replacement) + KEEP(*(.altinstr_replacement)) } . = ALIGN(PAGE_SIZE);
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index f04400d..bfa00a9 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile
@@ -3,7 +3,11 @@ # Makefile for Kernel-based Virtual Machine module, HYP part # -ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING +ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING $(DISABLE_CFI) + +ifeq ($(cc-name),clang) +ccflags-y += -fno-jump-tables +endif KVM=../../../../virt/kvm
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index ba88b5b..d2cbb1c 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c
@@ -166,7 +166,7 @@ static void *__dma_alloc(struct device *dev, size_t size, /* create a coherent mapping */ page = virt_to_page(ptr); coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP, - prot, NULL); + prot, __builtin_return_address(0)); if (!coherent_ptr) goto no_map;
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 4ac419c..560aefd 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -742,7 +742,7 @@ static void cmm_exit(void) * Return value: * 0 on success / other on failure **/ -static int cmm_set_disable(const char *val, struct kernel_param *kp) +static int cmm_set_disable(const char *val, const struct kernel_param *kp) { int disable = simple_strtoul(val, NULL, 10);
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index c5290ae..9bfdee9e 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile
@@ -111,6 +111,8 @@ KBUILD_CFLAGS += $(call cc-option,-mno-80387) KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387) + KBUILD_CFLAGS += -fno-pic + # By default gcc and clang use a stack alignment of 16 bytes for x86. # However the standard kernel entry on x86-64 leaves the stack on an # 8-byte boundary. If the compiler isn't informed about the actual
diff --git a/arch/x86/configs/i386_ranchu_defconfig b/arch/x86/configs/i386_ranchu_defconfig new file mode 100644 index 0000000..8648412 --- /dev/null +++ b/arch/x86/configs/i386_ranchu_defconfig
@@ -0,0 +1,421 @@ +# CONFIG_64BIT is not set +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_POSIX_MQUEUE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_CGROUPS=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_COMPAT_BRK is not set +CONFIG_ARCH_MMAP_RND_BITS=16 +CONFIG_PARTITION_ADVANCED=y +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_SGI_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_SMP=y +CONFIG_X86_BIGSMP=y +CONFIG_MCORE2=y +CONFIG_X86_GENERIC=y +CONFIG_HPET_TIMER=y +CONFIG_NR_CPUS=512 +CONFIG_PREEMPT=y +# CONFIG_X86_MCE is not set +CONFIG_X86_REBOOTFIXUPS=y +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y +CONFIG_KSM=y +CONFIG_CMA=y +# CONFIG_MTRR_SANITIZER is not set +CONFIG_EFI=y +CONFIG_EFI_STUB=y +CONFIG_HZ_100=y +CONFIG_PHYSICAL_START=0x100000 +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set +CONFIG_PM_DEBUG=y +CONFIG_CPU_FREQ=y +# CONFIG_CPU_FREQ_STAT is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_PCIEPORTBUS=y +# CONFIG_PCIEASPM is not set +CONFIG_PCCARD=y +CONFIG_YENTA=y +CONFIG_HOTPLUG_PCI=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_INET_ESP=y +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +CONFIG_INET_DIAG_DESTROY=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETLABEL=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_OWNER=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_CLS_ACT=y +CONFIG_CFG80211=y +CONFIG_MAC80211=y +CONFIG_MAC80211_LEDS=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DMA_CMA=y +CONFIG_CMA_SIZE_MBYTES=16 +CONFIG_CONNECTOR=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_ISCSI_ATTRS=y +# CONFIG_SCSI_LOWLEVEL is not set +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_ATA_PIIX=y +CONFIG_PATA_AMD=y +CONFIG_PATA_OLDPIIX=y +CONFIG_PATA_SCH=y +CONFIG_PATA_MPIIX=y +CONFIG_ATA_GENERIC=y +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_DEBUG=y +CONFIG_DM_CRYPT=y +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_NETDEVICES=y +CONFIG_NETCONSOLE=y +CONFIG_TUN=y +CONFIG_VIRTIO_NET=y +CONFIG_BNX2=y +CONFIG_TIGON3=y +CONFIG_NET_TULIP=y +CONFIG_E100=y +CONFIG_E1000=y +CONFIG_E1000E=y +CONFIG_SKY2=y +CONFIG_NE2K_PCI=y +CONFIG_FORCEDETH=y +CONFIG_8139TOO=y +# CONFIG_8139TOO_PIO is not set +CONFIG_R8169=y +CONFIG_FDDI=y +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_USB_USBNET=y +CONFIG_INPUT_POLLDEV=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYRESET=y +# CONFIG_KEYBOARD_ATKBD is not set +CONFIG_KEYBOARD_GOLDFISH_EVENTS=y +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_INPUT_TABLET=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +CONFIG_SERIAL_NONSTANDARD=y +# CONFIG_DEVMEM is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_NVRAM=y +CONFIG_I2C_I801=y +CONFIG_BATTERY_GOLDFISH=y +CONFIG_WATCHDOG=y +CONFIG_MEDIA_SUPPORT=y +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +CONFIG_DRM=y +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y +CONFIG_FB_EFI=y +CONFIG_FB_GOLDFISH=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +# CONFIG_LCD_CLASS_DEVICE is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_HIDRAW=y +CONFIG_UHID=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_HOLTEK=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_ROCCAT=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_WACOM=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_MON=y +CONFIG_USB_EHCI_HCD=y +# CONFIG_USB_EHCI_TT_NEWSCHED is not set +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_UHCI_HCD=y +CONFIG_USB_PRINTER=y +CONFIG_USB_STORAGE=y +CONFIG_USB_OTG_WAKELOCK=y +CONFIG_EDAC=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +CONFIG_DMADEVICES=y +CONFIG_VIRTIO_PCI=y +CONFIG_STAGING=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y +CONFIG_SYNC_FILE=y +CONFIG_ION=y +CONFIG_GOLDFISH_AUDIO=y +CONFIG_SND_HDA_INTEL=y +CONFIG_GOLDFISH=y +CONFIG_GOLDFISH_PIPE=y +CONFIG_GOLDFISH_SYNC=y +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ISCSI_IBFT_FIND=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_FUSE_FS=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_PSTORE=y +CONFIG_PSTORE_CONSOLE=y +CONFIG_PSTORE_RAM=y +# CONFIG_NETWORK_FILESYSTEMS is not set +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=2048 +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_PANIC_TIMEOUT=5 +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_SCHED_TRACER=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +CONFIG_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_CRYPTO_AES_586=y +CONFIG_CRYPTO_TWOFISH=y +CONFIG_ASYMMETRIC_KEY_TYPE=y +CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y +CONFIG_X509_CERTIFICATE_PARSER=y +CONFIG_PKCS7_MESSAGE_PARSER=y +CONFIG_PKCS7_TEST_KEY=y +# CONFIG_VIRTUALIZATION is not set +CONFIG_CRC_T10DIF=y
diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig new file mode 100644 index 0000000..0b06775 --- /dev/null +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig
@@ -0,0 +1,469 @@ +CONFIG_POSIX_MQUEUE=y +# CONFIG_FHANDLE is not set +# CONFIG_USELIB is not set +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_CGROUPS=y +CONFIG_MEMCG=y +CONFIG_MEMCG_SWAP=y +CONFIG_CGROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_BPF=y +CONFIG_NAMESPACES=y +CONFIG_BLK_DEV_INITRD=y +# CONFIG_RD_LZ4 is not set +CONFIG_KALLSYMS_ALL=y +# CONFIG_PCSPKR_PLATFORM is not set +CONFIG_BPF_SYSCALL=y +CONFIG_EMBEDDED=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_OPROFILE=y +CONFIG_KPROBES=y +CONFIG_JUMP_LABEL=y +CONFIG_CC_STACKPROTECTOR_STRONG=y +CONFIG_REFCOUNT_FULL=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_SMP=y +CONFIG_HYPERVISOR_GUEST=y +CONFIG_PARAVIRT=y +CONFIG_PARAVIRT_SPINLOCKS=y +CONFIG_MCORE2=y +CONFIG_PROCESSOR_SELECT=y +# CONFIG_CPU_SUP_CENTAUR is not set +CONFIG_NR_CPUS=8 +CONFIG_PREEMPT=y +# CONFIG_MICROCODE is not set +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y +CONFIG_KSM=y +CONFIG_DEFAULT_MMAP_MIN_ADDR=65536 +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_ZSMALLOC=y +# CONFIG_MTRR is not set +CONFIG_HZ_100=y +CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y +CONFIG_PHYSICAL_START=0x200000 +CONFIG_PHYSICAL_ALIGN=0x1000000 +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 reboot=p" +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set +CONFIG_PM_DEBUG=y +CONFIG_ACPI_PROCFS_POWER=y +# CONFIG_ACPI_FAN is not set +# CONFIG_ACPI_THERMAL is not set +# CONFIG_X86_PM_TIMER is not set +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_X86_ACPI_CPUFREQ=y +CONFIG_PCI_MMCONFIG=y +CONFIG_PCI_MSI=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=y +CONFIG_IA32_EMULATION=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_XFRM_INTERFACE=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_NET_IPVTI=y +CONFIG_INET_ESP=y +# CONFIG_INET_XFRM_MODE_BEET is not set +CONFIG_INET_DIAG_DESTROY=y +CONFIG_TCP_CONG_ADVANCED=y +# CONFIG_TCP_CONG_BIC is not set +# CONFIG_TCP_CONG_WESTWOOD is not set +# CONFIG_TCP_CONG_HTCP is not set +CONFIG_TCP_MD5SIG=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_VTI=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETLABEL=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_BPF=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_OWNER=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_SOCKET_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_NAT=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_IP_NF_TARGET_NETMAP=y +CONFIG_IP_NF_TARGET_REDIRECT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_NF_SOCKET_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_IPV6HEADER=y +CONFIG_IP6_NF_MATCH_RPFILTER=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_SCH_NETEM=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_CLS_BPF=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_CLS_ACT=y +CONFIG_VSOCKETS=y +CONFIG_VIRTIO_VSOCKETS=y +CONFIG_CFG80211=y +CONFIG_MAC80211=y +CONFIG_RFKILL=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_DEBUG_DEVRES=y +CONFIG_OF=y +CONFIG_OF_UNITTEST=y +# CONFIG_PNP_DEBUG_MESSAGES is not set +CONFIG_ZRAM=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y +CONFIG_UID_SYS_STATS=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_VIRTIO=y +CONFIG_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=y +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_DM_ANDROID_VERITY=y +CONFIG_NETDEVICES=y +CONFIG_NETCONSOLE=y +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_TUN=y +CONFIG_VIRTIO_NET=y +# CONFIG_ETHERNET is not set +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_USB_USBNET=y +# CONFIG_USB_NET_AX8817X is not set +# CONFIG_USB_NET_AX88179_178A is not set +# CONFIG_USB_NET_CDCETHER is not set +# CONFIG_USB_NET_CDC_NCM is not set +# CONFIG_USB_NET_NET1080 is not set +# CONFIG_USB_NET_CDC_SUBSET is not set +# CONFIG_USB_NET_ZAURUS is not set +# CONFIG_WLAN_VENDOR_ADMTEK is not set +# CONFIG_WLAN_VENDOR_ATH is not set +# CONFIG_WLAN_VENDOR_ATMEL is not set +# CONFIG_WLAN_VENDOR_BROADCOM is not set +# CONFIG_WLAN_VENDOR_CISCO is not set +# CONFIG_WLAN_VENDOR_INTEL is not set +# CONFIG_WLAN_VENDOR_INTERSIL is not set +# CONFIG_WLAN_VENDOR_MARVELL is not set +# CONFIG_WLAN_VENDOR_MEDIATEK is not set +# CONFIG_WLAN_VENDOR_RALINK is not set +# CONFIG_WLAN_VENDOR_REALTEK is not set +# CONFIG_WLAN_VENDOR_RSI is not set +# CONFIG_WLAN_VENDOR_ST is not set +# CONFIG_WLAN_VENDOR_TI is not set +# CONFIG_WLAN_VENDOR_ZYDAS is not set +# CONFIG_WLAN_VENDOR_QUANTENNA is not set +CONFIG_MAC80211_HWSIM=y +CONFIG_VIRT_WIFI=y +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYRESET=y +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_INPUT_TABLET=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_KEYCHORD=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_SERIO_I8042 is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVMEM is not set +CONFIG_SERIAL_8250=y +# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set +CONFIG_SERIAL_8250_CONSOLE=y +# CONFIG_SERIAL_8250_EXAR is not set +CONFIG_SERIAL_8250_NR_UARTS=48 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_HW_RANDOM=y +# CONFIG_HW_RANDOM_INTEL is not set +# CONFIG_HW_RANDOM_AMD is not set +# CONFIG_HW_RANDOM_VIA is not set +CONFIG_HW_RANDOM_VIRTIO=y +CONFIG_HPET=y +# CONFIG_HPET_MMAP_DEFAULT is not set +# CONFIG_DEVPORT is not set +# CONFIG_ACPI_I2C_OPREGION is not set +# CONFIG_I2C_COMPAT is not set +# CONFIG_I2C_HELPER_AUTO is not set +CONFIG_PTP_1588_CLOCK=y +# CONFIG_HWMON is not set +# CONFIG_X86_PKG_TEMP_THERMAL is not set +CONFIG_WATCHDOG=y +CONFIG_SOFT_WATCHDOG=y +CONFIG_MEDIA_SUPPORT=y +# CONFIG_VGA_ARB is not set +CONFIG_DRM=y +# CONFIG_DRM_FBDEV_EMULATION is not set +CONFIG_DRM_VIRTIO_GPU=y +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_HIDRAW=y +CONFIG_UHID=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_HOLTEK=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_ROCCAT=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_WACOM=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_GADGET=y +CONFIG_USB_DUMMY_HCD=y +CONFIG_USB_CONFIGFS=y +CONFIG_USB_CONFIGFS_F_FS=y +CONFIG_USB_CONFIGFS_F_ACC=y +CONFIG_USB_CONFIGFS_F_AUDIO_SRC=y +CONFIG_USB_CONFIGFS_UEVENT=y +CONFIG_USB_CONFIGFS_F_MIDI=y +CONFIG_RTC_CLASS=y +CONFIG_SW_SYNC=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_INPUT=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y +CONFIG_STAGING=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_VSOC=y +CONFIG_ION=y +# CONFIG_X86_PLATFORM_DEVICES is not set +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +# CONFIG_FIRMWARE_MEMMAP is not set +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_EXT4_ENCRYPTION=y +CONFIG_F2FS_FS=y +CONFIG_F2FS_FS_SECURITY=y +CONFIG_F2FS_FS_ENCRYPTION=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_QFMT_V2=y +CONFIG_AUTOFS4_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_SDCARD_FS=y +CONFIG_PSTORE=y +CONFIG_PSTORE_CONSOLE=y +CONFIG_PSTORE_RAM=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=1024 +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_STACK_USAGE=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_DEBUG_STACKOVERFLOW=y +CONFIG_HARDLOCKUP_DETECTOR=y +CONFIG_PANIC_TIMEOUT=5 +CONFIG_SCHEDSTATS=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_ENABLE_DEFAULT_TRACERS=y +CONFIG_IO_DELAY_NONE=y +CONFIG_DEBUG_BOOT_PARAMS=y +CONFIG_OPTIMIZE_INLINING=y +CONFIG_UNWINDER_FRAME_POINTER=y +CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_PATH=y +CONFIG_HARDENED_USERCOPY=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 +CONFIG_CRYPTO_RSA=y +# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_ADIANTUM=y +CONFIG_CRYPTO_SHA512=y +CONFIG_CRYPTO_LZ4=y +CONFIG_CRYPTO_ZSTD=y +CONFIG_CRYPTO_DEV_VIRTIO=y +CONFIG_ASYMMETRIC_KEY_TYPE=y +CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y +CONFIG_X509_CERTIFICATE_PARSER=y +CONFIG_SYSTEM_TRUSTED_KEYRING=y +CONFIG_SYSTEM_TRUSTED_KEYS="verity_dev_keys.x509"
diff --git a/arch/x86/configs/x86_64_ranchu_defconfig b/arch/x86/configs/x86_64_ranchu_defconfig new file mode 100644 index 0000000..964b93d --- /dev/null +++ b/arch/x86/configs/x86_64_ranchu_defconfig
@@ -0,0 +1,416 @@ +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_POSIX_MQUEUE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_CGROUPS=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_COMPAT_BRK is not set +CONFIG_ARCH_MMAP_RND_BITS=32 +CONFIG_ARCH_MMAP_RND_COMPAT_BITS=16 +CONFIG_PARTITION_ADVANCED=y +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_SGI_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_SMP=y +CONFIG_MCORE2=y +CONFIG_MAXSMP=y +CONFIG_PREEMPT=y +# CONFIG_X86_MCE is not set +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y +CONFIG_KSM=y +CONFIG_CMA=y +# CONFIG_MTRR_SANITIZER is not set +CONFIG_EFI=y +CONFIG_EFI_STUB=y +CONFIG_HZ_100=y +CONFIG_PHYSICAL_START=0x100000 +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set +CONFIG_PM_DEBUG=y +CONFIG_CPU_FREQ=y +# CONFIG_CPU_FREQ_STAT is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_PCI_MMCONFIG=y +CONFIG_PCIEPORTBUS=y +# CONFIG_PCIEASPM is not set +CONFIG_PCCARD=y +CONFIG_YENTA=y +CONFIG_HOTPLUG_PCI=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=y +CONFIG_IA32_EMULATION=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_INET_ESP=y +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +CONFIG_INET_DIAG_DESTROY=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETLABEL=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_OWNER=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_CLS_ACT=y +CONFIG_CFG80211=y +CONFIG_MAC80211=y +CONFIG_MAC80211_LEDS=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DMA_CMA=y +CONFIG_CONNECTOR=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_ISCSI_ATTRS=y +# CONFIG_SCSI_LOWLEVEL is not set +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_ATA_PIIX=y +CONFIG_PATA_AMD=y +CONFIG_PATA_OLDPIIX=y +CONFIG_PATA_SCH=y +CONFIG_PATA_MPIIX=y +CONFIG_ATA_GENERIC=y +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_DEBUG=y +CONFIG_DM_CRYPT=y +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_NETDEVICES=y +CONFIG_NETCONSOLE=y +CONFIG_TUN=y +CONFIG_VIRTIO_NET=y +CONFIG_BNX2=y +CONFIG_TIGON3=y +CONFIG_NET_TULIP=y +CONFIG_E100=y +CONFIG_E1000=y +CONFIG_E1000E=y +CONFIG_SKY2=y +CONFIG_NE2K_PCI=y +CONFIG_FORCEDETH=y +CONFIG_8139TOO=y +# CONFIG_8139TOO_PIO is not set +CONFIG_R8169=y +CONFIG_FDDI=y +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_USB_USBNET=y +CONFIG_INPUT_POLLDEV=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYRESET=y +# CONFIG_KEYBOARD_ATKBD is not set +CONFIG_KEYBOARD_GOLDFISH_EVENTS=y +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_INPUT_TABLET=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +CONFIG_SERIAL_NONSTANDARD=y +# CONFIG_DEVMEM is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_NVRAM=y +CONFIG_I2C_I801=y +CONFIG_BATTERY_GOLDFISH=y +CONFIG_WATCHDOG=y +CONFIG_MEDIA_SUPPORT=y +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +CONFIG_DRM=y +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y +CONFIG_FB_EFI=y +CONFIG_FB_GOLDFISH=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +# CONFIG_LCD_CLASS_DEVICE is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_HIDRAW=y +CONFIG_UHID=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_HOLTEK=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_ROCCAT=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_WACOM=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_MON=y +CONFIG_USB_EHCI_HCD=y +# CONFIG_USB_EHCI_TT_NEWSCHED is not set +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_UHCI_HCD=y +CONFIG_USB_PRINTER=y +CONFIG_USB_STORAGE=y +CONFIG_USB_OTG_WAKELOCK=y +CONFIG_EDAC=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +CONFIG_DMADEVICES=y +CONFIG_VIRTIO_PCI=y +CONFIG_STAGING=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y +CONFIG_SYNC_FILE=y +CONFIG_ION=y +CONFIG_GOLDFISH_AUDIO=y +CONFIG_SND_HDA_INTEL=y +CONFIG_GOLDFISH=y +CONFIG_GOLDFISH_PIPE=y +CONFIG_GOLDFISH_SYNC=y +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ISCSI_IBFT_FIND=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_FUSE_FS=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_PSTORE=y +CONFIG_PSTORE_CONSOLE=y +CONFIG_PSTORE_RAM=y +# CONFIG_NETWORK_FILESYSTEMS is not set +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_PANIC_TIMEOUT=5 +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_SCHED_TRACER=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +CONFIG_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_CRYPTO_TWOFISH=y +CONFIG_ASYMMETRIC_KEY_TYPE=y +CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y +CONFIG_X509_CERTIFICATE_PARSER=y +CONFIG_PKCS7_MESSAGE_PARSER=y +CONFIG_PKCS7_TEST_KEY=y +# CONFIG_VIRTUALIZATION is not set +CONFIG_CRC_T10DIF=y
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c index 1e6af1b..dfc91b9 100644 --- a/arch/x86/crypto/chacha20_glue.c +++ b/arch/x86/crypto/chacha20_glue.c
@@ -10,7 +10,7 @@ */ #include <crypto/algapi.h> -#include <crypto/chacha20.h> +#include <crypto/chacha.h> #include <crypto/internal/skcipher.h> #include <linux/kernel.h> #include <linux/module.h> @@ -29,31 +29,31 @@ static bool chacha20_use_avx2; static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src, unsigned int bytes) { - u8 buf[CHACHA20_BLOCK_SIZE]; + u8 buf[CHACHA_BLOCK_SIZE]; #ifdef CONFIG_AS_AVX2 if (chacha20_use_avx2) { - while (bytes >= CHACHA20_BLOCK_SIZE * 8) { + while (bytes >= CHACHA_BLOCK_SIZE * 8) { chacha20_8block_xor_avx2(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE * 8; - src += CHACHA20_BLOCK_SIZE * 8; - dst += CHACHA20_BLOCK_SIZE * 8; + bytes -= CHACHA_BLOCK_SIZE * 8; + src += CHACHA_BLOCK_SIZE * 8; + dst += CHACHA_BLOCK_SIZE * 8; state[12] += 8; } } #endif - while (bytes >= CHACHA20_BLOCK_SIZE * 4) { + while (bytes >= CHACHA_BLOCK_SIZE * 4) { chacha20_4block_xor_ssse3(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE * 4; - src += CHACHA20_BLOCK_SIZE * 4; - dst += CHACHA20_BLOCK_SIZE * 4; + bytes -= CHACHA_BLOCK_SIZE * 4; + src += CHACHA_BLOCK_SIZE * 4; + dst += CHACHA_BLOCK_SIZE * 4; state[12] += 4; } - while (bytes >= CHACHA20_BLOCK_SIZE) { + while (bytes >= CHACHA_BLOCK_SIZE) { chacha20_block_xor_ssse3(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE; - src += CHACHA20_BLOCK_SIZE; - dst += CHACHA20_BLOCK_SIZE; + bytes -= CHACHA_BLOCK_SIZE; + src += CHACHA_BLOCK_SIZE; + dst += CHACHA_BLOCK_SIZE; state[12]++; } if (bytes) { @@ -66,7 +66,7 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src, static int chacha20_simd(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); u32 *state, state_buf[16 + 2] __aligned(8); struct skcipher_walk walk; int err; @@ -74,20 +74,20 @@ static int chacha20_simd(struct skcipher_request *req) BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16); state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN); - if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd()) - return crypto_chacha20_crypt(req); + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd()) + return crypto_chacha_crypt(req); err = skcipher_walk_virt(&walk, req, true); - crypto_chacha20_init(state, ctx, walk.iv); + crypto_chacha_init(state, ctx, walk.iv); kernel_fpu_begin(); - while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { + while (walk.nbytes >= CHACHA_BLOCK_SIZE) { chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, - rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); + rounddown(walk.nbytes, CHACHA_BLOCK_SIZE)); err = skcipher_walk_done(&walk, - walk.nbytes % CHACHA20_BLOCK_SIZE); + walk.nbytes % CHACHA_BLOCK_SIZE); } if (walk.nbytes) { @@ -106,14 +106,14 @@ static struct skcipher_alg alg = { .base.cra_driver_name = "chacha20-simd", .base.cra_priority = 300, .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha20_ctx), + .base.cra_ctxsize = sizeof(struct chacha_ctx), .base.cra_alignmask = sizeof(u32) - 1, .base.cra_module = THIS_MODULE, - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = CHACHA20_IV_SIZE, - .chunksize = CHACHA20_BLOCK_SIZE, + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, .setkey = crypto_chacha20_setkey, .encrypt = chacha20_simd, .decrypt = chacha20_simd,
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index 28c3720..a69670b 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c
@@ -83,35 +83,37 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx, if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) { if (unlikely(!sctx->wset)) { if (!sctx->uset) { - memcpy(sctx->u, dctx->r, sizeof(sctx->u)); - poly1305_simd_mult(sctx->u, dctx->r); + memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); + poly1305_simd_mult(sctx->u, dctx->r.r); sctx->uset = true; } memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u)); - poly1305_simd_mult(sctx->u + 5, dctx->r); + poly1305_simd_mult(sctx->u + 5, dctx->r.r); memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u)); - poly1305_simd_mult(sctx->u + 10, dctx->r); + poly1305_simd_mult(sctx->u + 10, dctx->r.r); sctx->wset = true; } blocks = srclen / (POLY1305_BLOCK_SIZE * 4); - poly1305_4block_avx2(dctx->h, src, dctx->r, blocks, sctx->u); + poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks, + sctx->u); src += POLY1305_BLOCK_SIZE * 4 * blocks; srclen -= POLY1305_BLOCK_SIZE * 4 * blocks; } #endif if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) { if (unlikely(!sctx->uset)) { - memcpy(sctx->u, dctx->r, sizeof(sctx->u)); - poly1305_simd_mult(sctx->u, dctx->r); + memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); + poly1305_simd_mult(sctx->u, dctx->r.r); sctx->uset = true; } blocks = srclen / (POLY1305_BLOCK_SIZE * 2); - poly1305_2block_sse2(dctx->h, src, dctx->r, blocks, sctx->u); + poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks, + sctx->u); src += POLY1305_BLOCK_SIZE * 2 * blocks; srclen -= POLY1305_BLOCK_SIZE * 2 * blocks; } if (srclen >= POLY1305_BLOCK_SIZE) { - poly1305_block_sse2(dctx->h, src, dctx->r, 1); + poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1); srclen -= POLY1305_BLOCK_SIZE; } return srclen;
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 0a550dc..4505ffe 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile
@@ -186,7 +186,8 @@ sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=both) \ - $(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic $(LTO_CFLAGS) + $(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic $(LTO_CFLAGS) \ + $(filter --target=% --gcc-toolchain=%,$(KBUILD_CFLAGS)) GCOV_PROFILE := n #
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index abff76b..a7a7677 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c
@@ -592,7 +592,7 @@ enum __force_cpu_type { static int force_cpu_type; -static int set_cpu_type(const char *str, struct kernel_param *kp) +static int set_cpu_type(const char *str, const struct kernel_param *kp) { if (!strcmp(str, "timer")) { force_cpu_type = timer;
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index db6d90e..e3b18ad 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c
@@ -430,6 +430,7 @@ static void xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */ * data back is to call: */ tick_nohz_idle_enter(); + tick_nohz_idle_stop_tick_protected(); cpuhp_online_idle(CPUHP_AP_ONLINE_IDLE); }
diff --git a/build.config.cuttlefish.aarch64 b/build.config.cuttlefish.aarch64 new file mode 100644 index 0000000..90d28e1 --- /dev/null +++ b/build.config.cuttlefish.aarch64
@@ -0,0 +1,16 @@ +ARCH=arm64 +BRANCH=android-4.14 +CLANG_TRIPLE=aarch64-linux-gnu- +CROSS_COMPILE=aarch64-linux-androidkernel- +DEFCONFIG=cuttlefish_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +POST_DEFCONFIG_CMDS="check_defconfig" +CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r349610/bin +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/bin +FILES=" +arch/arm64/boot/Image.gz +vmlinux +System.map +" +STOP_SHIP_TRACEPRINTK=1
diff --git a/build.config.cuttlefish.x86_64 b/build.config.cuttlefish.x86_64 new file mode 100644 index 0000000..daff8d7 --- /dev/null +++ b/build.config.cuttlefish.x86_64
@@ -0,0 +1,16 @@ +ARCH=x86_64 +BRANCH=android-4.14 +CLANG_TRIPLE=x86_64-linux-gnu- +CROSS_COMPILE=x86_64-linux-androidkernel- +DEFCONFIG=x86_64_cuttlefish_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +POST_DEFCONFIG_CMDS="check_defconfig" +CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r349610/bin +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin +FILES=" +arch/x86/boot/bzImage +vmlinux +System.map +" +STOP_SHIP_TRACEPRINTK=1
diff --git a/build.config.goldfish.arm b/build.config.goldfish.arm new file mode 100644 index 0000000..ff5646a --- /dev/null +++ b/build.config.goldfish.arm
@@ -0,0 +1,13 @@ +ARCH=arm +BRANCH=android-4.4 +CROSS_COMPILE=arm-linux-androidkernel- +DEFCONFIG=ranchu_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/arm/arm-linux-androideabi-4.9/bin +FILES=" +arch/arm/boot/zImage +vmlinux +System.map +" +STOP_SHIP_TRACEPRINTK=1
diff --git a/build.config.goldfish.arm64 b/build.config.goldfish.arm64 new file mode 100644 index 0000000..4c896a67 --- /dev/null +++ b/build.config.goldfish.arm64
@@ -0,0 +1,13 @@ +ARCH=arm64 +BRANCH=android-4.4 +CROSS_COMPILE=aarch64-linux-android- +DEFCONFIG=ranchu64_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/bin +FILES=" +arch/arm64/boot/Image +vmlinux +System.map +" +STOP_SHIP_TRACEPRINTK=1
diff --git a/build.config.goldfish.mips b/build.config.goldfish.mips new file mode 100644 index 0000000..9a14a44 --- /dev/null +++ b/build.config.goldfish.mips
@@ -0,0 +1,12 @@ +ARCH=mips +BRANCH=android-4.4 +CROSS_COMPILE=mips64el-linux-android- +DEFCONFIG=ranchu_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/mips/mips64el-linux-android-4.9/bin +FILES=" +vmlinux +System.map +" +STOP_SHIP_TRACEPRINTK=1
diff --git a/build.config.goldfish.mips64 b/build.config.goldfish.mips64 new file mode 100644 index 0000000..6ad9759 --- /dev/null +++ b/build.config.goldfish.mips64
@@ -0,0 +1,12 @@ +ARCH=mips +BRANCH=android-4.4 +CROSS_COMPILE=mips64el-linux-android- +DEFCONFIG=ranchu64_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/mips/mips64el-linux-android-4.9/bin +FILES=" +vmlinux +System.map +" +STOP_SHIP_TRACEPRINTK=1
diff --git a/build.config.goldfish.x86 b/build.config.goldfish.x86 new file mode 100644 index 0000000..2266c62 --- /dev/null +++ b/build.config.goldfish.x86
@@ -0,0 +1,13 @@ +ARCH=x86 +BRANCH=android-4.4 +CROSS_COMPILE=x86_64-linux-android- +DEFCONFIG=i386_ranchu_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin +FILES=" +arch/x86/boot/bzImage +vmlinux +System.map +" +STOP_SHIP_TRACEPRINTK=1
diff --git a/build.config.goldfish.x86_64 b/build.config.goldfish.x86_64 new file mode 100644 index 0000000..08c42c2 --- /dev/null +++ b/build.config.goldfish.x86_64
@@ -0,0 +1,13 @@ +ARCH=x86_64 +BRANCH=android-4.4 +CROSS_COMPILE=x86_64-linux-android- +DEFCONFIG=x86_64_ranchu_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin +FILES=" +arch/x86/boot/bzImage +vmlinux +System.map +" +STOP_SHIP_TRACEPRINTK=1
diff --git a/certs/system_keyring.c b/certs/system_keyring.c index 8172871..4ba922f 100644 --- a/certs/system_keyring.c +++ b/certs/system_keyring.c
@@ -264,5 +264,46 @@ int verify_pkcs7_signature(const void *data, size_t len, return ret; } EXPORT_SYMBOL_GPL(verify_pkcs7_signature); - #endif /* CONFIG_SYSTEM_DATA_VERIFICATION */ + +/** + * verify_signature_one - Verify a signature with keys from given keyring + * @sig: The signature to be verified + * @trusted_keys: Trusted keys to use (NULL for builtin trusted keys only, + * (void *)1UL for all trusted keys). + * @keyid: key description (not partial) + */ +int verify_signature_one(const struct public_key_signature *sig, + struct key *trusted_keys, const char *keyid) +{ + key_ref_t ref; + struct key *key; + int ret; + + if (!sig) + return -EBADMSG; + if (!trusted_keys) { + trusted_keys = builtin_trusted_keys; + } else if (trusted_keys == (void *)1UL) { +#ifdef CONFIG_SECONDARY_TRUSTED_KEYRING + trusted_keys = secondary_trusted_keys; +#else + trusted_keys = builtin_trusted_keys; +#endif + } + + ref = keyring_search(make_key_ref(trusted_keys, 1), + &key_type_asymmetric, keyid); + if (IS_ERR(ref)) { + pr_err("Asymmetric key (%s) not found in keyring(%s)\n", + keyid, trusted_keys->description); + return -ENOKEY; + } + + key = key_ref_to_ptr(ref); + ret = verify_signature(key, sig); + key_put(key); + return ret; +} +EXPORT_SYMBOL_GPL(verify_signature_one); +
diff --git a/crypto/Kconfig b/crypto/Kconfig index 84f99f8..edee908 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig
@@ -389,6 +389,34 @@ Support for key wrapping (NIST SP800-38F / RFC3394) without padding. +config CRYPTO_NHPOLY1305 + tristate + select CRYPTO_HASH + select CRYPTO_POLY1305 + +config CRYPTO_ADIANTUM + tristate "Adiantum support" + select CRYPTO_CHACHA20 + select CRYPTO_POLY1305 + select CRYPTO_NHPOLY1305 + help + Adiantum is a tweakable, length-preserving encryption mode + designed for fast and secure disk encryption, especially on + CPUs without dedicated crypto instructions. It encrypts + each sector using the XChaCha12 stream cipher, two passes of + an ε-almost-∆-universal hash function, and an invocation of + the AES-256 block cipher on a single 16-byte block. On CPUs + without AES instructions, Adiantum is much faster than + AES-XTS. + + Adiantum's security is provably reducible to that of its + underlying stream and block ciphers, subject to a security + bound. Unlike XTS, Adiantum is a true wide-block encryption + mode, so it actually provides an even stronger notion of + security than XTS, subject to the security bound. + + If unsure, say N. + comment "Hash modes" config CRYPTO_CMAC @@ -1326,18 +1354,26 @@ Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html> config CRYPTO_CHACHA20 - tristate "ChaCha20 cipher algorithm" + tristate "ChaCha stream cipher algorithms" select CRYPTO_BLKCIPHER help - ChaCha20 cipher algorithm, RFC7539. + The ChaCha20, XChaCha20, and XChaCha12 stream cipher algorithms. ChaCha20 is a 256-bit high-speed stream cipher designed by Daniel J. Bernstein and further specified in RFC7539 for use in IETF protocols. - This is the portable C implementation of ChaCha20. - - See also: + This is the portable C implementation of ChaCha20. See also: <http://cr.yp.to/chacha/chacha-20080128.pdf> + XChaCha20 is the application of the XSalsa20 construction to ChaCha20 + rather than to Salsa20. XChaCha20 extends ChaCha20's nonce length + from 64 bits (or 96 bits using the RFC7539 convention) to 192 bits, + while provably retaining ChaCha20's security. See also: + <https://cr.yp.to/snuffle/xsalsa-20081128.pdf> + + XChaCha12 is XChaCha20 reduced to 12 rounds, with correspondingly + reduced security margin but increased performance. It can be needed + in some performance-sensitive scenarios. + config CRYPTO_CHACHA20_X86_64 tristate "ChaCha20 cipher algorithm (x86_64/SSSE3/AVX2)" depends on X86 && 64BIT @@ -1638,6 +1674,15 @@ help This is the LZ4 high compression mode algorithm. +config CRYPTO_ZSTD + tristate "Zstd compression algorithm" + select CRYPTO_ALGAPI + select CRYPTO_ACOMP2 + select ZSTD_COMPRESS + select ZSTD_DECOMPRESS + help + This is the zstd algorithm. + comment "Random Number Generation" config CRYPTO_ANSI_CPRNG
diff --git a/crypto/Makefile b/crypto/Makefile index 56282e2d..3bbd410 100644 --- a/crypto/Makefile +++ b/crypto/Makefile
@@ -83,6 +83,8 @@ obj-$(CONFIG_CRYPTO_XTS) += xts.o obj-$(CONFIG_CRYPTO_CTR) += ctr.o obj-$(CONFIG_CRYPTO_KEYWRAP) += keywrap.o +obj-$(CONFIG_CRYPTO_ADIANTUM) += adiantum.o +obj-$(CONFIG_CRYPTO_NHPOLY1305) += nhpoly1305.o obj-$(CONFIG_CRYPTO_GCM) += gcm.o obj-$(CONFIG_CRYPTO_CCM) += ccm.o obj-$(CONFIG_CRYPTO_CHACHA20POLY1305) += chacha20poly1305.o @@ -110,7 +112,7 @@ obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o obj-$(CONFIG_CRYPTO_SEED) += seed.o obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o -obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o +obj-$(CONFIG_CRYPTO_CHACHA20) += chacha_generic.o obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o @@ -135,6 +137,7 @@ obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o obj-$(CONFIG_CRYPTO_USER_API_RNG) += algif_rng.o obj-$(CONFIG_CRYPTO_USER_API_AEAD) += algif_aead.o +obj-$(CONFIG_CRYPTO_ZSTD) += zstd.o ecdh_generic-y := ecc.o ecdh_generic-y += ecdh.o
diff --git a/crypto/adiantum.c b/crypto/adiantum.c new file mode 100644 index 0000000..5564e73 --- /dev/null +++ b/crypto/adiantum.c
@@ -0,0 +1,668 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Adiantum length-preserving encryption mode + * + * Copyright 2018 Google LLC + */ + +/* + * Adiantum is a tweakable, length-preserving encryption mode designed for fast + * and secure disk encryption, especially on CPUs without dedicated crypto + * instructions. Adiantum encrypts each sector using the XChaCha12 stream + * cipher, two passes of an ε-almost-∆-universal (ε-∆U) hash function based on + * NH and Poly1305, and an invocation of the AES-256 block cipher on a single + * 16-byte block. See the paper for details: + * + * Adiantum: length-preserving encryption for entry-level processors + * (https://eprint.iacr.org/2018/720.pdf) + * + * For flexibility, this implementation also allows other ciphers: + * + * - Stream cipher: XChaCha12 or XChaCha20 + * - Block cipher: any with a 128-bit block size and 256-bit key + * + * This implementation doesn't currently allow other ε-∆U hash functions, i.e. + * HPolyC is not supported. This is because Adiantum is ~20% faster than HPolyC + * but still provably as secure, and also the ε-∆U hash function of HBSH is + * formally defined to take two inputs (tweak, message) which makes it difficult + * to wrap with the crypto_shash API. Rather, some details need to be handled + * here. Nevertheless, if needed in the future, support for other ε-∆U hash + * functions could be added here. + */ + +#include <crypto/b128ops.h> +#include <crypto/chacha.h> +#include <crypto/internal/hash.h> +#include <crypto/internal/skcipher.h> +#include <crypto/nhpoly1305.h> +#include <crypto/scatterwalk.h> +#include <linux/module.h> + +#include "internal.h" + +/* + * Size of right-hand part of input data, in bytes; also the size of the block + * cipher's block size and the hash function's output. + */ +#define BLOCKCIPHER_BLOCK_SIZE 16 + +/* Size of the block cipher key (K_E) in bytes */ +#define BLOCKCIPHER_KEY_SIZE 32 + +/* Size of the hash key (K_H) in bytes */ +#define HASH_KEY_SIZE (POLY1305_BLOCK_SIZE + NHPOLY1305_KEY_SIZE) + +/* + * The specification allows variable-length tweaks, but Linux's crypto API + * currently only allows algorithms to support a single length. The "natural" + * tweak length for Adiantum is 16, since that fits into one Poly1305 block for + * the best performance. But longer tweaks are useful for fscrypt, to avoid + * needing to derive per-file keys. So instead we use two blocks, or 32 bytes. + */ +#define TWEAK_SIZE 32 + +struct adiantum_instance_ctx { + struct crypto_skcipher_spawn streamcipher_spawn; + struct crypto_spawn blockcipher_spawn; + struct crypto_shash_spawn hash_spawn; +}; + +struct adiantum_tfm_ctx { + struct crypto_skcipher *streamcipher; + struct crypto_cipher *blockcipher; + struct crypto_shash *hash; + struct poly1305_key header_hash_key; +}; + +struct adiantum_request_ctx { + + /* + * Buffer for right-hand part of data, i.e. + * + * P_L => P_M => C_M => C_R when encrypting, or + * C_R => C_M => P_M => P_L when decrypting. + * + * Also used to build the IV for the stream cipher. + */ + union { + u8 bytes[XCHACHA_IV_SIZE]; + __le32 words[XCHACHA_IV_SIZE / sizeof(__le32)]; + le128 bignum; /* interpret as element of Z/(2^{128}Z) */ + } rbuf; + + bool enc; /* true if encrypting, false if decrypting */ + + /* + * The result of the Poly1305 ε-∆U hash function applied to + * (bulk length, tweak) + */ + le128 header_hash; + + /* Sub-requests, must be last */ + union { + struct shash_desc hash_desc; + struct skcipher_request streamcipher_req; + } u; +}; + +/* + * Given the XChaCha stream key K_S, derive the block cipher key K_E and the + * hash key K_H as follows: + * + * K_E || K_H || ... = XChaCha(key=K_S, nonce=1||0^191) + * + * Note that this denotes using bits from the XChaCha keystream, which here we + * get indirectly by encrypting a buffer containing all 0's. + */ +static int adiantum_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); + struct { + u8 iv[XCHACHA_IV_SIZE]; + u8 derived_keys[BLOCKCIPHER_KEY_SIZE + HASH_KEY_SIZE]; + struct scatterlist sg; + struct crypto_wait wait; + struct skcipher_request req; /* must be last */ + } *data; + u8 *keyp; + int err; + + /* Set the stream cipher key (K_S) */ + crypto_skcipher_clear_flags(tctx->streamcipher, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(tctx->streamcipher, + crypto_skcipher_get_flags(tfm) & + CRYPTO_TFM_REQ_MASK); + err = crypto_skcipher_setkey(tctx->streamcipher, key, keylen); + crypto_skcipher_set_flags(tfm, + crypto_skcipher_get_flags(tctx->streamcipher) & + CRYPTO_TFM_RES_MASK); + if (err) + return err; + + /* Derive the subkeys */ + data = kzalloc(sizeof(*data) + + crypto_skcipher_reqsize(tctx->streamcipher), GFP_KERNEL); + if (!data) + return -ENOMEM; + data->iv[0] = 1; + sg_init_one(&data->sg, data->derived_keys, sizeof(data->derived_keys)); + crypto_init_wait(&data->wait); + skcipher_request_set_tfm(&data->req, tctx->streamcipher); + skcipher_request_set_callback(&data->req, CRYPTO_TFM_REQ_MAY_SLEEP | + CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &data->wait); + skcipher_request_set_crypt(&data->req, &data->sg, &data->sg, + sizeof(data->derived_keys), data->iv); + err = crypto_wait_req(crypto_skcipher_encrypt(&data->req), &data->wait); + if (err) + goto out; + keyp = data->derived_keys; + + /* Set the block cipher key (K_E) */ + crypto_cipher_clear_flags(tctx->blockcipher, CRYPTO_TFM_REQ_MASK); + crypto_cipher_set_flags(tctx->blockcipher, + crypto_skcipher_get_flags(tfm) & + CRYPTO_TFM_REQ_MASK); + err = crypto_cipher_setkey(tctx->blockcipher, keyp, + BLOCKCIPHER_KEY_SIZE); + crypto_skcipher_set_flags(tfm, + crypto_cipher_get_flags(tctx->blockcipher) & + CRYPTO_TFM_RES_MASK); + if (err) + goto out; + keyp += BLOCKCIPHER_KEY_SIZE; + + /* Set the hash key (K_H) */ + poly1305_core_setkey(&tctx->header_hash_key, keyp); + keyp += POLY1305_BLOCK_SIZE; + + crypto_shash_clear_flags(tctx->hash, CRYPTO_TFM_REQ_MASK); + crypto_shash_set_flags(tctx->hash, crypto_skcipher_get_flags(tfm) & + CRYPTO_TFM_REQ_MASK); + err = crypto_shash_setkey(tctx->hash, keyp, NHPOLY1305_KEY_SIZE); + crypto_skcipher_set_flags(tfm, crypto_shash_get_flags(tctx->hash) & + CRYPTO_TFM_RES_MASK); + keyp += NHPOLY1305_KEY_SIZE; + WARN_ON(keyp != &data->derived_keys[ARRAY_SIZE(data->derived_keys)]); +out: + kzfree(data); + return err; +} + +/* Addition in Z/(2^{128}Z) */ +static inline void le128_add(le128 *r, const le128 *v1, const le128 *v2) +{ + u64 x = le64_to_cpu(v1->b); + u64 y = le64_to_cpu(v2->b); + + r->b = cpu_to_le64(x + y); + r->a = cpu_to_le64(le64_to_cpu(v1->a) + le64_to_cpu(v2->a) + + (x + y < x)); +} + +/* Subtraction in Z/(2^{128}Z) */ +static inline void le128_sub(le128 *r, const le128 *v1, const le128 *v2) +{ + u64 x = le64_to_cpu(v1->b); + u64 y = le64_to_cpu(v2->b); + + r->b = cpu_to_le64(x - y); + r->a = cpu_to_le64(le64_to_cpu(v1->a) - le64_to_cpu(v2->a) - + (x - y > x)); +} + +/* + * Apply the Poly1305 ε-∆U hash function to (bulk length, tweak) and save the + * result to rctx->header_hash. This is the calculation + * + * H_T ← Poly1305_{K_T}(bin_{128}(|L|) || T) + * + * from the procedure in section 6.4 of the Adiantum paper. The resulting value + * is reused in both the first and second hash steps. Specifically, it's added + * to the result of an independently keyed ε-∆U hash function (for equal length + * inputs only) taken over the left-hand part (the "bulk") of the message, to + * give the overall Adiantum hash of the (tweak, left-hand part) pair. + */ +static void adiantum_hash_header(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); + struct adiantum_request_ctx *rctx = skcipher_request_ctx(req); + const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE; + struct { + __le64 message_bits; + __le64 padding; + } header = { + .message_bits = cpu_to_le64((u64)bulk_len * 8) + }; + struct poly1305_state state; + + poly1305_core_init(&state); + + BUILD_BUG_ON(sizeof(header) % POLY1305_BLOCK_SIZE != 0); + poly1305_core_blocks(&state, &tctx->header_hash_key, + &header, sizeof(header) / POLY1305_BLOCK_SIZE); + + BUILD_BUG_ON(TWEAK_SIZE % POLY1305_BLOCK_SIZE != 0); + poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv, + TWEAK_SIZE / POLY1305_BLOCK_SIZE); + + poly1305_core_emit(&state, &rctx->header_hash); +} + +/* Hash the left-hand part (the "bulk") of the message using NHPoly1305 */ +static int adiantum_hash_message(struct skcipher_request *req, + struct scatterlist *sgl, le128 *digest) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); + struct adiantum_request_ctx *rctx = skcipher_request_ctx(req); + const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE; + struct shash_desc *hash_desc = &rctx->u.hash_desc; + struct sg_mapping_iter miter; + unsigned int i, n; + int err; + + hash_desc->tfm = tctx->hash; + hash_desc->flags = 0; + + err = crypto_shash_init(hash_desc); + if (err) + return err; + + sg_miter_start(&miter, sgl, sg_nents(sgl), + SG_MITER_FROM_SG | SG_MITER_ATOMIC); + for (i = 0; i < bulk_len; i += n) { + sg_miter_next(&miter); + n = min_t(unsigned int, miter.length, bulk_len - i); + err = crypto_shash_update(hash_desc, miter.addr, n); + if (err) + break; + } + sg_miter_stop(&miter); + if (err) + return err; + + return crypto_shash_final(hash_desc, (u8 *)digest); +} + +/* Continue Adiantum encryption/decryption after the stream cipher step */ +static int adiantum_finish(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); + struct adiantum_request_ctx *rctx = skcipher_request_ctx(req); + const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE; + le128 digest; + int err; + + /* If decrypting, decrypt C_M with the block cipher to get P_M */ + if (!rctx->enc) + crypto_cipher_decrypt_one(tctx->blockcipher, rctx->rbuf.bytes, + rctx->rbuf.bytes); + + /* + * Second hash step + * enc: C_R = C_M - H_{K_H}(T, C_L) + * dec: P_R = P_M - H_{K_H}(T, P_L) + */ + err = adiantum_hash_message(req, req->dst, &digest); + if (err) + return err; + le128_add(&digest, &digest, &rctx->header_hash); + le128_sub(&rctx->rbuf.bignum, &rctx->rbuf.bignum, &digest); + scatterwalk_map_and_copy(&rctx->rbuf.bignum, req->dst, + bulk_len, BLOCKCIPHER_BLOCK_SIZE, 1); + return 0; +} + +static void adiantum_streamcipher_done(struct crypto_async_request *areq, + int err) +{ + struct skcipher_request *req = areq->data; + + if (!err) + err = adiantum_finish(req); + + skcipher_request_complete(req, err); +} + +static int adiantum_crypt(struct skcipher_request *req, bool enc) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); + struct adiantum_request_ctx *rctx = skcipher_request_ctx(req); + const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE; + unsigned int stream_len; + le128 digest; + int err; + + if (req->cryptlen < BLOCKCIPHER_BLOCK_SIZE) + return -EINVAL; + + rctx->enc = enc; + + /* + * First hash step + * enc: P_M = P_R + H_{K_H}(T, P_L) + * dec: C_M = C_R + H_{K_H}(T, C_L) + */ + adiantum_hash_header(req); + err = adiantum_hash_message(req, req->src, &digest); + if (err) + return err; + le128_add(&digest, &digest, &rctx->header_hash); + scatterwalk_map_and_copy(&rctx->rbuf.bignum, req->src, + bulk_len, BLOCKCIPHER_BLOCK_SIZE, 0); + le128_add(&rctx->rbuf.bignum, &rctx->rbuf.bignum, &digest); + + /* If encrypting, encrypt P_M with the block cipher to get C_M */ + if (enc) + crypto_cipher_encrypt_one(tctx->blockcipher, rctx->rbuf.bytes, + rctx->rbuf.bytes); + + /* Initialize the rest of the XChaCha IV (first part is C_M) */ + BUILD_BUG_ON(BLOCKCIPHER_BLOCK_SIZE != 16); + BUILD_BUG_ON(XCHACHA_IV_SIZE != 32); /* nonce || stream position */ + rctx->rbuf.words[4] = cpu_to_le32(1); + rctx->rbuf.words[5] = 0; + rctx->rbuf.words[6] = 0; + rctx->rbuf.words[7] = 0; + + /* + * XChaCha needs to be done on all the data except the last 16 bytes; + * for disk encryption that usually means 4080 or 496 bytes. But ChaCha + * implementations tend to be most efficient when passed a whole number + * of 64-byte ChaCha blocks, or sometimes even a multiple of 256 bytes. + * And here it doesn't matter whether the last 16 bytes are written to, + * as the second hash step will overwrite them. Thus, round the XChaCha + * length up to the next 64-byte boundary if possible. + */ + stream_len = bulk_len; + if (round_up(stream_len, CHACHA_BLOCK_SIZE) <= req->cryptlen) + stream_len = round_up(stream_len, CHACHA_BLOCK_SIZE); + + skcipher_request_set_tfm(&rctx->u.streamcipher_req, tctx->streamcipher); + skcipher_request_set_crypt(&rctx->u.streamcipher_req, req->src, + req->dst, stream_len, &rctx->rbuf); + skcipher_request_set_callback(&rctx->u.streamcipher_req, + req->base.flags, + adiantum_streamcipher_done, req); + return crypto_skcipher_encrypt(&rctx->u.streamcipher_req) ?: + adiantum_finish(req); +} + +static int adiantum_encrypt(struct skcipher_request *req) +{ + return adiantum_crypt(req, true); +} + +static int adiantum_decrypt(struct skcipher_request *req) +{ + return adiantum_crypt(req, false); +} + +static int adiantum_init_tfm(struct crypto_skcipher *tfm) +{ + struct skcipher_instance *inst = skcipher_alg_instance(tfm); + struct adiantum_instance_ctx *ictx = skcipher_instance_ctx(inst); + struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher *streamcipher; + struct crypto_cipher *blockcipher; + struct crypto_shash *hash; + unsigned int subreq_size; + int err; + + streamcipher = crypto_spawn_skcipher(&ictx->streamcipher_spawn); + if (IS_ERR(streamcipher)) + return PTR_ERR(streamcipher); + + blockcipher = crypto_spawn_cipher(&ictx->blockcipher_spawn); + if (IS_ERR(blockcipher)) { + err = PTR_ERR(blockcipher); + goto err_free_streamcipher; + } + + hash = crypto_spawn_shash(&ictx->hash_spawn); + if (IS_ERR(hash)) { + err = PTR_ERR(hash); + goto err_free_blockcipher; + } + + tctx->streamcipher = streamcipher; + tctx->blockcipher = blockcipher; + tctx->hash = hash; + + BUILD_BUG_ON(offsetofend(struct adiantum_request_ctx, u) != + sizeof(struct adiantum_request_ctx)); + subreq_size = max(FIELD_SIZEOF(struct adiantum_request_ctx, + u.hash_desc) + + crypto_shash_descsize(hash), + FIELD_SIZEOF(struct adiantum_request_ctx, + u.streamcipher_req) + + crypto_skcipher_reqsize(streamcipher)); + + crypto_skcipher_set_reqsize(tfm, + offsetof(struct adiantum_request_ctx, u) + + subreq_size); + return 0; + +err_free_blockcipher: + crypto_free_cipher(blockcipher); +err_free_streamcipher: + crypto_free_skcipher(streamcipher); + return err; +} + +static void adiantum_exit_tfm(struct crypto_skcipher *tfm) +{ + struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); + + crypto_free_skcipher(tctx->streamcipher); + crypto_free_cipher(tctx->blockcipher); + crypto_free_shash(tctx->hash); +} + +static void adiantum_free_instance(struct skcipher_instance *inst) +{ + struct adiantum_instance_ctx *ictx = skcipher_instance_ctx(inst); + + crypto_drop_skcipher(&ictx->streamcipher_spawn); + crypto_drop_spawn(&ictx->blockcipher_spawn); + crypto_drop_shash(&ictx->hash_spawn); + kfree(inst); +} + +/* + * Check for a supported set of inner algorithms. + * See the comment at the beginning of this file. + */ +static bool adiantum_supported_algorithms(struct skcipher_alg *streamcipher_alg, + struct crypto_alg *blockcipher_alg, + struct shash_alg *hash_alg) +{ + if (strcmp(streamcipher_alg->base.cra_name, "xchacha12") != 0 && + strcmp(streamcipher_alg->base.cra_name, "xchacha20") != 0) + return false; + + if (blockcipher_alg->cra_cipher.cia_min_keysize > BLOCKCIPHER_KEY_SIZE || + blockcipher_alg->cra_cipher.cia_max_keysize < BLOCKCIPHER_KEY_SIZE) + return false; + if (blockcipher_alg->cra_blocksize != BLOCKCIPHER_BLOCK_SIZE) + return false; + + if (strcmp(hash_alg->base.cra_name, "nhpoly1305") != 0) + return false; + + return true; +} + +static int adiantum_create(struct crypto_template *tmpl, struct rtattr **tb) +{ + struct crypto_attr_type *algt; + const char *streamcipher_name; + const char *blockcipher_name; + const char *nhpoly1305_name; + struct skcipher_instance *inst; + struct adiantum_instance_ctx *ictx; + struct skcipher_alg *streamcipher_alg; + struct crypto_alg *blockcipher_alg; + struct crypto_alg *_hash_alg; + struct shash_alg *hash_alg; + int err; + + algt = crypto_get_attr_type(tb); + if (IS_ERR(algt)) + return PTR_ERR(algt); + + if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask) + return -EINVAL; + + streamcipher_name = crypto_attr_alg_name(tb[1]); + if (IS_ERR(streamcipher_name)) + return PTR_ERR(streamcipher_name); + + blockcipher_name = crypto_attr_alg_name(tb[2]); + if (IS_ERR(blockcipher_name)) + return PTR_ERR(blockcipher_name); + + nhpoly1305_name = crypto_attr_alg_name(tb[3]); + if (nhpoly1305_name == ERR_PTR(-ENOENT)) + nhpoly1305_name = "nhpoly1305"; + if (IS_ERR(nhpoly1305_name)) + return PTR_ERR(nhpoly1305_name); + + inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL); + if (!inst) + return -ENOMEM; + ictx = skcipher_instance_ctx(inst); + + /* Stream cipher, e.g. "xchacha12" */ + crypto_set_skcipher_spawn(&ictx->streamcipher_spawn, + skcipher_crypto_instance(inst)); + err = crypto_grab_skcipher(&ictx->streamcipher_spawn, streamcipher_name, + 0, crypto_requires_sync(algt->type, + algt->mask)); + if (err) + goto out_free_inst; + streamcipher_alg = crypto_spawn_skcipher_alg(&ictx->streamcipher_spawn); + + /* Block cipher, e.g. "aes" */ + crypto_set_spawn(&ictx->blockcipher_spawn, + skcipher_crypto_instance(inst)); + err = crypto_grab_spawn(&ictx->blockcipher_spawn, blockcipher_name, + CRYPTO_ALG_TYPE_CIPHER, CRYPTO_ALG_TYPE_MASK); + if (err) + goto out_drop_streamcipher; + blockcipher_alg = ictx->blockcipher_spawn.alg; + + /* NHPoly1305 ε-∆U hash function */ + _hash_alg = crypto_alg_mod_lookup(nhpoly1305_name, + CRYPTO_ALG_TYPE_SHASH, + CRYPTO_ALG_TYPE_MASK); + if (IS_ERR(_hash_alg)) { + err = PTR_ERR(_hash_alg); + goto out_drop_blockcipher; + } + hash_alg = __crypto_shash_alg(_hash_alg); + err = crypto_init_shash_spawn(&ictx->hash_spawn, hash_alg, + skcipher_crypto_instance(inst)); + if (err) + goto out_put_hash; + + /* Check the set of algorithms */ + if (!adiantum_supported_algorithms(streamcipher_alg, blockcipher_alg, + hash_alg)) { + pr_warn("Unsupported Adiantum instantiation: (%s,%s,%s)\n", + streamcipher_alg->base.cra_name, + blockcipher_alg->cra_name, hash_alg->base.cra_name); + err = -EINVAL; + goto out_drop_hash; + } + + /* Instance fields */ + + err = -ENAMETOOLONG; + if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, + "adiantum(%s,%s)", streamcipher_alg->base.cra_name, + blockcipher_alg->cra_name) >= CRYPTO_MAX_ALG_NAME) + goto out_drop_hash; + if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, + "adiantum(%s,%s,%s)", + streamcipher_alg->base.cra_driver_name, + blockcipher_alg->cra_driver_name, + hash_alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) + goto out_drop_hash; + + inst->alg.base.cra_flags = streamcipher_alg->base.cra_flags & + CRYPTO_ALG_ASYNC; + inst->alg.base.cra_blocksize = BLOCKCIPHER_BLOCK_SIZE; + inst->alg.base.cra_ctxsize = sizeof(struct adiantum_tfm_ctx); + inst->alg.base.cra_alignmask = streamcipher_alg->base.cra_alignmask | + hash_alg->base.cra_alignmask; + /* + * The block cipher is only invoked once per message, so for long + * messages (e.g. sectors for disk encryption) its performance doesn't + * matter as much as that of the stream cipher and hash function. Thus, + * weigh the block cipher's ->cra_priority less. + */ + inst->alg.base.cra_priority = (4 * streamcipher_alg->base.cra_priority + + 2 * hash_alg->base.cra_priority + + blockcipher_alg->cra_priority) / 7; + + inst->alg.setkey = adiantum_setkey; + inst->alg.encrypt = adiantum_encrypt; + inst->alg.decrypt = adiantum_decrypt; + inst->alg.init = adiantum_init_tfm; + inst->alg.exit = adiantum_exit_tfm; + inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(streamcipher_alg); + inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(streamcipher_alg); + inst->alg.ivsize = TWEAK_SIZE; + + inst->free = adiantum_free_instance; + + err = skcipher_register_instance(tmpl, inst); + if (err) + goto out_drop_hash; + + crypto_mod_put(_hash_alg); + return 0; + +out_drop_hash: + crypto_drop_shash(&ictx->hash_spawn); +out_put_hash: + crypto_mod_put(_hash_alg); +out_drop_blockcipher: + crypto_drop_spawn(&ictx->blockcipher_spawn); +out_drop_streamcipher: + crypto_drop_skcipher(&ictx->streamcipher_spawn); +out_free_inst: + kfree(inst); + return err; +} + +/* adiantum(streamcipher_name, blockcipher_name [, nhpoly1305_name]) */ +static struct crypto_template adiantum_tmpl = { + .name = "adiantum", + .create = adiantum_create, + .module = THIS_MODULE, +}; + +static int __init adiantum_module_init(void) +{ + return crypto_register_template(&adiantum_tmpl); +} + +static void __exit adiantum_module_exit(void) +{ + crypto_unregister_template(&adiantum_tmpl); +} + +module_init(adiantum_module_init); +module_exit(adiantum_module_exit); + +MODULE_DESCRIPTION("Adiantum length-preserving encryption mode"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>"); +MODULE_ALIAS_CRYPTO("adiantum");
diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c index ca554d5..13df33a 100644 --- a/crypto/aes_generic.c +++ b/crypto/aes_generic.c
@@ -63,7 +63,8 @@ static inline u8 byte(const u32 x, const unsigned n) static const u32 rco_tab[10] = { 1, 2, 4, 8, 16, 32, 64, 128, 27, 54 }; -__visible const u32 crypto_ft_tab[4][256] = { +/* cacheline-aligned to facilitate prefetching into cache */ +__visible const u32 crypto_ft_tab[4][256] __cacheline_aligned = { { 0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591, @@ -327,7 +328,7 @@ __visible const u32 crypto_ft_tab[4][256] = { } }; -__visible const u32 crypto_fl_tab[4][256] = { +__visible const u32 crypto_fl_tab[4][256] __cacheline_aligned = { { 0x00000063, 0x0000007c, 0x00000077, 0x0000007b, 0x000000f2, 0x0000006b, 0x0000006f, 0x000000c5, @@ -591,7 +592,7 @@ __visible const u32 crypto_fl_tab[4][256] = { } }; -__visible const u32 crypto_it_tab[4][256] = { +__visible const u32 crypto_it_tab[4][256] __cacheline_aligned = { { 0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, 0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b, @@ -855,7 +856,7 @@ __visible const u32 crypto_it_tab[4][256] = { } }; -__visible const u32 crypto_il_tab[4][256] = { +__visible const u32 crypto_il_tab[4][256] __cacheline_aligned = { { 0x00000052, 0x00000009, 0x0000006a, 0x000000d5, 0x00000030, 0x00000036, 0x000000a5, 0x00000038,
diff --git a/crypto/api.c b/crypto/api.c index e485aed..ff7a785 100644 --- a/crypto/api.c +++ b/crypto/api.c
@@ -24,6 +24,7 @@ #include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/string.h> +#include <linux/completion.h> #include "internal.h" LIST_HEAD(crypto_alg_list); @@ -595,5 +596,17 @@ int crypto_has_alg(const char *name, u32 type, u32 mask) } EXPORT_SYMBOL_GPL(crypto_has_alg); +void crypto_req_done(struct crypto_async_request *req, int err) +{ + struct crypto_wait *wait = req->data; + + if (err == -EINPROGRESS) + return; + + wait->err = err; + complete(&wait->completion); +} +EXPORT_SYMBOL_GPL(crypto_req_done); + MODULE_DESCRIPTION("Cryptographic core API"); MODULE_LICENSE("GPL");
diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c deleted file mode 100644 index 4a45fa4..0000000 --- a/crypto/chacha20_generic.c +++ /dev/null
@@ -1,143 +0,0 @@ -/* - * ChaCha20 256-bit cipher algorithm, RFC7539 - * - * Copyright (C) 2015 Martin Willi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include <crypto/algapi.h> -#include <crypto/chacha20.h> -#include <crypto/internal/skcipher.h> -#include <linux/module.h> - -static inline u32 le32_to_cpuvp(const void *p) -{ - return le32_to_cpup(p); -} - -static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src, - unsigned int bytes) -{ - u8 stream[CHACHA20_BLOCK_SIZE]; - - if (dst != src) - memcpy(dst, src, bytes); - - while (bytes >= CHACHA20_BLOCK_SIZE) { - chacha20_block(state, stream); - crypto_xor(dst, stream, CHACHA20_BLOCK_SIZE); - bytes -= CHACHA20_BLOCK_SIZE; - dst += CHACHA20_BLOCK_SIZE; - } - if (bytes) { - chacha20_block(state, stream); - crypto_xor(dst, stream, bytes); - } -} - -void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv) -{ - static const char constant[16] = "expand 32-byte k"; - - state[0] = le32_to_cpuvp(constant + 0); - state[1] = le32_to_cpuvp(constant + 4); - state[2] = le32_to_cpuvp(constant + 8); - state[3] = le32_to_cpuvp(constant + 12); - state[4] = ctx->key[0]; - state[5] = ctx->key[1]; - state[6] = ctx->key[2]; - state[7] = ctx->key[3]; - state[8] = ctx->key[4]; - state[9] = ctx->key[5]; - state[10] = ctx->key[6]; - state[11] = ctx->key[7]; - state[12] = le32_to_cpuvp(iv + 0); - state[13] = le32_to_cpuvp(iv + 4); - state[14] = le32_to_cpuvp(iv + 8); - state[15] = le32_to_cpuvp(iv + 12); -} -EXPORT_SYMBOL_GPL(crypto_chacha20_init); - -int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keysize) -{ - struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); - int i; - - if (keysize != CHACHA20_KEY_SIZE) - return -EINVAL; - - for (i = 0; i < ARRAY_SIZE(ctx->key); i++) - ctx->key[i] = le32_to_cpuvp(key + i * sizeof(u32)); - - return 0; -} -EXPORT_SYMBOL_GPL(crypto_chacha20_setkey); - -int crypto_chacha20_crypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - u32 state[16]; - int err; - - err = skcipher_walk_virt(&walk, req, true); - - crypto_chacha20_init(state, ctx, walk.iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - - if (nbytes < walk.total) - nbytes = round_down(nbytes, walk.stride); - - chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr, - nbytes); - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } - - return err; -} -EXPORT_SYMBOL_GPL(crypto_chacha20_crypt); - -static struct skcipher_alg alg = { - .base.cra_name = "chacha20", - .base.cra_driver_name = "chacha20-generic", - .base.cra_priority = 100, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha20_ctx), - .base.cra_alignmask = sizeof(u32) - 1, - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = CHACHA20_IV_SIZE, - .chunksize = CHACHA20_BLOCK_SIZE, - .setkey = crypto_chacha20_setkey, - .encrypt = crypto_chacha20_crypt, - .decrypt = crypto_chacha20_crypt, -}; - -static int __init chacha20_generic_mod_init(void) -{ - return crypto_register_skcipher(&alg); -} - -static void __exit chacha20_generic_mod_fini(void) -{ - crypto_unregister_skcipher(&alg); -} - -module_init(chacha20_generic_mod_init); -module_exit(chacha20_generic_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Martin Willi <martin@strongswan.org>"); -MODULE_DESCRIPTION("chacha20 cipher algorithm"); -MODULE_ALIAS_CRYPTO("chacha20"); -MODULE_ALIAS_CRYPTO("chacha20-generic");
diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c index 600afa9..573c07e 100644 --- a/crypto/chacha20poly1305.c +++ b/crypto/chacha20poly1305.c
@@ -13,7 +13,7 @@ #include <crypto/internal/hash.h> #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> -#include <crypto/chacha20.h> +#include <crypto/chacha.h> #include <crypto/poly1305.h> #include <linux/err.h> #include <linux/init.h> @@ -51,7 +51,7 @@ struct poly_req { }; struct chacha_req { - u8 iv[CHACHA20_IV_SIZE]; + u8 iv[CHACHA_IV_SIZE]; struct scatterlist src[1]; struct skcipher_request req; /* must be last member */ }; @@ -91,7 +91,7 @@ static void chacha_iv(u8 *iv, struct aead_request *req, u32 icb) memcpy(iv, &leicb, sizeof(leicb)); memcpy(iv + sizeof(leicb), ctx->salt, ctx->saltlen); memcpy(iv + sizeof(leicb) + ctx->saltlen, req->iv, - CHACHA20_IV_SIZE - sizeof(leicb) - ctx->saltlen); + CHACHA_IV_SIZE - sizeof(leicb) - ctx->saltlen); } static int poly_verify_tag(struct aead_request *req) @@ -494,7 +494,7 @@ static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key, struct chachapoly_ctx *ctx = crypto_aead_ctx(aead); int err; - if (keylen != ctx->saltlen + CHACHA20_KEY_SIZE) + if (keylen != ctx->saltlen + CHACHA_KEY_SIZE) return -EINVAL; keylen -= ctx->saltlen; @@ -639,7 +639,7 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, err = -EINVAL; /* Need 16-byte IV size, including Initial Block Counter value */ - if (crypto_skcipher_alg_ivsize(chacha) != CHACHA20_IV_SIZE) + if (crypto_skcipher_alg_ivsize(chacha) != CHACHA_IV_SIZE) goto out_drop_chacha; /* Not a stream cipher? */ if (chacha->base.cra_blocksize != 1)
diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c new file mode 100644 index 0000000..35b5831 --- /dev/null +++ b/crypto/chacha_generic.c
@@ -0,0 +1,217 @@ +/* + * ChaCha and XChaCha stream ciphers, including ChaCha20 (RFC7539) + * + * Copyright (C) 2015 Martin Willi + * Copyright (C) 2018 Google LLC + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <asm/unaligned.h> +#include <crypto/algapi.h> +#include <crypto/chacha.h> +#include <crypto/internal/skcipher.h> +#include <linux/module.h> + +static void chacha_docrypt(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + /* aligned to potentially speed up crypto_xor() */ + u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long)); + + if (dst != src) + memcpy(dst, src, bytes); + + while (bytes >= CHACHA_BLOCK_SIZE) { + chacha_block(state, stream, nrounds); + crypto_xor(dst, stream, CHACHA_BLOCK_SIZE); + bytes -= CHACHA_BLOCK_SIZE; + dst += CHACHA_BLOCK_SIZE; + } + if (bytes) { + chacha_block(state, stream, nrounds); + crypto_xor(dst, stream, bytes); + } +} + +static int chacha_stream_xor(struct skcipher_request *req, + struct chacha_ctx *ctx, u8 *iv) +{ + struct skcipher_walk walk; + u32 state[16]; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + crypto_chacha_init(state, ctx, iv); + + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = round_down(nbytes, walk.stride); + + chacha_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr, + nbytes, ctx->nrounds); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + + return err; +} + +void crypto_chacha_init(u32 *state, struct chacha_ctx *ctx, u8 *iv) +{ + state[0] = 0x61707865; /* "expa" */ + state[1] = 0x3320646e; /* "nd 3" */ + state[2] = 0x79622d32; /* "2-by" */ + state[3] = 0x6b206574; /* "te k" */ + state[4] = ctx->key[0]; + state[5] = ctx->key[1]; + state[6] = ctx->key[2]; + state[7] = ctx->key[3]; + state[8] = ctx->key[4]; + state[9] = ctx->key[5]; + state[10] = ctx->key[6]; + state[11] = ctx->key[7]; + state[12] = get_unaligned_le32(iv + 0); + state[13] = get_unaligned_le32(iv + 4); + state[14] = get_unaligned_le32(iv + 8); + state[15] = get_unaligned_le32(iv + 12); +} +EXPORT_SYMBOL_GPL(crypto_chacha_init); + +static int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keysize, int nrounds) +{ + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + int i; + + if (keysize != CHACHA_KEY_SIZE) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(ctx->key); i++) + ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32)); + + ctx->nrounds = nrounds; + return 0; +} + +int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keysize) +{ + return chacha_setkey(tfm, key, keysize, 20); +} +EXPORT_SYMBOL_GPL(crypto_chacha20_setkey); + +int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keysize) +{ + return chacha_setkey(tfm, key, keysize, 12); +} +EXPORT_SYMBOL_GPL(crypto_chacha12_setkey); + +int crypto_chacha_crypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + + return chacha_stream_xor(req, ctx, req->iv); +} +EXPORT_SYMBOL_GPL(crypto_chacha_crypt); + +int crypto_xchacha_crypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + struct chacha_ctx subctx; + u32 state[16]; + u8 real_iv[16]; + + /* Compute the subkey given the original key and first 128 nonce bits */ + crypto_chacha_init(state, ctx, req->iv); + hchacha_block(state, subctx.key, ctx->nrounds); + subctx.nrounds = ctx->nrounds; + + /* Build the real IV */ + memcpy(&real_iv[0], req->iv + 24, 8); /* stream position */ + memcpy(&real_iv[8], req->iv + 16, 8); /* remaining 64 nonce bits */ + + /* Generate the stream and XOR it with the data */ + return chacha_stream_xor(req, &subctx, real_iv); +} +EXPORT_SYMBOL_GPL(crypto_xchacha_crypt); + +static struct skcipher_alg algs[] = { + { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-generic", + .base.cra_priority = 100, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = crypto_chacha_crypt, + .decrypt = crypto_chacha_crypt, + }, { + .base.cra_name = "xchacha20", + .base.cra_driver_name = "xchacha20-generic", + .base.cra_priority = 100, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = crypto_xchacha_crypt, + .decrypt = crypto_xchacha_crypt, + }, { + .base.cra_name = "xchacha12", + .base.cra_driver_name = "xchacha12-generic", + .base.cra_priority = 100, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = crypto_chacha12_setkey, + .encrypt = crypto_xchacha_crypt, + .decrypt = crypto_xchacha_crypt, + } +}; + +static int __init chacha_generic_mod_init(void) +{ + return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); +} + +static void __exit chacha_generic_mod_fini(void) +{ + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); +} + +module_init(chacha_generic_mod_init); +module_exit(chacha_generic_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Martin Willi <martin@strongswan.org>"); +MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (generic)"); +MODULE_ALIAS_CRYPTO("chacha20"); +MODULE_ALIAS_CRYPTO("chacha20-generic"); +MODULE_ALIAS_CRYPTO("xchacha20"); +MODULE_ALIAS_CRYPTO("xchacha20-generic"); +MODULE_ALIAS_CRYPTO("xchacha12"); +MODULE_ALIAS_CRYPTO("xchacha12-generic");
diff --git a/crypto/nhpoly1305.c b/crypto/nhpoly1305.c new file mode 100644 index 0000000..ec831a5 --- /dev/null +++ b/crypto/nhpoly1305.c
@@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum + * + * Copyright 2018 Google LLC + */ + +/* + * "NHPoly1305" is the main component of Adiantum hashing. + * Specifically, it is the calculation + * + * H_L ← Poly1305_{K_L}(NH_{K_N}(pad_{128}(L))) + * + * from the procedure in section 6.4 of the Adiantum paper [1]. It is an + * ε-almost-∆-universal (ε-∆U) hash function for equal-length inputs over + * Z/(2^{128}Z), where the "∆" operation is addition. It hashes 1024-byte + * chunks of the input with the NH hash function [2], reducing the input length + * by 32x. The resulting NH digests are evaluated as a polynomial in + * GF(2^{130}-5), like in the Poly1305 MAC [3]. Note that the polynomial + * evaluation by itself would suffice to achieve the ε-∆U property; NH is used + * for performance since it's over twice as fast as Poly1305. + * + * This is *not* a cryptographic hash function; do not use it as such! + * + * [1] Adiantum: length-preserving encryption for entry-level processors + * (https://eprint.iacr.org/2018/720.pdf) + * [2] UMAC: Fast and Secure Message Authentication + * (https://fastcrypto.org/umac/umac_proc.pdf) + * [3] The Poly1305-AES message-authentication code + * (https://cr.yp.to/mac/poly1305-20050329.pdf) + */ + +#include <asm/unaligned.h> +#include <crypto/algapi.h> +#include <crypto/internal/hash.h> +#include <crypto/nhpoly1305.h> +#include <linux/crypto.h> +#include <linux/kernel.h> +#include <linux/module.h> + +static void nh_generic(const u32 *key, const u8 *message, size_t message_len, + __le64 hash[NH_NUM_PASSES]) +{ + u64 sums[4] = { 0, 0, 0, 0 }; + + BUILD_BUG_ON(NH_PAIR_STRIDE != 2); + BUILD_BUG_ON(NH_NUM_PASSES != 4); + + while (message_len) { + u32 m0 = get_unaligned_le32(message + 0); + u32 m1 = get_unaligned_le32(message + 4); + u32 m2 = get_unaligned_le32(message + 8); + u32 m3 = get_unaligned_le32(message + 12); + + sums[0] += (u64)(u32)(m0 + key[ 0]) * (u32)(m2 + key[ 2]); + sums[1] += (u64)(u32)(m0 + key[ 4]) * (u32)(m2 + key[ 6]); + sums[2] += (u64)(u32)(m0 + key[ 8]) * (u32)(m2 + key[10]); + sums[3] += (u64)(u32)(m0 + key[12]) * (u32)(m2 + key[14]); + sums[0] += (u64)(u32)(m1 + key[ 1]) * (u32)(m3 + key[ 3]); + sums[1] += (u64)(u32)(m1 + key[ 5]) * (u32)(m3 + key[ 7]); + sums[2] += (u64)(u32)(m1 + key[ 9]) * (u32)(m3 + key[11]); + sums[3] += (u64)(u32)(m1 + key[13]) * (u32)(m3 + key[15]); + key += NH_MESSAGE_UNIT / sizeof(key[0]); + message += NH_MESSAGE_UNIT; + message_len -= NH_MESSAGE_UNIT; + } + + hash[0] = cpu_to_le64(sums[0]); + hash[1] = cpu_to_le64(sums[1]); + hash[2] = cpu_to_le64(sums[2]); + hash[3] = cpu_to_le64(sums[3]); +} + +/* Pass the next NH hash value through Poly1305 */ +static void process_nh_hash_value(struct nhpoly1305_state *state, + const struct nhpoly1305_key *key) +{ + BUILD_BUG_ON(NH_HASH_BYTES % POLY1305_BLOCK_SIZE != 0); + + poly1305_core_blocks(&state->poly_state, &key->poly_key, state->nh_hash, + NH_HASH_BYTES / POLY1305_BLOCK_SIZE); +} + +/* + * Feed the next portion of the source data, as a whole number of 16-byte + * "NH message units", through NH and Poly1305. Each NH hash is taken over + * 1024 bytes, except possibly the final one which is taken over a multiple of + * 16 bytes up to 1024. Also, in the case where data is passed in misaligned + * chunks, we combine partial hashes; the end result is the same either way. + */ +static void nhpoly1305_units(struct nhpoly1305_state *state, + const struct nhpoly1305_key *key, + const u8 *src, unsigned int srclen, nh_t nh_fn) +{ + do { + unsigned int bytes; + + if (state->nh_remaining == 0) { + /* Starting a new NH message */ + bytes = min_t(unsigned int, srclen, NH_MESSAGE_BYTES); + nh_fn(key->nh_key, src, bytes, state->nh_hash); + state->nh_remaining = NH_MESSAGE_BYTES - bytes; + } else { + /* Continuing a previous NH message */ + __le64 tmp_hash[NH_NUM_PASSES]; + unsigned int pos; + int i; + + pos = NH_MESSAGE_BYTES - state->nh_remaining; + bytes = min(srclen, state->nh_remaining); + nh_fn(&key->nh_key[pos / 4], src, bytes, tmp_hash); + for (i = 0; i < NH_NUM_PASSES; i++) + le64_add_cpu(&state->nh_hash[i], + le64_to_cpu(tmp_hash[i])); + state->nh_remaining -= bytes; + } + if (state->nh_remaining == 0) + process_nh_hash_value(state, key); + src += bytes; + srclen -= bytes; + } while (srclen); +} + +int crypto_nhpoly1305_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) +{ + struct nhpoly1305_key *ctx = crypto_shash_ctx(tfm); + int i; + + if (keylen != NHPOLY1305_KEY_SIZE) + return -EINVAL; + + poly1305_core_setkey(&ctx->poly_key, key); + key += POLY1305_BLOCK_SIZE; + + for (i = 0; i < NH_KEY_WORDS; i++) + ctx->nh_key[i] = get_unaligned_le32(key + i * sizeof(u32)); + + return 0; +} +EXPORT_SYMBOL(crypto_nhpoly1305_setkey); + +int crypto_nhpoly1305_init(struct shash_desc *desc) +{ + struct nhpoly1305_state *state = shash_desc_ctx(desc); + + poly1305_core_init(&state->poly_state); + state->buflen = 0; + state->nh_remaining = 0; + return 0; +} +EXPORT_SYMBOL(crypto_nhpoly1305_init); + +int crypto_nhpoly1305_update_helper(struct shash_desc *desc, + const u8 *src, unsigned int srclen, + nh_t nh_fn) +{ + struct nhpoly1305_state *state = shash_desc_ctx(desc); + const struct nhpoly1305_key *key = crypto_shash_ctx(desc->tfm); + unsigned int bytes; + + if (state->buflen) { + bytes = min(srclen, (int)NH_MESSAGE_UNIT - state->buflen); + memcpy(&state->buffer[state->buflen], src, bytes); + state->buflen += bytes; + if (state->buflen < NH_MESSAGE_UNIT) + return 0; + nhpoly1305_units(state, key, state->buffer, NH_MESSAGE_UNIT, + nh_fn); + state->buflen = 0; + src += bytes; + srclen -= bytes; + } + + if (srclen >= NH_MESSAGE_UNIT) { + bytes = round_down(srclen, NH_MESSAGE_UNIT); + nhpoly1305_units(state, key, src, bytes, nh_fn); + src += bytes; + srclen -= bytes; + } + + if (srclen) { + memcpy(state->buffer, src, srclen); + state->buflen = srclen; + } + return 0; +} +EXPORT_SYMBOL(crypto_nhpoly1305_update_helper); + +int crypto_nhpoly1305_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + return crypto_nhpoly1305_update_helper(desc, src, srclen, nh_generic); +} +EXPORT_SYMBOL(crypto_nhpoly1305_update); + +int crypto_nhpoly1305_final_helper(struct shash_desc *desc, u8 *dst, nh_t nh_fn) +{ + struct nhpoly1305_state *state = shash_desc_ctx(desc); + const struct nhpoly1305_key *key = crypto_shash_ctx(desc->tfm); + + if (state->buflen) { + memset(&state->buffer[state->buflen], 0, + NH_MESSAGE_UNIT - state->buflen); + nhpoly1305_units(state, key, state->buffer, NH_MESSAGE_UNIT, + nh_fn); + } + + if (state->nh_remaining) + process_nh_hash_value(state, key); + + poly1305_core_emit(&state->poly_state, dst); + return 0; +} +EXPORT_SYMBOL(crypto_nhpoly1305_final_helper); + +int crypto_nhpoly1305_final(struct shash_desc *desc, u8 *dst) +{ + return crypto_nhpoly1305_final_helper(desc, dst, nh_generic); +} +EXPORT_SYMBOL(crypto_nhpoly1305_final); + +static struct shash_alg nhpoly1305_alg = { + .base.cra_name = "nhpoly1305", + .base.cra_driver_name = "nhpoly1305-generic", + .base.cra_priority = 100, + .base.cra_ctxsize = sizeof(struct nhpoly1305_key), + .base.cra_module = THIS_MODULE, + .digestsize = POLY1305_DIGEST_SIZE, + .init = crypto_nhpoly1305_init, + .update = crypto_nhpoly1305_update, + .final = crypto_nhpoly1305_final, + .setkey = crypto_nhpoly1305_setkey, + .descsize = sizeof(struct nhpoly1305_state), +}; + +static int __init nhpoly1305_mod_init(void) +{ + return crypto_register_shash(&nhpoly1305_alg); +} + +static void __exit nhpoly1305_mod_exit(void) +{ + crypto_unregister_shash(&nhpoly1305_alg); +} + +module_init(nhpoly1305_mod_init); +module_exit(nhpoly1305_mod_exit); + +MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>"); +MODULE_ALIAS_CRYPTO("nhpoly1305"); +MODULE_ALIAS_CRYPTO("nhpoly1305-generic");
diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c index ba39eb3..b60c1ee 100644 --- a/crypto/poly1305_generic.c +++ b/crypto/poly1305_generic.c
@@ -38,7 +38,7 @@ int crypto_poly1305_init(struct shash_desc *desc) { struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - memset(dctx->h, 0, sizeof(dctx->h)); + poly1305_core_init(&dctx->h); dctx->buflen = 0; dctx->rset = false; dctx->sset = false; @@ -47,23 +47,16 @@ int crypto_poly1305_init(struct shash_desc *desc) } EXPORT_SYMBOL_GPL(crypto_poly1305_init); -static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key) +void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key) { /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ - dctx->r[0] = (get_unaligned_le32(key + 0) >> 0) & 0x3ffffff; - dctx->r[1] = (get_unaligned_le32(key + 3) >> 2) & 0x3ffff03; - dctx->r[2] = (get_unaligned_le32(key + 6) >> 4) & 0x3ffc0ff; - dctx->r[3] = (get_unaligned_le32(key + 9) >> 6) & 0x3f03fff; - dctx->r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff; + key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff; + key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03; + key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff; + key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff; + key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff; } - -static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key) -{ - dctx->s[0] = get_unaligned_le32(key + 0); - dctx->s[1] = get_unaligned_le32(key + 4); - dctx->s[2] = get_unaligned_le32(key + 8); - dctx->s[3] = get_unaligned_le32(key + 12); -} +EXPORT_SYMBOL_GPL(poly1305_core_setkey); /* * Poly1305 requires a unique key for each tag, which implies that we can't set @@ -75,13 +68,16 @@ unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, { if (!dctx->sset) { if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { - poly1305_setrkey(dctx, src); + poly1305_core_setkey(&dctx->r, src); src += POLY1305_BLOCK_SIZE; srclen -= POLY1305_BLOCK_SIZE; dctx->rset = true; } if (srclen >= POLY1305_BLOCK_SIZE) { - poly1305_setskey(dctx, src); + dctx->s[0] = get_unaligned_le32(src + 0); + dctx->s[1] = get_unaligned_le32(src + 4); + dctx->s[2] = get_unaligned_le32(src + 8); + dctx->s[3] = get_unaligned_le32(src + 12); src += POLY1305_BLOCK_SIZE; srclen -= POLY1305_BLOCK_SIZE; dctx->sset = true; @@ -91,41 +87,37 @@ unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, } EXPORT_SYMBOL_GPL(crypto_poly1305_setdesckey); -static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx, - const u8 *src, unsigned int srclen, - u32 hibit) +static void poly1305_blocks_internal(struct poly1305_state *state, + const struct poly1305_key *key, + const void *src, unsigned int nblocks, + u32 hibit) { u32 r0, r1, r2, r3, r4; u32 s1, s2, s3, s4; u32 h0, h1, h2, h3, h4; u64 d0, d1, d2, d3, d4; - unsigned int datalen; - if (unlikely(!dctx->sset)) { - datalen = crypto_poly1305_setdesckey(dctx, src, srclen); - src += srclen - datalen; - srclen = datalen; - } + if (!nblocks) + return; - r0 = dctx->r[0]; - r1 = dctx->r[1]; - r2 = dctx->r[2]; - r3 = dctx->r[3]; - r4 = dctx->r[4]; + r0 = key->r[0]; + r1 = key->r[1]; + r2 = key->r[2]; + r3 = key->r[3]; + r4 = key->r[4]; s1 = r1 * 5; s2 = r2 * 5; s3 = r3 * 5; s4 = r4 * 5; - h0 = dctx->h[0]; - h1 = dctx->h[1]; - h2 = dctx->h[2]; - h3 = dctx->h[3]; - h4 = dctx->h[4]; + h0 = state->h[0]; + h1 = state->h[1]; + h2 = state->h[2]; + h3 = state->h[3]; + h4 = state->h[4]; - while (likely(srclen >= POLY1305_BLOCK_SIZE)) { - + do { /* h += m[i] */ h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff; h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff; @@ -154,16 +146,36 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx, h1 += h0 >> 26; h0 = h0 & 0x3ffffff; src += POLY1305_BLOCK_SIZE; - srclen -= POLY1305_BLOCK_SIZE; + } while (--nblocks); + + state->h[0] = h0; + state->h[1] = h1; + state->h[2] = h2; + state->h[3] = h3; + state->h[4] = h4; +} + +void poly1305_core_blocks(struct poly1305_state *state, + const struct poly1305_key *key, + const void *src, unsigned int nblocks) +{ + poly1305_blocks_internal(state, key, src, nblocks, 1 << 24); +} +EXPORT_SYMBOL_GPL(poly1305_core_blocks); + +static void poly1305_blocks(struct poly1305_desc_ctx *dctx, + const u8 *src, unsigned int srclen, u32 hibit) +{ + unsigned int datalen; + + if (unlikely(!dctx->sset)) { + datalen = crypto_poly1305_setdesckey(dctx, src, srclen); + src += srclen - datalen; + srclen = datalen; } - dctx->h[0] = h0; - dctx->h[1] = h1; - dctx->h[2] = h2; - dctx->h[3] = h3; - dctx->h[4] = h4; - - return srclen; + poly1305_blocks_internal(&dctx->h, &dctx->r, + src, srclen / POLY1305_BLOCK_SIZE, hibit); } int crypto_poly1305_update(struct shash_desc *desc, @@ -187,9 +199,9 @@ int crypto_poly1305_update(struct shash_desc *desc, } if (likely(srclen >= POLY1305_BLOCK_SIZE)) { - bytes = poly1305_blocks(dctx, src, srclen, 1 << 24); - src += srclen - bytes; - srclen = bytes; + poly1305_blocks(dctx, src, srclen, 1 << 24); + src += srclen - (srclen % POLY1305_BLOCK_SIZE); + srclen %= POLY1305_BLOCK_SIZE; } if (unlikely(srclen)) { @@ -201,31 +213,18 @@ int crypto_poly1305_update(struct shash_desc *desc, } EXPORT_SYMBOL_GPL(crypto_poly1305_update); -int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) +void poly1305_core_emit(const struct poly1305_state *state, void *dst) { - struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - __le32 *mac = (__le32 *)dst; u32 h0, h1, h2, h3, h4; u32 g0, g1, g2, g3, g4; u32 mask; - u64 f = 0; - - if (unlikely(!dctx->sset)) - return -ENOKEY; - - if (unlikely(dctx->buflen)) { - dctx->buf[dctx->buflen++] = 1; - memset(dctx->buf + dctx->buflen, 0, - POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0); - } /* fully carry h */ - h0 = dctx->h[0]; - h1 = dctx->h[1]; - h2 = dctx->h[2]; - h3 = dctx->h[3]; - h4 = dctx->h[4]; + h0 = state->h[0]; + h1 = state->h[1]; + h2 = state->h[2]; + h3 = state->h[3]; + h4 = state->h[4]; h2 += (h1 >> 26); h1 = h1 & 0x3ffffff; h3 += (h2 >> 26); h2 = h2 & 0x3ffffff; @@ -255,16 +254,40 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) h4 = (h4 & mask) | g4; /* h = h % (2^128) */ - h0 = (h0 >> 0) | (h1 << 26); - h1 = (h1 >> 6) | (h2 << 20); - h2 = (h2 >> 12) | (h3 << 14); - h3 = (h3 >> 18) | (h4 << 8); + put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0); + put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4); + put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8); + put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12); +} +EXPORT_SYMBOL_GPL(poly1305_core_emit); + +int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + __le32 digest[4]; + u64 f = 0; + + if (unlikely(!dctx->sset)) + return -ENOKEY; + + if (unlikely(dctx->buflen)) { + dctx->buf[dctx->buflen++] = 1; + memset(dctx->buf + dctx->buflen, 0, + POLY1305_BLOCK_SIZE - dctx->buflen); + poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0); + } + + poly1305_core_emit(&dctx->h, digest); /* mac = (h + s) % (2^128) */ - f = (f >> 32) + h0 + dctx->s[0]; mac[0] = cpu_to_le32(f); - f = (f >> 32) + h1 + dctx->s[1]; mac[1] = cpu_to_le32(f); - f = (f >> 32) + h2 + dctx->s[2]; mac[2] = cpu_to_le32(f); - f = (f >> 32) + h3 + dctx->s[3]; mac[3] = cpu_to_le32(f); + f = (f >> 32) + le32_to_cpu(digest[0]) + dctx->s[0]; + put_unaligned_le32(f, dst + 0); + f = (f >> 32) + le32_to_cpu(digest[1]) + dctx->s[1]; + put_unaligned_le32(f, dst + 4); + f = (f >> 32) + le32_to_cpu(digest[2]) + dctx->s[2]; + put_unaligned_le32(f, dst + 8); + f = (f >> 32) + le32_to_cpu(digest[3]) + dctx->s[3]; + put_unaligned_le32(f, dst + 12); return 0; }
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index f7affe7..99f2432 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c
@@ -1610,6 +1610,16 @@ static int do_test(const char *alg, u32 type, u32 mask, int m) speed_template_32); break; + case 219: + test_cipher_speed("adiantum(xchacha12,aes)", ENCRYPT, sec, NULL, + 0, speed_template_32); + test_cipher_speed("adiantum(xchacha12,aes)", DECRYPT, sec, NULL, + 0, speed_template_32); + test_cipher_speed("adiantum(xchacha20,aes)", ENCRYPT, sec, NULL, + 0, speed_template_32); + test_cipher_speed("adiantum(xchacha20,aes)", DECRYPT, sec, NULL, + 0, speed_template_32); + break; case 300: if (alg) {
diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 7125ba38..f352209 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c
@@ -2349,6 +2349,24 @@ static int alg_test_null(const struct alg_test_desc *desc, /* Please keep this list sorted by algorithm name. */ static const struct alg_test_desc alg_test_descs[] = { { + .alg = "adiantum(xchacha12,aes)", + .test = alg_test_skcipher, + .suite = { + .cipher = { + .enc = __VECS(adiantum_xchacha12_aes_enc_tv_template), + .dec = __VECS(adiantum_xchacha12_aes_dec_tv_template) + } + }, + }, { + .alg = "adiantum(xchacha20,aes)", + .test = alg_test_skcipher, + .suite = { + .cipher = { + .enc = __VECS(adiantum_xchacha20_aes_enc_tv_template), + .dec = __VECS(adiantum_xchacha20_aes_dec_tv_template) + } + }, + }, { .alg = "ansi_cprng", .test = alg_test_cprng, .suite = { @@ -3297,6 +3315,12 @@ static const struct alg_test_desc alg_test_descs[] = { .hash = __VECS(michael_mic_tv_template) } }, { + .alg = "nhpoly1305", + .test = alg_test_hash, + .suite = { + .hash = __VECS(nhpoly1305_tv_template) + } + }, { .alg = "ofb(aes)", .test = alg_test_skcipher, .fips_allowed = 1, @@ -3548,6 +3572,24 @@ static const struct alg_test_desc alg_test_descs[] = { .hash = __VECS(aes_xcbc128_tv_template) } }, { + .alg = "xchacha12", + .test = alg_test_skcipher, + .suite = { + .cipher = { + .enc = __VECS(xchacha12_tv_template), + .dec = __VECS(xchacha12_tv_template) + } + }, + }, { + .alg = "xchacha20", + .test = alg_test_skcipher, + .suite = { + .cipher = { + .enc = __VECS(xchacha20_tv_template), + .dec = __VECS(xchacha20_tv_template) + } + }, + }, { .alg = "xts(aes)", .test = alg_test_skcipher, .fips_allowed = 1, @@ -3603,6 +3645,16 @@ static const struct alg_test_desc alg_test_descs[] = { .decomp = __VECS(zlib_deflate_decomp_tv_template) } } + }, { + .alg = "zstd", + .test = alg_test_comp, + .fips_allowed = 1, + .suite = { + .comp = { + .comp = __VECS(zstd_comp_tv_template), + .decomp = __VECS(zstd_decomp_tv_template) + } + } } };
diff --git a/crypto/testmgr.h b/crypto/testmgr.h index fbc0fab..50ed007 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h
@@ -29,7 +29,7 @@ #define MAX_DIGEST_SIZE 64 #define MAX_TAP 8 -#define MAX_KEYLEN 160 +#define MAX_KEYLEN 1088 #define MAX_IVLEN 32 struct hash_testvec { @@ -37,10 +37,10 @@ struct hash_testvec { const char *key; const char *plaintext; const char *digest; - unsigned char tap[MAX_TAP]; + unsigned short tap[MAX_TAP]; + unsigned short np; unsigned short psize; - unsigned char np; - unsigned char ksize; + unsigned short ksize; }; /* @@ -4663,6 +4663,1237 @@ static const struct hash_testvec poly1305_tv_template[] = { }, }; +/* NHPoly1305 test vectors from https://github.com/google/adiantum */ +static const struct hash_testvec nhpoly1305_tv_template[] = { + { + .key = "\xd2\x5d\x4c\xdd\x8d\x2b\x7f\x7a" + "\xd9\xbe\x71\xec\xd1\x83\x52\xe3" + "\xe1\xad\xd7\x5c\x0a\x75\x9d\xec" + "\x1d\x13\x7e\x5d\x71\x07\xc9\xe4" + "\x57\x2d\x44\x68\xcf\xd8\xd6\xc5" + "\x39\x69\x7d\x32\x75\x51\x4f\x7e" + "\xb2\x4c\xc6\x90\x51\x6e\xd9\xd6" + "\xa5\x8b\x2d\xf1\x94\xf9\xf7\x5e" + "\x2c\x84\x7b\x41\x0f\x88\x50\x89" + "\x30\xd9\xa1\x38\x46\x6c\xc0\x4f" + "\xe8\xdf\xdc\x66\xab\x24\x43\x41" + "\x91\x55\x29\x65\x86\x28\x5e\x45" + "\xd5\x2d\xb7\x80\x08\x9a\xc3\xd4" + "\x9a\x77\x0a\xd4\xef\x3e\xe6\x3f" + "\x6f\x2f\x9b\x3a\x7d\x12\x1e\x80" + "\x6c\x44\xa2\x25\xe1\xf6\x60\xe9" + "\x0d\xaf\xc5\x3c\xa5\x79\xae\x64" + "\xbc\xa0\x39\xa3\x4d\x10\xe5\x4d" + "\xd5\xe7\x89\x7a\x13\xee\x06\x78" + "\xdc\xa4\xdc\x14\x27\xe6\x49\x38" + "\xd0\xe0\x45\x25\x36\xc5\xf4\x79" + "\x2e\x9a\x98\x04\xe4\x2b\x46\x52" + "\x7c\x33\xca\xe2\x56\x51\x50\xe2" + "\xa5\x9a\xae\x18\x6a\x13\xf8\xd2" + "\x21\x31\x66\x02\xe2\xda\x8d\x7e" + "\x41\x19\xb2\x61\xee\x48\x8f\xf1" + "\x65\x24\x2e\x1e\x68\xce\x05\xd9" + "\x2a\xcf\xa5\x3a\x57\xdd\x35\x91" + "\x93\x01\xca\x95\xfc\x2b\x36\x04" + "\xe6\x96\x97\x28\xf6\x31\xfe\xa3" + "\x9d\xf6\x6a\x1e\x80\x8d\xdc\xec" + "\xaf\x66\x11\x13\x02\x88\xd5\x27" + "\x33\xb4\x1a\xcd\xa3\xf6\xde\x31" + "\x8e\xc0\x0e\x6c\xd8\x5a\x97\x5e" + "\xdd\xfd\x60\x69\x38\x46\x3f\x90" + "\x5e\x97\xd3\x32\x76\xc7\x82\x49" + "\xfe\xba\x06\x5f\x2f\xa2\xfd\xff" + "\x80\x05\x40\xe4\x33\x03\xfb\x10" + "\xc0\xde\x65\x8c\xc9\x8d\x3a\x9d" + "\xb5\x7b\x36\x4b\xb5\x0c\xcf\x00" + "\x9c\x87\xe4\x49\xad\x90\xda\x4a" + "\xdd\xbd\xff\xe2\x32\x57\xd6\x78" + "\x36\x39\x6c\xd3\x5b\x9b\x88\x59" + "\x2d\xf0\x46\xe4\x13\x0e\x2b\x35" + "\x0d\x0f\x73\x8a\x4f\x26\x84\x75" + "\x88\x3c\xc5\x58\x66\x18\x1a\xb4" + "\x64\x51\x34\x27\x1b\xa4\x11\xc9" + "\x6d\x91\x8a\xfa\x32\x60\x9d\xd7" + "\x87\xe5\xaa\x43\x72\xf8\xda\xd1" + "\x48\x44\x13\x61\xdc\x8c\x76\x17" + "\x0c\x85\x4e\xf3\xdd\xa2\x42\xd2" + "\x74\xc1\x30\x1b\xeb\x35\x31\x29" + "\x5b\xd7\x4c\x94\x46\x35\xa1\x23" + "\x50\xf2\xa2\x8e\x7e\x4f\x23\x4f" + "\x51\xff\xe2\xc9\xa3\x7d\x56\x8b" + "\x41\xf2\xd0\xc5\x57\x7e\x59\xac" + "\xbb\x65\xf3\xfe\xf7\x17\xef\x63" + "\x7c\x6f\x23\xdd\x22\x8e\xed\x84" + "\x0e\x3b\x09\xb3\xf3\xf4\x8f\xcd" + "\x37\xa8\xe1\xa7\x30\xdb\xb1\xa2" + "\x9c\xa2\xdf\x34\x17\x3e\x68\x44" + "\xd0\xde\x03\x50\xd1\x48\x6b\x20" + "\xe2\x63\x45\xa5\xea\x87\xc2\x42" + "\x95\x03\x49\x05\xed\xe0\x90\x29" + "\x1a\xb8\xcf\x9b\x43\xcf\x29\x7a" + "\x63\x17\x41\x9f\xe0\xc9\x10\xfd" + "\x2c\x56\x8c\x08\x55\xb4\xa9\x27" + "\x0f\x23\xb1\x05\x6a\x12\x46\xc7" + "\xe1\xfe\x28\x93\x93\xd7\x2f\xdc" + "\x98\x30\xdb\x75\x8a\xbe\x97\x7a" + "\x02\xfb\x8c\xba\xbe\x25\x09\xbe" + "\xce\xcb\xa2\xef\x79\x4d\x0e\x9d" + "\x1b\x9d\xb6\x39\x34\x38\xfa\x07" + "\xec\xe8\xfc\x32\x85\x1d\xf7\x85" + "\x63\xc3\x3c\xc0\x02\x75\xd7\x3f" + "\xb2\x68\x60\x66\x65\x81\xc6\xb1" + "\x42\x65\x4b\x4b\x28\xd7\xc7\xaa" + "\x9b\xd2\xdc\x1b\x01\xe0\x26\x39" + "\x01\xc1\x52\x14\xd1\x3f\xb7\xe6" + "\x61\x41\xc7\x93\xd2\xa2\x67\xc6" + "\xf7\x11\xb5\xf5\xea\xdd\x19\xfb" + "\x4d\x21\x12\xd6\x7d\xf1\x10\xb0" + "\x89\x07\xc7\x5a\x52\x73\x70\x2f" + "\x32\xef\x65\x2b\x12\xb2\xf0\xf5" + "\x20\xe0\x90\x59\x7e\x64\xf1\x4c" + "\x41\xb3\xa5\x91\x08\xe6\x5e\x5f" + "\x05\x56\x76\xb4\xb0\xcd\x70\x53" + "\x10\x48\x9c\xff\xc2\x69\x55\x24" + "\x87\xef\x84\xea\xfb\xa7\xbf\xa0" + "\x91\x04\xad\x4f\x8b\x57\x54\x4b" + "\xb6\xe9\xd1\xac\x37\x2f\x1d\x2e" + "\xab\xa5\xa4\xe8\xff\xfb\xd9\x39" + "\x2f\xb7\xac\xd1\xfe\x0b\x9a\x80" + "\x0f\xb6\xf4\x36\x39\x90\x51\xe3" + "\x0a\x2f\xb6\x45\x76\x89\xcd\x61" + "\xfe\x48\x5f\x75\x1d\x13\x00\x62" + "\x80\x24\x47\xe7\xbc\x37\xd7\xe3" + "\x15\xe8\x68\x22\xaf\x80\x6f\x4b" + "\xa8\x9f\x01\x10\x48\x14\xc3\x02" + "\x52\xd2\xc7\x75\x9b\x52\x6d\x30" + "\xac\x13\x85\xc8\xf7\xa3\x58\x4b" + "\x49\xf7\x1c\x45\x55\x8c\x39\x9a" + "\x99\x6d\x97\x27\x27\xe6\xab\xdd" + "\x2c\x42\x1b\x35\xdd\x9d\x73\xbb" + "\x6c\xf3\x64\xf1\xfb\xb9\xf7\xe6" + "\x4a\x3c\xc0\x92\xc0\x2e\xb7\x1a" + "\xbe\xab\xb3\x5a\xe5\xea\xb1\x48" + "\x58\x13\x53\x90\xfd\xc3\x8e\x54" + "\xf9\x18\x16\x73\xe8\xcb\x6d\x39" + "\x0e\xd7\xe0\xfe\xb6\x9f\x43\x97" + "\xe8\xd0\x85\x56\x83\x3e\x98\x68" + "\x7f\xbd\x95\xa8\x9a\x61\x21\x8f" + "\x06\x98\x34\xa6\xc8\xd6\x1d\xf3" + "\x3d\x43\xa4\x9a\x8c\xe5\xd3\x5a" + "\x32\xa2\x04\x22\xa4\x19\x1a\x46" + "\x42\x7e\x4d\xe5\xe0\xe6\x0e\xca" + "\xd5\x58\x9d\x2c\xaf\xda\x33\x5c" + "\xb0\x79\x9e\xc9\xfc\xca\xf0\x2f" + "\xa8\xb2\x77\xeb\x7a\xa2\xdd\x37" + "\x35\x83\x07\xd6\x02\x1a\xb6\x6c" + "\x24\xe2\x59\x08\x0e\xfd\x3e\x46" + "\xec\x40\x93\xf4\x00\x26\x4f\x2a" + "\xff\x47\x2f\xeb\x02\x92\x26\x5b" + "\x53\x17\xc2\x8d\x2a\xc7\xa3\x1b" + "\xcd\xbc\xa7\xe8\xd1\x76\xe3\x80" + "\x21\xca\x5d\x3b\xe4\x9c\x8f\xa9" + "\x5b\x7f\x29\x7f\x7c\xd8\xed\x6d" + "\x8c\xb2\x86\x85\xe7\x77\xf2\x85" + "\xab\x38\xa9\x9d\xc1\x4e\xc5\x64" + "\x33\x73\x8b\x59\x03\xad\x05\xdf" + "\x25\x98\x31\xde\xef\x13\xf1\x9b" + "\x3c\x91\x9d\x7b\xb1\xfa\xe6\xbf" + "\x5b\xed\xa5\x55\xe6\xea\x6c\x74" + "\xf4\xb9\xe4\x45\x64\x72\x81\xc2" + "\x4c\x28\xd4\xcd\xac\xe2\xde\xf9" + "\xeb\x5c\xeb\x61\x60\x5a\xe5\x28", + .ksize = 1088, + .plaintext = "", + .psize = 0, + .digest = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + }, { + .key = "\x29\x21\x43\xcb\xcb\x13\x07\xde" + "\xbf\x48\xdf\x8a\x7f\xa2\x84\xde" + "\x72\x23\x9d\xf5\xf0\x07\xf2\x4c" + "\x20\x3a\x93\xb9\xcd\x5d\xfe\xcb" + "\x99\x2c\x2b\x58\xc6\x50\x5f\x94" + "\x56\xc3\x7c\x0d\x02\x3f\xb8\x5e" + "\x7b\xc0\x6c\x51\x34\x76\xc0\x0e" + "\xc6\x22\xc8\x9e\x92\xa0\x21\xc9" + "\x85\x5c\x7c\xf8\xe2\x64\x47\xc9" + "\xe4\xa2\x57\x93\xf8\xa2\x69\xcd" + "\x62\x98\x99\xf4\xd7\x7b\x14\xb1" + "\xd8\x05\xff\x04\x15\xc9\xe1\x6e" + "\x9b\xe6\x50\x6b\x0b\x3f\x22\x1f" + "\x08\xde\x0c\x5b\x08\x7e\xc6\x2f" + "\x6c\xed\xd6\xb2\x15\xa4\xb3\xf9" + "\xa7\x46\x38\x2a\xea\x69\xa5\xde" + "\x02\xc3\x96\x89\x4d\x55\x3b\xed" + "\x3d\x3a\x85\x77\xbf\x97\x45\x5c" + "\x9e\x02\x69\xe2\x1b\x68\xbe\x96" + "\xfb\x64\x6f\x0f\xf6\x06\x40\x67" + "\xfa\x04\xe3\x55\xfa\xbe\xa4\x60" + "\xef\x21\x66\x97\xe6\x9d\x5c\x1f" + "\x62\x37\xaa\x31\xde\xe4\x9c\x28" + "\x95\xe0\x22\x86\xf4\x4d\xf3\x07" + "\xfd\x5f\x3a\x54\x2c\x51\x80\x71" + "\xba\x78\x69\x5b\x65\xab\x1f\x81" + "\xed\x3b\xff\x34\xa3\xfb\xbc\x73" + "\x66\x7d\x13\x7f\xdf\x6e\xe2\xe2" + "\xeb\x4f\x6c\xda\x7d\x33\x57\xd0" + "\xd3\x7c\x95\x4f\x33\x58\x21\xc7" + "\xc0\xe5\x6f\x42\x26\xc6\x1f\x5e" + "\x85\x1b\x98\x9a\xa2\x1e\x55\x77" + "\x23\xdf\x81\x5e\x79\x55\x05\xfc" + "\xfb\xda\xee\xba\x5a\xba\xf7\x77" + "\x7f\x0e\xd3\xe1\x37\xfe\x8d\x2b" + "\xd5\x3f\xfb\xd0\xc0\x3c\x0b\x3f" + "\xcf\x3c\x14\xcf\xfb\x46\x72\x4c" + "\x1f\x39\xe2\xda\x03\x71\x6d\x23" + "\xef\x93\xcd\x39\xd9\x37\x80\x4d" + "\x65\x61\xd1\x2c\x03\xa9\x47\x72" + "\x4d\x1e\x0e\x16\x33\x0f\x21\x17" + "\xec\x92\xea\x6f\x37\x22\xa4\xd8" + "\x03\x33\x9e\xd8\x03\x69\x9a\xe8" + "\xb2\x57\xaf\x78\x99\x05\x12\xab" + "\x48\x90\x80\xf0\x12\x9b\x20\x64" + "\x7a\x1d\x47\x5f\xba\x3c\xf9\xc3" + "\x0a\x0d\x8d\xa1\xf9\x1b\x82\x13" + "\x3e\x0d\xec\x0a\x83\xc0\x65\xe1" + "\xe9\x95\xff\x97\xd6\xf2\xe4\xd5" + "\x86\xc0\x1f\x29\x27\x63\xd7\xde" + "\xb7\x0a\x07\x99\x04\x2d\xa3\x89" + "\xa2\x43\xcf\xf3\xe1\x43\xac\x4a" + "\x06\x97\xd0\x05\x4f\x87\xfa\xf9" + "\x9b\xbf\x52\x70\xbd\xbc\x6c\xf3" + "\x03\x13\x60\x41\x28\x09\xec\xcc" + "\xb1\x1a\xec\xd6\xfb\x6f\x2a\x89" + "\x5d\x0b\x53\x9c\x59\xc1\x84\x21" + "\x33\x51\x47\x19\x31\x9c\xd4\x0a" + "\x4d\x04\xec\x50\x90\x61\xbd\xbc" + "\x7e\xc8\xd9\x6c\x98\x1d\x45\x41" + "\x17\x5e\x97\x1c\xc5\xa8\xe8\xea" + "\x46\x58\x53\xf7\x17\xd5\xad\x11" + "\xc8\x54\xf5\x7a\x33\x90\xf5\x19" + "\xba\x36\xb4\xfc\x52\xa5\x72\x3d" + "\x14\xbb\x55\xa7\xe9\xe3\x12\xf7" + "\x1c\x30\xa2\x82\x03\xbf\x53\x91" + "\x2e\x60\x41\x9f\x5b\x69\x39\xf6" + "\x4d\xc8\xf8\x46\x7a\x7f\xa4\x98" + "\x36\xff\x06\xcb\xca\xe7\x33\xf2" + "\xc0\x4a\xf4\x3c\x14\x44\x5f\x6b" + "\x75\xef\x02\x36\x75\x08\x14\xfd" + "\x10\x8e\xa5\x58\xd0\x30\x46\x49" + "\xaf\x3a\xf8\x40\x3d\x35\xdb\x84" + "\x11\x2e\x97\x6a\xb7\x87\x7f\xad" + "\xf1\xfa\xa5\x63\x60\xd8\x5e\xbf" + "\x41\x78\x49\xcf\x77\xbb\x56\xbb" + "\x7d\x01\x67\x05\x22\xc8\x8f\x41" + "\xba\x81\xd2\xca\x2c\x38\xac\x76" + "\x06\xc1\x1a\xc2\xce\xac\x90\x67" + "\x57\x3e\x20\x12\x5b\xd9\x97\x58" + "\x65\x05\xb7\x04\x61\x7e\xd8\x3a" + "\xbf\x55\x3b\x13\xe9\x34\x5a\x37" + "\x36\xcb\x94\x45\xc5\x32\xb3\xa0" + "\x0c\x3e\x49\xc5\xd3\xed\xa7\xf0" + "\x1c\x69\xcc\xea\xcc\x83\xc9\x16" + "\x95\x72\x4b\xf4\x89\xd5\xb9\x10" + "\xf6\x2d\x60\x15\xea\x3c\x06\x66" + "\x9f\x82\xad\x17\xce\xd2\xa4\x48" + "\x7c\x65\xd9\xf8\x02\x4d\x9b\x4c" + "\x89\x06\x3a\x34\x85\x48\x89\x86" + "\xf9\x24\xa9\x54\x72\xdb\x44\x95" + "\xc7\x44\x1c\x19\x11\x4c\x04\xdc" + "\x13\xb9\x67\xc8\xc3\x3a\x6a\x50" + "\xfa\xd1\xfb\xe1\x88\xb6\xf1\xa3" + "\xc5\x3b\xdc\x38\x45\x16\x26\x02" + "\x3b\xb8\x8f\x8b\x58\x7d\x23\x04" + "\x50\x6b\x81\x9f\xae\x66\xac\x6f" + "\xcf\x2a\x9d\xf1\xfd\x1d\x57\x07" + "\xbe\x58\xeb\x77\x0c\xe3\xc2\x19" + "\x14\x74\x1b\x51\x1c\x4f\x41\xf3" + "\x32\x89\xb3\xe7\xde\x62\xf6\x5f" + "\xc7\x6a\x4a\x2a\x5b\x0f\x5f\x87" + "\x9c\x08\xb9\x02\x88\xc8\x29\xb7" + "\x94\x52\xfa\x52\xfe\xaa\x50\x10" + "\xba\x48\x75\x5e\x11\x1b\xe6\x39" + "\xd7\x82\x2c\x87\xf1\x1e\xa4\x38" + "\x72\x3e\x51\xe7\xd8\x3e\x5b\x7b" + "\x31\x16\x89\xba\xd6\xad\x18\x5e" + "\xba\xf8\x12\xb3\xf4\x6c\x47\x30" + "\xc0\x38\x58\xb3\x10\x8d\x58\x5d" + "\xb4\xfb\x19\x7e\x41\xc3\x66\xb8" + "\xd6\x72\x84\xe1\x1a\xc2\x71\x4c" + "\x0d\x4a\x21\x7a\xab\xa2\xc0\x36" + "\x15\xc5\xe9\x46\xd7\x29\x17\x76" + "\x5e\x47\x36\x7f\x72\x05\xa7\xcc" + "\x36\x63\xf9\x47\x7d\xe6\x07\x3c" + "\x8b\x79\x1d\x96\x61\x8d\x90\x65" + "\x7c\xf5\xeb\x4e\x6e\x09\x59\x6d" + "\x62\x50\x1b\x0f\xe0\xdc\x78\xf2" + "\x5b\x83\x1a\xa1\x11\x75\xfd\x18" + "\xd7\xe2\x8d\x65\x14\x21\xce\xbe" + "\xb5\x87\xe3\x0a\xda\x24\x0a\x64" + "\xa9\x9f\x03\x8d\x46\x5d\x24\x1a" + "\x8a\x0c\x42\x01\xca\xb1\x5f\x7c" + "\xa5\xac\x32\x4a\xb8\x07\x91\x18" + "\x6f\xb0\x71\x3c\xc9\xb1\xa8\xf8" + "\x5f\x69\xa5\xa1\xca\x9e\x7a\xaa" + "\xac\xe9\xc7\x47\x41\x75\x25\xc3" + "\x73\xe2\x0b\xdd\x6d\x52\x71\xbe" + "\xc5\xdc\xb4\xe7\x01\x26\x53\x77" + "\x86\x90\x85\x68\x6b\x7b\x03\x53" + "\xda\x52\x52\x51\x68\xc8\xf3\xec" + "\x6c\xd5\x03\x7a\xa3\x0e\xb4\x02" + "\x5f\x1a\xab\xee\xca\x67\x29\x7b" + "\xbd\x96\x59\xb3\x8b\x32\x7a\x92" + "\x9f\xd8\x25\x2b\xdf\xc0\x4c\xda", + .ksize = 1088, + .plaintext = "\xbc\xda\x81\xa8\x78\x79\x1c\xbf" + "\x77\x53\xba\x4c\x30\x5b\xb8\x33", + .psize = 16, + .digest = "\x04\xbf\x7f\x6a\xce\x72\xea\x6a" + "\x79\xdb\xb0\xc9\x60\xf6\x12\xcc", + .np = 6, + .tap = { 4, 4, 1, 1, 1, 5 }, + }, { + .key = "\x65\x4d\xe3\xf8\xd2\x4c\xac\x28" + "\x68\xf5\xb3\x81\x71\x4b\xa1\xfa" + "\x04\x0e\xd3\x81\x36\xbe\x0c\x81" + "\x5e\xaf\xbc\x3a\xa4\xc0\x8e\x8b" + "\x55\x63\xd3\x52\x97\x88\xd6\x19" + "\xbc\x96\xdf\x49\xff\x04\x63\xf5" + "\x0c\x11\x13\xaa\x9e\x1f\x5a\xf7" + "\xdd\xbd\x37\x80\xc3\xd0\xbe\xa7" + "\x05\xc8\x3c\x98\x1e\x05\x3c\x84" + "\x39\x61\xc4\xed\xed\x71\x1b\xc4" + "\x74\x45\x2c\xa1\x56\x70\x97\xfd" + "\x44\x18\x07\x7d\xca\x60\x1f\x73" + "\x3b\x6d\x21\xcb\x61\x87\x70\x25" + "\x46\x21\xf1\x1f\x21\x91\x31\x2d" + "\x5d\xcc\xb7\xd1\x84\x3e\x3d\xdb" + "\x03\x53\x2a\x82\xa6\x9a\x95\xbc" + "\x1a\x1e\x0a\x5e\x07\x43\xab\x43" + "\xaf\x92\x82\x06\x91\x04\x09\xf4" + "\x17\x0a\x9a\x2c\x54\xdb\xb8\xf4" + "\xd0\xf0\x10\x66\x24\x8d\xcd\xda" + "\xfe\x0e\x45\x9d\x6f\xc4\x4e\xf4" + "\x96\xaf\x13\xdc\xa9\xd4\x8c\xc4" + "\xc8\x57\x39\x3c\xc2\xd3\x0a\x76" + "\x4a\x1f\x75\x83\x44\xc7\xd1\x39" + "\xd8\xb5\x41\xba\x73\x87\xfa\x96" + "\xc7\x18\x53\xfb\x9b\xda\xa0\x97" + "\x1d\xee\x60\x85\x9e\x14\xc3\xce" + "\xc4\x05\x29\x3b\x95\x30\xa3\xd1" + "\x9f\x82\x6a\x04\xf5\xa7\x75\x57" + "\x82\x04\xfe\x71\x51\x71\xb1\x49" + "\x50\xf8\xe0\x96\xf1\xfa\xa8\x88" + "\x3f\xa0\x86\x20\xd4\x60\x79\x59" + "\x17\x2d\xd1\x09\xf4\xec\x05\x57" + "\xcf\x62\x7e\x0e\x7e\x60\x78\xe6" + "\x08\x60\x29\xd8\xd5\x08\x1a\x24" + "\xc4\x6c\x24\xe7\x92\x08\x3d\x8a" + "\x98\x7a\xcf\x99\x0a\x65\x0e\xdc" + "\x8c\x8a\xbe\x92\x82\x91\xcc\x62" + "\x30\xb6\xf4\x3f\xc6\x8a\x7f\x12" + "\x4a\x8a\x49\xfa\x3f\x5c\xd4\x5a" + "\xa6\x82\xa3\xe6\xaa\x34\x76\xb2" + "\xab\x0a\x30\xef\x6c\x77\x58\x3f" + "\x05\x6b\xcc\x5c\xae\xdc\xd7\xb9" + "\x51\x7e\x8d\x32\x5b\x24\x25\xbe" + "\x2b\x24\x01\xcf\x80\xda\x16\xd8" + "\x90\x72\x2c\xad\x34\x8d\x0c\x74" + "\x02\xcb\xfd\xcf\x6e\xef\x97\xb5" + "\x4c\xf2\x68\xca\xde\x43\x9e\x8a" + "\xc5\x5f\x31\x7f\x14\x71\x38\xec" + "\xbd\x98\xe5\x71\xc4\xb5\xdb\xef" + "\x59\xd2\xca\xc0\xc1\x86\x75\x01" + "\xd4\x15\x0d\x6f\xa4\xf7\x7b\x37" + "\x47\xda\x18\x93\x63\xda\xbe\x9e" + "\x07\xfb\xb2\x83\xd5\xc4\x34\x55" + "\xee\x73\xa1\x42\x96\xf9\x66\x41" + "\xa4\xcc\xd2\x93\x6e\xe1\x0a\xbb" + "\xd2\xdd\x18\x23\xe6\x6b\x98\x0b" + "\x8a\x83\x59\x2c\xc3\xa6\x59\x5b" + "\x01\x22\x59\xf7\xdc\xb0\x87\x7e" + "\xdb\x7d\xf4\x71\x41\xab\xbd\xee" + "\x79\xbe\x3c\x01\x76\x0b\x2d\x0a" + "\x42\xc9\x77\x8c\xbb\x54\x95\x60" + "\x43\x2e\xe0\x17\x52\xbd\x90\xc9" + "\xc2\x2c\xdd\x90\x24\x22\x76\x40" + "\x5c\xb9\x41\xc9\xa1\xd5\xbd\xe3" + "\x44\xe0\xa4\xab\xcc\xb8\xe2\x32" + "\x02\x15\x04\x1f\x8c\xec\x5d\x14" + "\xac\x18\xaa\xef\x6e\x33\x19\x6e" + "\xde\xfe\x19\xdb\xeb\x61\xca\x18" + "\xad\xd8\x3d\xbf\x09\x11\xc7\xa5" + "\x86\x0b\x0f\xe5\x3e\xde\xe8\xd9" + "\x0a\x69\x9e\x4c\x20\xff\xf9\xc5" + "\xfa\xf8\xf3\x7f\xa5\x01\x4b\x5e" + "\x0f\xf0\x3b\x68\xf0\x46\x8c\x2a" + "\x7a\xc1\x8f\xa0\xfe\x6a\x5b\x44" + "\x70\x5c\xcc\x92\x2c\x6f\x0f\xbd" + "\x25\x3e\xb7\x8e\x73\x58\xda\xc9" + "\xa5\xaa\x9e\xf3\x9b\xfd\x37\x3e" + "\xe2\x88\xa4\x7b\xc8\x5c\xa8\x93" + "\x0e\xe7\x9a\x9c\x2e\x95\x18\x9f" + "\xc8\x45\x0c\x88\x9e\x53\x4f\x3a" + "\x76\xc1\x35\xfa\x17\xd8\xac\xa0" + "\x0c\x2d\x47\x2e\x4f\x69\x9b\xf7" + "\xd0\xb6\x96\x0c\x19\xb3\x08\x01" + "\x65\x7a\x1f\xc7\x31\x86\xdb\xc8" + "\xc1\x99\x8f\xf8\x08\x4a\x9d\x23" + "\x22\xa8\xcf\x27\x01\x01\x88\x93" + "\x9c\x86\x45\xbd\xe0\x51\xca\x52" + "\x84\xba\xfe\x03\xf7\xda\xc5\xce" + "\x3e\x77\x75\x86\xaf\x84\xc8\x05" + "\x44\x01\x0f\x02\xf3\x58\xb0\x06" + "\x5a\xd7\x12\x30\x8d\xdf\x1f\x1f" + "\x0a\xe6\xd2\xea\xf6\x3a\x7a\x99" + "\x63\xe8\xd2\xc1\x4a\x45\x8b\x40" + "\x4d\x0a\xa9\x76\x92\xb3\xda\x87" + "\x36\x33\xf0\x78\xc3\x2f\x5f\x02" + "\x1a\x6a\x2c\x32\xcd\x76\xbf\xbd" + "\x5a\x26\x20\x28\x8c\x8c\xbc\x52" + "\x3d\x0a\xc9\xcb\xab\xa4\x21\xb0" + "\x54\x40\x81\x44\xc7\xd6\x1c\x11" + "\x44\xc6\x02\x92\x14\x5a\xbf\x1a" + "\x09\x8a\x18\xad\xcd\x64\x3d\x53" + "\x4a\xb6\xa5\x1b\x57\x0e\xef\xe0" + "\x8c\x44\x5f\x7d\xbd\x6c\xfd\x60" + "\xae\x02\x24\xb6\x99\xdd\x8c\xaf" + "\x59\x39\x75\x3c\xd1\x54\x7b\x86" + "\xcc\x99\xd9\x28\x0c\xb0\x94\x62" + "\xf9\x51\xd1\x19\x96\x2d\x66\xf5" + "\x55\xcf\x9e\x59\xe2\x6b\x2c\x08" + "\xc0\x54\x48\x24\x45\xc3\x8c\x73" + "\xea\x27\x6e\x66\x7d\x1d\x0e\x6e" + "\x13\xe8\x56\x65\x3a\xb0\x81\x5c" + "\xf0\xe8\xd8\x00\x6b\xcd\x8f\xad" + "\xdd\x53\xf3\xa4\x6c\x43\xd6\x31" + "\xaf\xd2\x76\x1e\x91\x12\xdb\x3c" + "\x8c\xc2\x81\xf0\x49\xdb\xe2\x6b" + "\x76\x62\x0a\x04\xe4\xaa\x8a\x7c" + "\x08\x0b\x5d\xd0\xee\x1d\xfb\xc4" + "\x02\x75\x42\xd6\xba\xa7\x22\xa8" + "\x47\x29\xb7\x85\x6d\x93\x3a\xdb" + "\x00\x53\x0b\xa2\xeb\xf8\xfe\x01" + "\x6f\x8a\x31\xd6\x17\x05\x6f\x67" + "\x88\x95\x32\xfe\x4f\xa6\x4b\xf8" + "\x03\xe4\xcd\x9a\x18\xe8\x4e\x2d" + "\xf7\x97\x9a\x0c\x7d\x9f\x7e\x44" + "\x69\x51\xe0\x32\x6b\x62\x86\x8f" + "\xa6\x8e\x0b\x21\x96\xe5\xaf\x77" + "\xc0\x83\xdf\xa5\x0e\xd0\xa1\x04" + "\xaf\xc1\x10\xcb\x5a\x40\xe4\xe3" + "\x38\x7e\x07\xe8\x4d\xfa\xed\xc5" + "\xf0\x37\xdf\xbb\x8a\xcf\x3d\xdc" + "\x61\xd2\xc6\x2b\xff\x07\xc9\x2f" + "\x0c\x2d\x5c\x07\xa8\x35\x6a\xfc" + "\xae\x09\x03\x45\x74\x51\x4d\xc4" + "\xb8\x23\x87\x4a\x99\x27\x20\x87" + "\x62\x44\x0a\x4a\xce\x78\x47\x22", + .ksize = 1088, + .plaintext = "\x8e\xb0\x4c\xde\x9c\x4a\x04\x5a" + "\xf6\xa9\x7f\x45\x25\xa5\x7b\x3a" + "\xbc\x4d\x73\x39\x81\xb5\xbd\x3d" + "\x21\x6f\xd7\x37\x50\x3c\x7b\x28" + "\xd1\x03\x3a\x17\xed\x7b\x7c\x2a" + "\x16\xbc\xdf\x19\x89\x52\x71\x31" + "\xb6\xc0\xfd\xb5\xd3\xba\x96\x99" + "\xb6\x34\x0b\xd0\x99\x93\xfc\x1a" + "\x01\x3c\x85\xc6\x9b\x78\x5c\x8b" + "\xfe\xae\xd2\xbf\xb2\x6f\xf9\xed" + "\xc8\x25\x17\xfe\x10\x3b\x7d\xda" + "\xf4\x8d\x35\x4b\x7c\x7b\x82\xe7" + "\xc2\xb3\xee\x60\x4a\x03\x86\xc9" + "\x4e\xb5\xc4\xbe\xd2\xbd\x66\xf1" + "\x13\xf1\x09\xab\x5d\xca\x63\x1f" + "\xfc\xfb\x57\x2a\xfc\xca\x66\xd8" + "\x77\x84\x38\x23\x1d\xac\xd3\xb3" + "\x7a\xad\x4c\x70\xfa\x9c\xc9\x61" + "\xa6\x1b\xba\x33\x4b\x4e\x33\xec" + "\xa0\xa1\x64\x39\x40\x05\x1c\xc2" + "\x3f\x49\x9d\xae\xf2\xc5\xf2\xc5" + "\xfe\xe8\xf4\xc2\xf9\x96\x2d\x28" + "\x92\x30\x44\xbc\xd2\x7f\xe1\x6e" + "\x62\x02\x8f\x3d\x1c\x80\xda\x0e" + "\x6a\x90\x7e\x75\xff\xec\x3e\xc4" + "\xcd\x16\x34\x3b\x05\x6d\x4d\x20" + "\x1c\x7b\xf5\x57\x4f\xfa\x3d\xac" + "\xd0\x13\x55\xe8\xb3\xe1\x1b\x78" + "\x30\xe6\x9f\x84\xd4\x69\xd1\x08" + "\x12\x77\xa7\x4a\xbd\xc0\xf2\xd2" + "\x78\xdd\xa3\x81\x12\xcb\x6c\x14" + "\x90\x61\xe2\x84\xc6\x2b\x16\xcc" + "\x40\x99\x50\x88\x01\x09\x64\x4f" + "\x0a\x80\xbe\x61\xae\x46\xc9\x0a" + "\x5d\xe0\xfb\x72\x7a\x1a\xdd\x61" + "\x63\x20\x05\xa0\x4a\xf0\x60\x69" + "\x7f\x92\xbc\xbf\x4e\x39\x4d\xdd" + "\x74\xd1\xb7\xc0\x5a\x34\xb7\xae" + "\x76\x65\x2e\xbc\x36\xb9\x04\x95" + "\x42\xe9\x6f\xca\x78\xb3\x72\x07" + "\xa3\xba\x02\x94\x67\x4c\xb1\xd7" + "\xe9\x30\x0d\xf0\x3b\xb8\x10\x6d" + "\xea\x2b\x21\xbf\x74\x59\x82\x97" + "\x85\xaa\xf1\xd7\x54\x39\xeb\x05" + "\xbd\xf3\x40\xa0\x97\xe6\x74\xfe" + "\xb4\x82\x5b\xb1\x36\xcb\xe8\x0d" + "\xce\x14\xd9\xdf\xf1\x94\x22\xcd" + "\xd6\x00\xba\x04\x4c\x05\x0c\xc0" + "\xd1\x5a\xeb\x52\xd5\xa8\x8e\xc8" + "\x97\xa1\xaa\xc1\xea\xc1\xbe\x7c" + "\x36\xb3\x36\xa0\xc6\x76\x66\xc5" + "\xe2\xaf\xd6\x5c\xe2\xdb\x2c\xb3" + "\x6c\xb9\x99\x7f\xff\x9f\x03\x24" + "\xe1\x51\x44\x66\xd8\x0c\x5d\x7f" + "\x5c\x85\x22\x2a\xcf\x6d\x79\x28" + "\xab\x98\x01\x72\xfe\x80\x87\x5f" + "\x46\xba\xef\x81\x24\xee\xbf\xb0" + "\x24\x74\xa3\x65\x97\x12\xc4\xaf" + "\x8b\xa0\x39\xda\x8a\x7e\x74\x6e" + "\x1b\x42\xb4\x44\x37\xfc\x59\xfd" + "\x86\xed\xfb\x8c\x66\x33\xda\x63" + "\x75\xeb\xe1\xa4\x85\x4f\x50\x8f" + "\x83\x66\x0d\xd3\x37\xfa\xe6\x9c" + "\x4f\x30\x87\x35\x18\xe3\x0b\xb7" + "\x6e\x64\x54\xcd\x70\xb3\xde\x54" + "\xb7\x1d\xe6\x4c\x4d\x55\x12\x12" + "\xaf\x5f\x7f\x5e\xee\x9d\xe8\x8e" + "\x32\x9d\x4e\x75\xeb\xc6\xdd\xaa" + "\x48\x82\xa4\x3f\x3c\xd7\xd3\xa8" + "\x63\x9e\x64\xfe\xe3\x97\x00\x62" + "\xe5\x40\x5d\xc3\xad\x72\xe1\x28" + "\x18\x50\xb7\x75\xef\xcd\x23\xbf" + "\x3f\xc0\x51\x36\xf8\x41\xc3\x08" + "\xcb\xf1\x8d\x38\x34\xbd\x48\x45" + "\x75\xed\xbc\x65\x7b\xb5\x0c\x9b" + "\xd7\x67\x7d\x27\xb4\xc4\x80\xd7" + "\xa9\xb9\xc7\x4a\x97\xaa\xda\xc8" + "\x3c\x74\xcf\x36\x8f\xe4\x41\xe3" + "\xd4\xd3\x26\xa7\xf3\x23\x9d\x8f" + "\x6c\x20\x05\x32\x3e\xe0\xc3\xc8" + "\x56\x3f\xa7\x09\xb7\xfb\xc7\xf7" + "\xbe\x2a\xdd\x0f\x06\x7b\x0d\xdd" + "\xb0\xb4\x86\x17\xfd\xb9\x04\xe5" + "\xc0\x64\x5d\xad\x2a\x36\x38\xdb" + "\x24\xaf\x5b\xff\xca\xf9\x41\xe8" + "\xf9\x2f\x1e\x5e\xf9\xf5\xd5\xf2" + "\xb2\x88\xca\xc9\xa1\x31\xe2\xe8" + "\x10\x95\x65\xbf\xf1\x11\x61\x7a" + "\x30\x1a\x54\x90\xea\xd2\x30\xf6" + "\xa5\xad\x60\xf9\x4d\x84\x21\x1b" + "\xe4\x42\x22\xc8\x12\x4b\xb0\x58" + "\x3e\x9c\x2d\x32\x95\x0a\x8e\xb0" + "\x0a\x7e\x77\x2f\xe8\x97\x31\x6a" + "\xf5\x59\xb4\x26\xe6\x37\x12\xc9" + "\xcb\xa0\x58\x33\x6f\xd5\x55\x55" + "\x3c\xa1\x33\xb1\x0b\x7e\x2e\xb4" + "\x43\x2a\x84\x39\xf0\x9c\xf4\x69" + "\x4f\x1e\x79\xa6\x15\x1b\x87\xbb" + "\xdb\x9b\xe0\xf1\x0b\xba\xe3\x6e" + "\xcc\x2f\x49\x19\x22\x29\xfc\x71" + "\xbb\x77\x38\x18\x61\xaf\x85\x76" + "\xeb\xd1\x09\xcc\x86\x04\x20\x9a" + "\x66\x53\x2f\x44\x8b\xc6\xa3\xd2" + "\x5f\xc7\x79\x82\x66\xa8\x6e\x75" + "\x7d\x94\xd1\x86\x75\x0f\xa5\x4f" + "\x3c\x7a\x33\xce\xd1\x6e\x9d\x7b" + "\x1f\x91\x37\xb8\x37\x80\xfb\xe0" + "\x52\x26\xd0\x9a\xd4\x48\x02\x41" + "\x05\xe3\x5a\x94\xf1\x65\x61\x19" + "\xb8\x88\x4e\x2b\xea\xba\x8b\x58" + "\x8b\x42\x01\x00\xa8\xfe\x00\x5c" + "\xfe\x1c\xee\x31\x15\x69\xfa\xb3" + "\x9b\x5f\x22\x8e\x0d\x2c\xe3\xa5" + "\x21\xb9\x99\x8a\x8e\x94\x5a\xef" + "\x13\x3e\x99\x96\x79\x6e\xd5\x42" + "\x36\x03\xa9\xe2\xca\x65\x4e\x8a" + "\x8a\x30\xd2\x7d\x74\xe7\xf0\xaa" + "\x23\x26\xdd\xcb\x82\x39\xfc\x9d" + "\x51\x76\x21\x80\xa2\xbe\x93\x03" + "\x47\xb0\xc1\xb6\xdc\x63\xfd\x9f" + "\xca\x9d\xa5\xca\x27\x85\xe2\xd8" + "\x15\x5b\x7e\x14\x7a\xc4\x89\xcc" + "\x74\x14\x4b\x46\xd2\xce\xac\x39" + "\x6b\x6a\x5a\xa4\x0e\xe3\x7b\x15" + "\x94\x4b\x0f\x74\xcb\x0c\x7f\xa9" + "\xbe\x09\x39\xa3\xdd\x56\x5c\xc7" + "\x99\x56\x65\x39\xf4\x0b\x7d\x87" + "\xec\xaa\xe3\x4d\x22\x65\x39\x4e", + .psize = 1024, + .digest = "\x64\x3a\xbc\xc3\x3f\x74\x40\x51" + "\x6e\x56\x01\x1a\x51\xec\x36\xde", + .np = 8, + .tap = { 64, 203, 267, 28, 263, 62, 54, 83 }, + }, { + .key = "\x1b\x82\x2e\x1b\x17\x23\xb9\x6d" + "\xdc\x9c\xda\x99\x07\xe3\x5f\xd8" + "\xd2\xf8\x43\x80\x8d\x86\x7d\x80" + "\x1a\xd0\xcc\x13\xb9\x11\x05\x3f" + "\x7e\xcf\x7e\x80\x0e\xd8\x25\x48" + "\x8b\xaa\x63\x83\x92\xd0\x72\xf5" + "\x4f\x67\x7e\x50\x18\x25\xa4\xd1" + "\xe0\x7e\x1e\xba\xd8\xa7\x6e\xdb" + "\x1a\xcc\x0d\xfe\x9f\x6d\x22\x35" + "\xe1\xe6\xe0\xa8\x7b\x9c\xb1\x66" + "\xa3\xf8\xff\x4d\x90\x84\x28\xbc" + "\xdc\x19\xc7\x91\x49\xfc\xf6\x33" + "\xc9\x6e\x65\x7f\x28\x6f\x68\x2e" + "\xdf\x1a\x75\xe9\xc2\x0c\x96\xb9" + "\x31\x22\xc4\x07\xc6\x0a\x2f\xfd" + "\x36\x06\x5f\x5c\xc5\xb1\x3a\xf4" + "\x5e\x48\xa4\x45\x2b\x88\xa7\xee" + "\xa9\x8b\x52\xcc\x99\xd9\x2f\xb8" + "\xa4\x58\x0a\x13\xeb\x71\x5a\xfa" + "\xe5\x5e\xbe\xf2\x64\xad\x75\xbc" + "\x0b\x5b\x34\x13\x3b\x23\x13\x9a" + "\x69\x30\x1e\x9a\xb8\x03\xb8\x8b" + "\x3e\x46\x18\x6d\x38\xd9\xb3\xd8" + "\xbf\xf1\xd0\x28\xe6\x51\x57\x80" + "\x5e\x99\xfb\xd0\xce\x1e\x83\xf7" + "\xe9\x07\x5a\x63\xa9\xef\xce\xa5" + "\xfb\x3f\x37\x17\xfc\x0b\x37\x0e" + "\xbb\x4b\x21\x62\xb7\x83\x0e\xa9" + "\x9e\xb0\xc4\xad\x47\xbe\x35\xe7" + "\x51\xb2\xf2\xac\x2b\x65\x7b\x48" + "\xe3\x3f\x5f\xb6\x09\x04\x0c\x58" + "\xce\x99\xa9\x15\x2f\x4e\xc1\xf2" + "\x24\x48\xc0\xd8\x6c\xd3\x76\x17" + "\x83\x5d\xe6\xe3\xfd\x01\x8e\xf7" + "\x42\xa5\x04\x29\x30\xdf\xf9\x00" + "\x4a\xdc\x71\x22\x1a\x33\x15\xb6" + "\xd7\x72\xfb\x9a\xb8\xeb\x2b\x38" + "\xea\xa8\x61\xa8\x90\x11\x9d\x73" + "\x2e\x6c\xce\x81\x54\x5a\x9f\xcd" + "\xcf\xd5\xbd\x26\x5d\x66\xdb\xfb" + "\xdc\x1e\x7c\x10\xfe\x58\x82\x10" + "\x16\x24\x01\xce\x67\x55\x51\xd1" + "\xdd\x6b\x44\xa3\x20\x8e\xa9\xa6" + "\x06\xa8\x29\x77\x6e\x00\x38\x5b" + "\xde\x4d\x58\xd8\x1f\x34\xdf\xf9" + "\x2c\xac\x3e\xad\xfb\x92\x0d\x72" + "\x39\xa4\xac\x44\x10\xc0\x43\xc4" + "\xa4\x77\x3b\xfc\xc4\x0d\x37\xd3" + "\x05\x84\xda\x53\x71\xf8\x80\xd3" + "\x34\x44\xdb\x09\xb4\x2b\x8e\xe3" + "\x00\x75\x50\x9e\x43\x22\x00\x0b" + "\x7c\x70\xab\xd4\x41\xf1\x93\xcd" + "\x25\x2d\x84\x74\xb5\xf2\x92\xcd" + "\x0a\x28\xea\x9a\x49\x02\x96\xcb" + "\x85\x9e\x2f\x33\x03\x86\x1d\xdc" + "\x1d\x31\xd5\xfc\x9d\xaa\xc5\xe9" + "\x9a\xc4\x57\xf5\x35\xed\xf4\x4b" + "\x3d\x34\xc2\x29\x13\x86\x36\x42" + "\x5d\xbf\x90\x86\x13\x77\xe5\xc3" + "\x62\xb4\xfe\x0b\x70\x39\x35\x65" + "\x02\xea\xf6\xce\x57\x0c\xbb\x74" + "\x29\xe3\xfd\x60\x90\xfd\x10\x38" + "\xd5\x4e\x86\xbd\x37\x70\xf0\x97" + "\xa6\xab\x3b\x83\x64\x52\xca\x66" + "\x2f\xf9\xa4\xca\x3a\x55\x6b\xb0" + "\xe8\x3a\x34\xdb\x9e\x48\x50\x2f" + "\x3b\xef\xfd\x08\x2d\x5f\xc1\x37" + "\x5d\xbe\x73\xe4\xd8\xe9\xac\xca" + "\x8a\xaa\x48\x7c\x5c\xf4\xa6\x96" + "\x5f\xfa\x70\xa6\xb7\x8b\x50\xcb" + "\xa6\xf5\xa9\xbd\x7b\x75\x4c\x22" + "\x0b\x19\x40\x2e\xc9\x39\x39\x32" + "\x83\x03\xa8\xa4\x98\xe6\x8e\x16" + "\xb9\xde\x08\xc5\xfc\xbf\xad\x39" + "\xa8\xc7\x93\x6c\x6f\x23\xaf\xc1" + "\xab\xe1\xdf\xbb\x39\xae\x93\x29" + "\x0e\x7d\x80\x8d\x3e\x65\xf3\xfd" + "\x96\x06\x65\x90\xa1\x28\x64\x4b" + "\x69\xf9\xa8\x84\x27\x50\xfc\x87" + "\xf7\xbf\x55\x8e\x56\x13\x58\x7b" + "\x85\xb4\x6a\x72\x0f\x40\xf1\x4f" + "\x83\x81\x1f\x76\xde\x15\x64\x7a" + "\x7a\x80\xe4\xc7\x5e\x63\x01\x91" + "\xd7\x6b\xea\x0b\x9b\xa2\x99\x3b" + "\x6c\x88\xd8\xfd\x59\x3c\x8d\x22" + "\x86\x56\xbe\xab\xa1\x37\x08\x01" + "\x50\x85\x69\x29\xee\x9f\xdf\x21" + "\x3e\x20\x20\xf5\xb0\xbb\x6b\xd0" + "\x9c\x41\x38\xec\x54\x6f\x2d\xbd" + "\x0f\xe1\xbd\xf1\x2b\x6e\x60\x56" + "\x29\xe5\x7a\x70\x1c\xe2\xfc\x97" + "\x82\x68\x67\xd9\x3d\x1f\xfb\xd8" + "\x07\x9f\xbf\x96\x74\xba\x6a\x0e" + "\x10\x48\x20\xd8\x13\x1e\xb5\x44" + "\xf2\xcc\xb1\x8b\xfb\xbb\xec\xd7" + "\x37\x70\x1f\x7c\x55\xd2\x4b\xb9" + "\xfd\x70\x5e\xa3\x91\x73\x63\x52" + "\x13\x47\x5a\x06\xfb\x01\x67\xa5" + "\xc0\xd0\x49\x19\x56\x66\x9a\x77" + "\x64\xaf\x8c\x25\x91\x52\x87\x0e" + "\x18\xf3\x5f\x97\xfd\x71\x13\xf8" + "\x05\xa5\x39\xcc\x65\xd3\xcc\x63" + "\x5b\xdb\x5f\x7e\x5f\x6e\xad\xc4" + "\xf4\xa0\xc5\xc2\x2b\x4d\x97\x38" + "\x4f\xbc\xfa\x33\x17\xb4\x47\xb9" + "\x43\x24\x15\x8d\xd2\xed\x80\x68" + "\x84\xdb\x04\x80\xca\x5e\x6a\x35" + "\x2c\x2c\xe7\xc5\x03\x5f\x54\xb0" + "\x5e\x4f\x1d\x40\x54\x3d\x78\x9a" + "\xac\xda\x80\x27\x4d\x15\x4c\x1a" + "\x6e\x80\xc9\xc4\x3b\x84\x0e\xd9" + "\x2e\x93\x01\x8c\xc3\xc8\x91\x4b" + "\xb3\xaa\x07\x04\x68\x5b\x93\xa5" + "\xe7\xc4\x9d\xe7\x07\xee\xf5\x3b" + "\x40\x89\xcc\x60\x34\x9d\xb4\x06" + "\x1b\xef\x92\xe6\xc1\x2a\x7d\x0f" + "\x81\xaa\x56\xe3\xd7\xed\xa7\xd4" + "\xa7\x3a\x49\xc4\xad\x81\x5c\x83" + "\x55\x8e\x91\x54\xb7\x7d\x65\xa5" + "\x06\x16\xd5\x9a\x16\xc1\xb0\xa2" + "\x06\xd8\x98\x47\x73\x7e\x73\xa0" + "\xb8\x23\xb1\x52\xbf\x68\x74\x5d" + "\x0b\xcb\xfa\x8c\x46\xe3\x24\xe6" + "\xab\xd4\x69\x8d\x8c\xf2\x8a\x59" + "\xbe\x48\x46\x50\x8c\x9a\xe8\xe3" + "\x31\x55\x0a\x06\xed\x4f\xf8\xb7" + "\x4f\xe3\x85\x17\x30\xbd\xd5\x20" + "\xe7\x5b\xb2\x32\xcf\x6b\x16\x44" + "\xd2\xf5\x7e\xd7\xd1\x2f\xee\x64" + "\x3e\x9d\x10\xef\x27\x35\x43\x64" + "\x67\xfb\x7a\x7b\xe0\x62\x31\x9a" + "\x4d\xdf\xa5\xab\xc0\x20\xbb\x01" + "\xe9\x7b\x54\xf1\xde\xb2\x79\x50" + "\x6c\x4b\x91\xdb\x7f\xbb\x50\xc1" + "\x55\x44\x38\x9a\xe0\x9f\xe8\x29" + "\x6f\x15\xf8\x4e\xa6\xec\xa0\x60", + .ksize = 1088, + .plaintext = "\x15\x68\x9e\x2f\xad\x15\x52\xdf" + "\xf0\x42\x62\x24\x2a\x2d\xea\xbf" + "\xc7\xf3\xb4\x1a\xf5\xed\xb2\x08" + "\x15\x60\x1c\x00\x77\xbf\x0b\x0e" + "\xb7\x2c\xcf\x32\x3a\xc7\x01\x77" + "\xef\xa6\x75\xd0\x29\xc7\x68\x20" + "\xb2\x92\x25\xbf\x12\x34\xe9\xa4" + "\xfd\x32\x7b\x3f\x7c\xbd\xa5\x02" + "\x38\x41\xde\xc9\xc1\x09\xd9\xfc" + "\x6e\x78\x22\x83\x18\xf7\x50\x8d" + "\x8f\x9c\x2d\x02\xa5\x30\xac\xff" + "\xea\x63\x2e\x80\x37\x83\xb0\x58" + "\xda\x2f\xef\x21\x55\xba\x7b\xb1" + "\xb6\xed\xf5\xd2\x4d\xaa\x8c\xa9" + "\xdd\xdb\x0f\xb4\xce\xc1\x9a\xb1" + "\xc1\xdc\xbd\xab\x86\xc2\xdf\x0b" + "\xe1\x2c\xf9\xbe\xf6\xd8\xda\x62" + "\x72\xdd\x98\x09\x52\xc0\xc4\xb6" + "\x7b\x17\x5c\xf5\xd8\x4b\x88\xd6" + "\x6b\xbf\x84\x4a\x3f\xf5\x4d\xd2" + "\x94\xe2\x9c\xff\xc7\x3c\xd9\xc8" + "\x37\x38\xbc\x8c\xf3\xe7\xb7\xd0" + "\x1d\x78\xc4\x39\x07\xc8\x5e\x79" + "\xb6\x5a\x90\x5b\x6e\x97\xc9\xd4" + "\x82\x9c\xf3\x83\x7a\xe7\x97\xfc" + "\x1d\xbb\xef\xdb\xce\xe0\x82\xad" + "\xca\x07\x6c\x54\x62\x6f\x81\xe6" + "\x7a\x5a\x96\x6e\x80\x3a\xa2\x37" + "\x6f\xc6\xa4\x29\xc3\x9e\x19\x94" + "\x9f\xb0\x3e\x38\xfb\x3c\x2b\x7d" + "\xaa\xb8\x74\xda\x54\x23\x51\x12" + "\x4b\x96\x36\x8f\x91\x4f\x19\x37" + "\x83\xc9\xdd\xc7\x1a\x32\x2d\xab" + "\xc7\x89\xe2\x07\x47\x6c\xe8\xa6" + "\x70\x6b\x8e\x0c\xda\x5c\x6a\x59" + "\x27\x33\x0e\xe1\xe1\x20\xe8\xc8" + "\xae\xdc\xd0\xe3\x6d\xa8\xa6\x06" + "\x41\xb4\xd4\xd4\xcf\x91\x3e\x06" + "\xb0\x9a\xf7\xf1\xaa\xa6\x23\x92" + "\x10\x86\xf0\x94\xd1\x7c\x2e\x07" + "\x30\xfb\xc5\xd8\xf3\x12\xa9\xe8" + "\x22\x1c\x97\x1a\xad\x96\xb0\xa1" + "\x72\x6a\x6b\xb4\xfd\xf7\xe8\xfa" + "\xe2\x74\xd8\x65\x8d\x35\x17\x4b" + "\x00\x23\x5c\x8c\x70\xad\x71\xa2" + "\xca\xc5\x6c\x59\xbf\xb4\xc0\x6d" + "\x86\x98\x3e\x19\x5a\x90\x92\xb1" + "\x66\x57\x6a\x91\x68\x7c\xbc\xf3" + "\xf1\xdb\x94\xf8\x48\xf1\x36\xd8" + "\x78\xac\x1c\xa9\xcc\xd6\x27\xba" + "\x91\x54\x22\xf5\xe6\x05\x3f\xcc" + "\xc2\x8f\x2c\x3b\x2b\xc3\x2b\x2b" + "\x3b\xb8\xb6\x29\xb7\x2f\x94\xb6" + "\x7b\xfc\x94\x3e\xd0\x7a\x41\x59" + "\x7b\x1f\x9a\x09\xa6\xed\x4a\x82" + "\x9d\x34\x1c\xbd\x4e\x1c\x3a\x66" + "\x80\x74\x0e\x9a\x4f\x55\x54\x47" + "\x16\xba\x2a\x0a\x03\x35\x99\xa3" + "\x5c\x63\x8d\xa2\x72\x8b\x17\x15" + "\x68\x39\x73\xeb\xec\xf2\xe8\xf5" + "\x95\x32\x27\xd6\xc4\xfe\xb0\x51" + "\xd5\x0c\x50\xc5\xcd\x6d\x16\xb3" + "\xa3\x1e\x95\x69\xad\x78\x95\x06" + "\xb9\x46\xf2\x6d\x24\x5a\x99\x76" + "\x73\x6a\x91\xa6\xac\x12\xe1\x28" + "\x79\xbc\x08\x4e\x97\x00\x98\x63" + "\x07\x1c\x4e\xd1\x68\xf3\xb3\x81" + "\xa8\xa6\x5f\xf1\x01\xc9\xc1\xaf" + "\x3a\x96\xf9\x9d\xb5\x5a\x5f\x8f" + "\x7e\xc1\x7e\x77\x0a\x40\xc8\x8e" + "\xfc\x0e\xed\xe1\x0d\xb0\xe5\x5e" + "\x5e\x6f\xf5\x7f\xab\x33\x7d\xcd" + "\xf0\x09\x4b\xb2\x11\x37\xdc\x65" + "\x97\x32\x62\x71\x3a\x29\x54\xb9" + "\xc7\xa4\xbf\x75\x0f\xf9\x40\xa9" + "\x8d\xd7\x8b\xa7\xe0\x9a\xbe\x15" + "\xc6\xda\xd8\x00\x14\x69\x1a\xaf" + "\x5f\x79\xc3\xf5\xbb\x6c\x2a\x9d" + "\xdd\x3c\x5f\x97\x21\xe1\x3a\x03" + "\x84\x6a\xe9\x76\x11\x1f\xd3\xd5" + "\xf0\x54\x20\x4d\xc2\x91\xc3\xa4" + "\x36\x25\xbe\x1b\x2a\x06\xb7\xf3" + "\xd1\xd0\x55\x29\x81\x4c\x83\xa3" + "\xa6\x84\x1e\x5c\xd1\xd0\x6c\x90" + "\xa4\x11\xf0\xd7\x63\x6a\x48\x05" + "\xbc\x48\x18\x53\xcd\xb0\x8d\xdb" + "\xdc\xfe\x55\x11\x5c\x51\xb3\xab" + "\xab\x63\x3e\x31\x5a\x8b\x93\x63" + "\x34\xa9\xba\x2b\x69\x1a\xc0\xe3" + "\xcb\x41\xbc\xd7\xf5\x7f\x82\x3e" + "\x01\xa3\x3c\x72\xf4\xfe\xdf\xbe" + "\xb1\x67\x17\x2b\x37\x60\x0d\xca" + "\x6f\xc3\x94\x2c\xd2\x92\x6d\x9d" + "\x75\x18\x77\xaa\x29\x38\x96\xed" + "\x0e\x20\x70\x92\xd5\xd0\xb4\x00" + "\xc0\x31\xf2\xc9\x43\x0e\x75\x1d" + "\x4b\x64\xf2\x1f\xf2\x29\x6c\x7b" + "\x7f\xec\x59\x7d\x8c\x0d\xd4\xd3" + "\xac\x53\x4c\xa3\xde\x42\x92\x95" + "\x6d\xa3\x4f\xd0\xe6\x3d\xe7\xec" + "\x7a\x4d\x68\xf1\xfe\x67\x66\x09" + "\x83\x22\xb1\x98\x43\x8c\xab\xb8" + "\x45\xe6\x6d\xdf\x5e\x50\x71\xce" + "\xf5\x4e\x40\x93\x2b\xfa\x86\x0e" + "\xe8\x30\xbd\x82\xcc\x1c\x9c\x5f" + "\xad\xfd\x08\x31\xbe\x52\xe7\xe6" + "\xf2\x06\x01\x62\x25\x15\x99\x74" + "\x33\x51\x52\x57\x3f\x57\x87\x61" + "\xb9\x7f\x29\x3d\xcd\x92\x5e\xa6" + "\x5c\x3b\xf1\xed\x5f\xeb\x82\xed" + "\x56\x7b\x61\xe7\xfd\x02\x47\x0e" + "\x2a\x15\xa4\xce\x43\x86\x9b\xe1" + "\x2b\x4c\x2a\xd9\x42\x97\xf7\x9a" + "\xe5\x47\x46\x48\xd3\x55\x6f\x4d" + "\xd9\xeb\x4b\xdd\x7b\x21\x2f\xb3" + "\xa8\x36\x28\xdf\xca\xf1\xf6\xd9" + "\x10\xf6\x1c\xfd\x2e\x0c\x27\xe0" + "\x01\xb3\xff\x6d\x47\x08\x4d\xd4" + "\x00\x25\xee\x55\x4a\xe9\xe8\x5b" + "\xd8\xf7\x56\x12\xd4\x50\xb2\xe5" + "\x51\x6f\x34\x63\x69\xd2\x4e\x96" + "\x4e\xbc\x79\xbf\x18\xae\xc6\x13" + "\x80\x92\x77\xb0\xb4\x0f\x29\x94" + "\x6f\x4c\xbb\x53\x11\x36\xc3\x9f" + "\x42\x8e\x96\x8a\x91\xc8\xe9\xfc" + "\xfe\xbf\x7c\x2d\x6f\xf9\xb8\x44" + "\x89\x1b\x09\x53\x0a\x2a\x92\xc3" + "\x54\x7a\x3a\xf9\xe2\xe4\x75\x87" + "\xa0\x5e\x4b\x03\x7a\x0d\x8a\xf4" + "\x55\x59\x94\x2b\x63\x96\x0e\xf5", + .psize = 1040, + .digest = "\xb5\xb9\x08\xb3\x24\x3e\x03\xf0" + "\xd6\x0b\x57\xbc\x0a\x6d\x89\x59", + }, { + .key = "\xf6\x34\x42\x71\x35\x52\x8b\x58" + "\x02\x3a\x8e\x4a\x8d\x41\x13\xe9" + "\x7f\xba\xb9\x55\x9d\x73\x4d\xf8" + "\x3f\x5d\x73\x15\xff\xd3\x9e\x7f" + "\x20\x2a\x6a\xa8\xd1\xf0\x8f\x12" + "\x6b\x02\xd8\x6c\xde\xba\x80\x22" + "\x19\x37\xc8\xd0\x4e\x89\x17\x7c" + "\x7c\xdd\x88\xfd\x41\xc0\x04\xb7" + "\x1d\xac\x19\xe3\x20\xc7\x16\xcf" + "\x58\xee\x1d\x7a\x61\x69\xa9\x12" + "\x4b\xef\x4f\xb6\x38\xdd\x78\xf8" + "\x28\xee\x70\x08\xc7\x7c\xcc\xc8" + "\x1e\x41\xf5\x80\x86\x70\xd0\xf0" + "\xa3\x87\x6b\x0a\x00\xd2\x41\x28" + "\x74\x26\xf1\x24\xf3\xd0\x28\x77" + "\xd7\xcd\xf6\x2d\x61\xf4\xa2\x13" + "\x77\xb4\x6f\xa0\xf4\xfb\xd6\xb5" + "\x38\x9d\x5a\x0c\x51\xaf\xad\x63" + "\x27\x67\x8c\x01\xea\x42\x1a\x66" + "\xda\x16\x7c\x3c\x30\x0c\x66\x53" + "\x1c\x88\xa4\x5c\xb2\xe3\x78\x0a" + "\x13\x05\x6d\xe2\xaf\xb3\xe4\x75" + "\x00\x99\x58\xee\x76\x09\x64\xaa" + "\xbb\x2e\xb1\x81\xec\xd8\x0e\xd3" + "\x0c\x33\x5d\xb7\x98\xef\x36\xb6" + "\xd2\x65\x69\x41\x70\x12\xdc\x25" + "\x41\x03\x99\x81\x41\x19\x62\x13" + "\xd1\x0a\x29\xc5\x8c\xe0\x4c\xf3" + "\xd6\xef\x4c\xf4\x1d\x83\x2e\x6d" + "\x8e\x14\x87\xed\x80\xe0\xaa\xd3" + "\x08\x04\x73\x1a\x84\x40\xf5\x64" + "\xbd\x61\x32\x65\x40\x42\xfb\xb0" + "\x40\xf6\x40\x8d\xc7\x7f\x14\xd0" + "\x83\x99\xaa\x36\x7e\x60\xc6\xbf" + "\x13\x8a\xf9\x21\xe4\x7e\x68\x87" + "\xf3\x33\x86\xb4\xe0\x23\x7e\x0a" + "\x21\xb1\xf5\xad\x67\x3c\x9c\x9d" + "\x09\xab\xaf\x5f\xba\xe0\xd0\x82" + "\x48\x22\x70\xb5\x6d\x53\xd6\x0e" + "\xde\x64\x92\x41\xb0\xd3\xfb\xda" + "\x21\xfe\xab\xea\x20\xc4\x03\x58" + "\x18\x2e\x7d\x2f\x03\xa9\x47\x66" + "\xdf\x7b\xa4\x6b\x34\x6b\x55\x9c" + "\x4f\xd7\x9c\x47\xfb\xa9\x42\xec" + "\x5a\x12\xfd\xfe\x76\xa0\x92\x9d" + "\xfe\x1e\x16\xdd\x24\x2a\xe4\x27" + "\xd5\xa9\xf2\x05\x4f\x83\xa2\xaf" + "\xfe\xee\x83\x7a\xad\xde\xdf\x9a" + "\x80\xd5\x81\x14\x93\x16\x7e\x46" + "\x47\xc2\x14\xef\x49\x6e\xb9\xdb" + "\x40\xe8\x06\x6f\x9c\x2a\xfd\x62" + "\x06\x46\xfd\x15\x1d\x36\x61\x6f" + "\x77\x77\x5e\x64\xce\x78\x1b\x85" + "\xbf\x50\x9a\xfd\x67\xa6\x1a\x65" + "\xad\x5b\x33\x30\xf1\x71\xaa\xd9" + "\x23\x0d\x92\x24\x5f\xae\x57\xb0" + "\x24\x37\x0a\x94\x12\xfb\xb5\xb1" + "\xd3\xb8\x1d\x12\x29\xb0\x80\x24" + "\x2d\x47\x9f\x96\x1f\x95\xf1\xb1" + "\xda\x35\xf6\x29\xe0\xe1\x23\x96" + "\xc7\xe8\x22\x9b\x7c\xac\xf9\x41" + "\x39\x01\xe5\x73\x15\x5e\x99\xec" + "\xb4\xc1\xf4\xe7\xa7\x97\x6a\xd5" + "\x90\x9a\xa0\x1d\xf3\x5a\x8b\x5f" + "\xdf\x01\x52\xa4\x93\x31\x97\xb0" + "\x93\x24\xb5\xbc\xb2\x14\x24\x98" + "\x4a\x8f\x19\x85\xc3\x2d\x0f\x74" + "\x9d\x16\x13\x80\x5e\x59\x62\x62" + "\x25\xe0\xd1\x2f\x64\xef\xba\xac" + "\xcd\x09\x07\x15\x8a\xcf\x73\xb5" + "\x8b\xc9\xd8\x24\xb0\x53\xd5\x6f" + "\xe1\x2b\x77\xb1\xc5\xe4\xa7\x0e" + "\x18\x45\xab\x36\x03\x59\xa8\xbd" + "\x43\xf0\xd8\x2c\x1a\x69\x96\xbb" + "\x13\xdf\x6c\x33\x77\xdf\x25\x34" + "\x5b\xa5\x5b\x8c\xf9\x51\x05\xd4" + "\x8b\x8b\x44\x87\x49\xfc\xa0\x8f" + "\x45\x15\x5b\x40\x42\xc4\x09\x92" + "\x98\x0c\x4d\xf4\x26\x37\x1b\x13" + "\x76\x01\x93\x8d\x4f\xe6\xed\x18" + "\xd0\x79\x7b\x3f\x44\x50\xcb\xee" + "\xf7\x4a\xc9\x9e\xe0\x96\x74\xa7" + "\xe6\x93\xb2\x53\xca\x55\xa8\xdc" + "\x1e\x68\x07\x87\xb7\x2e\xc1\x08" + "\xb2\xa4\x5b\xaf\xc6\xdb\x5c\x66" + "\x41\x1c\x51\xd9\xb0\x07\x00\x0d" + "\xf0\x4c\xdc\x93\xde\xa9\x1e\x8e" + "\xd3\x22\x62\xd8\x8b\x88\x2c\xea" + "\x5e\xf1\x6e\x14\x40\xc7\xbe\xaa" + "\x42\x28\xd0\x26\x30\x78\x01\x9b" + "\x83\x07\xbc\x94\xc7\x57\xa2\x9f" + "\x03\x07\xff\x16\xff\x3c\x6e\x48" + "\x0a\xd0\xdd\x4c\xf6\x64\x9a\xf1" + "\xcd\x30\x12\x82\x2c\x38\xd3\x26" + "\x83\xdb\xab\x3e\xc6\xf8\xe6\xfa" + "\x77\x0a\x78\x82\x75\xf8\x63\x51" + "\x59\xd0\x8d\x24\x9f\x25\xe6\xa3" + "\x4c\xbc\x34\xfc\xe3\x10\xc7\x62" + "\xd4\x23\xc8\x3d\xa7\xc6\xa6\x0a" + "\x4f\x7e\x29\x9d\x6d\xbe\xb5\xf1" + "\xdf\xa4\x53\xfa\xc0\x23\x0f\x37" + "\x84\x68\xd0\xb5\xc8\xc6\xae\xf8" + "\xb7\x8d\xb3\x16\xfe\x8f\x87\xad" + "\xd0\xc1\x08\xee\x12\x1c\x9b\x1d" + "\x90\xf8\xd1\x63\xa4\x92\x3c\xf0" + "\xc7\x34\xd8\xf1\x14\xed\xa3\xbc" + "\x17\x7e\xd4\x62\x42\x54\x57\x2c" + "\x3e\x7a\x35\x35\x17\x0f\x0b\x7f" + "\x81\xa1\x3f\xd0\xcd\xc8\x3b\x96" + "\xe9\xe0\x4a\x04\xe1\xb6\x3c\xa1" + "\xd6\xca\xc4\xbd\xb6\xb5\x95\x34" + "\x12\x9d\xc5\x96\xf2\xdf\xba\x54" + "\x76\xd1\xb2\x6b\x3b\x39\xe0\xb9" + "\x18\x62\xfb\xf7\xfc\x12\xf1\x5f" + "\x7e\xc7\xe3\x59\x4c\xa6\xc2\x3d" + "\x40\x15\xf9\xa3\x95\x64\x4c\x74" + "\x8b\x73\x77\x33\x07\xa7\x04\x1d" + "\x33\x5a\x7e\x8f\xbd\x86\x01\x4f" + "\x3e\xb9\x27\x6f\xe2\x41\xf7\x09" + "\x67\xfd\x29\x28\xc5\xe4\xf6\x18" + "\x4c\x1b\x49\xb2\x9c\x5b\xf6\x81" + "\x4f\xbb\x5c\xcc\x0b\xdf\x84\x23" + "\x58\xd6\x28\x34\x93\x3a\x25\x97" + "\xdf\xb2\xc3\x9e\x97\x38\x0b\x7d" + "\x10\xb3\x54\x35\x23\x8c\x64\xee" + "\xf0\xd8\x66\xff\x8b\x22\xd2\x5b" + "\x05\x16\x3c\x89\xf7\xb1\x75\xaf" + "\xc0\xae\x6a\x4f\x3f\xaf\x9a\xf4" + "\xf4\x9a\x24\xd9\x80\x82\xc0\x12" + "\xde\x96\xd1\xbe\x15\x0b\x8d\x6a" + "\xd7\x12\xe4\x85\x9f\x83\xc9\xc3" + "\xff\x0b\xb5\xaf\x3b\xd8\x6d\x67" + "\x81\x45\xe6\xac\xec\xc1\x7b\x16" + "\x18\x0a\xce\x4b\xc0\x2e\x76\xbc" + "\x1b\xfa\xb4\x34\xb8\xfc\x3e\xc8" + "\x5d\x90\x71\x6d\x7a\x79\xef\x06", + .ksize = 1088, + .plaintext = "\xaa\x5d\x54\xcb\xea\x1e\x46\x0f" + "\x45\x87\x70\x51\x8a\x66\x7a\x33" + "\xb4\x18\xff\xa9\x82\xf9\x45\x4b" + "\x93\xae\x2e\x7f\xab\x98\xfe\xbf" + "\x01\xee\xe5\xa0\x37\x8f\x57\xa6" + "\xb0\x76\x0d\xa4\xd6\x28\x2b\x5d" + "\xe1\x03\xd6\x1c\x6f\x34\x0d\xe7" + "\x61\x2d\x2e\xe5\xae\x5d\x47\xc7" + "\x80\x4b\x18\x8f\xa8\x99\xbc\x28" + "\xed\x1d\x9d\x86\x7d\xd7\x41\xd1" + "\xe0\x2b\xe1\x8c\x93\x2a\xa7\x80" + "\xe1\x07\xa0\xa9\x9f\x8c\x8d\x1a" + "\x55\xfc\x6b\x24\x7a\xbd\x3e\x51" + "\x68\x4b\x26\x59\xc8\xa7\x16\xd9" + "\xb9\x61\x13\xde\x8b\x63\x1c\xf6" + "\x60\x01\xfb\x08\xb3\x5b\x0a\xbf" + "\x34\x73\xda\x87\x87\x3d\x6f\x97" + "\x4a\x0c\xa3\x58\x20\xa2\xc0\x81" + "\x5b\x8c\xef\xa9\xc2\x01\x1e\x64" + "\x83\x8c\xbc\x03\xb6\xd0\x29\x9f" + "\x54\xe2\xce\x8b\xc2\x07\x85\x78" + "\x25\x38\x96\x4c\xb4\xbe\x17\x4a" + "\x65\xa6\xfa\x52\x9d\x66\x9d\x65" + "\x4a\xd1\x01\x01\xf0\xcb\x13\xcc" + "\xa5\x82\xf3\xf2\x66\xcd\x3f\x9d" + "\xd1\xaa\xe4\x67\xea\xf2\xad\x88" + "\x56\x76\xa7\x9b\x59\x3c\xb1\x5d" + "\x78\xfd\x69\x79\x74\x78\x43\x26" + "\x7b\xde\x3f\xf1\xf5\x4e\x14\xd9" + "\x15\xf5\x75\xb5\x2e\x19\xf3\x0c" + "\x48\x72\xd6\x71\x6d\x03\x6e\xaa" + "\xa7\x08\xf9\xaa\x70\xa3\x0f\x4d" + "\x12\x8a\xdd\xe3\x39\x73\x7e\xa7" + "\xea\x1f\x6d\x06\x26\x2a\xf2\xc5" + "\x52\xb4\xbf\xfd\x52\x0c\x06\x60" + "\x90\xd1\xb2\x7b\x56\xae\xac\x58" + "\x5a\x6b\x50\x2a\xf5\xe0\x30\x3c" + "\x2a\x98\x0f\x1b\x5b\x0a\x84\x6c" + "\x31\xae\x92\xe2\xd4\xbb\x7f\x59" + "\x26\x10\xb9\x89\x37\x68\x26\xbf" + "\x41\xc8\x49\xc4\x70\x35\x7d\xff" + "\x2d\x7f\xf6\x8a\x93\x68\x8c\x78" + "\x0d\x53\xce\x7d\xff\x7d\xfb\xae" + "\x13\x1b\x75\xc4\x78\xd7\x71\xd8" + "\xea\xd3\xf4\x9d\x95\x64\x8e\xb4" + "\xde\xb8\xe4\xa6\x68\xc8\xae\x73" + "\x58\xaf\xa8\xb0\x5a\x20\xde\x87" + "\x43\xb9\x0f\xe3\xad\x41\x4b\xd5" + "\xb7\xad\x16\x00\xa6\xff\xf6\x74" + "\xbf\x8c\x9f\xb3\x58\x1b\xb6\x55" + "\xa9\x90\x56\x28\xf0\xb5\x13\x4e" + "\x9e\xf7\x25\x86\xe0\x07\x7b\x98" + "\xd8\x60\x5d\x38\x95\x3c\xe4\x22" + "\x16\x2f\xb2\xa2\xaf\xe8\x90\x17" + "\xec\x11\x83\x1a\xf4\xa9\x26\xda" + "\x39\x72\xf5\x94\x61\x05\x51\xec" + "\xa8\x30\x8b\x2c\x13\xd0\x72\xac" + "\xb9\xd2\xa0\x4c\x4b\x78\xe8\x6e" + "\x04\x85\xe9\x04\x49\x82\x91\xff" + "\x89\xe5\xab\x4c\xaa\x37\x03\x12" + "\xca\x8b\x74\x10\xfd\x9e\xd9\x7b" + "\xcb\xdb\x82\x6e\xce\x2e\x33\x39" + "\xce\xd2\x84\x6e\x34\x71\x51\x6e" + "\x0d\xd6\x01\x87\xc7\xfa\x0a\xd3" + "\xad\x36\xf3\x4c\x9f\x96\x5e\x62" + "\x62\x54\xc3\x03\x78\xd6\xab\xdd" + "\x89\x73\x55\x25\x30\xf8\xa7\xe6" + "\x4f\x11\x0c\x7c\x0a\xa1\x2b\x7b" + "\x3d\x0d\xde\x81\xd4\x9d\x0b\xae" + "\xdf\x00\xf9\x4c\xb6\x90\x8e\x16" + "\xcb\x11\xc8\xd1\x2e\x73\x13\x75" + "\x75\x3e\xaa\xf5\xee\x02\xb3\x18" + "\xa6\x2d\xf5\x3b\x51\xd1\x1f\x47" + "\x6b\x2c\xdb\xc4\x10\xe0\xc8\xba" + "\x9d\xac\xb1\x9d\x75\xd5\x41\x0e" + "\x7e\xbe\x18\x5b\xa4\x1f\xf8\x22" + "\x4c\xc1\x68\xda\x6d\x51\x34\x6c" + "\x19\x59\xec\xb5\xb1\xec\xa7\x03" + "\xca\x54\x99\x63\x05\x6c\xb1\xac" + "\x9c\x31\xd6\xdb\xba\x7b\x14\x12" + "\x7a\xc3\x2f\xbf\x8d\xdc\x37\x46" + "\xdb\xd2\xbc\xd4\x2f\xab\x30\xd5" + "\xed\x34\x99\x8e\x83\x3e\xbe\x4c" + "\x86\x79\x58\xe0\x33\x8d\x9a\xb8" + "\xa9\xa6\x90\x46\xa2\x02\xb8\xdd" + "\xf5\xf9\x1a\x5c\x8c\x01\xaa\x6e" + "\xb4\x22\x12\xf5\x0c\x1b\x9b\x7a" + "\xc3\x80\xf3\x06\x00\x5f\x30\xd5" + "\x06\xdb\x7d\x82\xc2\xd4\x0b\x4c" + "\x5f\xe9\xc5\xf5\xdf\x97\x12\xbf" + "\x56\xaf\x9b\x69\xcd\xee\x30\xb4" + "\xa8\x71\xff\x3e\x7d\x73\x7a\xb4" + "\x0d\xa5\x46\x7a\xf3\xf4\x15\x87" + "\x5d\x93\x2b\x8c\x37\x64\xb5\xdd" + "\x48\xd1\xe5\x8c\xae\xd4\xf1\x76" + "\xda\xf4\xba\x9e\x25\x0e\xad\xa3" + "\x0d\x08\x7c\xa8\x82\x16\x8d\x90" + "\x56\x40\x16\x84\xe7\x22\x53\x3a" + "\x58\xbc\xb9\x8f\x33\xc8\xc2\x84" + "\x22\xe6\x0d\xe7\xb3\xdc\x5d\xdf" + "\xd7\x2a\x36\xe4\x16\x06\x07\xd2" + "\x97\x60\xb2\xf5\x5e\x14\xc9\xfd" + "\x8b\x05\xd1\xce\xee\x9a\x65\x99" + "\xb7\xae\x19\xb7\xc8\xbc\xd5\xa2" + "\x7b\x95\xe1\xcc\xba\x0d\xdc\x8a" + "\x1d\x59\x52\x50\xaa\x16\x02\x82" + "\xdf\x61\x33\x2e\x44\xce\x49\xc7" + "\xe5\xc6\x2e\x76\xcf\x80\x52\xf0" + "\x3d\x17\x34\x47\x3f\xd3\x80\x48" + "\xa2\xba\xd5\xc7\x7b\x02\x28\xdb" + "\xac\x44\xc7\x6e\x05\x5c\xc2\x79" + "\xb3\x7d\x6a\x47\x77\x66\xf1\x38" + "\xf0\xf5\x4f\x27\x1a\x31\xca\x6c" + "\x72\x95\x92\x8e\x3f\xb0\xec\x1d" + "\xc7\x2a\xff\x73\xee\xdf\x55\x80" + "\x93\xd2\xbd\x34\xd3\x9f\x00\x51" + "\xfb\x2e\x41\xba\x6c\x5a\x7c\x17" + "\x7f\xe6\x70\xac\x8d\x39\x3f\x77" + "\xe2\x23\xac\x8f\x72\x4e\xe4\x53" + "\xcc\xf1\x1b\xf1\x35\xfe\x52\xa4" + "\xd6\xb8\x40\x6b\xc1\xfd\xa0\xa1" + "\xf5\x46\x65\xc2\x50\xbb\x43\xe2" + "\xd1\x43\x28\x34\x74\xf5\x87\xa0" + "\xf2\x5e\x27\x3b\x59\x2b\x3e\x49" + "\xdf\x46\xee\xaf\x71\xd7\x32\x36" + "\xc7\x14\x0b\x58\x6e\x3e\x2d\x41" + "\xfa\x75\x66\x3a\x54\xe0\xb2\xb9" + "\xaf\xdd\x04\x80\x15\x19\x3f\x6f" + "\xce\x12\xb4\xd8\xe8\x89\x3c\x05" + "\x30\xeb\xf3\x3d\xcd\x27\xec\xdc" + "\x56\x70\x12\xcf\x78\x2b\x77\xbf" + "\x22\xf0\x1b\x17\x9c\xcc\xd6\x1b" + "\x2d\x3d\xa0\x3b\xd8\xc9\x70\xa4" + "\x7a\x3e\x07\xb9\x06\xc3\xfa\xb0" + "\x33\xee\xc1\xd8\xf6\xe0\xf0\xb2" + "\x61\x12\x69\xb0\x5f\x28\x99\xda" + "\xc3\x61\x48\xfa\x07\x16\x03\xc4" + "\xa8\xe1\x3c\xe8\x0e\x64\x15\x30" + "\xc1\x9d\x84\x2f\x73\x98\x0e\x3a" + "\xf2\x86\x21\xa4\x9e\x1d\xb5\x86" + "\x16\xdb\x2b\x9a\x06\x64\x8e\x79" + "\x8d\x76\x3e\xc3\xc2\x64\x44\xe3" + "\xda\xbc\x1a\x52\xd7\x61\x03\x65" + "\x54\x32\x77\x01\xed\x9d\x8a\x43" + "\x25\x24\xe3\xc1\xbe\xb8\x2f\xcb" + "\x89\x14\x64\xab\xf6\xa0\x6e\x02" + "\x57\xe4\x7d\xa9\x4e\x9a\x03\x36" + "\xad\xf1\xb1\xfc\x0b\xe6\x79\x51" + "\x9f\x81\x77\xc4\x14\x78\x9d\xbf" + "\xb6\xd6\xa3\x8c\xba\x0b\x26\xe7" + "\xc8\xb9\x5c\xcc\xe1\x5f\xd5\xc6" + "\xc4\xca\xc2\xa3\x45\xba\x94\x13" + "\xb2\x8f\xc3\x54\x01\x09\xe7\x8b" + "\xda\x2a\x0a\x11\x02\x43\xcb\x57" + "\xc9\xcc\xb5\x5c\xab\xc4\xec\x54" + "\x00\x06\x34\xe1\x6e\x03\x89\x7c" + "\xc6\xfb\x6a\xc7\x60\x43\xd6\xc5" + "\xb5\x68\x72\x89\x8f\x42\xc3\x74" + "\xbd\x25\xaa\x9f\x67\xb5\xdf\x26" + "\x20\xe8\xb7\x01\x3c\xe4\x77\xce" + "\xc4\x65\xa7\x23\x79\xea\x33\xc7" + "\x82\x14\x5c\x82\xf2\x4e\x3d\xf6" + "\xc6\x4a\x0e\x29\xbb\xec\x44\xcd" + "\x2f\xd1\x4f\x21\x71\xa9\xce\x0f" + "\x5c\xf2\x72\x5c\x08\x2e\x21\xd2" + "\xc3\x29\x13\xd8\xac\xc3\xda\x13" + "\x1a\x9d\xa7\x71\x1d\x27\x1d\x27" + "\x1d\xea\xab\x44\x79\xad\xe5\xeb" + "\xef\x1f\x22\x0a\x44\x4f\xcb\x87" + "\xa7\x58\x71\x0e\x66\xf8\x60\xbf" + "\x60\x74\x4a\xb4\xec\x2e\xfe\xd3" + "\xf5\xb8\xfe\x46\x08\x50\x99\x6c" + "\x66\xa5\xa8\x34\x44\xb5\xe5\xf0" + "\xdd\x2c\x67\x4e\x35\x96\x8e\x67" + "\x48\x3f\x5f\x37\x44\x60\x51\x2e" + "\x14\x91\x5e\x57\xc3\x0e\x79\x77" + "\x2f\x03\xf4\xe2\x1c\x72\xbf\x85" + "\x5d\xd3\x17\xdf\x6c\xc5\x70\x24" + "\x42\xdf\x51\x4e\x2a\xb2\xd2\x5b" + "\x9e\x69\x83\x41\x11\xfe\x73\x22" + "\xde\x8a\x9e\xd8\x8a\xfb\x20\x38" + "\xd8\x47\x6f\xd5\xed\x8f\x41\xfd" + "\x13\x7a\x18\x03\x7d\x0f\xcd\x7d" + "\xa6\x7d\x31\x9e\xf1\x8f\x30\xa3" + "\x8b\x4c\x24\xb7\xf5\x48\xd7\xd9" + "\x12\xe7\x84\x97\x5c\x31\x6d\xfb" + "\xdf\xf3\xd3\xd1\xd5\x0c\x30\x06" + "\x01\x6a\xbc\x6c\x78\x7b\xa6\x50" + "\xfa\x0f\x3c\x42\x2d\xa5\xa3\x3b" + "\xcf\x62\x50\xff\x71\x6d\xe7\xda" + "\x27\xab\xc6\x67\x16\x65\x68\x64" + "\xc7\xd5\x5f\x81\xa9\xf6\x65\xb3" + "\x5e\x43\x91\x16\xcd\x3d\x55\x37" + "\x55\xb3\xf0\x28\xc5\x54\x19\xc0" + "\xe0\xd6\x2a\x61\xd4\xc8\x72\x51" + "\xe9\xa1\x7b\x48\x21\xad\x44\x09" + "\xe4\x01\x61\x3c\x8a\x5b\xf9\xa1" + "\x6e\x1b\xdf\xc0\x04\xa8\x8b\xf2" + "\x21\xbe\x34\x7b\xfc\xa1\xcd\xc9" + "\xa9\x96\xf4\xa4\x4c\xf7\x4e\x8f" + "\x84\xcc\xd3\xa8\x92\x77\x8f\x36" + "\xe2\x2e\x8c\x33\xe8\x84\xa6\x0c" + "\x6c\x8a\xda\x14\x32\xc2\x96\xff" + "\xc6\x4a\xc2\x9b\x30\x7f\xd1\x29" + "\xc0\xd5\x78\x41\x00\x80\x80\x03" + "\x2a\xb1\xde\x26\x03\x48\x49\xee" + "\x57\x14\x76\x51\x3c\x36\x5d\x0a" + "\x5c\x9f\xe8\xd8\x53\xdb\x4f\xd4" + "\x38\xbf\x66\xc9\x75\x12\x18\x75" + "\x34\x2d\x93\x22\x96\x51\x24\x6e" + "\x4e\xd9\x30\xea\x67\xff\x92\x1c" + "\x16\x26\xe9\xb5\x33\xab\x8c\x22" + "\x47\xdb\xa0\x2c\x08\xf0\x12\x69" + "\x7e\x93\x52\xda\xa5\xe5\xca\xc1" + "\x0f\x55\x2a\xbd\x09\x30\x88\x1b" + "\x9c\xc6\x9f\xe6\xdb\xa6\x92\xeb" + "\xf4\xbd\x5c\xc4\xdb\xc6\x71\x09" + "\xab\x5e\x48\x0c\xed\x6f\xda\x8e" + "\x8d\x0c\x98\x71\x7d\x10\xd0\x9c" + "\x20\x9b\x79\x53\x26\x5d\xb9\x85" + "\x8a\x31\xb8\xc5\x1c\x97\xde\x88" + "\x61\x55\x7f\x7c\x21\x06\xea\xc4" + "\x5f\xaf\xf2\xf0\xd5\x5e\x7d\xb4" + "\x6e\xcf\xe9\xae\x1b\x0e\x11\x80" + "\xc1\x9a\x74\x7e\x52\x6f\xa0\xb7" + "\x24\xcd\x8d\x0a\x11\x40\x63\x72" + "\xfa\xe2\xc5\xb3\x94\xef\x29\xa2" + "\x1a\x23\x43\x04\x37\x55\x0d\xe9" + "\x83\xb2\x29\x51\x49\x64\xa0\xbd" + "\xde\x73\xfd\xa5\x7c\x95\x70\x62" + "\x58\xdc\xe2\xd0\xbf\x98\xf5\x8a" + "\x6a\xfd\xce\xa8\x0e\x42\x2a\xeb" + "\xd2\xff\x83\x27\x53\x5c\xa0\x6e" + "\x93\xef\xe2\xb9\x5d\x35\xd6\x98" + "\xf6\x71\x19\x7a\x54\xa1\xa7\xe8" + "\x09\xfe\xf6\x9e\xc7\xbd\x3e\x29" + "\xbd\x6b\x17\xf4\xe7\x3e\x10\x5c" + "\xc1\xd2\x59\x4f\x4b\x12\x1a\x5b" + "\x50\x80\x59\xb9\xec\x13\x66\xa8" + "\xd2\x31\x7b\x6a\x61\x22\xdd\x7d" + "\x61\xee\x87\x16\x46\x9f\xf9\xc7" + "\x41\xee\x74\xf8\xd0\x96\x2c\x76" + "\x2a\xac\x7d\x6e\x9f\x0e\x7f\x95" + "\xfe\x50\x16\xb2\x23\xca\x62\xd5" + "\x68\xcf\x07\x3f\x3f\x97\x85\x2a" + "\x0c\x25\x45\xba\xdb\x32\xcb\x83" + "\x8c\x4f\xe0\x6d\x9a\x99\xf9\xc9" + "\xda\xd4\x19\x31\xc1\x7c\x6d\xd9" + "\x9c\x56\xd3\xec\xc1\x81\x4c\xed" + "\x28\x9d\x87\xeb\x19\xd7\x1a\x4f" + "\x04\x6a\xcb\x1f\xcf\x1f\xa2\x16" + "\xfc\x2a\x0d\xa1\x14\x2d\xfa\xc5" + "\x5a\xd2\xc5\xf9\x19\x7c\x20\x1f" + "\x2d\x10\xc0\x66\x7c\xd9\x2d\xe5" + "\x88\x70\x59\xa7\x85\xd5\x2e\x7c" + "\x5c\xe3\xb7\x12\xd6\x97\x3f\x29", + .psize = 2048, + .digest = "\x37\x90\x92\xc2\xeb\x01\x87\xd9" + "\x95\xc7\x91\xc3\x17\x8b\x38\x52", + } +}; + /* * DES test vectors. */ @@ -33059,6 +34290,2108 @@ static const struct cipher_testvec chacha20_enc_tv_template[] = { }, }; +static const struct cipher_testvec xchacha20_tv_template[] = { + { /* from libsodium test/default/xchacha20.c */ + .key = "\x79\xc9\x97\x98\xac\x67\x30\x0b" + "\xbb\x27\x04\xc9\x5c\x34\x1e\x32" + "\x45\xf3\xdc\xb2\x17\x61\xb9\x8e" + "\x52\xff\x45\xb2\x4f\x30\x4f\xc4", + .klen = 32, + .iv = "\xb3\x3f\xfd\x30\x96\x47\x9b\xcf" + "\xbc\x9a\xee\x49\x41\x76\x88\xa0" + "\xa2\x55\x4f\x8d\x95\x38\x94\x19" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .input = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00", + .result = "\xc6\xe9\x75\x81\x60\x08\x3a\xc6" + "\x04\xef\x90\xe7\x12\xce\x6e\x75" + "\xd7\x79\x75\x90\x74\x4e\x0c\xf0" + "\x60\xf0\x13\x73\x9c", + .ilen = 29, + .rlen = 29, + }, { /* from libsodium test/default/xchacha20.c */ + .key = "\x9d\x23\xbd\x41\x49\xcb\x97\x9c" + "\xcf\x3c\x5c\x94\xdd\x21\x7e\x98" + "\x08\xcb\x0e\x50\xcd\x0f\x67\x81" + "\x22\x35\xea\xaf\x60\x1d\x62\x32", + .klen = 32, + .iv = "\xc0\x47\x54\x82\x66\xb7\xc3\x70" + "\xd3\x35\x66\xa2\x42\x5c\xbf\x30" + "\xd8\x2d\x1e\xaf\x52\x94\x10\x9e" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .input = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00", + .result = "\xa2\x12\x09\x09\x65\x94\xde\x8c" + "\x56\x67\xb1\xd1\x3a\xd9\x3f\x74" + "\x41\x06\xd0\x54\xdf\x21\x0e\x47" + "\x82\xcd\x39\x6f\xec\x69\x2d\x35" + "\x15\xa2\x0b\xf3\x51\xee\xc0\x11" + "\xa9\x2c\x36\x78\x88\xbc\x46\x4c" + "\x32\xf0\x80\x7a\xcd\x6c\x20\x3a" + "\x24\x7e\x0d\xb8\x54\x14\x84\x68" + "\xe9\xf9\x6b\xee\x4c\xf7\x18\xd6" + "\x8d\x5f\x63\x7c\xbd\x5a\x37\x64" + "\x57\x78\x8e\x6f\xae\x90\xfc\x31" + "\x09\x7c\xfc", + .ilen = 91, + .rlen = 91, + }, { /* Taken from the ChaCha20 test vectors, appended 16 random bytes + to nonce, and recomputed the ciphertext with libsodium */ + .key = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x67\xc6\x69\x73" + "\x51\xff\x4a\xec\x29\xcd\xba\xab" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .input = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .result = "\x9c\x49\x2a\xe7\x8a\x2f\x93\xc7" + "\xb3\x33\x6f\x82\x17\xd8\xc4\x1e" + "\xad\x80\x11\x11\x1d\x4c\x16\x18" + "\x07\x73\x9b\x4f\xdb\x7c\xcb\x47" + "\xfd\xef\x59\x74\xfa\x3f\xe5\x4c" + "\x9b\xd0\xea\xbc\xba\x56\xad\x32" + "\x03\xdc\xf8\x2b\xc1\xe1\x75\x67" + "\x23\x7b\xe6\xfc\xd4\x03\x86\x54", + .ilen = 64, + .rlen = 64, + }, { /* Taken from the ChaCha20 test vectors, appended 16 random bytes + to nonce, and recomputed the ciphertext with libsodium */ + .key = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x01", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x02\xf2\xfb\xe3\x46" + "\x7c\xc2\x54\xf8\x1b\xe8\xe7\x8d" + "\x01\x00\x00\x00\x00\x00\x00\x00", + .input = "\x41\x6e\x79\x20\x73\x75\x62\x6d" + "\x69\x73\x73\x69\x6f\x6e\x20\x74" + "\x6f\x20\x74\x68\x65\x20\x49\x45" + "\x54\x46\x20\x69\x6e\x74\x65\x6e" + "\x64\x65\x64\x20\x62\x79\x20\x74" + "\x68\x65\x20\x43\x6f\x6e\x74\x72" + "\x69\x62\x75\x74\x6f\x72\x20\x66" + "\x6f\x72\x20\x70\x75\x62\x6c\x69" + "\x63\x61\x74\x69\x6f\x6e\x20\x61" + "\x73\x20\x61\x6c\x6c\x20\x6f\x72" + "\x20\x70\x61\x72\x74\x20\x6f\x66" + "\x20\x61\x6e\x20\x49\x45\x54\x46" + "\x20\x49\x6e\x74\x65\x72\x6e\x65" + "\x74\x2d\x44\x72\x61\x66\x74\x20" + "\x6f\x72\x20\x52\x46\x43\x20\x61" + "\x6e\x64\x20\x61\x6e\x79\x20\x73" + "\x74\x61\x74\x65\x6d\x65\x6e\x74" + "\x20\x6d\x61\x64\x65\x20\x77\x69" + "\x74\x68\x69\x6e\x20\x74\x68\x65" + "\x20\x63\x6f\x6e\x74\x65\x78\x74" + "\x20\x6f\x66\x20\x61\x6e\x20\x49" + "\x45\x54\x46\x20\x61\x63\x74\x69" + "\x76\x69\x74\x79\x20\x69\x73\x20" + "\x63\x6f\x6e\x73\x69\x64\x65\x72" + "\x65\x64\x20\x61\x6e\x20\x22\x49" + "\x45\x54\x46\x20\x43\x6f\x6e\x74" + "\x72\x69\x62\x75\x74\x69\x6f\x6e" + "\x22\x2e\x20\x53\x75\x63\x68\x20" + "\x73\x74\x61\x74\x65\x6d\x65\x6e" + "\x74\x73\x20\x69\x6e\x63\x6c\x75" + "\x64\x65\x20\x6f\x72\x61\x6c\x20" + "\x73\x74\x61\x74\x65\x6d\x65\x6e" + "\x74\x73\x20\x69\x6e\x20\x49\x45" + "\x54\x46\x20\x73\x65\x73\x73\x69" + "\x6f\x6e\x73\x2c\x20\x61\x73\x20" + "\x77\x65\x6c\x6c\x20\x61\x73\x20" + "\x77\x72\x69\x74\x74\x65\x6e\x20" + "\x61\x6e\x64\x20\x65\x6c\x65\x63" + "\x74\x72\x6f\x6e\x69\x63\x20\x63" + "\x6f\x6d\x6d\x75\x6e\x69\x63\x61" + "\x74\x69\x6f\x6e\x73\x20\x6d\x61" + "\x64\x65\x20\x61\x74\x20\x61\x6e" + "\x79\x20\x74\x69\x6d\x65\x20\x6f" + "\x72\x20\x70\x6c\x61\x63\x65\x2c" + "\x20\x77\x68\x69\x63\x68\x20\x61" + "\x72\x65\x20\x61\x64\x64\x72\x65" + "\x73\x73\x65\x64\x20\x74\x6f", + .result = "\xf9\xab\x7a\x4a\x60\xb8\x5f\xa0" + "\x50\xbb\x57\xce\xef\x8c\xc1\xd9" + "\x24\x15\xb3\x67\x5e\x7f\x01\xf6" + "\x1c\x22\xf6\xe5\x71\xb1\x43\x64" + "\x63\x05\xd5\xfc\x5c\x3d\xc0\x0e" + "\x23\xef\xd3\x3b\xd9\xdc\x7f\xa8" + "\x58\x26\xb3\xd0\xc2\xd5\x04\x3f" + "\x0a\x0e\x8f\x17\xe4\xcd\xf7\x2a" + "\xb4\x2c\x09\xe4\x47\xec\x8b\xfb" + "\x59\x37\x7a\xa1\xd0\x04\x7e\xaa" + "\xf1\x98\x5f\x24\x3d\x72\x9a\x43" + "\xa4\x36\x51\x92\x22\x87\xff\x26" + "\xce\x9d\xeb\x59\x78\x84\x5e\x74" + "\x97\x2e\x63\xc0\xef\x29\xf7\x8a" + "\xb9\xee\x35\x08\x77\x6a\x35\x9a" + "\x3e\xe6\x4f\x06\x03\x74\x1b\xc1" + "\x5b\xb3\x0b\x89\x11\x07\xd3\xb7" + "\x53\xd6\x25\x04\xd9\x35\xb4\x5d" + "\x4c\x33\x5a\xc2\x42\x4c\xe6\xa4" + "\x97\x6e\x0e\xd2\xb2\x8b\x2f\x7f" + "\x28\xe5\x9f\xac\x4b\x2e\x02\xab" + "\x85\xfa\xa9\x0d\x7c\x2d\x10\xe6" + "\x91\xab\x55\x63\xf0\xde\x3a\x94" + "\x25\x08\x10\x03\xc2\x68\xd1\xf4" + "\xaf\x7d\x9c\x99\xf7\x86\x96\x30" + "\x60\xfc\x0b\xe6\xa8\x80\x15\xb0" + "\x81\xb1\x0c\xbe\xb9\x12\x18\x25" + "\xe9\x0e\xb1\xe7\x23\xb2\xef\x4a" + "\x22\x8f\xc5\x61\x89\xd4\xe7\x0c" + "\x64\x36\x35\x61\xb6\x34\x60\xf7" + "\x7b\x61\x37\x37\x12\x10\xa2\xf6" + "\x7e\xdb\x7f\x39\x3f\xb6\x8e\x89" + "\x9e\xf3\xfe\x13\x98\xbb\x66\x5a" + "\xec\xea\xab\x3f\x9c\x87\xc4\x8c" + "\x8a\x04\x18\x49\xfc\x77\x11\x50" + "\x16\xe6\x71\x2b\xee\xc0\x9c\xb6" + "\x87\xfd\x80\xff\x0b\x1d\x73\x38" + "\xa4\x1d\x6f\xae\xe4\x12\xd7\x93" + "\x9d\xcd\x38\x26\x09\x40\x52\xcd" + "\x67\x01\x67\x26\xe0\x3e\x98\xa8" + "\xe8\x1a\x13\x41\xbb\x90\x4d\x87" + "\xbb\x42\x82\x39\xce\x3a\xd0\x18" + "\x6d\x7b\x71\x8f\xbb\x2c\x6a\xd1" + "\xbd\xf5\xc7\x8a\x7e\xe1\x1e\x0f" + "\x0d\x0d\x13\x7c\xd9\xd8\x3c\x91" + "\xab\xff\x1f\x12\xc3\xee\xe5\x65" + "\x12\x8d\x7b\x61\xe5\x1f\x98", + .ilen = 375, + .rlen = 375, + .also_non_np = 1, + .np = 3, + .tap = { 375 - 20, 4, 16 }, + + }, { /* Taken from the ChaCha20 test vectors, appended 16 random bytes + to nonce, and recomputed the ciphertext with libsodium */ + .key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a" + "\xf3\x33\x88\x86\x04\xf6\xb5\xf0" + "\x47\x39\x17\xc1\x40\x2b\x80\x09" + "\x9d\xca\x5c\xbc\x20\x70\x75\xc0", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x02\x76\x5a\x2e\x63" + "\x33\x9f\xc9\x9a\x66\x32\x0d\xb7" + "\x2a\x00\x00\x00\x00\x00\x00\x00", + .input = "\x27\x54\x77\x61\x73\x20\x62\x72" + "\x69\x6c\x6c\x69\x67\x2c\x20\x61" + "\x6e\x64\x20\x74\x68\x65\x20\x73" + "\x6c\x69\x74\x68\x79\x20\x74\x6f" + "\x76\x65\x73\x0a\x44\x69\x64\x20" + "\x67\x79\x72\x65\x20\x61\x6e\x64" + "\x20\x67\x69\x6d\x62\x6c\x65\x20" + "\x69\x6e\x20\x74\x68\x65\x20\x77" + "\x61\x62\x65\x3a\x0a\x41\x6c\x6c" + "\x20\x6d\x69\x6d\x73\x79\x20\x77" + "\x65\x72\x65\x20\x74\x68\x65\x20" + "\x62\x6f\x72\x6f\x67\x6f\x76\x65" + "\x73\x2c\x0a\x41\x6e\x64\x20\x74" + "\x68\x65\x20\x6d\x6f\x6d\x65\x20" + "\x72\x61\x74\x68\x73\x20\x6f\x75" + "\x74\x67\x72\x61\x62\x65\x2e", + .result = "\x95\xb9\x51\xe7\x8f\xb4\xa4\x03" + "\xca\x37\xcc\xde\x60\x1d\x8c\xe2" + "\xf1\xbb\x8a\x13\x7f\x61\x85\xcc" + "\xad\xf4\xf0\xdc\x86\xa6\x1e\x10" + "\xbc\x8e\xcb\x38\x2b\xa5\xc8\x8f" + "\xaa\x03\x3d\x53\x4a\x42\xb1\x33" + "\xfc\xd3\xef\xf0\x8e\x7e\x10\x9c" + "\x6f\x12\x5e\xd4\x96\xfe\x5b\x08" + "\xb6\x48\xf0\x14\x74\x51\x18\x7c" + "\x07\x92\xfc\xac\x9d\xf1\x94\xc0" + "\xc1\x9d\xc5\x19\x43\x1f\x1d\xbb" + "\x07\xf0\x1b\x14\x25\x45\xbb\xcb" + "\x5c\xe2\x8b\x28\xf3\xcf\x47\x29" + "\x27\x79\x67\x24\xa6\x87\xc2\x11" + "\x65\x03\xfa\x45\xf7\x9e\x53\x7a" + "\x99\xf1\x82\x25\x4f\x8d\x07", + .ilen = 127, + .rlen = 127, + }, { /* Taken from the ChaCha20 test vectors, appended 16 random bytes + to nonce, and recomputed the ciphertext with libsodium */ + .key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a" + "\xf3\x33\x88\x86\x04\xf6\xb5\xf0" + "\x47\x39\x17\xc1\x40\x2b\x80\x09" + "\x9d\xca\x5c\xbc\x20\x70\x75\xc0", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x01\x31\x58\xa3\x5a" + "\x25\x5d\x05\x17\x58\xe9\x5e\xd4" + "\x1c\x00\x00\x00\x00\x00\x00\x00", + .input = "\x49\xee\xe0\xdc\x24\x90\x40\xcd" + "\xc5\x40\x8f\x47\x05\xbc\xdd\x81" + "\x47\xc6\x8d\xe6\xb1\x8f\xd7\xcb" + "\x09\x0e\x6e\x22\x48\x1f\xbf\xb8" + "\x5c\xf7\x1e\x8a\xc1\x23\xf2\xd4" + "\x19\x4b\x01\x0f\x4e\xa4\x43\xce" + "\x01\xc6\x67\xda\x03\x91\x18\x90" + "\xa5\xa4\x8e\x45\x03\xb3\x2d\xac" + "\x74\x92\xd3\x53\x47\xc8\xdd\x25" + "\x53\x6c\x02\x03\x87\x0d\x11\x0c" + "\x58\xe3\x12\x18\xfd\x2a\x5b\x40" + "\x0c\x30\xf0\xb8\x3f\x43\xce\xae" + "\x65\x3a\x7d\x7c\xf4\x54\xaa\xcc" + "\x33\x97\xc3\x77\xba\xc5\x70\xde" + "\xd7\xd5\x13\xa5\x65\xc4\x5f\x0f" + "\x46\x1a\x0d\x97\xb5\xf3\xbb\x3c" + "\x84\x0f\x2b\xc5\xaa\xea\xf2\x6c" + "\xc9\xb5\x0c\xee\x15\xf3\x7d\xbe" + "\x9f\x7b\x5a\xa6\xae\x4f\x83\xb6" + "\x79\x49\x41\xf4\x58\x18\xcb\x86" + "\x7f\x30\x0e\xf8\x7d\x44\x36\xea" + "\x75\xeb\x88\x84\x40\x3c\xad\x4f" + "\x6f\x31\x6b\xaa\x5d\xe5\xa5\xc5" + "\x21\x66\xe9\xa7\xe3\xb2\x15\x88" + "\x78\xf6\x79\xa1\x59\x47\x12\x4e" + "\x9f\x9f\x64\x1a\xa0\x22\x5b\x08" + "\xbe\x7c\x36\xc2\x2b\x66\x33\x1b" + "\xdd\x60\x71\xf7\x47\x8c\x61\xc3" + "\xda\x8a\x78\x1e\x16\xfa\x1e\x86" + "\x81\xa6\x17\x2a\xa7\xb5\xc2\xe7" + "\xa4\xc7\x42\xf1\xcf\x6a\xca\xb4" + "\x45\xcf\xf3\x93\xf0\xe7\xea\xf6" + "\xf4\xe6\x33\x43\x84\x93\xa5\x67" + "\x9b\x16\x58\x58\x80\x0f\x2b\x5c" + "\x24\x74\x75\x7f\x95\x81\xb7\x30" + "\x7a\x33\xa7\xf7\x94\x87\x32\x27" + "\x10\x5d\x14\x4c\x43\x29\xdd\x26" + "\xbd\x3e\x3c\x0e\xfe\x0e\xa5\x10" + "\xea\x6b\x64\xfd\x73\xc6\xed\xec" + "\xa8\xc9\xbf\xb3\xba\x0b\x4d\x07" + "\x70\xfc\x16\xfd\x79\x1e\xd7\xc5" + "\x49\x4e\x1c\x8b\x8d\x79\x1b\xb1" + "\xec\xca\x60\x09\x4c\x6a\xd5\x09" + "\x49\x46\x00\x88\x22\x8d\xce\xea" + "\xb1\x17\x11\xde\x42\xd2\x23\xc1" + "\x72\x11\xf5\x50\x73\x04\x40\x47" + "\xf9\x5d\xe7\xa7\x26\xb1\x7e\xb0" + "\x3f\x58\xc1\x52\xab\x12\x67\x9d" + "\x3f\x43\x4b\x68\xd4\x9c\x68\x38" + "\x07\x8a\x2d\x3e\xf3\xaf\x6a\x4b" + "\xf9\xe5\x31\x69\x22\xf9\xa6\x69" + "\xc6\x9c\x96\x9a\x12\x35\x95\x1d" + "\x95\xd5\xdd\xbe\xbf\x93\x53\x24" + "\xfd\xeb\xc2\x0a\x64\xb0\x77\x00" + "\x6f\x88\xc4\x37\x18\x69\x7c\xd7" + "\x41\x92\x55\x4c\x03\xa1\x9a\x4b" + "\x15\xe5\xdf\x7f\x37\x33\x72\xc1" + "\x8b\x10\x67\xa3\x01\x57\x94\x25" + "\x7b\x38\x71\x7e\xdd\x1e\xcc\x73" + "\x55\xd2\x8e\xeb\x07\xdd\xf1\xda" + "\x58\xb1\x47\x90\xfe\x42\x21\x72" + "\xa3\x54\x7a\xa0\x40\xec\x9f\xdd" + "\xc6\x84\x6e\xca\xae\xe3\x68\xb4" + "\x9d\xe4\x78\xff\x57\xf2\xf8\x1b" + "\x03\xa1\x31\xd9\xde\x8d\xf5\x22" + "\x9c\xdd\x20\xa4\x1e\x27\xb1\x76" + "\x4f\x44\x55\xe2\x9b\xa1\x9c\xfe" + "\x54\xf7\x27\x1b\xf4\xde\x02\xf5" + "\x1b\x55\x48\x5c\xdc\x21\x4b\x9e" + "\x4b\x6e\xed\x46\x23\xdc\x65\xb2" + "\xcf\x79\x5f\x28\xe0\x9e\x8b\xe7" + "\x4c\x9d\x8a\xff\xc1\xa6\x28\xb8" + "\x65\x69\x8a\x45\x29\xef\x74\x85" + "\xde\x79\xc7\x08\xae\x30\xb0\xf4" + "\xa3\x1d\x51\x41\xab\xce\xcb\xf6" + "\xb5\xd8\x6d\xe0\x85\xe1\x98\xb3" + "\x43\xbb\x86\x83\x0a\xa0\xf5\xb7" + "\x04\x0b\xfa\x71\x1f\xb0\xf6\xd9" + "\x13\x00\x15\xf0\xc7\xeb\x0d\x5a" + "\x9f\xd7\xb9\x6c\x65\x14\x22\x45" + "\x6e\x45\x32\x3e\x7e\x60\x1a\x12" + "\x97\x82\x14\xfb\xaa\x04\x22\xfa" + "\xa0\xe5\x7e\x8c\x78\x02\x48\x5d" + "\x78\x33\x5a\x7c\xad\xdb\x29\xce" + "\xbb\x8b\x61\xa4\xb7\x42\xe2\xac" + "\x8b\x1a\xd9\x2f\x0b\x8b\x62\x21" + "\x83\x35\x7e\xad\x73\xc2\xb5\x6c" + "\x10\x26\x38\x07\xe5\xc7\x36\x80" + "\xe2\x23\x12\x61\xf5\x48\x4b\x2b" + "\xc5\xdf\x15\xd9\x87\x01\xaa\xac" + "\x1e\x7c\xad\x73\x78\x18\x63\xe0" + "\x8b\x9f\x81\xd8\x12\x6a\x28\x10" + "\xbe\x04\x68\x8a\x09\x7c\x1b\x1c" + "\x83\x66\x80\x47\x80\xe8\xfd\x35" + "\x1c\x97\x6f\xae\x49\x10\x66\xcc" + "\xc6\xd8\xcc\x3a\x84\x91\x20\x77" + "\x72\xe4\x24\xd2\x37\x9f\xc5\xc9" + "\x25\x94\x10\x5f\x40\x00\x64\x99" + "\xdc\xae\xd7\x21\x09\x78\x50\x15" + "\xac\x5f\xc6\x2c\xa2\x0b\xa9\x39" + "\x87\x6e\x6d\xab\xde\x08\x51\x16" + "\xc7\x13\xe9\xea\xed\x06\x8e\x2c" + "\xf8\x37\x8c\xf0\xa6\x96\x8d\x43" + "\xb6\x98\x37\xb2\x43\xed\xde\xdf" + "\x89\x1a\xe7\xeb\x9d\xa1\x7b\x0b" + "\x77\xb0\xe2\x75\xc0\xf1\x98\xd9" + "\x80\x55\xc9\x34\x91\xd1\x59\xe8" + "\x4b\x0f\xc1\xa9\x4b\x7a\x84\x06" + "\x20\xa8\x5d\xfa\xd1\xde\x70\x56" + "\x2f\x9e\x91\x9c\x20\xb3\x24\xd8" + "\x84\x3d\xe1\x8c\x7e\x62\x52\xe5" + "\x44\x4b\x9f\xc2\x93\x03\xea\x2b" + "\x59\xc5\xfa\x3f\x91\x2b\xbb\x23" + "\xf5\xb2\x7b\xf5\x38\xaf\xb3\xee" + "\x63\xdc\x7b\xd1\xff\xaa\x8b\xab" + "\x82\x6b\x37\x04\xeb\x74\xbe\x79" + "\xb9\x83\x90\xef\x20\x59\x46\xff" + "\xe9\x97\x3e\x2f\xee\xb6\x64\x18" + "\x38\x4c\x7a\x4a\xf9\x61\xe8\x9a" + "\xa1\xb5\x01\xa6\x47\xd3\x11\xd4" + "\xce\xd3\x91\x49\x88\xc7\xb8\x4d" + "\xb1\xb9\x07\x6d\x16\x72\xae\x46" + "\x5e\x03\xa1\x4b\xb6\x02\x30\xa8" + "\x3d\xa9\x07\x2a\x7c\x19\xe7\x62" + "\x87\xe3\x82\x2f\x6f\xe1\x09\xd9" + "\x94\x97\xea\xdd\x58\x9e\xae\x76" + "\x7e\x35\xe5\xb4\xda\x7e\xf4\xde" + "\xf7\x32\x87\xcd\x93\xbf\x11\x56" + "\x11\xbe\x08\x74\xe1\x69\xad\xe2" + "\xd7\xf8\x86\x75\x8a\x3c\xa4\xbe" + "\x70\xa7\x1b\xfc\x0b\x44\x2a\x76" + "\x35\xea\x5d\x85\x81\xaf\x85\xeb" + "\xa0\x1c\x61\xc2\xf7\x4f\xa5\xdc" + "\x02\x7f\xf6\x95\x40\x6e\x8a\x9a" + "\xf3\x5d\x25\x6e\x14\x3a\x22\xc9" + "\x37\x1c\xeb\x46\x54\x3f\xa5\x91" + "\xc2\xb5\x8c\xfe\x53\x08\x97\x32" + "\x1b\xb2\x30\x27\xfe\x25\x5d\xdc" + "\x08\x87\xd0\xe5\x94\x1a\xd4\xf1" + "\xfe\xd6\xb4\xa3\xe6\x74\x81\x3c" + "\x1b\xb7\x31\xa7\x22\xfd\xd4\xdd" + "\x20\x4e\x7c\x51\xb0\x60\x73\xb8" + "\x9c\xac\x91\x90\x7e\x01\xb0\xe1" + "\x8a\x2f\x75\x1c\x53\x2a\x98\x2a" + "\x06\x52\x95\x52\xb2\xe9\x25\x2e" + "\x4c\xe2\x5a\x00\xb2\x13\x81\x03" + "\x77\x66\x0d\xa5\x99\xda\x4e\x8c" + "\xac\xf3\x13\x53\x27\x45\xaf\x64" + "\x46\xdc\xea\x23\xda\x97\xd1\xab" + "\x7d\x6c\x30\x96\x1f\xbc\x06\x34" + "\x18\x0b\x5e\x21\x35\x11\x8d\x4c" + "\xe0\x2d\xe9\x50\x16\x74\x81\xa8" + "\xb4\x34\xb9\x72\x42\xa6\xcc\xbc" + "\xca\x34\x83\x27\x10\x5b\x68\x45" + "\x8f\x52\x22\x0c\x55\x3d\x29\x7c" + "\xe3\xc0\x66\x05\x42\x91\x5f\x58" + "\xfe\x4a\x62\xd9\x8c\xa9\x04\x19" + "\x04\xa9\x08\x4b\x57\xfc\x67\x53" + "\x08\x7c\xbc\x66\x8a\xb0\xb6\x9f" + "\x92\xd6\x41\x7c\x5b\x2a\x00\x79" + "\x72", + .result = "\x3a\x92\xee\x53\x31\xaf\x2b\x60" + "\x5f\x55\x8d\x00\x5d\xfc\x74\x97" + "\x28\x54\xf4\xa5\x75\xf1\x9b\x25" + "\x62\x1c\xc0\xe0\x13\xc8\x87\x53" + "\xd0\xf3\xa7\x97\x1f\x3b\x1e\xea" + "\xe0\xe5\x2a\xd1\xdd\xa4\x3b\x50" + "\x45\xa3\x0d\x7e\x1b\xc9\xa0\xad" + "\xb9\x2c\x54\xa6\xc7\x55\x16\xd0" + "\xc5\x2e\x02\x44\x35\xd0\x7e\x67" + "\xf2\xc4\x9b\xcd\x95\x10\xcc\x29" + "\x4b\xfa\x86\x87\xbe\x40\x36\xbe" + "\xe1\xa3\x52\x89\x55\x20\x9b\xc2" + "\xab\xf2\x31\x34\x16\xad\xc8\x17" + "\x65\x24\xc0\xff\x12\x37\xfe\x5a" + "\x62\x3b\x59\x47\x6c\x5f\x3a\x8e" + "\x3b\xd9\x30\xc8\x7f\x2f\x88\xda" + "\x80\xfd\x02\xda\x7f\x9a\x7a\x73" + "\x59\xc5\x34\x09\x9a\x11\xcb\xa7" + "\xfc\xf6\xa1\xa0\x60\xfb\x43\xbb" + "\xf1\xe9\xd7\xc6\x79\x27\x4e\xff" + "\x22\xb4\x24\xbf\x76\xee\x47\xb9" + "\x6d\x3f\x8b\xb0\x9c\x3c\x43\xdd" + "\xff\x25\x2e\x6d\xa4\x2b\xfb\x5d" + "\x1b\x97\x6c\x55\x0a\x82\x7a\x7b" + "\x94\x34\xc2\xdb\x2f\x1f\xc1\xea" + "\xd4\x4d\x17\x46\x3b\x51\x69\x09" + "\xe4\x99\x32\x25\xfd\x94\xaf\xfb" + "\x10\xf7\x4f\xdd\x0b\x3c\x8b\x41" + "\xb3\x6a\xb7\xd1\x33\xa8\x0c\x2f" + "\x62\x4c\x72\x11\xd7\x74\xe1\x3b" + "\x38\x43\x66\x7b\x6c\x36\x48\xe7" + "\xe3\xe7\x9d\xb9\x42\x73\x7a\x2a" + "\x89\x20\x1a\x41\x80\x03\xf7\x8f" + "\x61\x78\x13\xbf\xfe\x50\xf5\x04" + "\x52\xf9\xac\x47\xf8\x62\x4b\xb2" + "\x24\xa9\xbf\x64\xb0\x18\x69\xd2" + "\xf5\xe4\xce\xc8\xb1\x87\x75\xd6" + "\x2c\x24\x79\x00\x7d\x26\xfb\x44" + "\xe7\x45\x7a\xee\x58\xa5\x83\xc1" + "\xb4\x24\xab\x23\x2f\x4d\xd7\x4f" + "\x1c\xc7\xaa\xa9\x50\xf4\xa3\x07" + "\x12\x13\x89\x74\xdc\x31\x6a\xb2" + "\xf5\x0f\x13\x8b\xb9\xdb\x85\x1f" + "\xf5\xbc\x88\xd9\x95\xea\x31\x6c" + "\x36\x60\xb6\x49\xdc\xc4\xf7\x55" + "\x3f\x21\xc1\xb5\x92\x18\x5e\xbc" + "\x9f\x87\x7f\xe7\x79\x25\x40\x33" + "\xd6\xb9\x33\xd5\x50\xb3\xc7\x89" + "\x1b\x12\xa0\x46\xdd\xa7\xd8\x3e" + "\x71\xeb\x6f\x66\xa1\x26\x0c\x67" + "\xab\xb2\x38\x58\x17\xd8\x44\x3b" + "\x16\xf0\x8e\x62\x8d\x16\x10\x00" + "\x32\x8b\xef\xb9\x28\xd3\xc5\xad" + "\x0a\x19\xa2\xe4\x03\x27\x7d\x94" + "\x06\x18\xcd\xd6\x27\x00\xf9\x1f" + "\xb6\xb3\xfe\x96\x35\x5f\xc4\x1c" + "\x07\x62\x10\x79\x68\x50\xf1\x7e" + "\x29\xe7\xc4\xc4\xe7\xee\x54\xd6" + "\x58\x76\x84\x6d\x8d\xe4\x59\x31" + "\xe9\xf4\xdc\xa1\x1f\xe5\x1a\xd6" + "\xe6\x64\x46\xf5\x77\x9c\x60\x7a" + "\x5e\x62\xe3\x0a\xd4\x9f\x7a\x2d" + "\x7a\xa5\x0a\x7b\x29\x86\x7a\x74" + "\x74\x71\x6b\xca\x7d\x1d\xaa\xba" + "\x39\x84\x43\x76\x35\xfe\x4f\x9b" + "\xbb\xbb\xb5\x6a\x32\xb5\x5d\x41" + "\x51\xf0\x5b\x68\x03\x47\x4b\x8a" + "\xca\x88\xf6\x37\xbd\x73\x51\x70" + "\x66\xfe\x9e\x5f\x21\x9c\xf3\xdd" + "\xc3\xea\x27\xf9\x64\x94\xe1\x19" + "\xa0\xa9\xab\x60\xe0\x0e\xf7\x78" + "\x70\x86\xeb\xe0\xd1\x5c\x05\xd3" + "\xd7\xca\xe0\xc0\x47\x47\x34\xee" + "\x11\xa3\xa3\x54\x98\xb7\x49\x8e" + "\x84\x28\x70\x2c\x9e\xfb\x55\x54" + "\x4d\xf8\x86\xf7\x85\x7c\xbd\xf3" + "\x17\xd8\x47\xcb\xac\xf4\x20\x85" + "\x34\x66\xad\x37\x2d\x5e\x52\xda" + "\x8a\xfe\x98\x55\x30\xe7\x2d\x2b" + "\x19\x10\x8e\x7b\x66\x5e\xdc\xe0" + "\x45\x1f\x7b\xb4\x08\xfb\x8f\xf6" + "\x8c\x89\x21\x34\x55\x27\xb2\x76" + "\xb2\x07\xd9\xd6\x68\x9b\xea\x6b" + "\x2d\xb4\xc4\x35\xdd\xd2\x79\xae" + "\xc7\xd6\x26\x7f\x12\x01\x8c\xa7" + "\xe3\xdb\xa8\xf4\xf7\x2b\xec\x99" + "\x11\x00\xf1\x35\x8c\xcf\xd5\xc9" + "\xbd\x91\x36\x39\x70\xcf\x7d\x70" + "\x47\x1a\xfc\x6b\x56\xe0\x3f\x9c" + "\x60\x49\x01\x72\xa9\xaf\x2c\x9c" + "\xe8\xab\xda\x8c\x14\x19\xf3\x75" + "\x07\x17\x9d\x44\x67\x7a\x2e\xef" + "\xb7\x83\x35\x4a\xd1\x3d\x1c\x84" + "\x32\xdd\xaa\xea\xca\x1d\xdc\x72" + "\x2c\xcc\x43\xcd\x5d\xe3\x21\xa4" + "\xd0\x8a\x4b\x20\x12\xa3\xd5\x86" + "\x76\x96\xff\x5f\x04\x57\x0f\xe6" + "\xba\xe8\x76\x50\x0c\x64\x1d\x83" + "\x9c\x9b\x9a\x9a\x58\x97\x9c\x5c" + "\xb4\xa4\xa6\x3e\x19\xeb\x8f\x5a" + "\x61\xb2\x03\x7b\x35\x19\xbe\xa7" + "\x63\x0c\xfd\xdd\xf9\x90\x6c\x08" + "\x19\x11\xd3\x65\x4a\xf5\x96\x92" + "\x59\xaa\x9c\x61\x0c\x29\xa7\xf8" + "\x14\x39\x37\xbf\x3c\xf2\x16\x72" + "\x02\xfa\xa2\xf3\x18\x67\x5d\xcb" + "\xdc\x4d\xbb\x96\xff\x70\x08\x2d" + "\xc2\xa8\x52\xe1\x34\x5f\x72\xfe" + "\x64\xbf\xca\xa7\x74\x38\xfb\x74" + "\x55\x9c\xfa\x8a\xed\xfb\x98\xeb" + "\x58\x2e\x6c\xe1\x52\x76\x86\xd7" + "\xcf\xa1\xa4\xfc\xb2\x47\x41\x28" + "\xa3\xc1\xe5\xfd\x53\x19\x28\x2b" + "\x37\x04\x65\x96\x99\x7a\x28\x0f" + "\x07\x68\x4b\xc7\x52\x0a\x55\x35" + "\x40\x19\x95\x61\xe8\x59\x40\x1f" + "\x9d\xbf\x78\x7d\x8f\x84\xff\x6f" + "\xd0\xd5\x63\xd2\x22\xbd\xc8\x4e" + "\xfb\xe7\x9f\x06\xe6\xe7\x39\x6d" + "\x6a\x96\x9f\xf0\x74\x7e\xc9\x35" + "\xb7\x26\xb8\x1c\x0a\xa6\x27\x2c" + "\xa2\x2b\xfe\xbe\x0f\x07\x73\xae" + "\x7f\x7f\x54\xf5\x7c\x6a\x0a\x56" + "\x49\xd4\x81\xe5\x85\x53\x99\x1f" + "\x95\x05\x13\x58\x8d\x0e\x1b\x90" + "\xc3\x75\x48\x64\x58\x98\x67\x84" + "\xae\xe2\x21\xa2\x8a\x04\x0a\x0b" + "\x61\xaa\xb0\xd4\x28\x60\x7a\xf8" + "\xbc\x52\xfb\x24\x7f\xed\x0d\x2a" + "\x0a\xb2\xf9\xc6\x95\xb5\x11\xc9" + "\xf4\x0f\x26\x11\xcf\x2a\x57\x87" + "\x7a\xf3\xe7\x94\x65\xc2\xb5\xb3" + "\xab\x98\xe3\xc1\x2b\x59\x19\x7c" + "\xd6\xf3\xf9\xbf\xff\x6d\xc6\x82" + "\x13\x2f\x4a\x2e\xcd\x26\xfe\x2d" + "\x01\x70\xf4\xc2\x7f\x1f\x4c\xcb" + "\x47\x77\x0c\xa0\xa3\x03\xec\xda" + "\xa9\xbf\x0d\x2d\xae\xe4\xb8\x7b" + "\xa9\xbc\x08\xb4\x68\x2e\xc5\x60" + "\x8d\x87\x41\x2b\x0f\x69\xf0\xaf" + "\x5f\xba\x72\x20\x0f\x33\xcd\x6d" + "\x36\x7d\x7b\xd5\x05\xf1\x4b\x05" + "\xc4\xfc\x7f\x80\xb9\x4d\xbd\xf7" + "\x7c\x84\x07\x01\xc2\x40\x66\x5b" + "\x98\xc7\x2c\xe3\x97\xfa\xdf\x87" + "\xa0\x1f\xe9\x21\x42\x0f\x3b\xeb" + "\x89\x1c\x3b\xca\x83\x61\x77\x68" + "\x84\xbb\x60\x87\x38\x2e\x25\xd5" + "\x9e\x04\x41\x70\xac\xda\xc0\x9c" + "\x9c\x69\xea\x8d\x4e\x55\x2a\x29" + "\xed\x05\x4b\x7b\x73\x71\x90\x59" + "\x4d\xc8\xd8\x44\xf0\x4c\xe1\x5e" + "\x84\x47\x55\xcc\x32\x3f\xe7\x97" + "\x42\xc6\x32\xac\x40\xe5\xa5\xc7" + "\x8b\xed\xdb\xf7\x83\xd6\xb1\xc2" + "\x52\x5e\x34\xb7\xeb\x6e\xd9\xfc" + "\xe5\x93\x9a\x97\x3e\xb0\xdc\xd9" + "\xd7\x06\x10\xb6\x1d\x80\x59\xdd" + "\x0d\xfe\x64\x35\xcd\x5d\xec\xf0" + "\xba\xd0\x34\xc9\x2d\x91\xc5\x17" + "\x11", + .ilen = 1281, + .rlen = 1281, + .also_non_np = 1, + .np = 3, + .tap = { 1200, 1, 80 }, + }, +}; + +/* + * Same as XChaCha20 test vectors above, but recomputed the ciphertext with + * XChaCha12, using a modified libsodium. + */ +static const struct cipher_testvec xchacha12_tv_template[] = { + { + .key = "\x79\xc9\x97\x98\xac\x67\x30\x0b" + "\xbb\x27\x04\xc9\x5c\x34\x1e\x32" + "\x45\xf3\xdc\xb2\x17\x61\xb9\x8e" + "\x52\xff\x45\xb2\x4f\x30\x4f\xc4", + .klen = 32, + .iv = "\xb3\x3f\xfd\x30\x96\x47\x9b\xcf" + "\xbc\x9a\xee\x49\x41\x76\x88\xa0" + "\xa2\x55\x4f\x8d\x95\x38\x94\x19" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .input = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00", + .result = "\x1b\x78\x7f\xd7\xa1\x41\x68\xab" + "\x3d\x3f\xd1\x7b\x69\x56\xb2\xd5" + "\x43\xce\xeb\xaf\x36\xf0\x29\x9d" + "\x3a\xfb\x18\xae\x1b", + .ilen = 29, + .rlen = 29, + }, { + .key = "\x9d\x23\xbd\x41\x49\xcb\x97\x9c" + "\xcf\x3c\x5c\x94\xdd\x21\x7e\x98" + "\x08\xcb\x0e\x50\xcd\x0f\x67\x81" + "\x22\x35\xea\xaf\x60\x1d\x62\x32", + .klen = 32, + .iv = "\xc0\x47\x54\x82\x66\xb7\xc3\x70" + "\xd3\x35\x66\xa2\x42\x5c\xbf\x30" + "\xd8\x2d\x1e\xaf\x52\x94\x10\x9e" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .input = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00", + .result = "\xfb\x32\x09\x1d\x83\x05\xae\x4c" + "\x13\x1f\x12\x71\xf2\xca\xb2\xeb" + "\x5b\x83\x14\x7d\x83\xf6\x57\x77" + "\x2e\x40\x1f\x92\x2c\xf9\xec\x35" + "\x34\x1f\x93\xdf\xfb\x30\xd7\x35" + "\x03\x05\x78\xc1\x20\x3b\x7a\xe3" + "\x62\xa3\x89\xdc\x11\x11\x45\xa8" + "\x82\x89\xa0\xf1\x4e\xc7\x0f\x11" + "\x69\xdd\x0c\x84\x2b\x89\x5c\xdc" + "\xf0\xde\x01\xef\xc5\x65\x79\x23" + "\x87\x67\xd6\x50\xd9\x8d\xd9\x92" + "\x54\x5b\x0e", + .ilen = 91, + .rlen = 91, + }, { + .key = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x67\xc6\x69\x73" + "\x51\xff\x4a\xec\x29\xcd\xba\xab" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .input = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .result = "\xdf\x2d\xc6\x21\x2a\x9d\xa1\xbb" + "\xc2\x77\x66\x0c\x5c\x46\xef\xa7" + "\x79\x1b\xb9\xdf\x55\xe2\xf9\x61" + "\x4c\x7b\xa4\x52\x24\xaf\xa2\xda" + "\xd1\x8f\x8f\xa2\x9e\x53\x4d\xc4" + "\xb8\x55\x98\x08\x7c\x08\xd4\x18" + "\x67\x8f\xef\x50\xb1\x5f\xa5\x77" + "\x4c\x25\xe7\x86\x26\x42\xca\x44", + .ilen = 64, + .rlen = 64, + }, { + .key = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x01", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x02\xf2\xfb\xe3\x46" + "\x7c\xc2\x54\xf8\x1b\xe8\xe7\x8d" + "\x01\x00\x00\x00\x00\x00\x00\x00", + .input = "\x41\x6e\x79\x20\x73\x75\x62\x6d" + "\x69\x73\x73\x69\x6f\x6e\x20\x74" + "\x6f\x20\x74\x68\x65\x20\x49\x45" + "\x54\x46\x20\x69\x6e\x74\x65\x6e" + "\x64\x65\x64\x20\x62\x79\x20\x74" + "\x68\x65\x20\x43\x6f\x6e\x74\x72" + "\x69\x62\x75\x74\x6f\x72\x20\x66" + "\x6f\x72\x20\x70\x75\x62\x6c\x69" + "\x63\x61\x74\x69\x6f\x6e\x20\x61" + "\x73\x20\x61\x6c\x6c\x20\x6f\x72" + "\x20\x70\x61\x72\x74\x20\x6f\x66" + "\x20\x61\x6e\x20\x49\x45\x54\x46" + "\x20\x49\x6e\x74\x65\x72\x6e\x65" + "\x74\x2d\x44\x72\x61\x66\x74\x20" + "\x6f\x72\x20\x52\x46\x43\x20\x61" + "\x6e\x64\x20\x61\x6e\x79\x20\x73" + "\x74\x61\x74\x65\x6d\x65\x6e\x74" + "\x20\x6d\x61\x64\x65\x20\x77\x69" + "\x74\x68\x69\x6e\x20\x74\x68\x65" + "\x20\x63\x6f\x6e\x74\x65\x78\x74" + "\x20\x6f\x66\x20\x61\x6e\x20\x49" + "\x45\x54\x46\x20\x61\x63\x74\x69" + "\x76\x69\x74\x79\x20\x69\x73\x20" + "\x63\x6f\x6e\x73\x69\x64\x65\x72" + "\x65\x64\x20\x61\x6e\x20\x22\x49" + "\x45\x54\x46\x20\x43\x6f\x6e\x74" + "\x72\x69\x62\x75\x74\x69\x6f\x6e" + "\x22\x2e\x20\x53\x75\x63\x68\x20" + "\x73\x74\x61\x74\x65\x6d\x65\x6e" + "\x74\x73\x20\x69\x6e\x63\x6c\x75" + "\x64\x65\x20\x6f\x72\x61\x6c\x20" + "\x73\x74\x61\x74\x65\x6d\x65\x6e" + "\x74\x73\x20\x69\x6e\x20\x49\x45" + "\x54\x46\x20\x73\x65\x73\x73\x69" + "\x6f\x6e\x73\x2c\x20\x61\x73\x20" + "\x77\x65\x6c\x6c\x20\x61\x73\x20" + "\x77\x72\x69\x74\x74\x65\x6e\x20" + "\x61\x6e\x64\x20\x65\x6c\x65\x63" + "\x74\x72\x6f\x6e\x69\x63\x20\x63" + "\x6f\x6d\x6d\x75\x6e\x69\x63\x61" + "\x74\x69\x6f\x6e\x73\x20\x6d\x61" + "\x64\x65\x20\x61\x74\x20\x61\x6e" + "\x79\x20\x74\x69\x6d\x65\x20\x6f" + "\x72\x20\x70\x6c\x61\x63\x65\x2c" + "\x20\x77\x68\x69\x63\x68\x20\x61" + "\x72\x65\x20\x61\x64\x64\x72\x65" + "\x73\x73\x65\x64\x20\x74\x6f", + .result = "\xe4\xa6\xc8\x30\xc4\x23\x13\xd6" + "\x08\x4d\xc9\xb7\xa5\x64\x7c\xb9" + "\x71\xe2\xab\x3e\xa8\x30\x8a\x1c" + "\x4a\x94\x6d\x9b\xe0\xb3\x6f\xf1" + "\xdc\xe3\x1b\xb3\xa9\x6d\x0d\xd6" + "\xd0\xca\x12\xef\xe7\x5f\xd8\x61" + "\x3c\x82\xd3\x99\x86\x3c\x6f\x66" + "\x02\x06\xdc\x55\xf9\xed\xdf\x38" + "\xb4\xa6\x17\x00\x7f\xef\xbf\x4f" + "\xf8\x36\xf1\x60\x7e\x47\xaf\xdb" + "\x55\x9b\x12\xcb\x56\x44\xa7\x1f" + "\xd3\x1a\x07\x3b\x00\xec\xe6\x4c" + "\xa2\x43\x27\xdf\x86\x19\x4f\x16" + "\xed\xf9\x4a\xf3\x63\x6f\xfa\x7f" + "\x78\x11\xf6\x7d\x97\x6f\xec\x6f" + "\x85\x0f\x5c\x36\x13\x8d\x87\xe0" + "\x80\xb1\x69\x0b\x98\x89\x9c\x4e" + "\xf8\xdd\xee\x5c\x0a\x85\xce\xd4" + "\xea\x1b\x48\xbe\x08\xf8\xe2\xa8" + "\xa5\xb0\x3c\x79\xb1\x15\xb4\xb9" + "\x75\x10\x95\x35\x81\x7e\x26\xe6" + "\x78\xa4\x88\xcf\xdb\x91\x34\x18" + "\xad\xd7\x8e\x07\x7d\xab\x39\xf9" + "\xa3\x9e\xa5\x1d\xbb\xed\x61\xfd" + "\xdc\xb7\x5a\x27\xfc\xb5\xc9\x10" + "\xa8\xcc\x52\x7f\x14\x76\x90\xe7" + "\x1b\x29\x60\x74\xc0\x98\x77\xbb" + "\xe0\x54\xbb\x27\x49\x59\x1e\x62" + "\x3d\xaf\x74\x06\xa4\x42\x6f\xc6" + "\x52\x97\xc4\x1d\xc4\x9f\xe2\xe5" + "\x38\x57\x91\xd1\xa2\x28\xcc\x40" + "\xcc\x70\x59\x37\xfc\x9f\x4b\xda" + "\xa0\xeb\x97\x9a\x7d\xed\x14\x5c" + "\x9c\xb7\x93\x26\x41\xa8\x66\xdd" + "\x87\x6a\xc0\xd3\xc2\xa9\x3e\xae" + "\xe9\x72\xfe\xd1\xb3\xac\x38\xea" + "\x4d\x15\xa9\xd5\x36\x61\xe9\x96" + "\x6c\x23\xf8\x43\xe4\x92\x29\xd9" + "\x8b\x78\xf7\x0a\x52\xe0\x19\x5b" + "\x59\x69\x5b\x5d\xa1\x53\xc4\x68" + "\xe1\xbb\xac\x89\x14\xe2\xe2\x85" + "\x41\x18\xf5\xb3\xd1\xfa\x68\x19" + "\x44\x78\xdc\xcf\xe7\x88\x2d\x52" + "\x5f\x40\xb5\x7e\xf8\x88\xa2\xae" + "\x4a\xb2\x07\x35\x9d\x9b\x07\x88" + "\xb7\x00\xd0\x0c\xb6\xa0\x47\x59" + "\xda\x4e\xc9\xab\x9b\x8a\x7b", + + .ilen = 375, + .rlen = 375, + .also_non_np = 1, + .np = 3, + .tap = { 375 - 20, 4, 16 }, + + }, { + .key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a" + "\xf3\x33\x88\x86\x04\xf6\xb5\xf0" + "\x47\x39\x17\xc1\x40\x2b\x80\x09" + "\x9d\xca\x5c\xbc\x20\x70\x75\xc0", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x02\x76\x5a\x2e\x63" + "\x33\x9f\xc9\x9a\x66\x32\x0d\xb7" + "\x2a\x00\x00\x00\x00\x00\x00\x00", + .input = "\x27\x54\x77\x61\x73\x20\x62\x72" + "\x69\x6c\x6c\x69\x67\x2c\x20\x61" + "\x6e\x64\x20\x74\x68\x65\x20\x73" + "\x6c\x69\x74\x68\x79\x20\x74\x6f" + "\x76\x65\x73\x0a\x44\x69\x64\x20" + "\x67\x79\x72\x65\x20\x61\x6e\x64" + "\x20\x67\x69\x6d\x62\x6c\x65\x20" + "\x69\x6e\x20\x74\x68\x65\x20\x77" + "\x61\x62\x65\x3a\x0a\x41\x6c\x6c" + "\x20\x6d\x69\x6d\x73\x79\x20\x77" + "\x65\x72\x65\x20\x74\x68\x65\x20" + "\x62\x6f\x72\x6f\x67\x6f\x76\x65" + "\x73\x2c\x0a\x41\x6e\x64\x20\x74" + "\x68\x65\x20\x6d\x6f\x6d\x65\x20" + "\x72\x61\x74\x68\x73\x20\x6f\x75" + "\x74\x67\x72\x61\x62\x65\x2e", + .result = "\xb9\x68\xbc\x6a\x24\xbc\xcc\xd8" + "\x9b\x2a\x8d\x5b\x96\xaf\x56\xe3" + "\x11\x61\xe7\xa7\x9b\xce\x4e\x7d" + "\x60\x02\x48\xac\xeb\xd5\x3a\x26" + "\x9d\x77\x3b\xb5\x32\x13\x86\x8e" + "\x20\x82\x26\x72\xae\x64\x1b\x7e" + "\x2e\x01\x68\xb4\x87\x45\xa1\x24" + "\xe4\x48\x40\xf0\xaa\xac\xee\xa9" + "\xfc\x31\xad\x9d\x89\xa3\xbb\xd2" + "\xe4\x25\x13\xad\x0f\x5e\xdf\x3c" + "\x27\xab\xb8\x62\x46\x22\x30\x48" + "\x55\x2c\x4e\x84\x78\x1d\x0d\x34" + "\x8d\x3c\x91\x0a\x7f\x5b\x19\x9f" + "\x97\x05\x4c\xa7\x62\x47\x8b\xc5" + "\x44\x2e\x20\x33\xdd\xa0\x82\xa9" + "\x25\x76\x37\xe6\x3c\x67\x5b", + .ilen = 127, + .rlen = 127, + }, { + .key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a" + "\xf3\x33\x88\x86\x04\xf6\xb5\xf0" + "\x47\x39\x17\xc1\x40\x2b\x80\x09" + "\x9d\xca\x5c\xbc\x20\x70\x75\xc0", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x01\x31\x58\xa3\x5a" + "\x25\x5d\x05\x17\x58\xe9\x5e\xd4" + "\x1c\x00\x00\x00\x00\x00\x00\x00", + .input = "\x49\xee\xe0\xdc\x24\x90\x40\xcd" + "\xc5\x40\x8f\x47\x05\xbc\xdd\x81" + "\x47\xc6\x8d\xe6\xb1\x8f\xd7\xcb" + "\x09\x0e\x6e\x22\x48\x1f\xbf\xb8" + "\x5c\xf7\x1e\x8a\xc1\x23\xf2\xd4" + "\x19\x4b\x01\x0f\x4e\xa4\x43\xce" + "\x01\xc6\x67\xda\x03\x91\x18\x90" + "\xa5\xa4\x8e\x45\x03\xb3\x2d\xac" + "\x74\x92\xd3\x53\x47\xc8\xdd\x25" + "\x53\x6c\x02\x03\x87\x0d\x11\x0c" + "\x58\xe3\x12\x18\xfd\x2a\x5b\x40" + "\x0c\x30\xf0\xb8\x3f\x43\xce\xae" + "\x65\x3a\x7d\x7c\xf4\x54\xaa\xcc" + "\x33\x97\xc3\x77\xba\xc5\x70\xde" + "\xd7\xd5\x13\xa5\x65\xc4\x5f\x0f" + "\x46\x1a\x0d\x97\xb5\xf3\xbb\x3c" + "\x84\x0f\x2b\xc5\xaa\xea\xf2\x6c" + "\xc9\xb5\x0c\xee\x15\xf3\x7d\xbe" + "\x9f\x7b\x5a\xa6\xae\x4f\x83\xb6" + "\x79\x49\x41\xf4\x58\x18\xcb\x86" + "\x7f\x30\x0e\xf8\x7d\x44\x36\xea" + "\x75\xeb\x88\x84\x40\x3c\xad\x4f" + "\x6f\x31\x6b\xaa\x5d\xe5\xa5\xc5" + "\x21\x66\xe9\xa7\xe3\xb2\x15\x88" + "\x78\xf6\x79\xa1\x59\x47\x12\x4e" + "\x9f\x9f\x64\x1a\xa0\x22\x5b\x08" + "\xbe\x7c\x36\xc2\x2b\x66\x33\x1b" + "\xdd\x60\x71\xf7\x47\x8c\x61\xc3" + "\xda\x8a\x78\x1e\x16\xfa\x1e\x86" + "\x81\xa6\x17\x2a\xa7\xb5\xc2\xe7" + "\xa4\xc7\x42\xf1\xcf\x6a\xca\xb4" + "\x45\xcf\xf3\x93\xf0\xe7\xea\xf6" + "\xf4\xe6\x33\x43\x84\x93\xa5\x67" + "\x9b\x16\x58\x58\x80\x0f\x2b\x5c" + "\x24\x74\x75\x7f\x95\x81\xb7\x30" + "\x7a\x33\xa7\xf7\x94\x87\x32\x27" + "\x10\x5d\x14\x4c\x43\x29\xdd\x26" + "\xbd\x3e\x3c\x0e\xfe\x0e\xa5\x10" + "\xea\x6b\x64\xfd\x73\xc6\xed\xec" + "\xa8\xc9\xbf\xb3\xba\x0b\x4d\x07" + "\x70\xfc\x16\xfd\x79\x1e\xd7\xc5" + "\x49\x4e\x1c\x8b\x8d\x79\x1b\xb1" + "\xec\xca\x60\x09\x4c\x6a\xd5\x09" + "\x49\x46\x00\x88\x22\x8d\xce\xea" + "\xb1\x17\x11\xde\x42\xd2\x23\xc1" + "\x72\x11\xf5\x50\x73\x04\x40\x47" + "\xf9\x5d\xe7\xa7\x26\xb1\x7e\xb0" + "\x3f\x58\xc1\x52\xab\x12\x67\x9d" + "\x3f\x43\x4b\x68\xd4\x9c\x68\x38" + "\x07\x8a\x2d\x3e\xf3\xaf\x6a\x4b" + "\xf9\xe5\x31\x69\x22\xf9\xa6\x69" + "\xc6\x9c\x96\x9a\x12\x35\x95\x1d" + "\x95\xd5\xdd\xbe\xbf\x93\x53\x24" + "\xfd\xeb\xc2\x0a\x64\xb0\x77\x00" + "\x6f\x88\xc4\x37\x18\x69\x7c\xd7" + "\x41\x92\x55\x4c\x03\xa1\x9a\x4b" + "\x15\xe5\xdf\x7f\x37\x33\x72\xc1" + "\x8b\x10\x67\xa3\x01\x57\x94\x25" + "\x7b\x38\x71\x7e\xdd\x1e\xcc\x73" + "\x55\xd2\x8e\xeb\x07\xdd\xf1\xda" + "\x58\xb1\x47\x90\xfe\x42\x21\x72" + "\xa3\x54\x7a\xa0\x40\xec\x9f\xdd" + "\xc6\x84\x6e\xca\xae\xe3\x68\xb4" + "\x9d\xe4\x78\xff\x57\xf2\xf8\x1b" + "\x03\xa1\x31\xd9\xde\x8d\xf5\x22" + "\x9c\xdd\x20\xa4\x1e\x27\xb1\x76" + "\x4f\x44\x55\xe2\x9b\xa1\x9c\xfe" + "\x54\xf7\x27\x1b\xf4\xde\x02\xf5" + "\x1b\x55\x48\x5c\xdc\x21\x4b\x9e" + "\x4b\x6e\xed\x46\x23\xdc\x65\xb2" + "\xcf\x79\x5f\x28\xe0\x9e\x8b\xe7" + "\x4c\x9d\x8a\xff\xc1\xa6\x28\xb8" + "\x65\x69\x8a\x45\x29\xef\x74\x85" + "\xde\x79\xc7\x08\xae\x30\xb0\xf4" + "\xa3\x1d\x51\x41\xab\xce\xcb\xf6" + "\xb5\xd8\x6d\xe0\x85\xe1\x98\xb3" + "\x43\xbb\x86\x83\x0a\xa0\xf5\xb7" + "\x04\x0b\xfa\x71\x1f\xb0\xf6\xd9" + "\x13\x00\x15\xf0\xc7\xeb\x0d\x5a" + "\x9f\xd7\xb9\x6c\x65\x14\x22\x45" + "\x6e\x45\x32\x3e\x7e\x60\x1a\x12" + "\x97\x82\x14\xfb\xaa\x04\x22\xfa" + "\xa0\xe5\x7e\x8c\x78\x02\x48\x5d" + "\x78\x33\x5a\x7c\xad\xdb\x29\xce" + "\xbb\x8b\x61\xa4\xb7\x42\xe2\xac" + "\x8b\x1a\xd9\x2f\x0b\x8b\x62\x21" + "\x83\x35\x7e\xad\x73\xc2\xb5\x6c" + "\x10\x26\x38\x07\xe5\xc7\x36\x80" + "\xe2\x23\x12\x61\xf5\x48\x4b\x2b" + "\xc5\xdf\x15\xd9\x87\x01\xaa\xac" + "\x1e\x7c\xad\x73\x78\x18\x63\xe0" + "\x8b\x9f\x81\xd8\x12\x6a\x28\x10" + "\xbe\x04\x68\x8a\x09\x7c\x1b\x1c" + "\x83\x66\x80\x47\x80\xe8\xfd\x35" + "\x1c\x97\x6f\xae\x49\x10\x66\xcc" + "\xc6\xd8\xcc\x3a\x84\x91\x20\x77" + "\x72\xe4\x24\xd2\x37\x9f\xc5\xc9" + "\x25\x94\x10\x5f\x40\x00\x64\x99" + "\xdc\xae\xd7\x21\x09\x78\x50\x15" + "\xac\x5f\xc6\x2c\xa2\x0b\xa9\x39" + "\x87\x6e\x6d\xab\xde\x08\x51\x16" + "\xc7\x13\xe9\xea\xed\x06\x8e\x2c" + "\xf8\x37\x8c\xf0\xa6\x96\x8d\x43" + "\xb6\x98\x37\xb2\x43\xed\xde\xdf" + "\x89\x1a\xe7\xeb\x9d\xa1\x7b\x0b" + "\x77\xb0\xe2\x75\xc0\xf1\x98\xd9" + "\x80\x55\xc9\x34\x91\xd1\x59\xe8" + "\x4b\x0f\xc1\xa9\x4b\x7a\x84\x06" + "\x20\xa8\x5d\xfa\xd1\xde\x70\x56" + "\x2f\x9e\x91\x9c\x20\xb3\x24\xd8" + "\x84\x3d\xe1\x8c\x7e\x62\x52\xe5" + "\x44\x4b\x9f\xc2\x93\x03\xea\x2b" + "\x59\xc5\xfa\x3f\x91\x2b\xbb\x23" + "\xf5\xb2\x7b\xf5\x38\xaf\xb3\xee" + "\x63\xdc\x7b\xd1\xff\xaa\x8b\xab" + "\x82\x6b\x37\x04\xeb\x74\xbe\x79" + "\xb9\x83\x90\xef\x20\x59\x46\xff" + "\xe9\x97\x3e\x2f\xee\xb6\x64\x18" + "\x38\x4c\x7a\x4a\xf9\x61\xe8\x9a" + "\xa1\xb5\x01\xa6\x47\xd3\x11\xd4" + "\xce\xd3\x91\x49\x88\xc7\xb8\x4d" + "\xb1\xb9\x07\x6d\x16\x72\xae\x46" + "\x5e\x03\xa1\x4b\xb6\x02\x30\xa8" + "\x3d\xa9\x07\x2a\x7c\x19\xe7\x62" + "\x87\xe3\x82\x2f\x6f\xe1\x09\xd9" + "\x94\x97\xea\xdd\x58\x9e\xae\x76" + "\x7e\x35\xe5\xb4\xda\x7e\xf4\xde" + "\xf7\x32\x87\xcd\x93\xbf\x11\x56" + "\x11\xbe\x08\x74\xe1\x69\xad\xe2" + "\xd7\xf8\x86\x75\x8a\x3c\xa4\xbe" + "\x70\xa7\x1b\xfc\x0b\x44\x2a\x76" + "\x35\xea\x5d\x85\x81\xaf\x85\xeb" + "\xa0\x1c\x61\xc2\xf7\x4f\xa5\xdc" + "\x02\x7f\xf6\x95\x40\x6e\x8a\x9a" + "\xf3\x5d\x25\x6e\x14\x3a\x22\xc9" + "\x37\x1c\xeb\x46\x54\x3f\xa5\x91" + "\xc2\xb5\x8c\xfe\x53\x08\x97\x32" + "\x1b\xb2\x30\x27\xfe\x25\x5d\xdc" + "\x08\x87\xd0\xe5\x94\x1a\xd4\xf1" + "\xfe\xd6\xb4\xa3\xe6\x74\x81\x3c" + "\x1b\xb7\x31\xa7\x22\xfd\xd4\xdd" + "\x20\x4e\x7c\x51\xb0\x60\x73\xb8" + "\x9c\xac\x91\x90\x7e\x01\xb0\xe1" + "\x8a\x2f\x75\x1c\x53\x2a\x98\x2a" + "\x06\x52\x95\x52\xb2\xe9\x25\x2e" + "\x4c\xe2\x5a\x00\xb2\x13\x81\x03" + "\x77\x66\x0d\xa5\x99\xda\x4e\x8c" + "\xac\xf3\x13\x53\x27\x45\xaf\x64" + "\x46\xdc\xea\x23\xda\x97\xd1\xab" + "\x7d\x6c\x30\x96\x1f\xbc\x06\x34" + "\x18\x0b\x5e\x21\x35\x11\x8d\x4c" + "\xe0\x2d\xe9\x50\x16\x74\x81\xa8" + "\xb4\x34\xb9\x72\x42\xa6\xcc\xbc" + "\xca\x34\x83\x27\x10\x5b\x68\x45" + "\x8f\x52\x22\x0c\x55\x3d\x29\x7c" + "\xe3\xc0\x66\x05\x42\x91\x5f\x58" + "\xfe\x4a\x62\xd9\x8c\xa9\x04\x19" + "\x04\xa9\x08\x4b\x57\xfc\x67\x53" + "\x08\x7c\xbc\x66\x8a\xb0\xb6\x9f" + "\x92\xd6\x41\x7c\x5b\x2a\x00\x79" + "\x72", + .result = "\xe1\xb6\x8b\x5c\x80\xb8\xcc\x08" + "\x1b\x84\xb2\xd1\xad\xa4\x70\xac" + "\x67\xa9\x39\x27\xac\xb4\x5b\xb7" + "\x4c\x26\x77\x23\x1d\xce\x0a\xbe" + "\x18\x9e\x42\x8b\xbd\x7f\xd6\xf1" + "\xf1\x6b\xe2\x6d\x7f\x92\x0e\xcb" + "\xb8\x79\xba\xb4\xac\x7e\x2d\xc0" + "\x9e\x83\x81\x91\xd5\xea\xc3\x12" + "\x8d\xa4\x26\x70\xa4\xf9\x71\x0b" + "\xbd\x2e\xe1\xb3\x80\x42\x25\xb3" + "\x0b\x31\x99\xe1\x0d\xde\xa6\x90" + "\xf2\xa3\x10\xf7\xe5\xf3\x83\x1e" + "\x2c\xfb\x4d\xf0\x45\x3d\x28\x3c" + "\xb8\xf1\xcb\xbf\x67\xd8\x43\x5a" + "\x9d\x7b\x73\x29\x88\x0f\x13\x06" + "\x37\x50\x0d\x7c\xe6\x9b\x07\xdd" + "\x7e\x01\x1f\x81\x90\x10\x69\xdb" + "\xa4\xad\x8a\x5e\xac\x30\x72\xf2" + "\x36\xcd\xe3\x23\x49\x02\x93\xfa" + "\x3d\xbb\xe2\x98\x83\xeb\xe9\x8d" + "\xb3\x8f\x11\xaa\x53\xdb\xaf\x2e" + "\x95\x13\x99\x3d\x71\xbd\x32\x92" + "\xdd\xfc\x9d\x5e\x6f\x63\x2c\xee" + "\x91\x1f\x4c\x64\x3d\x87\x55\x0f" + "\xcc\x3d\x89\x61\x53\x02\x57\x8f" + "\xe4\x77\x29\x32\xaf\xa6\x2f\x0a" + "\xae\x3c\x3f\x3f\xf4\xfb\x65\x52" + "\xc5\xc1\x78\x78\x53\x28\xad\xed" + "\xd1\x67\x37\xc7\x59\x70\xcd\x0a" + "\xb8\x0f\x80\x51\x9f\xc0\x12\x5e" + "\x06\x0a\x7e\xec\x24\x5f\x73\x00" + "\xb1\x0b\x31\x47\x4f\x73\x8d\xb4" + "\xce\xf3\x55\x45\x6c\x84\x27\xba" + "\xb9\x6f\x03\x4a\xeb\x98\x88\x6e" + "\x53\xed\x25\x19\x0d\x8f\xfe\xca" + "\x60\xe5\x00\x93\x6e\x3c\xff\x19" + "\xae\x08\x3b\x8a\xa6\x84\x05\xfe" + "\x9b\x59\xa0\x8c\xc8\x05\x45\xf5" + "\x05\x37\xdc\x45\x6f\x8b\x95\x8c" + "\x4e\x11\x45\x7a\xce\x21\xa5\xf7" + "\x71\x67\xb9\xce\xd7\xf9\xe9\x5e" + "\x60\xf5\x53\x7a\xa8\x85\x14\x03" + "\xa0\x92\xec\xf3\x51\x80\x84\xc4" + "\xdc\x11\x9e\x57\xce\x4b\x45\xcf" + "\x90\x95\x85\x0b\x96\xe9\xee\x35" + "\x10\xb8\x9b\xf2\x59\x4a\xc6\x7e" + "\x85\xe5\x6f\x38\x51\x93\x40\x0c" + "\x99\xd7\x7f\x32\xa8\x06\x27\xd1" + "\x2b\xd5\xb5\x3a\x1a\xe1\x5e\xda" + "\xcd\x5a\x50\x30\x3c\xc7\xe7\x65" + "\xa6\x07\x0b\x98\x91\xc6\x20\x27" + "\x2a\x03\x63\x1b\x1e\x3d\xaf\xc8" + "\x71\x48\x46\x6a\x64\x28\xf9\x3d" + "\xd1\x1d\xab\xc8\x40\x76\xc2\x39" + "\x4e\x00\x75\xd2\x0e\x82\x58\x8c" + "\xd3\x73\x5a\xea\x46\x89\xbe\xfd" + "\x4e\x2c\x0d\x94\xaa\x9b\x68\xac" + "\x86\x87\x30\x7e\xa9\x16\xcd\x59" + "\xd2\xa6\xbe\x0a\xd8\xf5\xfd\x2d" + "\x49\x69\xd2\x1a\x90\xd2\x1b\xed" + "\xff\x71\x04\x87\x87\x21\xc4\xb8" + "\x1f\x5b\x51\x33\xd0\xd6\x59\x9a" + "\x03\x0e\xd3\x8b\xfb\x57\x73\xfd" + "\x5a\x52\x63\x82\xc8\x85\x2f\xcb" + "\x74\x6d\x4e\xd9\x68\x37\x85\x6a" + "\xd4\xfb\x94\xed\x8d\xd1\x1a\xaf" + "\x76\xa7\xb7\x88\xd0\x2b\x4e\xda" + "\xec\x99\x94\x27\x6f\x87\x8c\xdf" + "\x4b\x5e\xa6\x66\xdd\xcb\x33\x7b" + "\x64\x94\x31\xa8\x37\xa6\x1d\xdb" + "\x0d\x5c\x93\xa4\x40\xf9\x30\x53" + "\x4b\x74\x8d\xdd\xf6\xde\x3c\xac" + "\x5c\x80\x01\x3a\xef\xb1\x9a\x02" + "\x0c\x22\x8e\xe7\x44\x09\x74\x4c" + "\xf2\x9a\x27\x69\x7f\x12\x32\x36" + "\xde\x92\xdf\xde\x8f\x5b\x31\xab" + "\x4a\x01\x26\xe0\xb1\xda\xe8\x37" + "\x21\x64\xe8\xff\x69\xfc\x9e\x41" + "\xd2\x96\x2d\x18\x64\x98\x33\x78" + "\x24\x61\x73\x9b\x47\x29\xf1\xa7" + "\xcb\x27\x0f\xf0\x85\x6d\x8c\x9d" + "\x2c\x95\x9e\xe5\xb2\x8e\x30\x29" + "\x78\x8a\x9d\x65\xb4\x8e\xde\x7b" + "\xd9\x00\x50\xf5\x7f\x81\xc3\x1b" + "\x25\x85\xeb\xc2\x8c\x33\x22\x1e" + "\x68\x38\x22\x30\xd8\x2e\x00\x98" + "\x85\x16\x06\x56\xb4\x81\x74\x20" + "\x95\xdb\x1c\x05\x19\xe8\x23\x4d" + "\x65\x5d\xcc\xd8\x7f\xc4\x2d\x0f" + "\x57\x26\x71\x07\xad\xaa\x71\x9f" + "\x19\x76\x2f\x25\x51\x88\xe4\xc0" + "\x82\x6e\x08\x05\x37\x04\xee\x25" + "\x23\x90\xe9\x4e\xce\x9b\x16\xc1" + "\x31\xe7\x6e\x2c\x1b\xe1\x85\x9a" + "\x0c\x8c\xbb\x12\x1e\x68\x7b\x93" + "\xa9\x3c\x39\x56\x23\x3e\x6e\xc7" + "\x77\x84\xd3\xe0\x86\x59\xaa\xb9" + "\xd5\x53\x58\xc9\x0a\x83\x5f\x85" + "\xd8\x47\x14\x67\x8a\x3c\x17\xe0" + "\xab\x02\x51\xea\xf1\xf0\x4f\x30" + "\x7d\xe0\x92\xc2\x5f\xfb\x19\x5a" + "\x3f\xbd\xf4\x39\xa4\x31\x0c\x39" + "\xd1\xae\x4e\xf7\x65\x7f\x1f\xce" + "\xc2\x39\xd1\x84\xd4\xe5\x02\xe0" + "\x58\xaa\xf1\x5e\x81\xaf\x7f\x72" + "\x0f\x08\x99\x43\xb9\xd8\xac\x41" + "\x35\x55\xf2\xb2\xd4\x98\xb8\x3b" + "\x2b\x3c\x3e\x16\x06\x31\xfc\x79" + "\x47\x38\x63\x51\xc5\xd0\x26\xd7" + "\x43\xb4\x2b\xd9\xc5\x05\xf2\x9d" + "\x18\xc9\x26\x82\x56\xd2\x11\x05" + "\xb6\x89\xb4\x43\x9c\xb5\x9d\x11" + "\x6c\x83\x37\x71\x27\x1c\xae\xbf" + "\xcd\x57\xd2\xee\x0d\x5a\x15\x26" + "\x67\x88\x80\x80\x1b\xdc\xc1\x62" + "\xdd\x4c\xff\x92\x5c\x6c\xe1\xa0" + "\xe3\x79\xa9\x65\x8c\x8c\x14\x42" + "\xe5\x11\xd2\x1a\xad\xa9\x56\x6f" + "\x98\xfc\x8a\x7b\x56\x1f\xc6\xc1" + "\x52\x12\x92\x9b\x41\x0f\x4b\xae" + "\x1b\x4a\xbc\xfe\x23\xb6\x94\x70" + "\x04\x30\x9e\x69\x47\xbe\xb8\x8f" + "\xca\x45\xd7\x8a\xf4\x78\x3e\xaa" + "\x71\x17\xd8\x1e\xb8\x11\x8f\xbc" + "\xc8\x1a\x65\x7b\x41\x89\x72\xc7" + "\x5f\xbe\xc5\x2a\xdb\x5c\x54\xf9" + "\x25\xa3\x7a\x80\x56\x9c\x8c\xab" + "\x26\x19\x10\x36\xa6\xf3\x14\x79" + "\x40\x98\x70\x68\xb7\x35\xd9\xb9" + "\x27\xd4\xe7\x74\x5b\x3d\x97\xb4" + "\xd9\xaa\xd9\xf2\xb5\x14\x84\x1f" + "\xa9\xde\x12\x44\x5b\x00\xc0\xbc" + "\xc8\x11\x25\x1b\x67\x7a\x15\x72" + "\xa6\x31\x6f\xf4\x68\x7a\x86\x9d" + "\x43\x1c\x5f\x16\xd3\xad\x2e\x52" + "\xf3\xb4\xc3\xfa\x27\x2e\x68\x6c" + "\x06\xe7\x4c\x4f\xa2\xe0\xe4\x21" + "\x5d\x9e\x33\x58\x8d\xbf\xd5\x70" + "\xf8\x80\xa5\xdd\xe7\x18\x79\xfa" + "\x7b\xfd\x09\x69\x2c\x37\x32\xa8" + "\x65\xfa\x8d\x8b\x5c\xcc\xe8\xf3" + "\x37\xf6\xa6\xc6\x5c\xa2\x66\x79" + "\xfa\x8a\xa7\xd1\x0b\x2e\x1b\x5e" + "\x95\x35\x00\x76\xae\x42\xf7\x50" + "\x51\x78\xfb\xb4\x28\x24\xde\x1a" + "\x70\x8b\xed\xca\x3c\x5e\xe4\xbd" + "\x28\xb5\xf3\x76\x4f\x67\x5d\x81" + "\xb2\x60\x87\xd9\x7b\x19\x1a\xa7" + "\x79\xa2\xfa\x3f\x9e\xa9\xd7\x25" + "\x61\xe1\x74\x31\xa2\x77\xa0\x1b" + "\xf6\xf7\xcb\xc5\xaa\x9e\xce\xf9" + "\x9b\x96\xef\x51\xc3\x1a\x44\x96" + "\xae\x17\x50\xab\x29\x08\xda\xcc" + "\x1a\xb3\x12\xd0\x24\xe4\xe2\xe0" + "\xc6\xe3\xcc\x82\xd0\xba\x47\x4c" + "\x3f\x49\xd7\xe8\xb6\x61\xaa\x65" + "\x25\x18\x40\x2d\x62\x25\x02\x71" + "\x61\xa2\xc1\xb2\x13\xd2\x71\x3f" + "\x43\x1a\xc9\x09\x92\xff\xd5\x57" + "\xf0\xfc\x5e\x1c\xf1\xf5\xf9\xf3" + "\x5b", + .ilen = 1281, + .rlen = 1281, + .also_non_np = 1, + .np = 3, + .tap = { 1200, 1, 80 }, + }, +}; + +/* Adiantum test vectors from https://github.com/google/adiantum */ +static const struct cipher_testvec adiantum_xchacha12_aes_enc_tv_template[] = { + { + .key = "\x9e\xeb\xb2\x49\x3c\x1c\xf5\xf4" + "\x6a\x99\xc2\xc4\xdf\xb1\xf4\xdd" + "\x75\x20\x57\xea\x2c\x4f\xcd\xb2" + "\xa5\x3d\x7b\x49\x1e\xab\xfd\x0f", + .klen = 32, + .iv = "\xdf\x63\xd4\xab\xd2\x49\xf3\xd8" + "\x33\x81\x37\x60\x7d\xfa\x73\x08" + "\xd8\x49\x6d\x80\xe8\x2f\x62\x54" + "\xeb\x0e\xa9\x39\x5b\x45\x7f\x8a", + .input = "\x67\xc9\xf2\x30\x84\x41\x8e\x43" + "\xfb\xf3\xb3\x3e\x79\x36\x7f\xe8", + .result = "\x6d\x32\x86\x18\x67\x86\x0f\x3f" + "\x96\x7c\x9d\x28\x0d\x53\xec\x9f", + .ilen = 16, + .rlen = 16, + .also_non_np = 1, + .np = 2, + .tap = { 14, 2 }, + }, { + .key = "\x36\x2b\x57\x97\xf8\x5d\xcd\x99" + "\x5f\x1a\x5a\x44\x1d\x92\x0f\x27" + "\xcc\x16\xd7\x2b\x85\x63\x99\xd3" + "\xba\x96\xa1\xdb\xd2\x60\x68\xda", + .klen = 32, + .iv = "\xef\x58\x69\xb1\x2c\x5e\x9a\x47" + "\x24\xc1\xb1\x69\xe1\x12\x93\x8f" + "\x43\x3d\x6d\x00\xdb\x5e\xd8\xd9" + "\x12\x9a\xfe\xd9\xff\x2d\xaa\xc4", + .input = "\x5e\xa8\x68\x19\x85\x98\x12\x23" + "\x26\x0a\xcc\xdb\x0a\x04\xb9\xdf" + "\x4d\xb3\x48\x7b\xb0\xe3\xc8\x19" + "\x43\x5a\x46\x06\x94\x2d\xf2", + .result = "\xc7\xc6\xf1\x73\x8f\xc4\xff\x4a" + "\x39\xbe\x78\xbe\x8d\x28\xc8\x89" + "\x46\x63\xe7\x0c\x7d\x87\xe8\x4e" + "\xc9\x18\x7b\xbe\x18\x60\x50", + .ilen = 31, + .rlen = 31, + }, { + .key = "\xa5\x28\x24\x34\x1a\x3c\xd8\xf7" + "\x05\x91\x8f\xee\x85\x1f\x35\x7f" + "\x80\x3d\xfc\x9b\x94\xf6\xfc\x9e" + "\x19\x09\x00\xa9\x04\x31\x4f\x11", + .klen = 32, + .iv = "\xa1\xba\x49\x95\xff\x34\x6d\xb8" + "\xcd\x87\x5d\x5e\xfd\xea\x85\xdb" + "\x8a\x7b\x5e\xb2\x5d\x57\xdd\x62" + "\xac\xa9\x8c\x41\x42\x94\x75\xb7", + .input = "\x69\xb4\xe8\x8c\x37\xe8\x67\x82" + "\xf1\xec\x5d\x04\xe5\x14\x91\x13" + "\xdf\xf2\x87\x1b\x69\x81\x1d\x71" + "\x70\x9e\x9c\x3b\xde\x49\x70\x11" + "\xa0\xa3\xdb\x0d\x54\x4f\x66\x69" + "\xd7\xdb\x80\xa7\x70\x92\x68\xce" + "\x81\x04\x2c\xc6\xab\xae\xe5\x60" + "\x15\xe9\x6f\xef\xaa\x8f\xa7\xa7" + "\x63\x8f\xf2\xf0\x77\xf1\xa8\xea" + "\xe1\xb7\x1f\x9e\xab\x9e\x4b\x3f" + "\x07\x87\x5b\x6f\xcd\xa8\xaf\xb9" + "\xfa\x70\x0b\x52\xb8\xa8\xa7\x9e" + "\x07\x5f\xa6\x0e\xb3\x9b\x79\x13" + "\x79\xc3\x3e\x8d\x1c\x2c\x68\xc8" + "\x51\x1d\x3c\x7b\x7d\x79\x77\x2a" + "\x56\x65\xc5\x54\x23\x28\xb0\x03", + .result = "\x9e\x16\xab\xed\x4b\xa7\x42\x5a" + "\xc6\xfb\x4e\x76\xff\xbe\x03\xa0" + "\x0f\xe3\xad\xba\xe4\x98\x2b\x0e" + "\x21\x48\xa0\xb8\x65\x48\x27\x48" + "\x84\x54\x54\xb2\x9a\x94\x7b\xe6" + "\x4b\x29\xe9\xcf\x05\x91\x80\x1a" + "\x3a\xf3\x41\x96\x85\x1d\x9f\x74" + "\x51\x56\x63\xfa\x7c\x28\x85\x49" + "\xf7\x2f\xf9\xf2\x18\x46\xf5\x33" + "\x80\xa3\x3c\xce\xb2\x57\x93\xf5" + "\xae\xbd\xa9\xf5\x7b\x30\xc4\x93" + "\x66\xe0\x30\x77\x16\xe4\xa0\x31" + "\xba\x70\xbc\x68\x13\xf5\xb0\x9a" + "\xc1\xfc\x7e\xfe\x55\x80\x5c\x48" + "\x74\xa6\xaa\xa3\xac\xdc\xc2\xf5" + "\x8d\xde\x34\x86\x78\x60\x75\x8d", + .ilen = 128, + .rlen = 128, + .also_non_np = 1, + .np = 4, + .tap = { 104, 16, 4, 4 }, + }, { + .key = "\xd3\x81\x72\x18\x23\xff\x6f\x4a" + "\x25\x74\x29\x0d\x51\x8a\x0e\x13" + "\xc1\x53\x5d\x30\x8d\xee\x75\x0d" + "\x14\xd6\x69\xc9\x15\xa9\x0c\x60", + .klen = 32, + .iv = "\x65\x9b\xd4\xa8\x7d\x29\x1d\xf4" + "\xc4\xd6\x9b\x6a\x28\xab\x64\xe2" + "\x62\x81\x97\xc5\x81\xaa\xf9\x44" + "\xc1\x72\x59\x82\xaf\x16\xc8\x2c", + .input = "\xc7\x6b\x52\x6a\x10\xf0\xcc\x09" + "\xc1\x12\x1d\x6d\x21\xa6\x78\xf5" + "\x05\xa3\x69\x60\x91\x36\x98\x57" + "\xba\x0c\x14\xcc\xf3\x2d\x73\x03" + "\xc6\xb2\x5f\xc8\x16\x27\x37\x5d" + "\xd0\x0b\x87\xb2\x50\x94\x7b\x58" + "\x04\xf4\xe0\x7f\x6e\x57\x8e\xc9" + "\x41\x84\xc1\xb1\x7e\x4b\x91\x12" + "\x3a\x8b\x5d\x50\x82\x7b\xcb\xd9" + "\x9a\xd9\x4e\x18\x06\x23\x9e\xd4" + "\xa5\x20\x98\xef\xb5\xda\xe5\xc0" + "\x8a\x6a\x83\x77\x15\x84\x1e\xae" + "\x78\x94\x9d\xdf\xb7\xd1\xea\x67" + "\xaa\xb0\x14\x15\xfa\x67\x21\x84" + "\xd3\x41\x2a\xce\xba\x4b\x4a\xe8" + "\x95\x62\xa9\x55\xf0\x80\xad\xbd" + "\xab\xaf\xdd\x4f\xa5\x7c\x13\x36" + "\xed\x5e\x4f\x72\xad\x4b\xf1\xd0" + "\x88\x4e\xec\x2c\x88\x10\x5e\xea" + "\x12\xc0\x16\x01\x29\xa3\xa0\x55" + "\xaa\x68\xf3\xe9\x9d\x3b\x0d\x3b" + "\x6d\xec\xf8\xa0\x2d\xf0\x90\x8d" + "\x1c\xe2\x88\xd4\x24\x71\xf9\xb3" + "\xc1\x9f\xc5\xd6\x76\x70\xc5\x2e" + "\x9c\xac\xdb\x90\xbd\x83\x72\xba" + "\x6e\xb5\xa5\x53\x83\xa9\xa5\xbf" + "\x7d\x06\x0e\x3c\x2a\xd2\x04\xb5" + "\x1e\x19\x38\x09\x16\xd2\x82\x1f" + "\x75\x18\x56\xb8\x96\x0b\xa6\xf9" + "\xcf\x62\xd9\x32\x5d\xa9\xd7\x1d" + "\xec\xe4\xdf\x1b\xbe\xf1\x36\xee" + "\xe3\x7b\xb5\x2f\xee\xf8\x53\x3d" + "\x6a\xb7\x70\xa9\xfc\x9c\x57\x25" + "\xf2\x89\x10\xd3\xb8\xa8\x8c\x30" + "\xae\x23\x4f\x0e\x13\x66\x4f\xe1" + "\xb6\xc0\xe4\xf8\xef\x93\xbd\x6e" + "\x15\x85\x6b\xe3\x60\x81\x1d\x68" + "\xd7\x31\x87\x89\x09\xab\xd5\x96" + "\x1d\xf3\x6d\x67\x80\xca\x07\x31" + "\x5d\xa7\xe4\xfb\x3e\xf2\x9b\x33" + "\x52\x18\xc8\x30\xfe\x2d\xca\x1e" + "\x79\x92\x7a\x60\x5c\xb6\x58\x87" + "\xa4\x36\xa2\x67\x92\x8b\xa4\xb7" + "\xf1\x86\xdf\xdc\xc0\x7e\x8f\x63" + "\xd2\xa2\xdc\x78\xeb\x4f\xd8\x96" + "\x47\xca\xb8\x91\xf9\xf7\x94\x21" + "\x5f\x9a\x9f\x5b\xb8\x40\x41\x4b" + "\x66\x69\x6a\x72\xd0\xcb\x70\xb7" + "\x93\xb5\x37\x96\x05\x37\x4f\xe5" + "\x8c\xa7\x5a\x4e\x8b\xb7\x84\xea" + "\xc7\xfc\x19\x6e\x1f\x5a\xa1\xac" + "\x18\x7d\x52\x3b\xb3\x34\x62\x99" + "\xe4\x9e\x31\x04\x3f\xc0\x8d\x84" + "\x17\x7c\x25\x48\x52\x67\x11\x27" + "\x67\xbb\x5a\x85\xca\x56\xb2\x5c" + "\xe6\xec\xd5\x96\x3d\x15\xfc\xfb" + "\x22\x25\xf4\x13\xe5\x93\x4b\x9a" + "\x77\xf1\x52\x18\xfa\x16\x5e\x49" + "\x03\x45\xa8\x08\xfa\xb3\x41\x92" + "\x79\x50\x33\xca\xd0\xd7\x42\x55" + "\xc3\x9a\x0c\x4e\xd9\xa4\x3c\x86" + "\x80\x9f\x53\xd1\xa4\x2e\xd1\xbc" + "\xf1\x54\x6e\x93\xa4\x65\x99\x8e" + "\xdf\x29\xc0\x64\x63\x07\xbb\xea", + .result = "\x15\x97\xd0\x86\x18\x03\x9c\x51" + "\xc5\x11\x36\x62\x13\x92\xe6\x73" + "\x29\x79\xde\xa1\x00\x3e\x08\x64" + "\x17\x1a\xbc\xd5\xfe\x33\x0e\x0c" + "\x7c\x94\xa7\xc6\x3c\xbe\xac\xa2" + "\x89\xe6\xbc\xdf\x0c\x33\x27\x42" + "\x46\x73\x2f\xba\x4e\xa6\x46\x8f" + "\xe4\xee\x39\x63\x42\x65\xa3\x88" + "\x7a\xad\x33\x23\xa9\xa7\x20\x7f" + "\x0b\xe6\x6a\xc3\x60\xda\x9e\xb4" + "\xd6\x07\x8a\x77\x26\xd1\xab\x44" + "\x99\x55\x03\x5e\xed\x8d\x7b\xbd" + "\xc8\x21\xb7\x21\x30\x3f\xc0\xb5" + "\xc8\xec\x6c\x23\xa6\xa3\x6d\xf1" + "\x30\x0a\xd0\xa6\xa9\x28\x69\xae" + "\x2a\xe6\x54\xac\x82\x9d\x6a\x95" + "\x6f\x06\x44\xc5\x5a\x77\x6e\xec" + "\xf8\xf8\x63\xb2\xe6\xaa\xbd\x8e" + "\x0e\x8a\x62\x00\x03\xc8\x84\xdd" + "\x47\x4a\xc3\x55\xba\xb7\xe7\xdf" + "\x08\xbf\x62\xf5\xe8\xbc\xb6\x11" + "\xe4\xcb\xd0\x66\x74\x32\xcf\xd4" + "\xf8\x51\x80\x39\x14\x05\x12\xdb" + "\x87\x93\xe2\x26\x30\x9c\x3a\x21" + "\xe5\xd0\x38\x57\x80\x15\xe4\x08" + "\x58\x05\x49\x7d\xe6\x92\x77\x70" + "\xfb\x1e\x2d\x6a\x84\x00\xc8\x68" + "\xf7\x1a\xdd\xf0\x7b\x38\x1e\xd8" + "\x2c\x78\x78\x61\xcf\xe3\xde\x69" + "\x1f\xd5\x03\xd5\x1a\xb4\xcf\x03" + "\xc8\x7a\x70\x68\x35\xb4\xf6\xbe" + "\x90\x62\xb2\x28\x99\x86\xf5\x44" + "\x99\xeb\x31\xcf\xca\xdf\xd0\x21" + "\xd6\x60\xf7\x0f\x40\xb4\x80\xb7" + "\xab\xe1\x9b\x45\xba\x66\xda\xee" + "\xdd\x04\x12\x40\x98\xe1\x69\xe5" + "\x2b\x9c\x59\x80\xe7\x7b\xcc\x63" + "\xa6\xc0\x3a\xa9\xfe\x8a\xf9\x62" + "\x11\x34\x61\x94\x35\xfe\xf2\x99" + "\xfd\xee\x19\xea\x95\xb6\x12\xbf" + "\x1b\xdf\x02\x1a\xcc\x3e\x7e\x65" + "\x78\x74\x10\x50\x29\x63\x28\xea" + "\x6b\xab\xd4\x06\x4d\x15\x24\x31" + "\xc7\x0a\xc9\x16\xb6\x48\xf0\xbf" + "\x49\xdb\x68\x71\x31\x8f\x87\xe2" + "\x13\x05\x64\xd6\x22\x0c\xf8\x36" + "\x84\x24\x3e\x69\x5e\xb8\x9e\x16" + "\x73\x6c\x83\x1e\xe0\x9f\x9e\xba" + "\xe5\x59\x21\x33\x1b\xa9\x26\xc2" + "\xc7\xd9\x30\x73\xb6\xa6\x73\x82" + "\x19\xfa\x44\x4d\x40\x8b\x69\x04" + "\x94\x74\xea\x6e\xb3\x09\x47\x01" + "\x2a\xb9\x78\x34\x43\x11\xed\xd6" + "\x8c\x95\x65\x1b\x85\x67\xa5\x40" + "\xac\x9c\x05\x4b\x57\x4a\xa9\x96" + "\x0f\xdd\x4f\xa1\xe0\xcf\x6e\xc7" + "\x1b\xed\xa2\xb4\x56\x8c\x09\x6e" + "\xa6\x65\xd7\x55\x81\xb7\xed\x11" + "\x9b\x40\x75\xa8\x6b\x56\xaf\x16" + "\x8b\x3d\xf4\xcb\xfe\xd5\x1d\x3d" + "\x85\xc2\xc0\xde\x43\x39\x4a\x96" + "\xba\x88\x97\xc0\xd6\x00\x0e\x27" + "\x21\xb0\x21\x52\xba\xa7\x37\xaa" + "\xcc\xbf\x95\xa8\xf4\xd0\x91\xf6", + .ilen = 512, + .rlen = 512, + .also_non_np = 1, + .np = 2, + .tap = { 144, 368 }, + } +}; + +static const struct cipher_testvec adiantum_xchacha12_aes_dec_tv_template[] = { + { + .key = "\x9e\xeb\xb2\x49\x3c\x1c\xf5\xf4" + "\x6a\x99\xc2\xc4\xdf\xb1\xf4\xdd" + "\x75\x20\x57\xea\x2c\x4f\xcd\xb2" + "\xa5\x3d\x7b\x49\x1e\xab\xfd\x0f", + .klen = 32, + .iv = "\xdf\x63\xd4\xab\xd2\x49\xf3\xd8" + "\x33\x81\x37\x60\x7d\xfa\x73\x08" + "\xd8\x49\x6d\x80\xe8\x2f\x62\x54" + "\xeb\x0e\xa9\x39\x5b\x45\x7f\x8a", + .result = "\x67\xc9\xf2\x30\x84\x41\x8e\x43" + "\xfb\xf3\xb3\x3e\x79\x36\x7f\xe8", + .input = "\x6d\x32\x86\x18\x67\x86\x0f\x3f" + "\x96\x7c\x9d\x28\x0d\x53\xec\x9f", + .ilen = 16, + .rlen = 16, + .also_non_np = 1, + .np = 2, + .tap = { 14, 2 }, + }, { + .key = "\x36\x2b\x57\x97\xf8\x5d\xcd\x99" + "\x5f\x1a\x5a\x44\x1d\x92\x0f\x27" + "\xcc\x16\xd7\x2b\x85\x63\x99\xd3" + "\xba\x96\xa1\xdb\xd2\x60\x68\xda", + .klen = 32, + .iv = "\xef\x58\x69\xb1\x2c\x5e\x9a\x47" + "\x24\xc1\xb1\x69\xe1\x12\x93\x8f" + "\x43\x3d\x6d\x00\xdb\x5e\xd8\xd9" + "\x12\x9a\xfe\xd9\xff\x2d\xaa\xc4", + .result = "\x5e\xa8\x68\x19\x85\x98\x12\x23" + "\x26\x0a\xcc\xdb\x0a\x04\xb9\xdf" + "\x4d\xb3\x48\x7b\xb0\xe3\xc8\x19" + "\x43\x5a\x46\x06\x94\x2d\xf2", + .input = "\xc7\xc6\xf1\x73\x8f\xc4\xff\x4a" + "\x39\xbe\x78\xbe\x8d\x28\xc8\x89" + "\x46\x63\xe7\x0c\x7d\x87\xe8\x4e" + "\xc9\x18\x7b\xbe\x18\x60\x50", + .ilen = 31, + .rlen = 31, + }, { + .key = "\xa5\x28\x24\x34\x1a\x3c\xd8\xf7" + "\x05\x91\x8f\xee\x85\x1f\x35\x7f" + "\x80\x3d\xfc\x9b\x94\xf6\xfc\x9e" + "\x19\x09\x00\xa9\x04\x31\x4f\x11", + .klen = 32, + .iv = "\xa1\xba\x49\x95\xff\x34\x6d\xb8" + "\xcd\x87\x5d\x5e\xfd\xea\x85\xdb" + "\x8a\x7b\x5e\xb2\x5d\x57\xdd\x62" + "\xac\xa9\x8c\x41\x42\x94\x75\xb7", + .result = "\x69\xb4\xe8\x8c\x37\xe8\x67\x82" + "\xf1\xec\x5d\x04\xe5\x14\x91\x13" + "\xdf\xf2\x87\x1b\x69\x81\x1d\x71" + "\x70\x9e\x9c\x3b\xde\x49\x70\x11" + "\xa0\xa3\xdb\x0d\x54\x4f\x66\x69" + "\xd7\xdb\x80\xa7\x70\x92\x68\xce" + "\x81\x04\x2c\xc6\xab\xae\xe5\x60" + "\x15\xe9\x6f\xef\xaa\x8f\xa7\xa7" + "\x63\x8f\xf2\xf0\x77\xf1\xa8\xea" + "\xe1\xb7\x1f\x9e\xab\x9e\x4b\x3f" + "\x07\x87\x5b\x6f\xcd\xa8\xaf\xb9" + "\xfa\x70\x0b\x52\xb8\xa8\xa7\x9e" + "\x07\x5f\xa6\x0e\xb3\x9b\x79\x13" + "\x79\xc3\x3e\x8d\x1c\x2c\x68\xc8" + "\x51\x1d\x3c\x7b\x7d\x79\x77\x2a" + "\x56\x65\xc5\x54\x23\x28\xb0\x03", + .input = "\x9e\x16\xab\xed\x4b\xa7\x42\x5a" + "\xc6\xfb\x4e\x76\xff\xbe\x03\xa0" + "\x0f\xe3\xad\xba\xe4\x98\x2b\x0e" + "\x21\x48\xa0\xb8\x65\x48\x27\x48" + "\x84\x54\x54\xb2\x9a\x94\x7b\xe6" + "\x4b\x29\xe9\xcf\x05\x91\x80\x1a" + "\x3a\xf3\x41\x96\x85\x1d\x9f\x74" + "\x51\x56\x63\xfa\x7c\x28\x85\x49" + "\xf7\x2f\xf9\xf2\x18\x46\xf5\x33" + "\x80\xa3\x3c\xce\xb2\x57\x93\xf5" + "\xae\xbd\xa9\xf5\x7b\x30\xc4\x93" + "\x66\xe0\x30\x77\x16\xe4\xa0\x31" + "\xba\x70\xbc\x68\x13\xf5\xb0\x9a" + "\xc1\xfc\x7e\xfe\x55\x80\x5c\x48" + "\x74\xa6\xaa\xa3\xac\xdc\xc2\xf5" + "\x8d\xde\x34\x86\x78\x60\x75\x8d", + .ilen = 128, + .rlen = 128, + .also_non_np = 1, + .np = 4, + .tap = { 104, 16, 4, 4 }, + }, { + .key = "\xd3\x81\x72\x18\x23\xff\x6f\x4a" + "\x25\x74\x29\x0d\x51\x8a\x0e\x13" + "\xc1\x53\x5d\x30\x8d\xee\x75\x0d" + "\x14\xd6\x69\xc9\x15\xa9\x0c\x60", + .klen = 32, + .iv = "\x65\x9b\xd4\xa8\x7d\x29\x1d\xf4" + "\xc4\xd6\x9b\x6a\x28\xab\x64\xe2" + "\x62\x81\x97\xc5\x81\xaa\xf9\x44" + "\xc1\x72\x59\x82\xaf\x16\xc8\x2c", + .result = "\xc7\x6b\x52\x6a\x10\xf0\xcc\x09" + "\xc1\x12\x1d\x6d\x21\xa6\x78\xf5" + "\x05\xa3\x69\x60\x91\x36\x98\x57" + "\xba\x0c\x14\xcc\xf3\x2d\x73\x03" + "\xc6\xb2\x5f\xc8\x16\x27\x37\x5d" + "\xd0\x0b\x87\xb2\x50\x94\x7b\x58" + "\x04\xf4\xe0\x7f\x6e\x57\x8e\xc9" + "\x41\x84\xc1\xb1\x7e\x4b\x91\x12" + "\x3a\x8b\x5d\x50\x82\x7b\xcb\xd9" + "\x9a\xd9\x4e\x18\x06\x23\x9e\xd4" + "\xa5\x20\x98\xef\xb5\xda\xe5\xc0" + "\x8a\x6a\x83\x77\x15\x84\x1e\xae" + "\x78\x94\x9d\xdf\xb7\xd1\xea\x67" + "\xaa\xb0\x14\x15\xfa\x67\x21\x84" + "\xd3\x41\x2a\xce\xba\x4b\x4a\xe8" + "\x95\x62\xa9\x55\xf0\x80\xad\xbd" + "\xab\xaf\xdd\x4f\xa5\x7c\x13\x36" + "\xed\x5e\x4f\x72\xad\x4b\xf1\xd0" + "\x88\x4e\xec\x2c\x88\x10\x5e\xea" + "\x12\xc0\x16\x01\x29\xa3\xa0\x55" + "\xaa\x68\xf3\xe9\x9d\x3b\x0d\x3b" + "\x6d\xec\xf8\xa0\x2d\xf0\x90\x8d" + "\x1c\xe2\x88\xd4\x24\x71\xf9\xb3" + "\xc1\x9f\xc5\xd6\x76\x70\xc5\x2e" + "\x9c\xac\xdb\x90\xbd\x83\x72\xba" + "\x6e\xb5\xa5\x53\x83\xa9\xa5\xbf" + "\x7d\x06\x0e\x3c\x2a\xd2\x04\xb5" + "\x1e\x19\x38\x09\x16\xd2\x82\x1f" + "\x75\x18\x56\xb8\x96\x0b\xa6\xf9" + "\xcf\x62\xd9\x32\x5d\xa9\xd7\x1d" + "\xec\xe4\xdf\x1b\xbe\xf1\x36\xee" + "\xe3\x7b\xb5\x2f\xee\xf8\x53\x3d" + "\x6a\xb7\x70\xa9\xfc\x9c\x57\x25" + "\xf2\x89\x10\xd3\xb8\xa8\x8c\x30" + "\xae\x23\x4f\x0e\x13\x66\x4f\xe1" + "\xb6\xc0\xe4\xf8\xef\x93\xbd\x6e" + "\x15\x85\x6b\xe3\x60\x81\x1d\x68" + "\xd7\x31\x87\x89\x09\xab\xd5\x96" + "\x1d\xf3\x6d\x67\x80\xca\x07\x31" + "\x5d\xa7\xe4\xfb\x3e\xf2\x9b\x33" + "\x52\x18\xc8\x30\xfe\x2d\xca\x1e" + "\x79\x92\x7a\x60\x5c\xb6\x58\x87" + "\xa4\x36\xa2\x67\x92\x8b\xa4\xb7" + "\xf1\x86\xdf\xdc\xc0\x7e\x8f\x63" + "\xd2\xa2\xdc\x78\xeb\x4f\xd8\x96" + "\x47\xca\xb8\x91\xf9\xf7\x94\x21" + "\x5f\x9a\x9f\x5b\xb8\x40\x41\x4b" + "\x66\x69\x6a\x72\xd0\xcb\x70\xb7" + "\x93\xb5\x37\x96\x05\x37\x4f\xe5" + "\x8c\xa7\x5a\x4e\x8b\xb7\x84\xea" + "\xc7\xfc\x19\x6e\x1f\x5a\xa1\xac" + "\x18\x7d\x52\x3b\xb3\x34\x62\x99" + "\xe4\x9e\x31\x04\x3f\xc0\x8d\x84" + "\x17\x7c\x25\x48\x52\x67\x11\x27" + "\x67\xbb\x5a\x85\xca\x56\xb2\x5c" + "\xe6\xec\xd5\x96\x3d\x15\xfc\xfb" + "\x22\x25\xf4\x13\xe5\x93\x4b\x9a" + "\x77\xf1\x52\x18\xfa\x16\x5e\x49" + "\x03\x45\xa8\x08\xfa\xb3\x41\x92" + "\x79\x50\x33\xca\xd0\xd7\x42\x55" + "\xc3\x9a\x0c\x4e\xd9\xa4\x3c\x86" + "\x80\x9f\x53\xd1\xa4\x2e\xd1\xbc" + "\xf1\x54\x6e\x93\xa4\x65\x99\x8e" + "\xdf\x29\xc0\x64\x63\x07\xbb\xea", + .input = "\x15\x97\xd0\x86\x18\x03\x9c\x51" + "\xc5\x11\x36\x62\x13\x92\xe6\x73" + "\x29\x79\xde\xa1\x00\x3e\x08\x64" + "\x17\x1a\xbc\xd5\xfe\x33\x0e\x0c" + "\x7c\x94\xa7\xc6\x3c\xbe\xac\xa2" + "\x89\xe6\xbc\xdf\x0c\x33\x27\x42" + "\x46\x73\x2f\xba\x4e\xa6\x46\x8f" + "\xe4\xee\x39\x63\x42\x65\xa3\x88" + "\x7a\xad\x33\x23\xa9\xa7\x20\x7f" + "\x0b\xe6\x6a\xc3\x60\xda\x9e\xb4" + "\xd6\x07\x8a\x77\x26\xd1\xab\x44" + "\x99\x55\x03\x5e\xed\x8d\x7b\xbd" + "\xc8\x21\xb7\x21\x30\x3f\xc0\xb5" + "\xc8\xec\x6c\x23\xa6\xa3\x6d\xf1" + "\x30\x0a\xd0\xa6\xa9\x28\x69\xae" + "\x2a\xe6\x54\xac\x82\x9d\x6a\x95" + "\x6f\x06\x44\xc5\x5a\x77\x6e\xec" + "\xf8\xf8\x63\xb2\xe6\xaa\xbd\x8e" + "\x0e\x8a\x62\x00\x03\xc8\x84\xdd" + "\x47\x4a\xc3\x55\xba\xb7\xe7\xdf" + "\x08\xbf\x62\xf5\xe8\xbc\xb6\x11" + "\xe4\xcb\xd0\x66\x74\x32\xcf\xd4" + "\xf8\x51\x80\x39\x14\x05\x12\xdb" + "\x87\x93\xe2\x26\x30\x9c\x3a\x21" + "\xe5\xd0\x38\x57\x80\x15\xe4\x08" + "\x58\x05\x49\x7d\xe6\x92\x77\x70" + "\xfb\x1e\x2d\x6a\x84\x00\xc8\x68" + "\xf7\x1a\xdd\xf0\x7b\x38\x1e\xd8" + "\x2c\x78\x78\x61\xcf\xe3\xde\x69" + "\x1f\xd5\x03\xd5\x1a\xb4\xcf\x03" + "\xc8\x7a\x70\x68\x35\xb4\xf6\xbe" + "\x90\x62\xb2\x28\x99\x86\xf5\x44" + "\x99\xeb\x31\xcf\xca\xdf\xd0\x21" + "\xd6\x60\xf7\x0f\x40\xb4\x80\xb7" + "\xab\xe1\x9b\x45\xba\x66\xda\xee" + "\xdd\x04\x12\x40\x98\xe1\x69\xe5" + "\x2b\x9c\x59\x80\xe7\x7b\xcc\x63" + "\xa6\xc0\x3a\xa9\xfe\x8a\xf9\x62" + "\x11\x34\x61\x94\x35\xfe\xf2\x99" + "\xfd\xee\x19\xea\x95\xb6\x12\xbf" + "\x1b\xdf\x02\x1a\xcc\x3e\x7e\x65" + "\x78\x74\x10\x50\x29\x63\x28\xea" + "\x6b\xab\xd4\x06\x4d\x15\x24\x31" + "\xc7\x0a\xc9\x16\xb6\x48\xf0\xbf" + "\x49\xdb\x68\x71\x31\x8f\x87\xe2" + "\x13\x05\x64\xd6\x22\x0c\xf8\x36" + "\x84\x24\x3e\x69\x5e\xb8\x9e\x16" + "\x73\x6c\x83\x1e\xe0\x9f\x9e\xba" + "\xe5\x59\x21\x33\x1b\xa9\x26\xc2" + "\xc7\xd9\x30\x73\xb6\xa6\x73\x82" + "\x19\xfa\x44\x4d\x40\x8b\x69\x04" + "\x94\x74\xea\x6e\xb3\x09\x47\x01" + "\x2a\xb9\x78\x34\x43\x11\xed\xd6" + "\x8c\x95\x65\x1b\x85\x67\xa5\x40" + "\xac\x9c\x05\x4b\x57\x4a\xa9\x96" + "\x0f\xdd\x4f\xa1\xe0\xcf\x6e\xc7" + "\x1b\xed\xa2\xb4\x56\x8c\x09\x6e" + "\xa6\x65\xd7\x55\x81\xb7\xed\x11" + "\x9b\x40\x75\xa8\x6b\x56\xaf\x16" + "\x8b\x3d\xf4\xcb\xfe\xd5\x1d\x3d" + "\x85\xc2\xc0\xde\x43\x39\x4a\x96" + "\xba\x88\x97\xc0\xd6\x00\x0e\x27" + "\x21\xb0\x21\x52\xba\xa7\x37\xaa" + "\xcc\xbf\x95\xa8\xf4\xd0\x91\xf6", + .ilen = 512, + .rlen = 512, + .also_non_np = 1, + .np = 2, + .tap = { 144, 368 }, + } +}; + +/* Adiantum with XChaCha20 instead of XChaCha12 */ +/* Test vectors from https://github.com/google/adiantum */ +static const struct cipher_testvec adiantum_xchacha20_aes_enc_tv_template[] = { + { + .key = "\x9e\xeb\xb2\x49\x3c\x1c\xf5\xf4" + "\x6a\x99\xc2\xc4\xdf\xb1\xf4\xdd" + "\x75\x20\x57\xea\x2c\x4f\xcd\xb2" + "\xa5\x3d\x7b\x49\x1e\xab\xfd\x0f", + .klen = 32, + .iv = "\xdf\x63\xd4\xab\xd2\x49\xf3\xd8" + "\x33\x81\x37\x60\x7d\xfa\x73\x08" + "\xd8\x49\x6d\x80\xe8\x2f\x62\x54" + "\xeb\x0e\xa9\x39\x5b\x45\x7f\x8a", + .input = "\x67\xc9\xf2\x30\x84\x41\x8e\x43" + "\xfb\xf3\xb3\x3e\x79\x36\x7f\xe8", + .result = "\xf6\x78\x97\xd6\xaa\x94\x01\x27" + "\x2e\x4d\x83\xe0\x6e\x64\x9a\xdf", + .ilen = 16, + .rlen = 16, + .also_non_np = 1, + .np = 3, + .tap = { 5, 2, 9 }, + }, { + .key = "\x36\x2b\x57\x97\xf8\x5d\xcd\x99" + "\x5f\x1a\x5a\x44\x1d\x92\x0f\x27" + "\xcc\x16\xd7\x2b\x85\x63\x99\xd3" + "\xba\x96\xa1\xdb\xd2\x60\x68\xda", + .klen = 32, + .iv = "\xef\x58\x69\xb1\x2c\x5e\x9a\x47" + "\x24\xc1\xb1\x69\xe1\x12\x93\x8f" + "\x43\x3d\x6d\x00\xdb\x5e\xd8\xd9" + "\x12\x9a\xfe\xd9\xff\x2d\xaa\xc4", + .input = "\x5e\xa8\x68\x19\x85\x98\x12\x23" + "\x26\x0a\xcc\xdb\x0a\x04\xb9\xdf" + "\x4d\xb3\x48\x7b\xb0\xe3\xc8\x19" + "\x43\x5a\x46\x06\x94\x2d\xf2", + .result = "\x4b\xb8\x90\x10\xdf\x7f\x64\x08" + "\x0e\x14\x42\x5f\x00\x74\x09\x36" + "\x57\x72\xb5\xfd\xb5\x5d\xb8\x28" + "\x0c\x04\x91\x14\x91\xe9\x37", + .ilen = 31, + .rlen = 31, + .also_non_np = 1, + .np = 2, + .tap = { 16, 15 }, + }, { + .key = "\xa5\x28\x24\x34\x1a\x3c\xd8\xf7" + "\x05\x91\x8f\xee\x85\x1f\x35\x7f" + "\x80\x3d\xfc\x9b\x94\xf6\xfc\x9e" + "\x19\x09\x00\xa9\x04\x31\x4f\x11", + .klen = 32, + .iv = "\xa1\xba\x49\x95\xff\x34\x6d\xb8" + "\xcd\x87\x5d\x5e\xfd\xea\x85\xdb" + "\x8a\x7b\x5e\xb2\x5d\x57\xdd\x62" + "\xac\xa9\x8c\x41\x42\x94\x75\xb7", + .input = "\x69\xb4\xe8\x8c\x37\xe8\x67\x82" + "\xf1\xec\x5d\x04\xe5\x14\x91\x13" + "\xdf\xf2\x87\x1b\x69\x81\x1d\x71" + "\x70\x9e\x9c\x3b\xde\x49\x70\x11" + "\xa0\xa3\xdb\x0d\x54\x4f\x66\x69" + "\xd7\xdb\x80\xa7\x70\x92\x68\xce" + "\x81\x04\x2c\xc6\xab\xae\xe5\x60" + "\x15\xe9\x6f\xef\xaa\x8f\xa7\xa7" + "\x63\x8f\xf2\xf0\x77\xf1\xa8\xea" + "\xe1\xb7\x1f\x9e\xab\x9e\x4b\x3f" + "\x07\x87\x5b\x6f\xcd\xa8\xaf\xb9" + "\xfa\x70\x0b\x52\xb8\xa8\xa7\x9e" + "\x07\x5f\xa6\x0e\xb3\x9b\x79\x13" + "\x79\xc3\x3e\x8d\x1c\x2c\x68\xc8" + "\x51\x1d\x3c\x7b\x7d\x79\x77\x2a" + "\x56\x65\xc5\x54\x23\x28\xb0\x03", + .result = "\xb1\x8b\xa0\x05\x77\xa8\x4d\x59" + "\x1b\x8e\x21\xfc\x3a\x49\xfa\xd4" + "\xeb\x36\xf3\xc4\xdf\xdc\xae\x67" + "\x07\x3f\x70\x0e\xe9\x66\xf5\x0c" + "\x30\x4d\x66\xc9\xa4\x2f\x73\x9c" + "\x13\xc8\x49\x44\xcc\x0a\x90\x9d" + "\x7c\xdd\x19\x3f\xea\x72\x8d\x58" + "\xab\xe7\x09\x2c\xec\xb5\x44\xd2" + "\xca\xa6\x2d\x7a\x5c\x9c\x2b\x15" + "\xec\x2a\xa6\x69\x91\xf9\xf3\x13" + "\xf7\x72\xc1\xc1\x40\xd5\xe1\x94" + "\xf4\x29\xa1\x3e\x25\x02\xa8\x3e" + "\x94\xc1\x91\x14\xa1\x14\xcb\xbe" + "\x67\x4c\xb9\x38\xfe\xa7\xaa\x32" + "\x29\x62\x0d\xb2\xf6\x3c\x58\x57" + "\xc1\xd5\x5a\xbb\xd6\xa6\x2a\xe5", + .ilen = 128, + .rlen = 128, + .also_non_np = 1, + .np = 4, + .tap = { 112, 7, 8, 1 }, + }, { + .key = "\xd3\x81\x72\x18\x23\xff\x6f\x4a" + "\x25\x74\x29\x0d\x51\x8a\x0e\x13" + "\xc1\x53\x5d\x30\x8d\xee\x75\x0d" + "\x14\xd6\x69\xc9\x15\xa9\x0c\x60", + .klen = 32, + .iv = "\x65\x9b\xd4\xa8\x7d\x29\x1d\xf4" + "\xc4\xd6\x9b\x6a\x28\xab\x64\xe2" + "\x62\x81\x97\xc5\x81\xaa\xf9\x44" + "\xc1\x72\x59\x82\xaf\x16\xc8\x2c", + .input = "\xc7\x6b\x52\x6a\x10\xf0\xcc\x09" + "\xc1\x12\x1d\x6d\x21\xa6\x78\xf5" + "\x05\xa3\x69\x60\x91\x36\x98\x57" + "\xba\x0c\x14\xcc\xf3\x2d\x73\x03" + "\xc6\xb2\x5f\xc8\x16\x27\x37\x5d" + "\xd0\x0b\x87\xb2\x50\x94\x7b\x58" + "\x04\xf4\xe0\x7f\x6e\x57\x8e\xc9" + "\x41\x84\xc1\xb1\x7e\x4b\x91\x12" + "\x3a\x8b\x5d\x50\x82\x7b\xcb\xd9" + "\x9a\xd9\x4e\x18\x06\x23\x9e\xd4" + "\xa5\x20\x98\xef\xb5\xda\xe5\xc0" + "\x8a\x6a\x83\x77\x15\x84\x1e\xae" + "\x78\x94\x9d\xdf\xb7\xd1\xea\x67" + "\xaa\xb0\x14\x15\xfa\x67\x21\x84" + "\xd3\x41\x2a\xce\xba\x4b\x4a\xe8" + "\x95\x62\xa9\x55\xf0\x80\xad\xbd" + "\xab\xaf\xdd\x4f\xa5\x7c\x13\x36" + "\xed\x5e\x4f\x72\xad\x4b\xf1\xd0" + "\x88\x4e\xec\x2c\x88\x10\x5e\xea" + "\x12\xc0\x16\x01\x29\xa3\xa0\x55" + "\xaa\x68\xf3\xe9\x9d\x3b\x0d\x3b" + "\x6d\xec\xf8\xa0\x2d\xf0\x90\x8d" + "\x1c\xe2\x88\xd4\x24\x71\xf9\xb3" + "\xc1\x9f\xc5\xd6\x76\x70\xc5\x2e" + "\x9c\xac\xdb\x90\xbd\x83\x72\xba" + "\x6e\xb5\xa5\x53\x83\xa9\xa5\xbf" + "\x7d\x06\x0e\x3c\x2a\xd2\x04\xb5" + "\x1e\x19\x38\x09\x16\xd2\x82\x1f" + "\x75\x18\x56\xb8\x96\x0b\xa6\xf9" + "\xcf\x62\xd9\x32\x5d\xa9\xd7\x1d" + "\xec\xe4\xdf\x1b\xbe\xf1\x36\xee" + "\xe3\x7b\xb5\x2f\xee\xf8\x53\x3d" + "\x6a\xb7\x70\xa9\xfc\x9c\x57\x25" + "\xf2\x89\x10\xd3\xb8\xa8\x8c\x30" + "\xae\x23\x4f\x0e\x13\x66\x4f\xe1" + "\xb6\xc0\xe4\xf8\xef\x93\xbd\x6e" + "\x15\x85\x6b\xe3\x60\x81\x1d\x68" + "\xd7\x31\x87\x89\x09\xab\xd5\x96" + "\x1d\xf3\x6d\x67\x80\xca\x07\x31" + "\x5d\xa7\xe4\xfb\x3e\xf2\x9b\x33" + "\x52\x18\xc8\x30\xfe\x2d\xca\x1e" + "\x79\x92\x7a\x60\x5c\xb6\x58\x87" + "\xa4\x36\xa2\x67\x92\x8b\xa4\xb7" + "\xf1\x86\xdf\xdc\xc0\x7e\x8f\x63" + "\xd2\xa2\xdc\x78\xeb\x4f\xd8\x96" + "\x47\xca\xb8\x91\xf9\xf7\x94\x21" + "\x5f\x9a\x9f\x5b\xb8\x40\x41\x4b" + "\x66\x69\x6a\x72\xd0\xcb\x70\xb7" + "\x93\xb5\x37\x96\x05\x37\x4f\xe5" + "\x8c\xa7\x5a\x4e\x8b\xb7\x84\xea" + "\xc7\xfc\x19\x6e\x1f\x5a\xa1\xac" + "\x18\x7d\x52\x3b\xb3\x34\x62\x99" + "\xe4\x9e\x31\x04\x3f\xc0\x8d\x84" + "\x17\x7c\x25\x48\x52\x67\x11\x27" + "\x67\xbb\x5a\x85\xca\x56\xb2\x5c" + "\xe6\xec\xd5\x96\x3d\x15\xfc\xfb" + "\x22\x25\xf4\x13\xe5\x93\x4b\x9a" + "\x77\xf1\x52\x18\xfa\x16\x5e\x49" + "\x03\x45\xa8\x08\xfa\xb3\x41\x92" + "\x79\x50\x33\xca\xd0\xd7\x42\x55" + "\xc3\x9a\x0c\x4e\xd9\xa4\x3c\x86" + "\x80\x9f\x53\xd1\xa4\x2e\xd1\xbc" + "\xf1\x54\x6e\x93\xa4\x65\x99\x8e" + "\xdf\x29\xc0\x64\x63\x07\xbb\xea", + .result = "\xe0\x33\xf6\xe0\xb4\xa5\xdd\x2b" + "\xdd\xce\xfc\x12\x1e\xfc\x2d\xf2" + "\x8b\xc7\xeb\xc1\xc4\x2a\xe8\x44" + "\x0f\x3d\x97\x19\x2e\x6d\xa2\x38" + "\x9d\xa6\xaa\xe1\x96\xb9\x08\xe8" + "\x0b\x70\x48\x5c\xed\xb5\x9b\xcb" + "\x8b\x40\x88\x7e\x69\x73\xf7\x16" + "\x71\xbb\x5b\xfc\xa3\x47\x5d\xa6" + "\xae\x3a\x64\xc4\xe7\xb8\xa8\xe7" + "\xb1\x32\x19\xdb\xe3\x01\xb8\xf0" + "\xa4\x86\xb4\x4c\xc2\xde\x5c\xd2" + "\x6c\x77\xd2\xe8\x18\xb7\x0a\xc9" + "\x3d\x53\xb5\xc4\x5c\xf0\x8c\x06" + "\xdc\x90\xe0\x74\x47\x1b\x0b\xf6" + "\xd2\x71\x6b\xc4\xf1\x97\x00\x2d" + "\x63\x57\x44\x1f\x8c\xf4\xe6\x9b" + "\xe0\x7a\xdd\xec\x32\x73\x42\x32" + "\x7f\x35\x67\x60\x0d\xcf\x10\x52" + "\x61\x22\x53\x8d\x8e\xbb\x33\x76" + "\x59\xd9\x10\xce\xdf\xef\xc0\x41" + "\xd5\x33\x29\x6a\xda\x46\xa4\x51" + "\xf0\x99\x3d\x96\x31\xdd\xb5\xcb" + "\x3e\x2a\x1f\xc7\x5c\x79\xd3\xc5" + "\x20\xa1\xb1\x39\x1b\xc6\x0a\x70" + "\x26\x39\x95\x07\xad\x7a\xc9\x69" + "\xfe\x81\xc7\x88\x08\x38\xaf\xad" + "\x9e\x8d\xfb\xe8\x24\x0d\x22\xb8" + "\x0e\xed\xbe\x37\x53\x7c\xa6\xc6" + "\x78\x62\xec\xa3\x59\xd9\xc6\x9d" + "\xb8\x0e\x69\x77\x84\x2d\x6a\x4c" + "\xc5\xd9\xb2\xa0\x2b\xa8\x80\xcc" + "\xe9\x1e\x9c\x5a\xc4\xa1\xb2\x37" + "\x06\x9b\x30\x32\x67\xf7\xe7\xd2" + "\x42\xc7\xdf\x4e\xd4\xcb\xa0\x12" + "\x94\xa1\x34\x85\x93\x50\x4b\x0a" + "\x3c\x7d\x49\x25\x01\x41\x6b\x96" + "\xa9\x12\xbb\x0b\xc0\xd7\xd0\x93" + "\x1f\x70\x38\xb8\x21\xee\xf6\xa7" + "\xee\xeb\xe7\x81\xa4\x13\xb4\x87" + "\xfa\xc1\xb0\xb5\x37\x8b\x74\xa2" + "\x4e\xc7\xc2\xad\x3d\x62\x3f\xf8" + "\x34\x42\xe5\xae\x45\x13\x63\xfe" + "\xfc\x2a\x17\x46\x61\xa9\xd3\x1c" + "\x4c\xaf\xf0\x09\x62\x26\x66\x1e" + "\x74\xcf\xd6\x68\x3d\x7d\xd8\xb7" + "\xe7\xe6\xf8\xf0\x08\x20\xf7\x47" + "\x1c\x52\xaa\x0f\x3e\x21\xa3\xf2" + "\xbf\x2f\x95\x16\xa8\xc8\xc8\x8c" + "\x99\x0f\x5d\xfb\xfa\x2b\x58\x8a" + "\x7e\xd6\x74\x02\x60\xf0\xd0\x5b" + "\x65\xa8\xac\xea\x8d\x68\x46\x34" + "\x26\x9d\x4f\xb1\x9a\x8e\xc0\x1a" + "\xf1\xed\xc6\x7a\x83\xfd\x8a\x57" + "\xf2\xe6\xe4\xba\xfc\xc6\x3c\xad" + "\x5b\x19\x50\x2f\x3a\xcc\x06\x46" + "\x04\x51\x3f\x91\x97\xf0\xd2\x07" + "\xe7\x93\x89\x7e\xb5\x32\x0f\x03" + "\xe5\x58\x9e\x74\x72\xeb\xc2\x38" + "\x00\x0c\x91\x72\x69\xed\x7d\x6d" + "\xc8\x71\xf0\xec\xff\x80\xd9\x1c" + "\x9e\xd2\xfa\x15\xfc\x6c\x4e\xbc" + "\xb1\xa6\xbd\xbd\x70\x40\xca\x20" + "\xb8\x78\xd2\xa3\xc6\xf3\x79\x9c" + "\xc7\x27\xe1\x6a\x29\xad\xa4\x03", + .ilen = 512, + .rlen = 512, + } +}; + +static const struct cipher_testvec adiantum_xchacha20_aes_dec_tv_template[] = { + { + .key = "\x9e\xeb\xb2\x49\x3c\x1c\xf5\xf4" + "\x6a\x99\xc2\xc4\xdf\xb1\xf4\xdd" + "\x75\x20\x57\xea\x2c\x4f\xcd\xb2" + "\xa5\x3d\x7b\x49\x1e\xab\xfd\x0f", + .klen = 32, + .iv = "\xdf\x63\xd4\xab\xd2\x49\xf3\xd8" + "\x33\x81\x37\x60\x7d\xfa\x73\x08" + "\xd8\x49\x6d\x80\xe8\x2f\x62\x54" + "\xeb\x0e\xa9\x39\x5b\x45\x7f\x8a", + .result = "\x67\xc9\xf2\x30\x84\x41\x8e\x43" + "\xfb\xf3\xb3\x3e\x79\x36\x7f\xe8", + .input = "\xf6\x78\x97\xd6\xaa\x94\x01\x27" + "\x2e\x4d\x83\xe0\x6e\x64\x9a\xdf", + .ilen = 16, + .rlen = 16, + .also_non_np = 1, + .np = 3, + .tap = { 5, 2, 9 }, + }, { + .key = "\x36\x2b\x57\x97\xf8\x5d\xcd\x99" + "\x5f\x1a\x5a\x44\x1d\x92\x0f\x27" + "\xcc\x16\xd7\x2b\x85\x63\x99\xd3" + "\xba\x96\xa1\xdb\xd2\x60\x68\xda", + .klen = 32, + .iv = "\xef\x58\x69\xb1\x2c\x5e\x9a\x47" + "\x24\xc1\xb1\x69\xe1\x12\x93\x8f" + "\x43\x3d\x6d\x00\xdb\x5e\xd8\xd9" + "\x12\x9a\xfe\xd9\xff\x2d\xaa\xc4", + .result = "\x5e\xa8\x68\x19\x85\x98\x12\x23" + "\x26\x0a\xcc\xdb\x0a\x04\xb9\xdf" + "\x4d\xb3\x48\x7b\xb0\xe3\xc8\x19" + "\x43\x5a\x46\x06\x94\x2d\xf2", + .input = "\x4b\xb8\x90\x10\xdf\x7f\x64\x08" + "\x0e\x14\x42\x5f\x00\x74\x09\x36" + "\x57\x72\xb5\xfd\xb5\x5d\xb8\x28" + "\x0c\x04\x91\x14\x91\xe9\x37", + .ilen = 31, + .rlen = 31, + .also_non_np = 1, + .np = 2, + .tap = { 16, 15 }, + }, { + .key = "\xa5\x28\x24\x34\x1a\x3c\xd8\xf7" + "\x05\x91\x8f\xee\x85\x1f\x35\x7f" + "\x80\x3d\xfc\x9b\x94\xf6\xfc\x9e" + "\x19\x09\x00\xa9\x04\x31\x4f\x11", + .klen = 32, + .iv = "\xa1\xba\x49\x95\xff\x34\x6d\xb8" + "\xcd\x87\x5d\x5e\xfd\xea\x85\xdb" + "\x8a\x7b\x5e\xb2\x5d\x57\xdd\x62" + "\xac\xa9\x8c\x41\x42\x94\x75\xb7", + .result = "\x69\xb4\xe8\x8c\x37\xe8\x67\x82" + "\xf1\xec\x5d\x04\xe5\x14\x91\x13" + "\xdf\xf2\x87\x1b\x69\x81\x1d\x71" + "\x70\x9e\x9c\x3b\xde\x49\x70\x11" + "\xa0\xa3\xdb\x0d\x54\x4f\x66\x69" + "\xd7\xdb\x80\xa7\x70\x92\x68\xce" + "\x81\x04\x2c\xc6\xab\xae\xe5\x60" + "\x15\xe9\x6f\xef\xaa\x8f\xa7\xa7" + "\x63\x8f\xf2\xf0\x77\xf1\xa8\xea" + "\xe1\xb7\x1f\x9e\xab\x9e\x4b\x3f" + "\x07\x87\x5b\x6f\xcd\xa8\xaf\xb9" + "\xfa\x70\x0b\x52\xb8\xa8\xa7\x9e" + "\x07\x5f\xa6\x0e\xb3\x9b\x79\x13" + "\x79\xc3\x3e\x8d\x1c\x2c\x68\xc8" + "\x51\x1d\x3c\x7b\x7d\x79\x77\x2a" + "\x56\x65\xc5\x54\x23\x28\xb0\x03", + .input = "\xb1\x8b\xa0\x05\x77\xa8\x4d\x59" + "\x1b\x8e\x21\xfc\x3a\x49\xfa\xd4" + "\xeb\x36\xf3\xc4\xdf\xdc\xae\x67" + "\x07\x3f\x70\x0e\xe9\x66\xf5\x0c" + "\x30\x4d\x66\xc9\xa4\x2f\x73\x9c" + "\x13\xc8\x49\x44\xcc\x0a\x90\x9d" + "\x7c\xdd\x19\x3f\xea\x72\x8d\x58" + "\xab\xe7\x09\x2c\xec\xb5\x44\xd2" + "\xca\xa6\x2d\x7a\x5c\x9c\x2b\x15" + "\xec\x2a\xa6\x69\x91\xf9\xf3\x13" + "\xf7\x72\xc1\xc1\x40\xd5\xe1\x94" + "\xf4\x29\xa1\x3e\x25\x02\xa8\x3e" + "\x94\xc1\x91\x14\xa1\x14\xcb\xbe" + "\x67\x4c\xb9\x38\xfe\xa7\xaa\x32" + "\x29\x62\x0d\xb2\xf6\x3c\x58\x57" + "\xc1\xd5\x5a\xbb\xd6\xa6\x2a\xe5", + .ilen = 128, + .rlen = 128, + .also_non_np = 1, + .np = 4, + .tap = { 112, 7, 8, 1 }, + }, { + .key = "\xd3\x81\x72\x18\x23\xff\x6f\x4a" + "\x25\x74\x29\x0d\x51\x8a\x0e\x13" + "\xc1\x53\x5d\x30\x8d\xee\x75\x0d" + "\x14\xd6\x69\xc9\x15\xa9\x0c\x60", + .klen = 32, + .iv = "\x65\x9b\xd4\xa8\x7d\x29\x1d\xf4" + "\xc4\xd6\x9b\x6a\x28\xab\x64\xe2" + "\x62\x81\x97\xc5\x81\xaa\xf9\x44" + "\xc1\x72\x59\x82\xaf\x16\xc8\x2c", + .result = "\xc7\x6b\x52\x6a\x10\xf0\xcc\x09" + "\xc1\x12\x1d\x6d\x21\xa6\x78\xf5" + "\x05\xa3\x69\x60\x91\x36\x98\x57" + "\xba\x0c\x14\xcc\xf3\x2d\x73\x03" + "\xc6\xb2\x5f\xc8\x16\x27\x37\x5d" + "\xd0\x0b\x87\xb2\x50\x94\x7b\x58" + "\x04\xf4\xe0\x7f\x6e\x57\x8e\xc9" + "\x41\x84\xc1\xb1\x7e\x4b\x91\x12" + "\x3a\x8b\x5d\x50\x82\x7b\xcb\xd9" + "\x9a\xd9\x4e\x18\x06\x23\x9e\xd4" + "\xa5\x20\x98\xef\xb5\xda\xe5\xc0" + "\x8a\x6a\x83\x77\x15\x84\x1e\xae" + "\x78\x94\x9d\xdf\xb7\xd1\xea\x67" + "\xaa\xb0\x14\x15\xfa\x67\x21\x84" + "\xd3\x41\x2a\xce\xba\x4b\x4a\xe8" + "\x95\x62\xa9\x55\xf0\x80\xad\xbd" + "\xab\xaf\xdd\x4f\xa5\x7c\x13\x36" + "\xed\x5e\x4f\x72\xad\x4b\xf1\xd0" + "\x88\x4e\xec\x2c\x88\x10\x5e\xea" + "\x12\xc0\x16\x01\x29\xa3\xa0\x55" + "\xaa\x68\xf3\xe9\x9d\x3b\x0d\x3b" + "\x6d\xec\xf8\xa0\x2d\xf0\x90\x8d" + "\x1c\xe2\x88\xd4\x24\x71\xf9\xb3" + "\xc1\x9f\xc5\xd6\x76\x70\xc5\x2e" + "\x9c\xac\xdb\x90\xbd\x83\x72\xba" + "\x6e\xb5\xa5\x53\x83\xa9\xa5\xbf" + "\x7d\x06\x0e\x3c\x2a\xd2\x04\xb5" + "\x1e\x19\x38\x09\x16\xd2\x82\x1f" + "\x75\x18\x56\xb8\x96\x0b\xa6\xf9" + "\xcf\x62\xd9\x32\x5d\xa9\xd7\x1d" + "\xec\xe4\xdf\x1b\xbe\xf1\x36\xee" + "\xe3\x7b\xb5\x2f\xee\xf8\x53\x3d" + "\x6a\xb7\x70\xa9\xfc\x9c\x57\x25" + "\xf2\x89\x10\xd3\xb8\xa8\x8c\x30" + "\xae\x23\x4f\x0e\x13\x66\x4f\xe1" + "\xb6\xc0\xe4\xf8\xef\x93\xbd\x6e" + "\x15\x85\x6b\xe3\x60\x81\x1d\x68" + "\xd7\x31\x87\x89\x09\xab\xd5\x96" + "\x1d\xf3\x6d\x67\x80\xca\x07\x31" + "\x5d\xa7\xe4\xfb\x3e\xf2\x9b\x33" + "\x52\x18\xc8\x30\xfe\x2d\xca\x1e" + "\x79\x92\x7a\x60\x5c\xb6\x58\x87" + "\xa4\x36\xa2\x67\x92\x8b\xa4\xb7" + "\xf1\x86\xdf\xdc\xc0\x7e\x8f\x63" + "\xd2\xa2\xdc\x78\xeb\x4f\xd8\x96" + "\x47\xca\xb8\x91\xf9\xf7\x94\x21" + "\x5f\x9a\x9f\x5b\xb8\x40\x41\x4b" + "\x66\x69\x6a\x72\xd0\xcb\x70\xb7" + "\x93\xb5\x37\x96\x05\x37\x4f\xe5" + "\x8c\xa7\x5a\x4e\x8b\xb7\x84\xea" + "\xc7\xfc\x19\x6e\x1f\x5a\xa1\xac" + "\x18\x7d\x52\x3b\xb3\x34\x62\x99" + "\xe4\x9e\x31\x04\x3f\xc0\x8d\x84" + "\x17\x7c\x25\x48\x52\x67\x11\x27" + "\x67\xbb\x5a\x85\xca\x56\xb2\x5c" + "\xe6\xec\xd5\x96\x3d\x15\xfc\xfb" + "\x22\x25\xf4\x13\xe5\x93\x4b\x9a" + "\x77\xf1\x52\x18\xfa\x16\x5e\x49" + "\x03\x45\xa8\x08\xfa\xb3\x41\x92" + "\x79\x50\x33\xca\xd0\xd7\x42\x55" + "\xc3\x9a\x0c\x4e\xd9\xa4\x3c\x86" + "\x80\x9f\x53\xd1\xa4\x2e\xd1\xbc" + "\xf1\x54\x6e\x93\xa4\x65\x99\x8e" + "\xdf\x29\xc0\x64\x63\x07\xbb\xea", + .input = "\xe0\x33\xf6\xe0\xb4\xa5\xdd\x2b" + "\xdd\xce\xfc\x12\x1e\xfc\x2d\xf2" + "\x8b\xc7\xeb\xc1\xc4\x2a\xe8\x44" + "\x0f\x3d\x97\x19\x2e\x6d\xa2\x38" + "\x9d\xa6\xaa\xe1\x96\xb9\x08\xe8" + "\x0b\x70\x48\x5c\xed\xb5\x9b\xcb" + "\x8b\x40\x88\x7e\x69\x73\xf7\x16" + "\x71\xbb\x5b\xfc\xa3\x47\x5d\xa6" + "\xae\x3a\x64\xc4\xe7\xb8\xa8\xe7" + "\xb1\x32\x19\xdb\xe3\x01\xb8\xf0" + "\xa4\x86\xb4\x4c\xc2\xde\x5c\xd2" + "\x6c\x77\xd2\xe8\x18\xb7\x0a\xc9" + "\x3d\x53\xb5\xc4\x5c\xf0\x8c\x06" + "\xdc\x90\xe0\x74\x47\x1b\x0b\xf6" + "\xd2\x71\x6b\xc4\xf1\x97\x00\x2d" + "\x63\x57\x44\x1f\x8c\xf4\xe6\x9b" + "\xe0\x7a\xdd\xec\x32\x73\x42\x32" + "\x7f\x35\x67\x60\x0d\xcf\x10\x52" + "\x61\x22\x53\x8d\x8e\xbb\x33\x76" + "\x59\xd9\x10\xce\xdf\xef\xc0\x41" + "\xd5\x33\x29\x6a\xda\x46\xa4\x51" + "\xf0\x99\x3d\x96\x31\xdd\xb5\xcb" + "\x3e\x2a\x1f\xc7\x5c\x79\xd3\xc5" + "\x20\xa1\xb1\x39\x1b\xc6\x0a\x70" + "\x26\x39\x95\x07\xad\x7a\xc9\x69" + "\xfe\x81\xc7\x88\x08\x38\xaf\xad" + "\x9e\x8d\xfb\xe8\x24\x0d\x22\xb8" + "\x0e\xed\xbe\x37\x53\x7c\xa6\xc6" + "\x78\x62\xec\xa3\x59\xd9\xc6\x9d" + "\xb8\x0e\x69\x77\x84\x2d\x6a\x4c" + "\xc5\xd9\xb2\xa0\x2b\xa8\x80\xcc" + "\xe9\x1e\x9c\x5a\xc4\xa1\xb2\x37" + "\x06\x9b\x30\x32\x67\xf7\xe7\xd2" + "\x42\xc7\xdf\x4e\xd4\xcb\xa0\x12" + "\x94\xa1\x34\x85\x93\x50\x4b\x0a" + "\x3c\x7d\x49\x25\x01\x41\x6b\x96" + "\xa9\x12\xbb\x0b\xc0\xd7\xd0\x93" + "\x1f\x70\x38\xb8\x21\xee\xf6\xa7" + "\xee\xeb\xe7\x81\xa4\x13\xb4\x87" + "\xfa\xc1\xb0\xb5\x37\x8b\x74\xa2" + "\x4e\xc7\xc2\xad\x3d\x62\x3f\xf8" + "\x34\x42\xe5\xae\x45\x13\x63\xfe" + "\xfc\x2a\x17\x46\x61\xa9\xd3\x1c" + "\x4c\xaf\xf0\x09\x62\x26\x66\x1e" + "\x74\xcf\xd6\x68\x3d\x7d\xd8\xb7" + "\xe7\xe6\xf8\xf0\x08\x20\xf7\x47" + "\x1c\x52\xaa\x0f\x3e\x21\xa3\xf2" + "\xbf\x2f\x95\x16\xa8\xc8\xc8\x8c" + "\x99\x0f\x5d\xfb\xfa\x2b\x58\x8a" + "\x7e\xd6\x74\x02\x60\xf0\xd0\x5b" + "\x65\xa8\xac\xea\x8d\x68\x46\x34" + "\x26\x9d\x4f\xb1\x9a\x8e\xc0\x1a" + "\xf1\xed\xc6\x7a\x83\xfd\x8a\x57" + "\xf2\xe6\xe4\xba\xfc\xc6\x3c\xad" + "\x5b\x19\x50\x2f\x3a\xcc\x06\x46" + "\x04\x51\x3f\x91\x97\xf0\xd2\x07" + "\xe7\x93\x89\x7e\xb5\x32\x0f\x03" + "\xe5\x58\x9e\x74\x72\xeb\xc2\x38" + "\x00\x0c\x91\x72\x69\xed\x7d\x6d" + "\xc8\x71\xf0\xec\xff\x80\xd9\x1c" + "\x9e\xd2\xfa\x15\xfc\x6c\x4e\xbc" + "\xb1\xa6\xbd\xbd\x70\x40\xca\x20" + "\xb8\x78\xd2\xa3\xc6\xf3\x79\x9c" + "\xc7\x27\xe1\x6a\x29\xad\xa4\x03", + .ilen = 512, + .rlen = 512, + } +}; + /* * CTS (Cipher Text Stealing) mode tests */ @@ -34638,4 +37971,75 @@ static const struct comp_testvec lz4hc_decomp_tv_template[] = { }, }; +static const struct comp_testvec zstd_comp_tv_template[] = { + { + .inlen = 68, + .outlen = 39, + .input = "The algorithm is zstd. " + "The algorithm is zstd. " + "The algorithm is zstd.", + .output = "\x28\xb5\x2f\xfd\x00\x50\xf5\x00\x00\xb8\x54\x68\x65" + "\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d\x20\x69\x73" + "\x20\x7a\x73\x74\x64\x2e\x20\x01\x00\x55\x73\x36\x01" + , + }, + { + .inlen = 244, + .outlen = 151, + .input = "zstd, short for Zstandard, is a fast lossless " + "compression algorithm, targeting real-time " + "compression scenarios at zlib-level and better " + "compression ratios. The zstd compression library " + "provides in-memory compression and decompression " + "functions.", + .output = "\x28\xb5\x2f\xfd\x00\x50\x75\x04\x00\x42\x4b\x1e\x17" + "\x90\x81\x31\x00\xf2\x2f\xe4\x36\xc9\xef\x92\x88\x32" + "\xc9\xf2\x24\x94\xd8\x68\x9a\x0f\x00\x0c\xc4\x31\x6f" + "\x0d\x0c\x38\xac\x5c\x48\x03\xcd\x63\x67\xc0\xf3\xad" + "\x4e\x90\xaa\x78\xa0\xa4\xc5\x99\xda\x2f\xb6\x24\x60" + "\xe2\x79\x4b\xaa\xb6\x6b\x85\x0b\xc9\xc6\x04\x66\x86" + "\xe2\xcc\xe2\x25\x3f\x4f\x09\xcd\xb8\x9d\xdb\xc1\x90" + "\xa9\x11\xbc\x35\x44\x69\x2d\x9c\x64\x4f\x13\x31\x64" + "\xcc\xfb\x4d\x95\x93\x86\x7f\x33\x7f\x1a\xef\xe9\x30" + "\xf9\x67\xa1\x94\x0a\x69\x0f\x60\xcd\xc3\xab\x99\xdc" + "\x42\xed\x97\x05\x00\x33\xc3\x15\x95\x3a\x06\xa0\x0e" + "\x20\xa9\x0e\x82\xb9\x43\x45\x01", + }, +}; + +static const struct comp_testvec zstd_decomp_tv_template[] = { + { + .inlen = 43, + .outlen = 68, + .input = "\x28\xb5\x2f\xfd\x04\x50\xf5\x00\x00\xb8\x54\x68\x65" + "\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d\x20\x69\x73" + "\x20\x7a\x73\x74\x64\x2e\x20\x01\x00\x55\x73\x36\x01" + "\x6b\xf4\x13\x35", + .output = "The algorithm is zstd. " + "The algorithm is zstd. " + "The algorithm is zstd.", + }, + { + .inlen = 155, + .outlen = 244, + .input = "\x28\xb5\x2f\xfd\x04\x50\x75\x04\x00\x42\x4b\x1e\x17" + "\x90\x81\x31\x00\xf2\x2f\xe4\x36\xc9\xef\x92\x88\x32" + "\xc9\xf2\x24\x94\xd8\x68\x9a\x0f\x00\x0c\xc4\x31\x6f" + "\x0d\x0c\x38\xac\x5c\x48\x03\xcd\x63\x67\xc0\xf3\xad" + "\x4e\x90\xaa\x78\xa0\xa4\xc5\x99\xda\x2f\xb6\x24\x60" + "\xe2\x79\x4b\xaa\xb6\x6b\x85\x0b\xc9\xc6\x04\x66\x86" + "\xe2\xcc\xe2\x25\x3f\x4f\x09\xcd\xb8\x9d\xdb\xc1\x90" + "\xa9\x11\xbc\x35\x44\x69\x2d\x9c\x64\x4f\x13\x31\x64" + "\xcc\xfb\x4d\x95\x93\x86\x7f\x33\x7f\x1a\xef\xe9\x30" + "\xf9\x67\xa1\x94\x0a\x69\x0f\x60\xcd\xc3\xab\x99\xdc" + "\x42\xed\x97\x05\x00\x33\xc3\x15\x95\x3a\x06\xa0\x0e" + "\x20\xa9\x0e\x82\xb9\x43\x45\x01\xaa\x6d\xda\x0d", + .output = "zstd, short for Zstandard, is a fast lossless " + "compression algorithm, targeting real-time " + "compression scenarios at zlib-level and better " + "compression ratios. The zstd compression library " + "provides in-memory compression and decompression " + "functions.", + }, +}; #endif /* _CRYPTO_TESTMGR_H */
diff --git a/crypto/zstd.c b/crypto/zstd.c new file mode 100644 index 0000000..9a76b3e --- /dev/null +++ b/crypto/zstd.c
@@ -0,0 +1,265 @@ +/* + * Cryptographic API. + * + * Copyright (c) 2017-present, Facebook, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#include <linux/crypto.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/net.h> +#include <linux/vmalloc.h> +#include <linux/zstd.h> +#include <crypto/internal/scompress.h> + + +#define ZSTD_DEF_LEVEL 3 + +struct zstd_ctx { + ZSTD_CCtx *cctx; + ZSTD_DCtx *dctx; + void *cwksp; + void *dwksp; +}; + +static ZSTD_parameters zstd_params(void) +{ + return ZSTD_getParams(ZSTD_DEF_LEVEL, 0, 0); +} + +static int zstd_comp_init(struct zstd_ctx *ctx) +{ + int ret = 0; + const ZSTD_parameters params = zstd_params(); + const size_t wksp_size = ZSTD_CCtxWorkspaceBound(params.cParams); + + ctx->cwksp = vzalloc(wksp_size); + if (!ctx->cwksp) { + ret = -ENOMEM; + goto out; + } + + ctx->cctx = ZSTD_initCCtx(ctx->cwksp, wksp_size); + if (!ctx->cctx) { + ret = -EINVAL; + goto out_free; + } +out: + return ret; +out_free: + vfree(ctx->cwksp); + goto out; +} + +static int zstd_decomp_init(struct zstd_ctx *ctx) +{ + int ret = 0; + const size_t wksp_size = ZSTD_DCtxWorkspaceBound(); + + ctx->dwksp = vzalloc(wksp_size); + if (!ctx->dwksp) { + ret = -ENOMEM; + goto out; + } + + ctx->dctx = ZSTD_initDCtx(ctx->dwksp, wksp_size); + if (!ctx->dctx) { + ret = -EINVAL; + goto out_free; + } +out: + return ret; +out_free: + vfree(ctx->dwksp); + goto out; +} + +static void zstd_comp_exit(struct zstd_ctx *ctx) +{ + vfree(ctx->cwksp); + ctx->cwksp = NULL; + ctx->cctx = NULL; +} + +static void zstd_decomp_exit(struct zstd_ctx *ctx) +{ + vfree(ctx->dwksp); + ctx->dwksp = NULL; + ctx->dctx = NULL; +} + +static int __zstd_init(void *ctx) +{ + int ret; + + ret = zstd_comp_init(ctx); + if (ret) + return ret; + ret = zstd_decomp_init(ctx); + if (ret) + zstd_comp_exit(ctx); + return ret; +} + +static void *zstd_alloc_ctx(struct crypto_scomp *tfm) +{ + int ret; + struct zstd_ctx *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); + + ret = __zstd_init(ctx); + if (ret) { + kfree(ctx); + return ERR_PTR(ret); + } + + return ctx; +} + +static int zstd_init(struct crypto_tfm *tfm) +{ + struct zstd_ctx *ctx = crypto_tfm_ctx(tfm); + + return __zstd_init(ctx); +} + +static void __zstd_exit(void *ctx) +{ + zstd_comp_exit(ctx); + zstd_decomp_exit(ctx); +} + +static void zstd_free_ctx(struct crypto_scomp *tfm, void *ctx) +{ + __zstd_exit(ctx); + kzfree(ctx); +} + +static void zstd_exit(struct crypto_tfm *tfm) +{ + struct zstd_ctx *ctx = crypto_tfm_ctx(tfm); + + __zstd_exit(ctx); +} + +static int __zstd_compress(const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen, void *ctx) +{ + size_t out_len; + struct zstd_ctx *zctx = ctx; + const ZSTD_parameters params = zstd_params(); + + out_len = ZSTD_compressCCtx(zctx->cctx, dst, *dlen, src, slen, params); + if (ZSTD_isError(out_len)) + return -EINVAL; + *dlen = out_len; + return 0; +} + +static int zstd_compress(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + struct zstd_ctx *ctx = crypto_tfm_ctx(tfm); + + return __zstd_compress(src, slen, dst, dlen, ctx); +} + +static int zstd_scompress(struct crypto_scomp *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen, + void *ctx) +{ + return __zstd_compress(src, slen, dst, dlen, ctx); +} + +static int __zstd_decompress(const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen, void *ctx) +{ + size_t out_len; + struct zstd_ctx *zctx = ctx; + + out_len = ZSTD_decompressDCtx(zctx->dctx, dst, *dlen, src, slen); + if (ZSTD_isError(out_len)) + return -EINVAL; + *dlen = out_len; + return 0; +} + +static int zstd_decompress(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + struct zstd_ctx *ctx = crypto_tfm_ctx(tfm); + + return __zstd_decompress(src, slen, dst, dlen, ctx); +} + +static int zstd_sdecompress(struct crypto_scomp *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen, + void *ctx) +{ + return __zstd_decompress(src, slen, dst, dlen, ctx); +} + +static struct crypto_alg alg = { + .cra_name = "zstd", + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, + .cra_ctxsize = sizeof(struct zstd_ctx), + .cra_module = THIS_MODULE, + .cra_init = zstd_init, + .cra_exit = zstd_exit, + .cra_u = { .compress = { + .coa_compress = zstd_compress, + .coa_decompress = zstd_decompress } } +}; + +static struct scomp_alg scomp = { + .alloc_ctx = zstd_alloc_ctx, + .free_ctx = zstd_free_ctx, + .compress = zstd_scompress, + .decompress = zstd_sdecompress, + .base = { + .cra_name = "zstd", + .cra_driver_name = "zstd-scomp", + .cra_module = THIS_MODULE, + } +}; + +static int __init zstd_mod_init(void) +{ + int ret; + + ret = crypto_register_alg(&alg); + if (ret) + return ret; + + ret = crypto_register_scomp(&scomp); + if (ret) + crypto_unregister_alg(&alg); + + return ret; +} + +static void __exit zstd_mod_fini(void) +{ + crypto_unregister_alg(&alg); + crypto_unregister_scomp(&scomp); +} + +module_init(zstd_mod_init); +module_exit(zstd_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Zstd Compression Algorithm"); +MODULE_ALIAS_CRYPTO("zstd");
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index 9e80802..a329730 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c
@@ -557,7 +557,8 @@ static int acpi_button_remove(struct acpi_device *device) return 0; } -static int param_set_lid_init_state(const char *val, struct kernel_param *kp) +static int param_set_lid_init_state(const char *val, + const struct kernel_param *kp) { int result = 0; @@ -575,7 +576,8 @@ static int param_set_lid_init_state(const char *val, struct kernel_param *kp) return result; } -static int param_get_lid_init_state(char *buffer, struct kernel_param *kp) +static int param_get_lid_init_state(char *buffer, + const struct kernel_param *kp) { switch (lid_init_state) { case ACPI_BUTTON_LID_INIT_OPEN:
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 3d624c7..70a0f8b 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c
@@ -1962,7 +1962,8 @@ static const struct dev_pm_ops acpi_ec_pm = { SET_SYSTEM_SLEEP_PM_OPS(acpi_ec_suspend, acpi_ec_resume) }; -static int param_set_event_clearing(const char *val, struct kernel_param *kp) +static int param_set_event_clearing(const char *val, + const struct kernel_param *kp) { int result = 0; @@ -1980,7 +1981,8 @@ static int param_set_event_clearing(const char *val, struct kernel_param *kp) return result; } -static int param_get_event_clearing(char *buffer, struct kernel_param *kp) +static int param_get_event_clearing(char *buffer, + const struct kernel_param *kp) { switch (ec_event_clearing) { case ACPI_EC_EVT_TIMING_STATUS:
diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index 0fd57bf..9e728a1 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c
@@ -230,7 +230,8 @@ module_param_cb(trace_method_name, ¶m_ops_trace_method, &trace_method_name, module_param_cb(trace_debug_layer, ¶m_ops_trace_attrib, &acpi_gbl_trace_dbg_layer, 0644); module_param_cb(trace_debug_level, ¶m_ops_trace_attrib, &acpi_gbl_trace_dbg_level, 0644); -static int param_set_trace_state(const char *val, struct kernel_param *kp) +static int param_set_trace_state(const char *val, + const struct kernel_param *kp) { acpi_status status; const char *method = trace_method_name; @@ -266,7 +267,7 @@ static int param_set_trace_state(const char *val, struct kernel_param *kp) return 0; } -static int param_get_trace_state(char *buffer, struct kernel_param *kp) +static int param_get_trace_state(char *buffer, const struct kernel_param *kp) { if (!(acpi_gbl_trace_flags & ACPI_TRACE_ENABLED)) return sprintf(buffer, "disable"); @@ -295,7 +296,8 @@ MODULE_PARM_DESC(aml_debug_output, "To enable/disable the ACPI Debug Object output."); /* /sys/module/acpi/parameters/acpica_version */ -static int param_get_acpica_version(char *buffer, struct kernel_param *kp) +static int param_get_acpica_version(char *buffer, + const struct kernel_param *kp) { int result;
diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig index 7dce379..ee4880b 100644 --- a/drivers/android/Kconfig +++ b/drivers/android/Kconfig
@@ -10,7 +10,7 @@ config ANDROID_BINDER_IPC bool "Android Binder IPC Driver" - depends on MMU + depends on MMU && !M68K default n ---help--- Binder is used in Android for both communication between processes, @@ -32,19 +32,6 @@ created. Each binder device has its own context manager, and is therefore logically separated from the other devices. -config ANDROID_BINDER_IPC_32BIT - bool "Use old (Android 4.4 and earlier) 32-bit binder API" - depends on !64BIT && ANDROID_BINDER_IPC - default y - ---help--- - The Binder API has been changed to support both 32 and 64bit - applications in a mixed environment. - - Enable this to support an old 32-bit Android user-space (v4.4 and - earlier). - - Note that enabling this will break newer Android user-space. - config ANDROID_BINDER_IPC_SELFTEST bool "Android Binder IPC Driver Selftest" depends on ANDROID_BINDER_IPC
diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 96a0f94..fcc3657 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c
@@ -72,11 +72,8 @@ #include <linux/security.h> #include <linux/spinlock.h> -#ifdef CONFIG_ANDROID_BINDER_IPC_32BIT -#define BINDER_IPC_32BIT 1 -#endif - #include <uapi/linux/android/binder.h> +#include <uapi/linux/sched/types.h> #include "binder_alloc.h" #include "binder_trace.h" @@ -141,7 +138,7 @@ enum { }; static uint32_t binder_debug_mask = BINDER_DEBUG_USER_ERROR | BINDER_DEBUG_FAILED_TRANSACTION | BINDER_DEBUG_DEAD_TRANSACTION; -module_param_named(debug_mask, binder_debug_mask, uint, S_IWUSR | S_IRUGO); +module_param_named(debug_mask, binder_debug_mask, uint, 0644); static char *binder_devices_param = CONFIG_ANDROID_BINDER_DEVICES; module_param_named(devices, binder_devices_param, charp, 0444); @@ -150,7 +147,7 @@ static DECLARE_WAIT_QUEUE_HEAD(binder_user_error_wait); static int binder_stop_on_user_error; static int binder_set_stop_on_user_error(const char *val, - struct kernel_param *kp) + const struct kernel_param *kp) { int ret; @@ -160,7 +157,7 @@ static int binder_set_stop_on_user_error(const char *val, return ret; } module_param_call(stop_on_user_error, binder_set_stop_on_user_error, - param_get_int, &binder_stop_on_user_error, S_IWUSR | S_IRUGO); + param_get_int, &binder_stop_on_user_error, 0644); #define binder_debug(mask, x...) \ do { \ @@ -249,7 +246,7 @@ static struct binder_transaction_log_entry *binder_transaction_log_add( unsigned int cur = atomic_inc_return(&log->cur); if (cur >= ARRAY_SIZE(log->entry)) - log->full = 1; + log->full = true; e = &log->entry[cur % ARRAY_SIZE(log->entry)]; WRITE_ONCE(e->debug_id_done, 0); /* @@ -351,10 +348,14 @@ struct binder_error { * and by @lock) * @has_async_transaction: async transaction to node in progress * (protected by @lock) + * @sched_policy: minimum scheduling policy for node + * (invariant after initialized) * @accept_fds: file descriptor operations supported for node * (invariant after initialized) * @min_priority: minimum scheduling priority * (invariant after initialized) + * @inherit_rt: inherit RT scheduling policy from caller + * (invariant after initialized) * @async_todo: list of async work items * (protected by @proc->inner_lock) * @@ -390,6 +391,8 @@ struct binder_node { /* * invariant after initialization */ + u8 sched_policy:2; + u8 inherit_rt:1; u8 accept_fds:1; u8 min_priority; }; @@ -464,6 +467,22 @@ enum binder_deferred_state { }; /** + * struct binder_priority - scheduler policy and priority + * @sched_policy scheduler policy + * @prio [100..139] for SCHED_NORMAL, [0..99] for FIFO/RT + * + * The binder driver supports inheriting the following scheduler policies: + * SCHED_NORMAL + * SCHED_BATCH + * SCHED_FIFO + * SCHED_RR + */ +struct binder_priority { + unsigned int sched_policy; + int prio; +}; + +/** * struct binder_proc - binder process bookkeeping * @proc_node: element for binder_procs list * @threads: rbtree of binder_threads in this proc @@ -493,8 +512,6 @@ enum binder_deferred_state { * (protected by @inner_lock) * @todo: list of work for this process * (protected by @inner_lock) - * @wait: wait queue head to wait for proc work - * (invariant after initialized) * @stats: per-process binder statistics * (atomics, no lock needed) * @delivered_death: list of delivered death notification @@ -537,14 +554,13 @@ struct binder_proc { bool is_dead; struct list_head todo; - wait_queue_head_t wait; struct binder_stats stats; struct list_head delivered_death; int max_threads; int requested_threads; int requested_threads_started; int tmp_ref; - long default_priority; + struct binder_priority default_priority; struct dentry *debugfs_entry; struct binder_alloc alloc; struct binder_context *context; @@ -579,6 +595,8 @@ enum { * (protected by @proc->inner_lock) * @todo: list of work to do for this thread * (protected by @proc->inner_lock) + * @process_todo: whether work in @todo should be processed + * (protected by @proc->inner_lock) * @return_error: transaction errors reported by this thread * (only accessed by this thread) * @reply_error: transaction errors reported by target thread @@ -592,6 +610,7 @@ enum { * @is_dead: thread is dead and awaiting free * when outstanding transactions are cleaned up * (protected by @proc->inner_lock) + * @task: struct task_struct for this thread * * Bookkeeping structure for binder threads. */ @@ -604,12 +623,14 @@ struct binder_thread { bool looper_need_return; /* can be written by other thread */ struct binder_transaction *transaction_stack; struct list_head todo; + bool process_todo; struct binder_error return_error; struct binder_error reply_error; wait_queue_head_t wait; struct binder_stats stats; atomic_t tmp_ref; bool is_dead; + struct task_struct *task; }; struct binder_transaction { @@ -626,8 +647,9 @@ struct binder_transaction { struct binder_buffer *buffer; unsigned int code; unsigned int flags; - long priority; - long saved_priority; + struct binder_priority priority; + struct binder_priority saved_priority; + bool set_priority_called; kuid_t sender_euid; /** * @lock: protects @from, @to_proc, and @to_thread @@ -789,6 +811,16 @@ static bool binder_worklist_empty(struct binder_proc *proc, return ret; } +/** + * binder_enqueue_work_ilocked() - Add an item to the work list + * @work: struct binder_work to add to list + * @target_list: list to add work to + * + * Adds the work to the specified list. Asserts that work + * is not already on a list. + * + * Requires the proc->inner_lock to be held. + */ static void binder_enqueue_work_ilocked(struct binder_work *work, struct list_head *target_list) @@ -799,22 +831,56 @@ binder_enqueue_work_ilocked(struct binder_work *work, } /** - * binder_enqueue_work() - Add an item to the work list - * @proc: binder_proc associated with list + * binder_enqueue_deferred_thread_work_ilocked() - Add deferred thread work + * @thread: thread to queue work to * @work: struct binder_work to add to list - * @target_list: list to add work to * - * Adds the work to the specified list. Asserts that work - * is not already on a list. + * Adds the work to the todo list of the thread. Doesn't set the process_todo + * flag, which means that (if it wasn't already set) the thread will go to + * sleep without handling this work when it calls read. + * + * Requires the proc->inner_lock to be held. */ static void -binder_enqueue_work(struct binder_proc *proc, - struct binder_work *work, - struct list_head *target_list) +binder_enqueue_deferred_thread_work_ilocked(struct binder_thread *thread, + struct binder_work *work) { - binder_inner_proc_lock(proc); - binder_enqueue_work_ilocked(work, target_list); - binder_inner_proc_unlock(proc); + binder_enqueue_work_ilocked(work, &thread->todo); +} + +/** + * binder_enqueue_thread_work_ilocked() - Add an item to the thread work list + * @thread: thread to queue work to + * @work: struct binder_work to add to list + * + * Adds the work to the todo list of the thread, and enables processing + * of the todo queue. + * + * Requires the proc->inner_lock to be held. + */ +static void +binder_enqueue_thread_work_ilocked(struct binder_thread *thread, + struct binder_work *work) +{ + binder_enqueue_work_ilocked(work, &thread->todo); + thread->process_todo = true; +} + +/** + * binder_enqueue_thread_work() - Add an item to the thread work list + * @thread: thread to queue work to + * @work: struct binder_work to add to list + * + * Adds the work to the todo list of the thread, and enables processing + * of the todo queue. + */ +static void +binder_enqueue_thread_work(struct binder_thread *thread, + struct binder_work *work) +{ + binder_inner_proc_lock(thread->proc); + binder_enqueue_thread_work_ilocked(thread, work); + binder_inner_proc_unlock(thread->proc); } static void @@ -940,7 +1006,7 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd) static bool binder_has_work_ilocked(struct binder_thread *thread, bool do_proc_work) { - return !binder_worklist_empty_ilocked(&thread->todo) || + return thread->process_todo || thread->looper_need_return || (do_proc_work && !binder_worklist_empty_ilocked(&thread->proc->todo)); @@ -1064,22 +1130,145 @@ static void binder_wakeup_proc_ilocked(struct binder_proc *proc) binder_wakeup_thread_ilocked(proc, thread, /* sync = */false); } -static void binder_set_nice(long nice) +static bool is_rt_policy(int policy) { - long min_nice; + return policy == SCHED_FIFO || policy == SCHED_RR; +} - if (can_nice(current, nice)) { - set_user_nice(current, nice); +static bool is_fair_policy(int policy) +{ + return policy == SCHED_NORMAL || policy == SCHED_BATCH; +} + +static bool binder_supported_policy(int policy) +{ + return is_fair_policy(policy) || is_rt_policy(policy); +} + +static int to_userspace_prio(int policy, int kernel_priority) +{ + if (is_fair_policy(policy)) + return PRIO_TO_NICE(kernel_priority); + else + return MAX_USER_RT_PRIO - 1 - kernel_priority; +} + +static int to_kernel_prio(int policy, int user_priority) +{ + if (is_fair_policy(policy)) + return NICE_TO_PRIO(user_priority); + else + return MAX_USER_RT_PRIO - 1 - user_priority; +} + +static void binder_do_set_priority(struct task_struct *task, + struct binder_priority desired, + bool verify) +{ + int priority; /* user-space prio value */ + bool has_cap_nice; + unsigned int policy = desired.sched_policy; + + if (task->policy == policy && task->normal_prio == desired.prio) return; + + has_cap_nice = has_capability_noaudit(task, CAP_SYS_NICE); + + priority = to_userspace_prio(policy, desired.prio); + + if (verify && is_rt_policy(policy) && !has_cap_nice) { + long max_rtprio = task_rlimit(task, RLIMIT_RTPRIO); + + if (max_rtprio == 0) { + policy = SCHED_NORMAL; + priority = MIN_NICE; + } else if (priority > max_rtprio) { + priority = max_rtprio; + } } - min_nice = rlimit_to_nice(rlimit(RLIMIT_NICE)); - binder_debug(BINDER_DEBUG_PRIORITY_CAP, - "%d: nice value %ld not allowed use %ld instead\n", - current->pid, nice, min_nice); - set_user_nice(current, min_nice); - if (min_nice <= MAX_NICE) + + if (verify && is_fair_policy(policy) && !has_cap_nice) { + long min_nice = rlimit_to_nice(task_rlimit(task, RLIMIT_NICE)); + + if (min_nice > MAX_NICE) { + binder_user_error("%d RLIMIT_NICE not set\n", + task->pid); + return; + } else if (priority < min_nice) { + priority = min_nice; + } + } + + if (policy != desired.sched_policy || + to_kernel_prio(policy, priority) != desired.prio) + binder_debug(BINDER_DEBUG_PRIORITY_CAP, + "%d: priority %d not allowed, using %d instead\n", + task->pid, desired.prio, + to_kernel_prio(policy, priority)); + + trace_binder_set_priority(task->tgid, task->pid, task->normal_prio, + to_kernel_prio(policy, priority), + desired.prio); + + /* Set the actual priority */ + if (task->policy != policy || is_rt_policy(policy)) { + struct sched_param params; + + params.sched_priority = is_rt_policy(policy) ? priority : 0; + + sched_setscheduler_nocheck(task, + policy | SCHED_RESET_ON_FORK, + ¶ms); + } + if (is_fair_policy(policy)) + set_user_nice(task, priority); +} + +static void binder_set_priority(struct task_struct *task, + struct binder_priority desired) +{ + binder_do_set_priority(task, desired, /* verify = */ true); +} + +static void binder_restore_priority(struct task_struct *task, + struct binder_priority desired) +{ + binder_do_set_priority(task, desired, /* verify = */ false); +} + +static void binder_transaction_priority(struct task_struct *task, + struct binder_transaction *t, + struct binder_priority node_prio, + bool inherit_rt) +{ + struct binder_priority desired_prio = t->priority; + + if (t->set_priority_called) return; - binder_user_error("%d RLIMIT_NICE not set\n", current->pid); + + t->set_priority_called = true; + t->saved_priority.sched_policy = task->policy; + t->saved_priority.prio = task->normal_prio; + + if (!inherit_rt && is_rt_policy(desired_prio.sched_policy)) { + desired_prio.prio = NICE_TO_PRIO(0); + desired_prio.sched_policy = SCHED_NORMAL; + } + + if (node_prio.prio < t->priority.prio || + (node_prio.prio == t->priority.prio && + node_prio.sched_policy == SCHED_FIFO)) { + /* + * In case the minimum priority on the node is + * higher (lower value), use that priority. If + * the priority is the same, but the node uses + * SCHED_FIFO, prefer SCHED_FIFO, since it can + * run unbounded, unlike SCHED_RR. + */ + desired_prio = node_prio; + } + + binder_set_priority(task, desired_prio); } static struct binder_node *binder_get_node_ilocked(struct binder_proc *proc, @@ -1132,6 +1321,7 @@ static struct binder_node *binder_init_node_ilocked( binder_uintptr_t ptr = fp ? fp->binder : 0; binder_uintptr_t cookie = fp ? fp->cookie : 0; __u32 flags = fp ? fp->flags : 0; + s8 priority; assert_spin_locked(&proc->inner_lock); @@ -1164,8 +1354,12 @@ static struct binder_node *binder_init_node_ilocked( node->ptr = ptr; node->cookie = cookie; node->work.type = BINDER_WORK_NODE; - node->min_priority = flags & FLAT_BINDER_FLAG_PRIORITY_MASK; + priority = flags & FLAT_BINDER_FLAG_PRIORITY_MASK; + node->sched_policy = (flags & FLAT_BINDER_FLAG_SCHED_POLICY_MASK) >> + FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT; + node->min_priority = to_kernel_prio(node->sched_policy, priority); node->accept_fds = !!(flags & FLAT_BINDER_FLAG_ACCEPTS_FDS); + node->inherit_rt = !!(flags & FLAT_BINDER_FLAG_INHERIT_RT); spin_lock_init(&node->lock); INIT_LIST_HEAD(&node->work.entry); INIT_LIST_HEAD(&node->async_todo); @@ -1228,6 +1422,17 @@ static int binder_inc_node_nilocked(struct binder_node *node, int strong, node->local_strong_refs++; if (!node->has_strong_ref && target_list) { binder_dequeue_work_ilocked(&node->work); + /* + * Note: this function is the only place where we queue + * directly to a thread->todo without using the + * corresponding binder_enqueue_thread_work() helper + * functions; in this case it's ok to not set the + * process_todo flag, since we know this node work will + * always be followed by other work that starts queue + * processing: in case of synchronous transactions, a + * BR_REPLY or BR_ERROR; in case of oneway + * transactions, a BR_TRANSACTION_COMPLETE. + */ binder_enqueue_work_ilocked(&node->work, target_list); } } else { @@ -1239,6 +1444,9 @@ static int binder_inc_node_nilocked(struct binder_node *node, int strong, node->debug_id); return -EINVAL; } + /* + * See comment above + */ binder_enqueue_work_ilocked(&node->work, target_list); } } @@ -1928,9 +2136,9 @@ static void binder_send_failed_reply(struct binder_transaction *t, binder_pop_transaction_ilocked(target_thread, t); if (target_thread->reply_error.cmd == BR_OK) { target_thread->reply_error.cmd = error_code; - binder_enqueue_work_ilocked( - &target_thread->reply_error.work, - &target_thread->todo); + binder_enqueue_thread_work_ilocked( + target_thread, + &target_thread->reply_error.work); wake_up_interruptible(&target_thread->wait); } else { /* @@ -2000,8 +2208,8 @@ static size_t binder_validate_object(struct binder_buffer *buffer, u64 offset) struct binder_object_header *hdr; size_t object_size = 0; - if (offset > buffer->data_size - sizeof(*hdr) || - buffer->data_size < sizeof(*hdr) || + if (buffer->data_size < sizeof(*hdr) || + offset > buffer->data_size - sizeof(*hdr) || !IS_ALIGNED(offset, sizeof(u32))) return 0; @@ -2575,20 +2783,22 @@ static bool binder_proc_transaction(struct binder_transaction *t, struct binder_proc *proc, struct binder_thread *thread) { - struct list_head *target_list = NULL; struct binder_node *node = t->buffer->target_node; + struct binder_priority node_prio; bool oneway = !!(t->flags & TF_ONE_WAY); - bool wakeup = true; + bool pending_async = false; BUG_ON(!node); binder_node_lock(node); + node_prio.prio = node->min_priority; + node_prio.sched_policy = node->sched_policy; + if (oneway) { BUG_ON(thread); if (node->has_async_transaction) { - target_list = &node->async_todo; - wakeup = false; + pending_async = true; } else { - node->has_async_transaction = 1; + node->has_async_transaction = true; } } @@ -2600,19 +2810,20 @@ static bool binder_proc_transaction(struct binder_transaction *t, return false; } - if (!thread && !target_list) + if (!thread && !pending_async) thread = binder_select_thread_ilocked(proc); - if (thread) - target_list = &thread->todo; - else if (!target_list) - target_list = &proc->todo; - else - BUG_ON(target_list != &node->async_todo); + if (thread) { + binder_transaction_priority(thread->task, t, node_prio, + node->inherit_rt); + binder_enqueue_thread_work_ilocked(thread, &t->work); + } else if (!pending_async) { + binder_enqueue_work_ilocked(&t->work, &proc->todo); + } else { + binder_enqueue_work_ilocked(&t->work, &node->async_todo); + } - binder_enqueue_work_ilocked(&t->work, target_list); - - if (wakeup) + if (!pending_async) binder_wakeup_thread_ilocked(proc, thread, !oneway /* sync */); binder_inner_proc_unlock(proc); @@ -2727,7 +2938,6 @@ static void binder_transaction(struct binder_proc *proc, } thread->transaction_stack = in_reply_to->to_parent; binder_inner_proc_unlock(proc); - binder_set_nice(in_reply_to->saved_priority); target_thread = binder_get_txn_from_and_acq_inner(in_reply_to); if (target_thread == NULL) { return_error = BR_DEAD_REPLY; @@ -2900,7 +3110,15 @@ static void binder_transaction(struct binder_proc *proc, t->to_thread = target_thread; t->code = tr->code; t->flags = tr->flags; - t->priority = task_nice(current); + if (!(t->flags & TF_ONE_WAY) && + binder_supported_policy(current->policy)) { + /* Inherit supported policies for synchronous transactions */ + t->priority.sched_policy = current->policy; + t->priority.prio = current->normal_prio; + } else { + /* Otherwise, fall back to the default priority */ + t->priority = target_proc->default_priority; + } trace_binder_transaction(reply, t, target_node); @@ -3114,10 +3332,10 @@ static void binder_transaction(struct binder_proc *proc, } } tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; - binder_enqueue_work(proc, tcomplete, &thread->todo); t->work.type = BINDER_WORK_TRANSACTION; if (reply) { + binder_enqueue_thread_work(thread, tcomplete); binder_inner_proc_lock(target_proc); if (target_thread->is_dead) { binder_inner_proc_unlock(target_proc); @@ -3125,13 +3343,22 @@ static void binder_transaction(struct binder_proc *proc, } BUG_ON(t->buffer->async_transaction != 0); binder_pop_transaction_ilocked(target_thread, in_reply_to); - binder_enqueue_work_ilocked(&t->work, &target_thread->todo); + binder_enqueue_thread_work_ilocked(target_thread, &t->work); binder_inner_proc_unlock(target_proc); wake_up_interruptible_sync(&target_thread->wait); + binder_restore_priority(current, in_reply_to->saved_priority); binder_free_transaction(in_reply_to); } else if (!(t->flags & TF_ONE_WAY)) { BUG_ON(t->buffer->async_transaction != 0); binder_inner_proc_lock(proc); + /* + * Defer the TRANSACTION_COMPLETE, so we don't return to + * userspace immediately; this allows the target process to + * immediately start processing this transaction, reducing + * latency. We will then return the TRANSACTION_COMPLETE when + * the target replies (or there is an error). + */ + binder_enqueue_deferred_thread_work_ilocked(thread, tcomplete); t->need_reply = 1; t->from_parent = thread->transaction_stack; thread->transaction_stack = t; @@ -3145,6 +3372,7 @@ static void binder_transaction(struct binder_proc *proc, } else { BUG_ON(target_node == NULL); BUG_ON(t->buffer->async_transaction != 1); + binder_enqueue_thread_work(thread, tcomplete); if (!binder_proc_transaction(t, target_proc, NULL)) goto err_dead_proc_or_thread; } @@ -3222,16 +3450,13 @@ static void binder_transaction(struct binder_proc *proc, BUG_ON(thread->return_error.cmd != BR_OK); if (in_reply_to) { + binder_restore_priority(current, in_reply_to->saved_priority); thread->return_error.cmd = BR_TRANSACTION_COMPLETE; - binder_enqueue_work(thread->proc, - &thread->return_error.work, - &thread->todo); + binder_enqueue_thread_work(thread, &thread->return_error.work); binder_send_failed_reply(in_reply_to, return_error); } else { thread->return_error.cmd = return_error; - binder_enqueue_work(thread->proc, - &thread->return_error.work, - &thread->todo); + binder_enqueue_thread_work(thread, &thread->return_error.work); } } @@ -3441,7 +3666,7 @@ static int binder_thread_write(struct binder_proc *proc, w = binder_dequeue_work_head_ilocked( &buf_node->async_todo); if (!w) { - buf_node->has_async_transaction = 0; + buf_node->has_async_transaction = false; } else { binder_enqueue_work_ilocked( w, &proc->todo); @@ -3539,10 +3764,9 @@ static int binder_thread_write(struct binder_proc *proc, WARN_ON(thread->return_error.cmd != BR_OK); thread->return_error.cmd = BR_ERROR; - binder_enqueue_work( - thread->proc, - &thread->return_error.work, - &thread->todo); + binder_enqueue_thread_work( + thread, + &thread->return_error.work); binder_debug( BINDER_DEBUG_FAILED_TRANSACTION, "%d:%d BC_REQUEST_DEATH_NOTIFICATION failed\n", @@ -3622,9 +3846,9 @@ static int binder_thread_write(struct binder_proc *proc, if (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) - binder_enqueue_work_ilocked( - &death->work, - &thread->todo); + binder_enqueue_thread_work_ilocked( + thread, + &death->work); else { binder_enqueue_work_ilocked( &death->work, @@ -3679,8 +3903,8 @@ static int binder_thread_write(struct binder_proc *proc, if (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) - binder_enqueue_work_ilocked( - &death->work, &thread->todo); + binder_enqueue_thread_work_ilocked( + thread, &death->work); else { binder_enqueue_work_ilocked( &death->work, @@ -3811,7 +4035,7 @@ static int binder_thread_read(struct binder_proc *proc, wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); } - binder_set_nice(proc->default_priority); + binder_restore_priority(current, proc->default_priority); } if (non_block) { @@ -3854,6 +4078,8 @@ static int binder_thread_read(struct binder_proc *proc, break; } w = binder_dequeue_work_head_ilocked(list); + if (binder_worklist_empty_ilocked(&thread->todo)) + thread->process_todo = false; switch (w->type) { case BINDER_WORK_TRANSACTION: { @@ -3868,6 +4094,7 @@ static int binder_thread_read(struct binder_proc *proc, binder_inner_proc_unlock(proc); if (put_user(e->cmd, (uint32_t __user *)ptr)) return -EFAULT; + cmd = e->cmd; e->cmd = BR_OK; ptr += sizeof(uint32_t); @@ -4023,16 +4250,14 @@ static int binder_thread_read(struct binder_proc *proc, BUG_ON(t->buffer == NULL); if (t->buffer->target_node) { struct binder_node *target_node = t->buffer->target_node; + struct binder_priority node_prio; tr.target.ptr = target_node->ptr; tr.cookie = target_node->cookie; - t->saved_priority = task_nice(current); - if (t->priority < target_node->min_priority && - !(t->flags & TF_ONE_WAY)) - binder_set_nice(t->priority); - else if (!(t->flags & TF_ONE_WAY) || - t->saved_priority > target_node->min_priority) - binder_set_nice(target_node->min_priority); + node_prio.sched_policy = target_node->sched_policy; + node_prio.prio = target_node->min_priority; + binder_transaction_priority(current, t, node_prio, + target_node->inherit_rt); cmd = BR_TRANSACTION; } else { tr.target.ptr = 0; @@ -4210,6 +4435,8 @@ static struct binder_thread *binder_get_thread_ilocked( binder_stats_created(BINDER_STAT_THREAD); thread->proc = proc; thread->pid = current->pid; + get_task_struct(current); + thread->task = current; atomic_set(&thread->tmp_ref, 0); init_waitqueue_head(&thread->wait); INIT_LIST_HEAD(&thread->todo); @@ -4260,6 +4487,7 @@ static void binder_free_thread(struct binder_thread *thread) BUG_ON(!list_empty(&thread->todo)); binder_stats_deleted(BINDER_STAT_THREAD); binder_proc_dec_tmpref(thread->proc); + put_task_struct(thread->task); kfree(thread); } @@ -4487,6 +4715,42 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp) return ret; } +static int binder_ioctl_get_node_info_for_ref(struct binder_proc *proc, + struct binder_node_info_for_ref *info) +{ + struct binder_node *node; + struct binder_context *context = proc->context; + __u32 handle = info->handle; + + if (info->strong_count || info->weak_count || info->reserved1 || + info->reserved2 || info->reserved3) { + binder_user_error("%d BINDER_GET_NODE_INFO_FOR_REF: only handle may be non-zero.", + proc->pid); + return -EINVAL; + } + + /* This ioctl may only be used by the context manager */ + mutex_lock(&context->context_mgr_node_lock); + if (!context->binder_context_mgr_node || + context->binder_context_mgr_node->proc != proc) { + mutex_unlock(&context->context_mgr_node_lock); + return -EPERM; + } + mutex_unlock(&context->context_mgr_node_lock); + + node = binder_get_node_from_ref(proc, handle, true, NULL); + if (!node) + return -EINVAL; + + info->strong_count = node->local_strong_refs + + node->internal_strong_refs; + info->weak_count = node->local_weak_refs; + + binder_put_node(node); + + return 0; +} + static int binder_ioctl_get_node_debug_info(struct binder_proc *proc, struct binder_node_debug_info *info) { @@ -4581,6 +4845,25 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } break; } + case BINDER_GET_NODE_INFO_FOR_REF: { + struct binder_node_info_for_ref info; + + if (copy_from_user(&info, ubuf, sizeof(info))) { + ret = -EFAULT; + goto err; + } + + ret = binder_ioctl_get_node_info_for_ref(proc, &info); + if (ret < 0) + goto err; + + if (copy_to_user(ubuf, &info, sizeof(info))) { + ret = -EFAULT; + goto err; + } + + break; + } case BINDER_GET_NODE_DEBUG_INFO: { struct binder_node_debug_info info; @@ -4673,7 +4956,9 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) failure_string = "bad vm_flags"; goto err_bad_arg; } - vma->vm_flags = (vma->vm_flags | VM_DONTCOPY) & ~VM_MAYWRITE; + vma->vm_flags |= VM_DONTCOPY | VM_MIXEDMAP; + vma->vm_flags &= ~VM_MAYWRITE; + vma->vm_ops = &binder_vm_ops; vma->vm_private_data = proc; @@ -4686,7 +4971,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) return 0; err_bad_arg: - pr_err("binder_mmap: %d %lx-%lx %s failed %d\n", + pr_err("%s: %d %lx-%lx %s failed %d\n", __func__, proc->pid, vma->vm_start, vma->vm_end, failure_string, ret); return ret; } @@ -4696,7 +4981,7 @@ static int binder_open(struct inode *nodp, struct file *filp) struct binder_proc *proc; struct binder_device *binder_dev; - binder_debug(BINDER_DEBUG_OPEN_CLOSE, "binder_open: %d:%d\n", + binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__, current->group_leader->pid, current->pid); proc = kzalloc(sizeof(*proc), GFP_KERNEL); @@ -4708,7 +4993,14 @@ static int binder_open(struct inode *nodp, struct file *filp) proc->tsk = current->group_leader; mutex_init(&proc->files_lock); INIT_LIST_HEAD(&proc->todo); - proc->default_priority = task_nice(current); + if (binder_supported_policy(current->policy)) { + proc->default_priority.sched_policy = current->policy; + proc->default_priority.prio = current->normal_prio; + } else { + proc->default_priority.sched_policy = SCHED_NORMAL; + proc->default_priority.prio = NICE_TO_PRIO(0); + } + binder_dev = container_of(filp->private_data, struct binder_device, miscdev); proc->context = &binder_dev->context; @@ -4735,7 +5027,7 @@ static int binder_open(struct inode *nodp, struct file *filp) * anyway print all contexts that a given PID has, so this * is not a problem. */ - proc->debugfs_entry = debugfs_create_file(strbuf, S_IRUGO, + proc->debugfs_entry = debugfs_create_file(strbuf, 0444, binder_debugfs_dir_entry_proc, (void *)(unsigned long)proc->pid, &binder_proc_fops); @@ -5002,13 +5294,14 @@ static void print_binder_transaction_ilocked(struct seq_file *m, spin_lock(&t->lock); to_proc = t->to_proc; seq_printf(m, - "%s %d: %pK from %d:%d to %d:%d code %x flags %x pri %ld r%d", + "%s %d: %pK from %d:%d to %d:%d code %x flags %x pri %d:%d r%d", prefix, t->debug_id, t, t->from ? t->from->proc->pid : 0, t->from ? t->from->pid : 0, to_proc ? to_proc->pid : 0, t->to_thread ? t->to_thread->pid : 0, - t->code, t->flags, t->priority, t->need_reply); + t->code, t->flags, t->priority.sched_policy, + t->priority.prio, t->need_reply); spin_unlock(&t->lock); if (proc != to_proc) { @@ -5126,8 +5419,9 @@ static void print_binder_node_nilocked(struct seq_file *m, hlist_for_each_entry(ref, &node->refs, node_entry) count++; - seq_printf(m, " node %d: u%016llx c%016llx hs %d hw %d ls %d lw %d is %d iw %d tr %d", + seq_printf(m, " node %d: u%016llx c%016llx pri %d:%d hs %d hw %d ls %d lw %d is %d iw %d tr %d", node->debug_id, (u64)node->ptr, (u64)node->cookie, + node->sched_policy, node->min_priority, node->has_strong_ref, node->has_weak_ref, node->local_strong_refs, node->local_weak_refs, node->internal_strong_refs, count, node->tmp_refs); @@ -5177,6 +5471,9 @@ static void print_binder_proc(struct seq_file *m, for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) { struct binder_node *node = rb_entry(n, struct binder_node, rb_node); + if (!print_all && !node->has_async_transaction) + continue; + /* * take a temporary reference on the node so it * survives and isn't removed from the tree @@ -5564,7 +5861,9 @@ static int __init binder_init(void) struct binder_device *device; struct hlist_node *tmp; - binder_alloc_shrinker_init(); + ret = binder_alloc_shrinker_init(); + if (ret) + return ret; atomic_set(&binder_transaction_log.cur, ~0U); atomic_set(&binder_transaction_log_failed.cur, ~0U); @@ -5576,27 +5875,27 @@ static int __init binder_init(void) if (binder_debugfs_dir_entry_root) { debugfs_create_file("state", - S_IRUGO, + 0444, binder_debugfs_dir_entry_root, NULL, &binder_state_fops); debugfs_create_file("stats", - S_IRUGO, + 0444, binder_debugfs_dir_entry_root, NULL, &binder_stats_fops); debugfs_create_file("transactions", - S_IRUGO, + 0444, binder_debugfs_dir_entry_root, NULL, &binder_transactions_fops); debugfs_create_file("transaction_log", - S_IRUGO, + 0444, binder_debugfs_dir_entry_root, &binder_transaction_log, &binder_transaction_log_fops); debugfs_create_file("failed_transaction_log", - S_IRUGO, + 0444, binder_debugfs_dir_entry_root, &binder_transaction_log_failed, &binder_transaction_log_fops);
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index b9281f2..e00d4d1 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c
@@ -184,12 +184,12 @@ struct binder_buffer *binder_alloc_prepare_to_free(struct binder_alloc *alloc, } static int binder_update_page_range(struct binder_alloc *alloc, int allocate, - void *start, void *end, - struct vm_area_struct *vma) + void *start, void *end) { void *page_addr; unsigned long user_page_addr; struct binder_lru_page *page; + struct vm_area_struct *vma = NULL; struct mm_struct *mm = NULL; bool need_mm = false; @@ -213,11 +213,11 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, } } - if (!vma && need_mm && mmget_not_zero(alloc->vma_vm_mm)) + if (need_mm && mmget_not_zero(alloc->vma_vm_mm)) mm = alloc->vma_vm_mm; if (mm) { - down_write(&mm->mmap_sem); + down_read(&mm->mmap_sem); vma = alloc->vma; } @@ -279,11 +279,14 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, goto err_vm_insert_page_failed; } + if (index + 1 > alloc->pages_high) + alloc->pages_high = index + 1; + trace_binder_alloc_page_end(alloc, index); /* vm_insert_page does not seem to increment the refcount */ } if (mm) { - up_write(&mm->mmap_sem); + up_read(&mm->mmap_sem); mmput(mm); } return 0; @@ -316,7 +319,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, } err_no_vma: if (mm) { - up_write(&mm->mmap_sem); + up_read(&mm->mmap_sem); mmput(mm); } return vma ? -ENOMEM : -ESRCH; @@ -350,11 +353,12 @@ static inline struct vm_area_struct *binder_alloc_get_vma( return vma; } -struct binder_buffer *binder_alloc_new_buf_locked(struct binder_alloc *alloc, - size_t data_size, - size_t offsets_size, - size_t extra_buffers_size, - int is_async) +static struct binder_buffer *binder_alloc_new_buf_locked( + struct binder_alloc *alloc, + size_t data_size, + size_t offsets_size, + size_t extra_buffers_size, + int is_async) { struct rb_node *n = alloc->free_buffers.rb_node; struct binder_buffer *buffer; @@ -463,7 +467,7 @@ struct binder_buffer *binder_alloc_new_buf_locked(struct binder_alloc *alloc, if (end_page_addr > has_page_addr) end_page_addr = has_page_addr; ret = binder_update_page_range(alloc, 1, - (void *)PAGE_ALIGN((uintptr_t)buffer->data), end_page_addr, NULL); + (void *)PAGE_ALIGN((uintptr_t)buffer->data), end_page_addr); if (ret) return ERR_PTR(ret); @@ -504,7 +508,7 @@ struct binder_buffer *binder_alloc_new_buf_locked(struct binder_alloc *alloc, err_alloc_buf_struct_failed: binder_update_page_range(alloc, 0, (void *)PAGE_ALIGN((uintptr_t)buffer->data), - end_page_addr, NULL); + end_page_addr); return ERR_PTR(-ENOMEM); } @@ -588,8 +592,7 @@ static void binder_delete_free_buffer(struct binder_alloc *alloc, alloc->pid, buffer->data, prev->data, next ? next->data : NULL); binder_update_page_range(alloc, 0, buffer_start_page(buffer), - buffer_start_page(buffer) + PAGE_SIZE, - NULL); + buffer_start_page(buffer) + PAGE_SIZE); } list_del(&buffer->entry); kfree(buffer); @@ -626,8 +629,7 @@ static void binder_free_buf_locked(struct binder_alloc *alloc, binder_update_page_range(alloc, 0, (void *)PAGE_ALIGN((uintptr_t)buffer->data), - (void *)(((uintptr_t)buffer->data + buffer_size) & PAGE_MASK), - NULL); + (void *)(((uintptr_t)buffer->data + buffer_size) & PAGE_MASK)); rb_erase(&buffer->rb_node, &alloc->allocated_buffers); buffer->free = 1; @@ -879,6 +881,7 @@ void binder_alloc_print_pages(struct seq_file *m, } mutex_unlock(&alloc->mutex); seq_printf(m, " pages: %d:%d:%d\n", active, lru, free); + seq_printf(m, " pages high watermark: %zu\n", alloc->pages_high); } /** @@ -1008,7 +1011,7 @@ binder_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) return ret; } -struct shrinker binder_shrinker = { +static struct shrinker binder_shrinker = { .count_objects = binder_shrink_count, .scan_objects = binder_shrink_scan, .seeks = DEFAULT_SEEKS, @@ -1028,8 +1031,14 @@ void binder_alloc_init(struct binder_alloc *alloc) INIT_LIST_HEAD(&alloc->buffers); } -void binder_alloc_shrinker_init(void) +int binder_alloc_shrinker_init(void) { - list_lru_init(&binder_alloc_lru); - register_shrinker(&binder_shrinker); + int ret = list_lru_init(&binder_alloc_lru); + + if (ret == 0) { + ret = register_shrinker(&binder_shrinker); + if (ret) + list_lru_destroy(&binder_alloc_lru); + } + return ret; }
diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h index a3ad768..fb3238c7 100644 --- a/drivers/android/binder_alloc.h +++ b/drivers/android/binder_alloc.h
@@ -91,6 +91,7 @@ struct binder_lru_page { * @pages: array of binder_lru_page * @buffer_size: size of address space specified via mmap * @pid: pid for associated binder_proc (invariant after init) + * @pages_high: high watermark of offset in @pages * * Bookkeeping structure for per-proc address space management for binder * buffers. It is normally initialized during binder_init() and binder_mmap() @@ -111,6 +112,7 @@ struct binder_alloc { size_t buffer_size; uint32_t buffer_free; int pid; + size_t pages_high; }; #ifdef CONFIG_ANDROID_BINDER_IPC_SELFTEST @@ -127,7 +129,7 @@ extern struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc, size_t extra_buffers_size, int is_async); extern void binder_alloc_init(struct binder_alloc *alloc); -void binder_alloc_shrinker_init(void); +extern int binder_alloc_shrinker_init(void); extern void binder_alloc_vma_close(struct binder_alloc *alloc); extern struct binder_buffer * binder_alloc_prepare_to_free(struct binder_alloc *alloc,
diff --git a/drivers/android/binder_trace.h b/drivers/android/binder_trace.h index 76e3b9c..b11dffc5 100644 --- a/drivers/android/binder_trace.h +++ b/drivers/android/binder_trace.h
@@ -85,6 +85,30 @@ DEFINE_BINDER_FUNCTION_RETURN_EVENT(binder_ioctl_done); DEFINE_BINDER_FUNCTION_RETURN_EVENT(binder_write_done); DEFINE_BINDER_FUNCTION_RETURN_EVENT(binder_read_done); +TRACE_EVENT(binder_set_priority, + TP_PROTO(int proc, int thread, unsigned int old_prio, + unsigned int desired_prio, unsigned int new_prio), + TP_ARGS(proc, thread, old_prio, new_prio, desired_prio), + + TP_STRUCT__entry( + __field(int, proc) + __field(int, thread) + __field(unsigned int, old_prio) + __field(unsigned int, new_prio) + __field(unsigned int, desired_prio) + ), + TP_fast_assign( + __entry->proc = proc; + __entry->thread = thread; + __entry->old_prio = old_prio; + __entry->new_prio = new_prio; + __entry->desired_prio = desired_prio; + ), + TP_printk("proc=%d thread=%d old=%d => new=%d desired=%d", + __entry->proc, __entry->thread, __entry->old_prio, + __entry->new_prio, __entry->desired_prio) +); + TRACE_EVENT(binder_wait_for_work, TP_PROTO(bool proc_work, bool transaction_stack, bool thread_todo), TP_ARGS(proc_work, transaction_stack, thread_todo),
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 41be9ff..2485f3f 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c
@@ -21,14 +21,48 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/sched/topology.h> +#include <linux/sched/energy.h> +#include <linux/cpuset.h> + +DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE; +DEFINE_PER_CPU(unsigned long, max_cpu_freq); +DEFINE_PER_CPU(unsigned long, max_freq_scale) = SCHED_CAPACITY_SCALE; + +void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq, + unsigned long max_freq) +{ + unsigned long scale; + int i; + + scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq; + + for_each_cpu(i, cpus) { + per_cpu(freq_scale, i) = scale; + per_cpu(max_cpu_freq, i) = max_freq; + } +} + +void arch_set_max_freq_scale(struct cpumask *cpus, + unsigned long policy_max_freq) +{ + unsigned long scale, max_freq; + int cpu = cpumask_first(cpus); + + if (cpu > nr_cpu_ids) + return; + + max_freq = per_cpu(max_cpu_freq, cpu); + if (!max_freq) + return; + + scale = (policy_max_freq << SCHED_CAPACITY_SHIFT) / max_freq; + + for_each_cpu(cpu, cpus) + per_cpu(max_freq_scale, cpu) = scale; +} static DEFINE_MUTEX(cpu_scale_mutex); -static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; - -unsigned long topology_get_cpu_scale(struct sched_domain *sd, int cpu) -{ - return per_cpu(cpu_scale, cpu); -} +DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity) { @@ -44,6 +78,9 @@ static ssize_t cpu_capacity_show(struct device *dev, return sprintf(buf, "%lu\n", topology_get_cpu_scale(NULL, cpu->dev.id)); } +static void update_topology_flags_workfn(struct work_struct *work); +static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn); + static ssize_t cpu_capacity_store(struct device *dev, struct device_attribute *attr, const char *buf, @@ -54,6 +91,7 @@ static ssize_t cpu_capacity_store(struct device *dev, int i; unsigned long new_capacity; ssize_t ret; + cpumask_var_t mask; if (!count) return 0; @@ -65,10 +103,41 @@ static ssize_t cpu_capacity_store(struct device *dev, return -EINVAL; mutex_lock(&cpu_scale_mutex); - for_each_cpu(i, &cpu_topology[this_cpu].core_sibling) + + if (new_capacity < SCHED_CAPACITY_SCALE) { + int highest_score_cpu = 0; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) { + mutex_unlock(&cpu_scale_mutex); + return -ENOMEM; + } + + cpumask_andnot(mask, cpu_online_mask, + topology_core_cpumask(this_cpu)); + + for_each_cpu(i, mask) { + if (topology_get_cpu_scale(NULL, i) == + SCHED_CAPACITY_SCALE) { + highest_score_cpu = 1; + break; + } + } + + free_cpumask_var(mask); + + if (!highest_score_cpu) { + mutex_unlock(&cpu_scale_mutex); + return -EINVAL; + } + } + + for_each_cpu(i, topology_core_cpumask(this_cpu)) topology_set_cpu_scale(i, new_capacity); mutex_unlock(&cpu_scale_mutex); + if (topology_detect_flags()) + schedule_work(&update_topology_flags_work); + return count; } @@ -93,6 +162,186 @@ static int register_cpu_capacity_sysctl(void) } subsys_initcall(register_cpu_capacity_sysctl); +enum asym_cpucap_type { no_asym, asym_thread, asym_core, asym_die }; +static enum asym_cpucap_type asym_cpucap = no_asym; +enum share_cap_type { no_share_cap, share_cap_thread, share_cap_core, share_cap_die}; +static enum share_cap_type share_cap = no_share_cap; + +#ifdef CONFIG_CPU_FREQ +int detect_share_cap_flag(void) +{ + int cpu; + enum share_cap_type share_cap_level = no_share_cap; + struct cpufreq_policy *policy; + + for_each_possible_cpu(cpu) { + policy = cpufreq_cpu_get(cpu); + + if (!policy) + return 0; + + if (share_cap_level < share_cap_thread && + cpumask_equal(topology_sibling_cpumask(cpu), + policy->related_cpus)) { + share_cap_level = share_cap_thread; + continue; + } + + if (cpumask_equal(topology_core_cpumask(cpu), + policy->related_cpus)) { + share_cap_level = share_cap_core; + continue; + } + + if (cpumask_equal(cpu_cpu_mask(cpu), + policy->related_cpus)) { + share_cap_level = share_cap_die; + continue; + } + } + + if (share_cap != share_cap_level) { + share_cap = share_cap_level; + return 1; + } + + return 0; +} +#else +int detect_share_cap_flag(void) { return 0; } +#endif + +/* + * Walk cpu topology to determine sched_domain flags. + * + * SD_ASYM_CPUCAPACITY: Indicates the lowest level that spans all cpu + * capacities found in the system for all cpus, i.e. the flag is set + * at the same level for all systems. The current algorithm implements + * this by looking for higher capacities, which doesn't work for all + * conceivable topology, but don't complicate things until it is + * necessary. + */ +int topology_detect_flags(void) +{ + unsigned long max_capacity, capacity; + enum asym_cpucap_type asym_level = no_asym; + int cpu, die_cpu, core, thread, flags_changed = 0; + + for_each_possible_cpu(cpu) { + max_capacity = 0; + + if (asym_level >= asym_thread) + goto check_core; + + for_each_cpu(thread, topology_sibling_cpumask(cpu)) { + capacity = topology_get_cpu_scale(NULL, thread); + + if (capacity > max_capacity) { + if (max_capacity != 0) + asym_level = asym_thread; + + max_capacity = capacity; + } + } + +check_core: + if (asym_level >= asym_core) + goto check_die; + + for_each_cpu(core, topology_core_cpumask(cpu)) { + capacity = topology_get_cpu_scale(NULL, core); + + if (capacity > max_capacity) { + if (max_capacity != 0) + asym_level = asym_core; + + max_capacity = capacity; + } + } +check_die: + for_each_possible_cpu(die_cpu) { + capacity = topology_get_cpu_scale(NULL, die_cpu); + + if (capacity > max_capacity) { + if (max_capacity != 0) { + asym_level = asym_die; + goto done; + } + } + } + } + +done: + if (asym_cpucap != asym_level) { + asym_cpucap = asym_level; + flags_changed = 1; + pr_debug("topology flag change detected\n"); + } + + if (detect_share_cap_flag()) + flags_changed = 1; + + return flags_changed; +} + +int topology_smt_flags(void) +{ + int flags = 0; + + if (asym_cpucap == asym_thread) + flags |= SD_ASYM_CPUCAPACITY; + + if (share_cap == share_cap_thread) + flags |= SD_SHARE_CAP_STATES; + + return flags; +} + +int topology_core_flags(void) +{ + int flags = 0; + + if (asym_cpucap == asym_core) + flags |= SD_ASYM_CPUCAPACITY; + + if (share_cap == share_cap_core) + flags |= SD_SHARE_CAP_STATES; + + return flags; +} + +int topology_cpu_flags(void) +{ + int flags = 0; + + if (asym_cpucap == asym_die) + flags |= SD_ASYM_CPUCAPACITY; + + if (share_cap == share_cap_die) + flags |= SD_SHARE_CAP_STATES; + + return flags; +} + +static int update_topology = 0; + +int topology_update_cpu_topology(void) +{ + return update_topology; +} + +/* + * Updating the sched_domains can't be done directly from cpufreq callbacks + * due to locking, so queue the work for later. + */ +static void update_topology_flags_workfn(struct work_struct *work) +{ + update_topology = 1; + rebuild_sched_domains(); + pr_debug("sched_domain hierarchy rebuilt, flags updated\n"); + update_topology = 0; +} + static u32 capacity_scale; static u32 *raw_capacity; @@ -115,13 +364,12 @@ void topology_normalize_cpu_scale(void) pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale); mutex_lock(&cpu_scale_mutex); for_each_possible_cpu(cpu) { - pr_debug("cpu_capacity: cpu=%d raw_capacity=%u\n", - cpu, raw_capacity[cpu]); capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT) / capacity_scale; topology_set_cpu_scale(cpu, capacity); - pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n", - cpu, topology_get_cpu_scale(NULL, cpu)); + pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu raw_capacity=%u\n", + cpu, topology_get_cpu_scale(NULL, cpu), + raw_capacity[cpu]); } mutex_unlock(&cpu_scale_mutex); } @@ -198,6 +446,9 @@ init_cpu_capacity_callback(struct notifier_block *nb, if (cpumask_empty(cpus_to_visit)) { topology_normalize_cpu_scale(); + init_sched_energy_costs(); + if (topology_detect_flags()) + schedule_work(&update_topology_flags_work); free_raw_capacity(); pr_debug("cpu_capacity: parsing done\n"); schedule_work(&parsing_done_work); @@ -212,6 +463,8 @@ static struct notifier_block init_cpu_capacity_notifier = { static int __init register_cpufreq_notifier(void) { + int ret; + /* * on ACPI-based systems we need to use the default cpu capacity * until we have the necessary code to parse the cpu capacity, so @@ -227,8 +480,13 @@ static int __init register_cpufreq_notifier(void) cpumask_copy(cpus_to_visit, cpu_possible_mask); - return cpufreq_register_notifier(&init_cpu_capacity_notifier, - CPUFREQ_POLICY_NOTIFIER); + ret = cpufreq_register_notifier(&init_cpu_capacity_notifier, + CPUFREQ_POLICY_NOTIFIER); + + if (ret) + free_cpumask_var(cpus_to_visit); + + return ret; } core_initcall(register_cpufreq_notifier); @@ -236,6 +494,7 @@ static void parsing_done_workfn(struct work_struct *work) { cpufreq_unregister_notifier(&init_cpu_capacity_notifier, CPUFREQ_POLICY_NOTIFIER); + free_cpumask_var(cpus_to_visit); } #else
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index d16b40c..594581a 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c
@@ -34,6 +34,7 @@ #include <linux/cpufreq.h> #include <linux/cpuidle.h> #include <linux/timer.h> +#include <linux/wakeup_reason.h> #include "../base.h" #include "power.h" @@ -1455,6 +1456,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) pm_callback_t callback = NULL; const char *info = NULL; int error = 0; + char suspend_abort[MAX_SUSPEND_ABORT_LEN]; DECLARE_DPM_WATCHDOG_ON_STACK(wd); TRACE_DEVICE(dev); @@ -1477,6 +1479,9 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) pm_wakeup_event(dev, 0); if (pm_wakeup_pending()) { + pm_get_active_wakeup_sources(suspend_abort, + MAX_SUSPEND_ABORT_LEN); + log_suspend_abort_reason(suspend_abort); dev->power.direct_complete = false; async_error = -EBUSY; goto Complete;
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index cdd6f25..b932d7f7 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c
@@ -15,6 +15,7 @@ #include <linux/seq_file.h> #include <linux/debugfs.h> #include <linux/pm_wakeirq.h> +#include <linux/types.h> #include <trace/events/power.h> #include "power.h" @@ -805,6 +806,37 @@ void pm_wakeup_dev_event(struct device *dev, unsigned int msec, bool hard) } EXPORT_SYMBOL_GPL(pm_wakeup_dev_event); +void pm_get_active_wakeup_sources(char *pending_wakeup_source, size_t max) +{ + struct wakeup_source *ws, *last_active_ws = NULL; + int len = 0; + bool active = false; + + rcu_read_lock(); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) { + if (ws->active && len < max) { + if (!active) + len += scnprintf(pending_wakeup_source, max, + "Pending Wakeup Sources: "); + len += scnprintf(pending_wakeup_source + len, max - len, + "%s ", ws->name); + active = true; + } else if (!active && + (!last_active_ws || + ktime_to_ns(ws->last_time) > + ktime_to_ns(last_active_ws->last_time))) { + last_active_ws = ws; + } + } + if (!active && last_active_ws) { + scnprintf(pending_wakeup_source, max, + "Last active Wakeup Source: %s", + last_active_ws->name); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(pm_get_active_wakeup_sources); + void pm_print_active_wakeup_sources(void) { struct wakeup_source *ws; @@ -1018,7 +1050,7 @@ static int print_wakeup_source_stats(struct seq_file *m, active_time = 0; } - seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t%lu\t\t%lld\t\t%lld\t\t%lld\t\t%lld\t\t%lld\n", + seq_printf(m, "%-32s\t%lu\t\t%lu\t\t%lu\t\t%lu\t\t%lld\t\t%lld\t\t%lld\t\t%lld\t\t%lld\n", ws->name, active_count, ws->event_count, ws->wakeup_count, ws->expire_count, ktime_to_ms(active_time), ktime_to_ms(total_time), @@ -1039,7 +1071,7 @@ static int wakeup_sources_stats_show(struct seq_file *m, void *unused) struct wakeup_source *ws; int srcuidx; - seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t" + seq_puts(m, "name\t\t\t\t\tactive_count\tevent_count\twakeup_count\t" "expire_count\tactive_since\ttotal_time\tmax_time\t" "last_change\tprevent_suspend_time\n");
diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c index 8d98a32..96c34a9 100644 --- a/drivers/base/syscore.c +++ b/drivers/base/syscore.c
@@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/suspend.h> #include <trace/events/power.h> +#include <linux/wakeup_reason.h> static LIST_HEAD(syscore_ops_list); static DEFINE_MUTEX(syscore_ops_lock); @@ -75,6 +76,8 @@ int syscore_suspend(void) return 0; err_out: + log_suspend_abort_reason("System core suspend callback %pF failed", + ops->suspend); pr_err("PM: System core suspend callback %pF failed.\n", ops->suspend); list_for_each_entry_continue(ops, &syscore_ops_list, node)
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index ac3a31d..99a2c60 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig
@@ -13,10 +13,10 @@ It has several use cases, for example: /tmp storage, use as swap disks and maybe many more. - See zram.txt for more information. + See Documentation/blockdev/zram.txt for more information. config ZRAM_WRITEBACK - bool "Write back incompressible page to backing device" + bool "Write back incompressible or idle page to backing device" depends on ZRAM default n help @@ -25,4 +25,17 @@ For this feature, admin should set up backing device via /sys/block/zramX/backing_dev. - See zram.txt for more infomration. + With /sys/block/zramX/{idle,writeback}, application could ask + idle page's writeback to the backing device to save in memory. + + See Documentation/blockdev/zram.txt for more information. + +config ZRAM_MEMORY_TRACKING + bool "Track zRam block status" + depends on ZRAM && DEBUG_FS + help + With this feature, admin can track the state of allocated blocks + of zRAM. Admin could see the information via + /sys/kernel/debug/zram/zramX/block_state. + + See Documentation/blockdev/zram.txt for more information.
diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 5b8992b..cc66dae 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c
@@ -32,6 +32,9 @@ static const char * const backends[] = { #if IS_ENABLED(CONFIG_CRYPTO_842) "842", #endif +#if IS_ENABLED(CONFIG_CRYPTO_ZSTD) + "zstd", +#endif NULL };
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a46776a..0edd2dd 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c
@@ -31,6 +31,7 @@ #include <linux/err.h> #include <linux/idr.h> #include <linux/sysfs.h> +#include <linux/debugfs.h> #include <linux/cpuhotplug.h> #include "zram_drv.h" @@ -44,8 +45,31 @@ static const char *default_compressor = "lzo"; /* Module params (documentation at end) */ static unsigned int num_devices = 1; +/* + * Pages that compress to sizes equals or greater than this are stored + * uncompressed in memory. + */ +static size_t huge_class_size; static void zram_free_page(struct zram *zram, size_t index); +static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, + u32 index, int offset, struct bio *bio); + + +static int zram_slot_trylock(struct zram *zram, u32 index) +{ + return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags); +} + +static void zram_slot_lock(struct zram *zram, u32 index) +{ + bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags); +} + +static void zram_slot_unlock(struct zram *zram, u32 index) +{ + bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags); +} static inline bool init_done(struct zram *zram) { @@ -68,22 +92,22 @@ static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) } /* flag operations require table entry bit_spin_lock() being held */ -static int zram_test_flag(struct zram *zram, u32 index, +static bool zram_test_flag(struct zram *zram, u32 index, enum zram_pageflags flag) { - return zram->table[index].value & BIT(flag); + return zram->table[index].flags & BIT(flag); } static void zram_set_flag(struct zram *zram, u32 index, enum zram_pageflags flag) { - zram->table[index].value |= BIT(flag); + zram->table[index].flags |= BIT(flag); } static void zram_clear_flag(struct zram *zram, u32 index, enum zram_pageflags flag) { - zram->table[index].value &= ~BIT(flag); + zram->table[index].flags &= ~BIT(flag); } static inline void zram_set_element(struct zram *zram, u32 index, @@ -99,15 +123,22 @@ static unsigned long zram_get_element(struct zram *zram, u32 index) static size_t zram_get_obj_size(struct zram *zram, u32 index) { - return zram->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); + return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1); } static void zram_set_obj_size(struct zram *zram, u32 index, size_t size) { - unsigned long flags = zram->table[index].value >> ZRAM_FLAG_SHIFT; + unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT; - zram->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size; + zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size; +} + +static inline bool zram_allocated(struct zram *zram, u32 index) +{ + return zram_get_obj_size(zram, index) || + zram_test_flag(zram, index, ZRAM_SAME) || + zram_test_flag(zram, index, ZRAM_WB); } #if PAGE_SIZE != 4096 @@ -122,14 +153,6 @@ static inline bool is_partial_io(struct bio_vec *bvec) } #endif -static void zram_revalidate_disk(struct zram *zram) -{ - revalidate_disk(zram->disk); - /* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */ - zram->disk->queue->backing_dev_info->capabilities |= - BDI_CAP_STABLE_WRITES; -} - /* * Check if request is within bounds and aligned on zram logical blocks. */ @@ -261,17 +284,125 @@ static ssize_t mem_used_max_store(struct device *dev, return len; } -#ifdef CONFIG_ZRAM_WRITEBACK -static bool zram_wb_enabled(struct zram *zram) +static ssize_t idle_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) { - return zram->backing_dev; + struct zram *zram = dev_to_zram(dev); + unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; + int index; + char mode_buf[8]; + ssize_t sz; + + sz = strscpy(mode_buf, buf, sizeof(mode_buf)); + if (sz <= 0) + return -EINVAL; + + /* ignore trailing new line */ + if (mode_buf[sz - 1] == '\n') + mode_buf[sz - 1] = 0x00; + + if (strcmp(mode_buf, "all")) + return -EINVAL; + + down_read(&zram->init_lock); + if (!init_done(zram)) { + up_read(&zram->init_lock); + return -EINVAL; + } + + for (index = 0; index < nr_pages; index++) { + /* + * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race. + * See the comment in writeback_store. + */ + zram_slot_lock(zram, index); + if (zram_allocated(zram, index) && + !zram_test_flag(zram, index, ZRAM_UNDER_WB)) + zram_set_flag(zram, index, ZRAM_IDLE); + zram_slot_unlock(zram, index); + } + + up_read(&zram->init_lock); + + return len; +} + +#ifdef CONFIG_ZRAM_WRITEBACK +static ssize_t writeback_limit_enable_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct zram *zram = dev_to_zram(dev); + u64 val; + ssize_t ret = -EINVAL; + + if (kstrtoull(buf, 10, &val)) + return ret; + + down_read(&zram->init_lock); + spin_lock(&zram->wb_limit_lock); + zram->wb_limit_enable = val; + spin_unlock(&zram->wb_limit_lock); + up_read(&zram->init_lock); + ret = len; + + return ret; +} + +static ssize_t writeback_limit_enable_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + bool val; + struct zram *zram = dev_to_zram(dev); + + down_read(&zram->init_lock); + spin_lock(&zram->wb_limit_lock); + val = zram->wb_limit_enable; + spin_unlock(&zram->wb_limit_lock); + up_read(&zram->init_lock); + + return scnprintf(buf, PAGE_SIZE, "%d\n", val); +} + +static ssize_t writeback_limit_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct zram *zram = dev_to_zram(dev); + u64 val; + ssize_t ret = -EINVAL; + + if (kstrtoull(buf, 10, &val)) + return ret; + + down_read(&zram->init_lock); + spin_lock(&zram->wb_limit_lock); + zram->bd_wb_limit = val; + spin_unlock(&zram->wb_limit_lock); + up_read(&zram->init_lock); + ret = len; + + return ret; +} + +static ssize_t writeback_limit_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u64 val; + struct zram *zram = dev_to_zram(dev); + + down_read(&zram->init_lock); + spin_lock(&zram->wb_limit_lock); + val = zram->bd_wb_limit; + spin_unlock(&zram->wb_limit_lock); + up_read(&zram->init_lock); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", val); } static void reset_bdev(struct zram *zram) { struct block_device *bdev; - if (!zram_wb_enabled(zram)) + if (!zram->backing_dev) return; bdev = zram->bdev; @@ -297,7 +428,7 @@ static ssize_t backing_dev_show(struct device *dev, ssize_t ret; down_read(&zram->init_lock); - if (!zram_wb_enabled(zram)) { + if (!zram->backing_dev) { memcpy(buf, "none\n", 5); up_read(&zram->init_lock); return 5; @@ -385,7 +516,6 @@ static ssize_t backing_dev_store(struct device *dev, goto out; reset_bdev(zram); - spin_lock_init(&zram->bitmap_lock); zram->old_block_size = old_block_size; zram->bdev = bdev; @@ -415,35 +545,32 @@ static ssize_t backing_dev_store(struct device *dev, return err; } -static unsigned long get_entry_bdev(struct zram *zram) +static unsigned long alloc_block_bdev(struct zram *zram) { - unsigned long entry; - - spin_lock(&zram->bitmap_lock); + unsigned long blk_idx = 1; +retry: /* skip 0 bit to confuse zram.handle = 0 */ - entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1); - if (entry == zram->nr_pages) { - spin_unlock(&zram->bitmap_lock); + blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx); + if (blk_idx == zram->nr_pages) return 0; - } - set_bit(entry, zram->bitmap); - spin_unlock(&zram->bitmap_lock); + if (test_and_set_bit(blk_idx, zram->bitmap)) + goto retry; - return entry; + atomic64_inc(&zram->stats.bd_count); + return blk_idx; } -static void put_entry_bdev(struct zram *zram, unsigned long entry) +static void free_block_bdev(struct zram *zram, unsigned long blk_idx) { int was_set; - spin_lock(&zram->bitmap_lock); - was_set = test_and_clear_bit(entry, zram->bitmap); - spin_unlock(&zram->bitmap_lock); + was_set = test_and_clear_bit(blk_idx, zram->bitmap); WARN_ON_ONCE(!was_set); + atomic64_dec(&zram->stats.bd_count); } -void zram_page_end_io(struct bio *bio) +static void zram_page_end_io(struct bio *bio) { struct page *page = bio->bi_io_vec[0].bv_page; @@ -483,6 +610,172 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, return 1; } +#define HUGE_WRITEBACK 1 +#define IDLE_WRITEBACK 2 + +static ssize_t writeback_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct zram *zram = dev_to_zram(dev); + unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; + unsigned long index; + struct bio bio; + struct bio_vec bio_vec; + struct page *page; + ssize_t ret, sz; + char mode_buf[8]; + int mode = -1; + unsigned long blk_idx = 0; + + sz = strscpy(mode_buf, buf, sizeof(mode_buf)); + if (sz <= 0) + return -EINVAL; + + /* ignore trailing newline */ + if (mode_buf[sz - 1] == '\n') + mode_buf[sz - 1] = 0x00; + + if (!strcmp(mode_buf, "idle")) + mode = IDLE_WRITEBACK; + else if (!strcmp(mode_buf, "huge")) + mode = HUGE_WRITEBACK; + + if (mode == -1) + return -EINVAL; + + down_read(&zram->init_lock); + if (!init_done(zram)) { + ret = -EINVAL; + goto release_init_lock; + } + + if (!zram->backing_dev) { + ret = -ENODEV; + goto release_init_lock; + } + + page = alloc_page(GFP_KERNEL); + if (!page) { + ret = -ENOMEM; + goto release_init_lock; + } + + for (index = 0; index < nr_pages; index++) { + struct bio_vec bvec; + + bvec.bv_page = page; + bvec.bv_len = PAGE_SIZE; + bvec.bv_offset = 0; + + spin_lock(&zram->wb_limit_lock); + if (zram->wb_limit_enable && !zram->bd_wb_limit) { + spin_unlock(&zram->wb_limit_lock); + ret = -EIO; + break; + } + spin_unlock(&zram->wb_limit_lock); + + if (!blk_idx) { + blk_idx = alloc_block_bdev(zram); + if (!blk_idx) { + ret = -ENOSPC; + break; + } + } + + zram_slot_lock(zram, index); + if (!zram_allocated(zram, index)) + goto next; + + if (zram_test_flag(zram, index, ZRAM_WB) || + zram_test_flag(zram, index, ZRAM_SAME) || + zram_test_flag(zram, index, ZRAM_UNDER_WB)) + goto next; + + if (mode == IDLE_WRITEBACK && + !zram_test_flag(zram, index, ZRAM_IDLE)) + goto next; + if (mode == HUGE_WRITEBACK && + !zram_test_flag(zram, index, ZRAM_HUGE)) + goto next; + /* + * Clearing ZRAM_UNDER_WB is duty of caller. + * IOW, zram_free_page never clear it. + */ + zram_set_flag(zram, index, ZRAM_UNDER_WB); + /* Need for hugepage writeback racing */ + zram_set_flag(zram, index, ZRAM_IDLE); + zram_slot_unlock(zram, index); + if (zram_bvec_read(zram, &bvec, index, 0, NULL)) { + zram_slot_lock(zram, index); + zram_clear_flag(zram, index, ZRAM_UNDER_WB); + zram_clear_flag(zram, index, ZRAM_IDLE); + zram_slot_unlock(zram, index); + continue; + } + + bio_init(&bio, &bio_vec, 1); + bio_set_dev(&bio, zram->bdev); + bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); + bio.bi_opf = REQ_OP_WRITE | REQ_SYNC; + + bio_add_page(&bio, bvec.bv_page, bvec.bv_len, + bvec.bv_offset); + /* + * XXX: A single page IO would be inefficient for write + * but it would be not bad as starter. + */ + ret = submit_bio_wait(&bio); + if (ret) { + zram_slot_lock(zram, index); + zram_clear_flag(zram, index, ZRAM_UNDER_WB); + zram_clear_flag(zram, index, ZRAM_IDLE); + zram_slot_unlock(zram, index); + continue; + } + + atomic64_inc(&zram->stats.bd_writes); + /* + * We released zram_slot_lock so need to check if the slot was + * changed. If there is freeing for the slot, we can catch it + * easily by zram_allocated. + * A subtle case is the slot is freed/reallocated/marked as + * ZRAM_IDLE again. To close the race, idle_store doesn't + * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB. + * Thus, we could close the race by checking ZRAM_IDLE bit. + */ + zram_slot_lock(zram, index); + if (!zram_allocated(zram, index) || + !zram_test_flag(zram, index, ZRAM_IDLE)) { + zram_clear_flag(zram, index, ZRAM_UNDER_WB); + zram_clear_flag(zram, index, ZRAM_IDLE); + goto next; + } + + zram_free_page(zram, index); + zram_clear_flag(zram, index, ZRAM_UNDER_WB); + zram_set_flag(zram, index, ZRAM_WB); + zram_set_element(zram, index, blk_idx); + blk_idx = 0; + atomic64_inc(&zram->stats.pages_stored); + spin_lock(&zram->wb_limit_lock); + if (zram->wb_limit_enable && zram->bd_wb_limit > 0) + zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); + spin_unlock(&zram->wb_limit_lock); +next: + zram_slot_unlock(zram, index); + } + + if (blk_idx) + free_block_bdev(zram, blk_idx); + ret = len; + __free_page(page); +release_init_lock: + up_read(&zram->init_lock); + + return ret; +} + struct zram_work { struct work_struct work; struct zram *zram; @@ -535,81 +828,130 @@ static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, unsigned long entry, struct bio *parent, bool sync) { + atomic64_inc(&zram->stats.bd_reads); if (sync) return read_from_bdev_sync(zram, bvec, entry, parent); else return read_from_bdev_async(zram, bvec, entry, parent); } - -static int write_to_bdev(struct zram *zram, struct bio_vec *bvec, - u32 index, struct bio *parent, - unsigned long *pentry) -{ - struct bio *bio; - unsigned long entry; - - bio = bio_alloc(GFP_ATOMIC, 1); - if (!bio) - return -ENOMEM; - - entry = get_entry_bdev(zram); - if (!entry) { - bio_put(bio); - return -ENOSPC; - } - - bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); - bio_set_dev(bio, zram->bdev); - if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, - bvec->bv_offset)) { - bio_put(bio); - put_entry_bdev(zram, entry); - return -EIO; - } - - if (!parent) { - bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; - bio->bi_end_io = zram_page_end_io; - } else { - bio->bi_opf = parent->bi_opf; - bio_chain(bio, parent); - } - - submit_bio(bio); - *pentry = entry; - - return 0; -} - -static void zram_wb_clear(struct zram *zram, u32 index) -{ - unsigned long entry; - - zram_clear_flag(zram, index, ZRAM_WB); - entry = zram_get_element(zram, index); - zram_set_element(zram, index, 0); - put_entry_bdev(zram, entry); -} - #else -static bool zram_wb_enabled(struct zram *zram) { return false; } static inline void reset_bdev(struct zram *zram) {}; -static int write_to_bdev(struct zram *zram, struct bio_vec *bvec, - u32 index, struct bio *parent, - unsigned long *pentry) - -{ - return -EIO; -} - static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, unsigned long entry, struct bio *parent, bool sync) { return -EIO; } -static void zram_wb_clear(struct zram *zram, u32 index) {} + +static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {}; #endif +#ifdef CONFIG_ZRAM_MEMORY_TRACKING + +static struct dentry *zram_debugfs_root; + +static void zram_debugfs_create(void) +{ + zram_debugfs_root = debugfs_create_dir("zram", NULL); +} + +static void zram_debugfs_destroy(void) +{ + debugfs_remove_recursive(zram_debugfs_root); +} + +static void zram_accessed(struct zram *zram, u32 index) +{ + zram_clear_flag(zram, index, ZRAM_IDLE); + zram->table[index].ac_time = ktime_get_boottime(); +} + +static ssize_t read_block_state(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + char *kbuf; + ssize_t index, written = 0; + struct zram *zram = file->private_data; + unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; + struct timespec64 ts; + + kbuf = kvmalloc(count, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + down_read(&zram->init_lock); + if (!init_done(zram)) { + up_read(&zram->init_lock); + kvfree(kbuf); + return -EINVAL; + } + + for (index = *ppos; index < nr_pages; index++) { + int copied; + + zram_slot_lock(zram, index); + if (!zram_allocated(zram, index)) + goto next; + + ts = ktime_to_timespec64(zram->table[index].ac_time); + copied = snprintf(kbuf + written, count, + "%12zd %12lld.%06lu %c%c%c%c\n", + index, (s64)ts.tv_sec, + ts.tv_nsec / NSEC_PER_USEC, + zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.', + zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.', + zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', + zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.'); + + if (count < copied) { + zram_slot_unlock(zram, index); + break; + } + written += copied; + count -= copied; +next: + zram_slot_unlock(zram, index); + *ppos += 1; + } + + up_read(&zram->init_lock); + if (copy_to_user(buf, kbuf, written)) + written = -EFAULT; + kvfree(kbuf); + + return written; +} + +static const struct file_operations proc_zram_block_state_op = { + .open = simple_open, + .read = read_block_state, + .llseek = default_llseek, +}; + +static void zram_debugfs_register(struct zram *zram) +{ + if (!zram_debugfs_root) + return; + + zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name, + zram_debugfs_root); + debugfs_create_file("block_state", 0400, zram->debugfs_dir, + zram, &proc_zram_block_state_op); +} + +static void zram_debugfs_unregister(struct zram *zram) +{ + debugfs_remove_recursive(zram->debugfs_dir); +} +#else +static void zram_debugfs_create(void) {}; +static void zram_debugfs_destroy(void) {}; +static void zram_accessed(struct zram *zram, u32 index) +{ + zram_clear_flag(zram, index, ZRAM_IDLE); +}; +static void zram_debugfs_register(struct zram *zram) {}; +static void zram_debugfs_unregister(struct zram *zram) {}; +#endif /* * We switched to per-cpu streams and this attr is not needed anymore. @@ -729,19 +1071,40 @@ static ssize_t mm_stat_show(struct device *dev, max_used = atomic_long_read(&zram->stats.max_used_pages); ret = scnprintf(buf, PAGE_SIZE, - "%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n", + "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu\n", orig_size << PAGE_SHIFT, (u64)atomic64_read(&zram->stats.compr_data_size), mem_used << PAGE_SHIFT, zram->limit_pages << PAGE_SHIFT, max_used << PAGE_SHIFT, (u64)atomic64_read(&zram->stats.same_pages), - pool_stats.pages_compacted); + pool_stats.pages_compacted, + (u64)atomic64_read(&zram->stats.huge_pages)); up_read(&zram->init_lock); return ret; } +#ifdef CONFIG_ZRAM_WRITEBACK +#define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12))) +static ssize_t bd_stat_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + ssize_t ret; + + down_read(&zram->init_lock); + ret = scnprintf(buf, PAGE_SIZE, + "%8llu %8llu %8llu\n", + FOUR_K((u64)atomic64_read(&zram->stats.bd_count)), + FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)), + FOUR_K((u64)atomic64_read(&zram->stats.bd_writes))); + up_read(&zram->init_lock); + + return ret; +} +#endif + static ssize_t debug_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -751,9 +1114,10 @@ static ssize_t debug_stat_show(struct device *dev, down_read(&zram->init_lock); ret = scnprintf(buf, PAGE_SIZE, - "version: %d\n%8llu\n", + "version: %d\n%8llu %8llu\n", version, - (u64)atomic64_read(&zram->stats.writestall)); + (u64)atomic64_read(&zram->stats.writestall), + (u64)atomic64_read(&zram->stats.miss_free)); up_read(&zram->init_lock); return ret; @@ -761,18 +1125,11 @@ static ssize_t debug_stat_show(struct device *dev, static DEVICE_ATTR_RO(io_stat); static DEVICE_ATTR_RO(mm_stat); +#ifdef CONFIG_ZRAM_WRITEBACK +static DEVICE_ATTR_RO(bd_stat); +#endif static DEVICE_ATTR_RO(debug_stat); -static void zram_slot_lock(struct zram *zram, u32 index) -{ - bit_spin_lock(ZRAM_ACCESS, &zram->table[index].value); -} - -static void zram_slot_unlock(struct zram *zram, u32 index) -{ - bit_spin_unlock(ZRAM_ACCESS, &zram->table[index].value); -} - static void zram_meta_free(struct zram *zram, u64 disksize) { size_t num_pages = disksize >> PAGE_SHIFT; @@ -801,6 +1158,8 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) return false; } + if (!huge_class_size) + huge_class_size = zs_huge_class_size(zram->mem_pool); return true; } @@ -813,10 +1172,21 @@ static void zram_free_page(struct zram *zram, size_t index) { unsigned long handle; - if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) { - zram_wb_clear(zram, index); - atomic64_dec(&zram->stats.pages_stored); - return; +#ifdef CONFIG_ZRAM_MEMORY_TRACKING + zram->table[index].ac_time = 0; +#endif + if (zram_test_flag(zram, index, ZRAM_IDLE)) + zram_clear_flag(zram, index, ZRAM_IDLE); + + if (zram_test_flag(zram, index, ZRAM_HUGE)) { + zram_clear_flag(zram, index, ZRAM_HUGE); + atomic64_dec(&zram->stats.huge_pages); + } + + if (zram_test_flag(zram, index, ZRAM_WB)) { + zram_clear_flag(zram, index, ZRAM_WB); + free_block_bdev(zram, zram_get_element(zram, index)); + goto out; } /* @@ -825,10 +1195,8 @@ static void zram_free_page(struct zram *zram, size_t index) */ if (zram_test_flag(zram, index, ZRAM_SAME)) { zram_clear_flag(zram, index, ZRAM_SAME); - zram_set_element(zram, index, 0); atomic64_dec(&zram->stats.same_pages); - atomic64_dec(&zram->stats.pages_stored); - return; + goto out; } handle = zram_get_handle(zram, index); @@ -839,10 +1207,12 @@ static void zram_free_page(struct zram *zram, size_t index) atomic64_sub(zram_get_obj_size(zram, index), &zram->stats.compr_data_size); +out: atomic64_dec(&zram->stats.pages_stored); - zram_set_handle(zram, index, 0); zram_set_obj_size(zram, index, 0); + WARN_ON_ONCE(zram->table[index].flags & + ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB)); } static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, @@ -853,24 +1223,20 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, unsigned int size; void *src, *dst; - if (zram_wb_enabled(zram)) { - zram_slot_lock(zram, index); - if (zram_test_flag(zram, index, ZRAM_WB)) { - struct bio_vec bvec; + zram_slot_lock(zram, index); + if (zram_test_flag(zram, index, ZRAM_WB)) { + struct bio_vec bvec; - zram_slot_unlock(zram, index); - - bvec.bv_page = page; - bvec.bv_len = PAGE_SIZE; - bvec.bv_offset = 0; - return read_from_bdev(zram, &bvec, - zram_get_element(zram, index), - bio, partial_io); - } zram_slot_unlock(zram, index); + + bvec.bv_page = page; + bvec.bv_len = PAGE_SIZE; + bvec.bv_offset = 0; + return read_from_bdev(zram, &bvec, + zram_get_element(zram, index), + bio, partial_io); } - zram_slot_lock(zram, index); handle = zram_get_handle(zram, index); if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) { unsigned long value; @@ -955,7 +1321,6 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, struct page *page = bvec->bv_page; unsigned long element = 0; enum zram_pageflags flags = 0; - bool allow_wb = true; mem = kmap_atomic(page); if (page_same_filled(mem, &element)) { @@ -980,21 +1345,8 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, return ret; } - if (unlikely(comp_len > max_zpage_size)) { - if (zram_wb_enabled(zram) && allow_wb) { - zcomp_stream_put(zram->comp); - ret = write_to_bdev(zram, bvec, index, bio, &element); - if (!ret) { - flags = ZRAM_WB; - ret = 1; - goto out; - } - allow_wb = false; - goto compress_again; - } + if (comp_len >= huge_class_size) comp_len = PAGE_SIZE; - } - /* * handle allocation has 2 paths: * a) fast path is executed with preemption disabled (for @@ -1054,6 +1406,11 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, zram_slot_lock(zram, index); zram_free_page(zram, index); + if (comp_len == PAGE_SIZE) { + zram_set_flag(zram, index, ZRAM_HUGE); + atomic64_inc(&zram->stats.huge_pages); + } + if (flags) { zram_set_flag(zram, index, flags); zram_set_element(zram, index, element); @@ -1174,6 +1531,10 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, generic_end_io_acct(q, rw_acct, &zram->disk->part0, start_time); + zram_slot_lock(zram, index); + zram_accessed(zram, index); + zram_slot_unlock(zram, index); + if (unlikely(ret < 0)) { if (!is_write) atomic64_inc(&zram->stats.failed_reads); @@ -1258,10 +1619,14 @@ static void zram_slot_free_notify(struct block_device *bdev, zram = bdev->bd_disk->private_data; - zram_slot_lock(zram, index); + atomic64_inc(&zram->stats.notify_free); + if (!zram_slot_trylock(zram, index)) { + atomic64_inc(&zram->stats.miss_free); + return; + } + zram_free_page(zram, index); zram_slot_unlock(zram, index); - atomic64_inc(&zram->stats.notify_free); } static int zram_rw_page(struct block_device *bdev, sector_t sector, @@ -1380,7 +1745,8 @@ static ssize_t disksize_store(struct device *dev, zram->comp = comp; zram->disksize = disksize; set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - zram_revalidate_disk(zram); + + revalidate_disk(zram->disk); up_write(&zram->init_lock); return len; @@ -1427,7 +1793,7 @@ static ssize_t reset_store(struct device *dev, /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); zram_reset_device(zram); - zram_revalidate_disk(zram); + revalidate_disk(zram->disk); bdput(bdev); mutex_lock(&bdev->bd_mutex); @@ -1465,10 +1831,14 @@ static DEVICE_ATTR_RO(initstate); static DEVICE_ATTR_WO(reset); static DEVICE_ATTR_WO(mem_limit); static DEVICE_ATTR_WO(mem_used_max); +static DEVICE_ATTR_WO(idle); static DEVICE_ATTR_RW(max_comp_streams); static DEVICE_ATTR_RW(comp_algorithm); #ifdef CONFIG_ZRAM_WRITEBACK static DEVICE_ATTR_RW(backing_dev); +static DEVICE_ATTR_WO(writeback); +static DEVICE_ATTR_RW(writeback_limit); +static DEVICE_ATTR_RW(writeback_limit_enable); #endif static struct attribute *zram_disk_attrs[] = { @@ -1478,13 +1848,20 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_compact.attr, &dev_attr_mem_limit.attr, &dev_attr_mem_used_max.attr, + &dev_attr_idle.attr, &dev_attr_max_comp_streams.attr, &dev_attr_comp_algorithm.attr, #ifdef CONFIG_ZRAM_WRITEBACK &dev_attr_backing_dev.attr, + &dev_attr_writeback.attr, + &dev_attr_writeback_limit.attr, + &dev_attr_writeback_limit_enable.attr, #endif &dev_attr_io_stat.attr, &dev_attr_mm_stat.attr, +#ifdef CONFIG_ZRAM_WRITEBACK + &dev_attr_bd_stat.attr, +#endif &dev_attr_debug_stat.attr, NULL, }; @@ -1518,7 +1895,9 @@ static int zram_add(void) device_id = ret; init_rwsem(&zram->init_lock); - +#ifdef CONFIG_ZRAM_WRITEBACK + spin_lock_init(&zram->wb_limit_lock); +#endif queue = blk_alloc_queue(GFP_KERNEL); if (!queue) { pr_err("Error allocating disk queue for device %d\n", @@ -1551,6 +1930,7 @@ static int zram_add(void) /* zram devices sort of resembles non-rotational disks */ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); + /* * To ensure that we always get PAGE_SIZE aligned * and n*PAGE_SIZED sized I/O requests. @@ -1575,11 +1955,15 @@ static int zram_add(void) if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); + zram->disk->queue->backing_dev_info->capabilities |= + BDI_CAP_STABLE_WRITES; + disk_to_dev(zram->disk)->groups = zram_disk_attr_groups; add_disk(zram->disk); strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); + zram_debugfs_register(zram); pr_info("Added device: %s\n", zram->disk->disk_name); return device_id; @@ -1610,6 +1994,8 @@ static int zram_remove(struct zram *zram) zram->claim = true; mutex_unlock(&bdev->bd_mutex); + zram_debugfs_unregister(zram); + /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); zram_reset_device(zram); @@ -1617,8 +2003,8 @@ static int zram_remove(struct zram *zram) pr_info("Removed device: %s\n", zram->disk->disk_name); - blk_cleanup_queue(zram->disk->queue); del_gendisk(zram->disk); + blk_cleanup_queue(zram->disk->queue); put_disk(zram->disk); kfree(zram); return 0; @@ -1702,6 +2088,7 @@ static void destroy_devices(void) { class_unregister(&zram_control_class); idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); + zram_debugfs_destroy(); idr_destroy(&zram_index_idr); unregister_blkdev(zram_major, "zram"); cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); @@ -1723,6 +2110,7 @@ static int __init zram_init(void) return ret; } + zram_debugfs_create(); zram_major = register_blkdev(0, "zram"); if (zram_major <= 0) { pr_err("Unable to get major number\n");
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 31762db..29af8d0 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h
@@ -21,22 +21,6 @@ #include "zcomp.h" -/*-- Configurable parameters */ - -/* - * Pages that compress to size greater than this are stored - * uncompressed in memory. - */ -static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; - -/* - * NOTE: max_zpage_size must be less than or equal to: - * ZS_MAX_ALLOC_SIZE. Otherwise, zs_malloc() would - * always return failure. - */ - -/*-- End of configurable params */ - #define SECTOR_SHIFT 9 #define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) #define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT) @@ -47,7 +31,7 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; /* - * The lower ZRAM_FLAG_SHIFT bits of table.value is for + * The lower ZRAM_FLAG_SHIFT bits of table.flags is for * object size (excluding header), the higher bits is for * zram_pageflags. * @@ -58,12 +42,15 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; */ #define ZRAM_FLAG_SHIFT 24 -/* Flags for zram pages (table[page_no].value) */ +/* Flags for zram pages (table[page_no].flags) */ enum zram_pageflags { - /* Page consists the same element */ - ZRAM_SAME = ZRAM_FLAG_SHIFT, - ZRAM_ACCESS, /* page is now accessed */ + /* zram slot is locked */ + ZRAM_LOCK = ZRAM_FLAG_SHIFT, + ZRAM_SAME, /* Page consists the same element */ ZRAM_WB, /* page is stored on backing_device */ + ZRAM_UNDER_WB, /* page is under writeback */ + ZRAM_HUGE, /* Incompressible page */ + ZRAM_IDLE, /* not accessed page since last idle marking */ __NR_ZRAM_PAGEFLAGS, }; @@ -76,7 +63,10 @@ struct zram_table_entry { unsigned long handle; unsigned long element; }; - unsigned long value; + unsigned long flags; +#ifdef CONFIG_ZRAM_MEMORY_TRACKING + ktime_t ac_time; +#endif }; struct zram_stats { @@ -88,9 +78,16 @@ struct zram_stats { atomic64_t invalid_io; /* non-page-aligned I/O requests */ atomic64_t notify_free; /* no. of swap slot free notifications */ atomic64_t same_pages; /* no. of same element filled pages */ + atomic64_t huge_pages; /* no. of huge pages */ atomic64_t pages_stored; /* no. of pages currently stored */ atomic_long_t max_used_pages; /* no. of maximum pages stored */ atomic64_t writestall; /* no. of write slow paths */ + atomic64_t miss_free; /* no. of missed free */ +#ifdef CONFIG_ZRAM_WRITEBACK + atomic64_t bd_count; /* no. of pages in backing device */ + atomic64_t bd_reads; /* no. of reads from backing device */ + atomic64_t bd_writes; /* no. of writes from backing device */ +#endif }; struct zram { @@ -116,13 +113,18 @@ struct zram { * zram is claimed so open request will be failed */ bool claim; /* Protected by bdev->bd_mutex */ -#ifdef CONFIG_ZRAM_WRITEBACK struct file *backing_dev; +#ifdef CONFIG_ZRAM_WRITEBACK + spinlock_t wb_limit_lock; + bool wb_limit_enable; + u64 bd_wb_limit; struct block_device *bdev; unsigned int old_block_size; unsigned long *bitmap; unsigned long nr_pages; - spinlock_t bitmap_lock; +#endif +#ifdef CONFIG_ZRAM_MEMORY_TRACKING + struct dentry *debugfs_dir; #endif }; #endif
diff --git a/drivers/char/ipmi/ipmi_poweroff.c b/drivers/char/ipmi/ipmi_poweroff.c index 9f2e3be..676c910 100644 --- a/drivers/char/ipmi/ipmi_poweroff.c +++ b/drivers/char/ipmi/ipmi_poweroff.c
@@ -66,7 +66,7 @@ static void (*specific_poweroff_func)(ipmi_user_t user); /* Holds the old poweroff function so we can restore it on removal. */ static void (*old_poweroff_func)(void); -static int set_param_ifnum(const char *val, struct kernel_param *kp) +static int set_param_ifnum(const char *val, const struct kernel_param *kp) { int rv = param_set_int(val, kp); if (rv)
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index a106cf7..13498ad 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -1345,7 +1345,7 @@ static unsigned int num_slave_addrs; #define IPMI_MEM_ADDR_SPACE 1 static const char * const addr_space_to_str[] = { "i/o", "mem" }; -static int hotmod_handler(const char *val, struct kernel_param *kp); +static int hotmod_handler(const char *val, const struct kernel_param *kp); module_param_call(hotmod, hotmod_handler, NULL, NULL, 0200); MODULE_PARM_DESC(hotmod, "Add and remove interfaces. See" @@ -1811,7 +1811,7 @@ static struct smi_info *smi_info_alloc(void) return info; } -static int hotmod_handler(const char *val, struct kernel_param *kp) +static int hotmod_handler(const char *val, const struct kernel_param *kp) { char *str = kstrdup(val, GFP_KERNEL); int rv;
diff --git a/drivers/char/random.c b/drivers/char/random.c index ea4dbfa..7eba05e2 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c
@@ -265,7 +265,7 @@ #include <linux/syscalls.h> #include <linux/completion.h> #include <linux/uuid.h> -#include <crypto/chacha20.h> +#include <crypto/chacha.h> #include <asm/processor.h> #include <linux/uaccess.h> @@ -431,11 +431,10 @@ static int crng_init = 0; #define crng_ready() (likely(crng_init > 1)) static int crng_init_cnt = 0; static unsigned long crng_global_init_time = 0; -#define CRNG_INIT_CNT_THRESH (2*CHACHA20_KEY_SIZE) -static void _extract_crng(struct crng_state *crng, - __u8 out[CHACHA20_BLOCK_SIZE]); +#define CRNG_INIT_CNT_THRESH (2*CHACHA_KEY_SIZE) +static void _extract_crng(struct crng_state *crng, __u8 out[CHACHA_BLOCK_SIZE]); static void _crng_backtrack_protect(struct crng_state *crng, - __u8 tmp[CHACHA20_BLOCK_SIZE], int used); + __u8 tmp[CHACHA_BLOCK_SIZE], int used); static void process_random_ready_list(void); static void _get_random_bytes(void *buf, int nbytes); @@ -848,7 +847,7 @@ static int crng_fast_load(const char *cp, size_t len) } p = (unsigned char *) &primary_crng.state[4]; while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { - p[crng_init_cnt % CHACHA20_KEY_SIZE] ^= *cp; + p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp; cp++; crng_init_cnt++; len--; } spin_unlock_irqrestore(&primary_crng.lock, flags); @@ -880,7 +879,7 @@ static int crng_slow_load(const char *cp, size_t len) unsigned long flags; static unsigned char lfsr = 1; unsigned char tmp; - unsigned i, max = CHACHA20_KEY_SIZE; + unsigned i, max = CHACHA_KEY_SIZE; const char * src_buf = cp; char * dest_buf = (char *) &primary_crng.state[4]; @@ -898,8 +897,8 @@ static int crng_slow_load(const char *cp, size_t len) lfsr >>= 1; if (tmp & 1) lfsr ^= 0xE1; - tmp = dest_buf[i % CHACHA20_KEY_SIZE]; - dest_buf[i % CHACHA20_KEY_SIZE] ^= src_buf[i % len] ^ lfsr; + tmp = dest_buf[i % CHACHA_KEY_SIZE]; + dest_buf[i % CHACHA_KEY_SIZE] ^= src_buf[i % len] ^ lfsr; lfsr += (tmp << 3) | (tmp >> 5); } spin_unlock_irqrestore(&primary_crng.lock, flags); @@ -911,7 +910,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) unsigned long flags; int i, num; union { - __u8 block[CHACHA20_BLOCK_SIZE]; + __u8 block[CHACHA_BLOCK_SIZE]; __u32 key[8]; } buf; @@ -922,7 +921,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) } else { _extract_crng(&primary_crng, buf.block); _crng_backtrack_protect(&primary_crng, buf.block, - CHACHA20_KEY_SIZE); + CHACHA_KEY_SIZE); } spin_lock_irqsave(&crng->lock, flags); for (i = 0; i < 8; i++) { @@ -958,7 +957,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) } static void _extract_crng(struct crng_state *crng, - __u8 out[CHACHA20_BLOCK_SIZE]) + __u8 out[CHACHA_BLOCK_SIZE]) { unsigned long v, flags; @@ -975,7 +974,7 @@ static void _extract_crng(struct crng_state *crng, spin_unlock_irqrestore(&crng->lock, flags); } -static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE]) +static void extract_crng(__u8 out[CHACHA_BLOCK_SIZE]) { struct crng_state *crng = NULL; @@ -993,14 +992,14 @@ static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE]) * enough) to mutate the CRNG key to provide backtracking protection. */ static void _crng_backtrack_protect(struct crng_state *crng, - __u8 tmp[CHACHA20_BLOCK_SIZE], int used) + __u8 tmp[CHACHA_BLOCK_SIZE], int used) { unsigned long flags; __u32 *s, *d; int i; used = round_up(used, sizeof(__u32)); - if (used + CHACHA20_KEY_SIZE > CHACHA20_BLOCK_SIZE) { + if (used + CHACHA_KEY_SIZE > CHACHA_BLOCK_SIZE) { extract_crng(tmp); used = 0; } @@ -1012,7 +1011,7 @@ static void _crng_backtrack_protect(struct crng_state *crng, spin_unlock_irqrestore(&crng->lock, flags); } -static void crng_backtrack_protect(__u8 tmp[CHACHA20_BLOCK_SIZE], int used) +static void crng_backtrack_protect(__u8 tmp[CHACHA_BLOCK_SIZE], int used) { struct crng_state *crng = NULL; @@ -1027,8 +1026,8 @@ static void crng_backtrack_protect(__u8 tmp[CHACHA20_BLOCK_SIZE], int used) static ssize_t extract_crng_user(void __user *buf, size_t nbytes) { - ssize_t ret = 0, i = CHACHA20_BLOCK_SIZE; - __u8 tmp[CHACHA20_BLOCK_SIZE]; + ssize_t ret = 0, i = CHACHA_BLOCK_SIZE; + __u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); int large_request = (nbytes > 256); while (nbytes) { @@ -1042,7 +1041,7 @@ static ssize_t extract_crng_user(void __user *buf, size_t nbytes) } extract_crng(tmp); - i = min_t(int, nbytes, CHACHA20_BLOCK_SIZE); + i = min_t(int, nbytes, CHACHA_BLOCK_SIZE); if (copy_to_user(buf, tmp, i)) { ret = -EFAULT; break; @@ -1614,14 +1613,14 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, */ static void _get_random_bytes(void *buf, int nbytes) { - __u8 tmp[CHACHA20_BLOCK_SIZE]; + __u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); trace_get_random_bytes(nbytes, _RET_IP_); - while (nbytes >= CHACHA20_BLOCK_SIZE) { + while (nbytes >= CHACHA_BLOCK_SIZE) { extract_crng(buf); - buf += CHACHA20_BLOCK_SIZE; - nbytes -= CHACHA20_BLOCK_SIZE; + buf += CHACHA_BLOCK_SIZE; + nbytes -= CHACHA_BLOCK_SIZE; } if (nbytes > 0) { @@ -1629,7 +1628,7 @@ static void _get_random_bytes(void *buf, int nbytes) memcpy(buf, tmp, nbytes); crng_backtrack_protect(tmp, nbytes); } else - crng_backtrack_protect(tmp, CHACHA20_BLOCK_SIZE); + crng_backtrack_protect(tmp, CHACHA_BLOCK_SIZE); memzero_explicit(tmp, sizeof(tmp)); } @@ -2184,8 +2183,8 @@ struct ctl_table random_table[] = { struct batched_entropy { union { - u64 entropy_u64[CHACHA20_BLOCK_SIZE / sizeof(u64)]; - u32 entropy_u32[CHACHA20_BLOCK_SIZE / sizeof(u32)]; + u64 entropy_u64[CHACHA_BLOCK_SIZE / sizeof(u64)]; + u32 entropy_u32[CHACHA_BLOCK_SIZE / sizeof(u32)]; }; unsigned int position; };
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index d8addbc..b374515 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig
@@ -37,6 +37,13 @@ If in doubt, say N. +config CPU_FREQ_TIMES + bool "CPU frequency time-in-state statistics" + help + Export CPU time-in-state information through procfs. + + If in doubt, say N. + choice prompt "Default CPUFreq governor" default CPU_FREQ_DEFAULT_GOV_USERSPACE if ARM_SA1100_CPUFREQ || ARM_SA1110_CPUFREQ
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 812f9e0..3ad8aeb 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile
@@ -5,7 +5,10 @@ # CPUfreq stats obj-$(CONFIG_CPU_FREQ_STAT) += cpufreq_stats.o -# CPUfreq governors +# CPUfreq times +obj-$(CONFIG_CPU_FREQ_TIMES) += cpufreq_times.o + +# CPUfreq governors obj-$(CONFIG_CPU_FREQ_GOV_PERFORMANCE) += cpufreq_performance.o obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o
diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c index 1750412..0c41ab3 100644 --- a/drivers/cpufreq/arm_big_little.c +++ b/drivers/cpufreq/arm_big_little.c
@@ -213,6 +213,7 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy, { u32 cpu = policy->cpu, cur_cluster, new_cluster, actual_cluster; unsigned int freqs_new; + int ret; cur_cluster = cpu_to_cluster(cpu); new_cluster = actual_cluster = per_cpu(physical_cluster, cpu); @@ -229,7 +230,14 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy, } } - return bL_cpufreq_set_rate(cpu, actual_cluster, new_cluster, freqs_new); + ret = bL_cpufreq_set_rate(cpu, actual_cluster, new_cluster, freqs_new); + + if (!ret) { + arch_set_freq_scale(policy->related_cpus, freqs_new, + policy->cpuinfo.max_freq); + } + + return ret; } static inline u32 get_table_count(struct cpufreq_frequency_table *table)
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index ca6ee9f3..9546218e 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c
@@ -44,9 +44,17 @@ static struct freq_attr *cpufreq_dt_attr[] = { static int set_target(struct cpufreq_policy *policy, unsigned int index) { struct private_data *priv = policy->driver_data; + unsigned long freq = policy->freq_table[index].frequency; + int ret; - return dev_pm_opp_set_rate(priv->cpu_dev, - policy->freq_table[index].frequency * 1000); + ret = dev_pm_opp_set_rate(priv->cpu_dev, freq * 1000); + + if (!ret) { + arch_set_freq_scale(policy->related_cpus, freq, + policy->cpuinfo.max_freq); + } + + return ret; } /*
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 9375430..9273994 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c
@@ -19,6 +19,7 @@ #include <linux/cpu.h> #include <linux/cpufreq.h> +#include <linux/cpufreq_times.h> #include <linux/delay.h> #include <linux/device.h> #include <linux/init.h> @@ -339,6 +340,7 @@ static void __cpufreq_notify_transition(struct cpufreq_policy *policy, (unsigned long)freqs->new, (unsigned long)freqs->cpu); trace_cpu_frequency(freqs->new, freqs->cpu); cpufreq_stats_record_transition(policy, freqs->new); + cpufreq_times_record_transition(freqs); srcu_notifier_call_chain(&cpufreq_transition_notifier_list, CPUFREQ_POSTCHANGE, freqs); if (likely(policy) && likely(policy->cpu == freqs->cpu)) @@ -1289,6 +1291,7 @@ static int cpufreq_online(unsigned int cpu) goto out_exit_policy; cpufreq_stats_create_table(policy); + cpufreq_times_create_policy(policy); write_lock_irqsave(&cpufreq_driver_lock, flags); list_add(&policy->policy_list, &cpufreq_policy_list); @@ -2235,6 +2238,10 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, policy->min = new_policy->min; policy->max = new_policy->max; + arch_set_max_freq_scale(policy->cpus, policy->max); + + trace_cpu_frequency_limits(policy->max, policy->min, policy->cpu); + policy->cached_target_freq = UINT_MAX; pr_debug("new min and max freqs are %u - %u kHz\n", @@ -2433,6 +2440,23 @@ int cpufreq_boost_enabled(void) EXPORT_SYMBOL_GPL(cpufreq_boost_enabled); /********************************************************************* + * FREQUENCY INVARIANT ACCOUNTING SUPPORT * + *********************************************************************/ + +__weak void arch_set_freq_scale(struct cpumask *cpus, + unsigned long cur_freq, + unsigned long max_freq) +{ +} +EXPORT_SYMBOL_GPL(arch_set_freq_scale); + +__weak void arch_set_max_freq_scale(struct cpumask *cpus, + unsigned long policy_max_freq) +{ +} +EXPORT_SYMBOL_GPL(arch_set_max_freq_scale); + +/********************************************************************* * REGISTER / UNREGISTER CPUFREQ DRIVER * *********************************************************************/ static enum cpuhp_state hp_online;
diff --git a/drivers/cpufreq/cpufreq_times.c b/drivers/cpufreq/cpufreq_times.c new file mode 100644 index 0000000..aaded60 --- /dev/null +++ b/drivers/cpufreq/cpufreq_times.c
@@ -0,0 +1,650 @@ +/* drivers/cpufreq/cpufreq_times.c + * + * Copyright (C) 2018 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/cpufreq.h> +#include <linux/cpufreq_times.h> +#include <linux/hashtable.h> +#include <linux/init.h> +#include <linux/jiffies.h> +#include <linux/proc_fs.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/threads.h> + +#define UID_HASH_BITS 10 + +static DECLARE_HASHTABLE(uid_hash_table, UID_HASH_BITS); + +static DEFINE_SPINLOCK(task_time_in_state_lock); /* task->time_in_state */ +static DEFINE_SPINLOCK(uid_lock); /* uid_hash_table */ + +struct concurrent_times { + atomic64_t active[NR_CPUS]; + atomic64_t policy[NR_CPUS]; +}; + +struct uid_entry { + uid_t uid; + unsigned int max_state; + struct hlist_node hash; + struct rcu_head rcu; + struct concurrent_times *concurrent_times; + u64 time_in_state[0]; +}; + +/** + * struct cpu_freqs - per-cpu frequency information + * @offset: start of these freqs' stats in task time_in_state array + * @max_state: number of entries in freq_table + * @last_index: index in freq_table of last frequency switched to + * @freq_table: list of available frequencies + */ +struct cpu_freqs { + unsigned int offset; + unsigned int max_state; + unsigned int last_index; + unsigned int freq_table[0]; +}; + +static struct cpu_freqs *all_freqs[NR_CPUS]; + +static unsigned int next_offset; + + +/* Caller must hold rcu_read_lock() */ +static struct uid_entry *find_uid_entry_rcu(uid_t uid) +{ + struct uid_entry *uid_entry; + + hash_for_each_possible_rcu(uid_hash_table, uid_entry, hash, uid) { + if (uid_entry->uid == uid) + return uid_entry; + } + return NULL; +} + +/* Caller must hold uid lock */ +static struct uid_entry *find_uid_entry_locked(uid_t uid) +{ + struct uid_entry *uid_entry; + + hash_for_each_possible(uid_hash_table, uid_entry, hash, uid) { + if (uid_entry->uid == uid) + return uid_entry; + } + return NULL; +} + +/* Caller must hold uid lock */ +static struct uid_entry *find_or_register_uid_locked(uid_t uid) +{ + struct uid_entry *uid_entry, *temp; + struct concurrent_times *times; + unsigned int max_state = READ_ONCE(next_offset); + size_t alloc_size = sizeof(*uid_entry) + max_state * + sizeof(uid_entry->time_in_state[0]); + + uid_entry = find_uid_entry_locked(uid); + if (uid_entry) { + if (uid_entry->max_state == max_state) + return uid_entry; + /* uid_entry->time_in_state is too small to track all freqs, so + * expand it. + */ + temp = __krealloc(uid_entry, alloc_size, GFP_ATOMIC); + if (!temp) + return uid_entry; + temp->max_state = max_state; + memset(temp->time_in_state + uid_entry->max_state, 0, + (max_state - uid_entry->max_state) * + sizeof(uid_entry->time_in_state[0])); + if (temp != uid_entry) { + hlist_replace_rcu(&uid_entry->hash, &temp->hash); + kfree_rcu(uid_entry, rcu); + } + return temp; + } + + uid_entry = kzalloc(alloc_size, GFP_ATOMIC); + if (!uid_entry) + return NULL; + times = kzalloc(sizeof(*times), GFP_ATOMIC); + if (!times) { + kfree(uid_entry); + return NULL; + } + + uid_entry->uid = uid; + uid_entry->max_state = max_state; + uid_entry->concurrent_times = times; + + hash_add_rcu(uid_hash_table, &uid_entry->hash, uid); + + return uid_entry; +} + +static bool freq_index_invalid(unsigned int index) +{ + unsigned int cpu; + struct cpu_freqs *freqs; + + for_each_possible_cpu(cpu) { + freqs = all_freqs[cpu]; + if (!freqs || index < freqs->offset || + freqs->offset + freqs->max_state <= index) + continue; + return freqs->freq_table[index - freqs->offset] == + CPUFREQ_ENTRY_INVALID; + } + return true; +} + +static int single_uid_time_in_state_show(struct seq_file *m, void *ptr) +{ + struct uid_entry *uid_entry; + unsigned int i; + u64 time; + uid_t uid = from_kuid_munged(current_user_ns(), *(kuid_t *)m->private); + + if (uid == overflowuid) + return -EINVAL; + + rcu_read_lock(); + + uid_entry = find_uid_entry_rcu(uid); + if (!uid_entry) { + rcu_read_unlock(); + return 0; + } + + for (i = 0; i < uid_entry->max_state; ++i) { + if (freq_index_invalid(i)) + continue; + time = nsec_to_clock_t(uid_entry->time_in_state[i]); + seq_write(m, &time, sizeof(time)); + } + + rcu_read_unlock(); + + return 0; +} + +static void *uid_seq_start(struct seq_file *seq, loff_t *pos) +{ + if (*pos >= HASH_SIZE(uid_hash_table)) + return NULL; + + return &uid_hash_table[*pos]; +} + +static void *uid_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + (*pos)++; + + if (*pos >= HASH_SIZE(uid_hash_table)) + return NULL; + + return &uid_hash_table[*pos]; +} + +static void uid_seq_stop(struct seq_file *seq, void *v) { } + +static int uid_time_in_state_seq_show(struct seq_file *m, void *v) +{ + struct uid_entry *uid_entry; + struct cpu_freqs *freqs, *last_freqs = NULL; + int i, cpu; + + if (v == uid_hash_table) { + seq_puts(m, "uid:"); + for_each_possible_cpu(cpu) { + freqs = all_freqs[cpu]; + if (!freqs || freqs == last_freqs) + continue; + last_freqs = freqs; + for (i = 0; i < freqs->max_state; i++) { + if (freqs->freq_table[i] == + CPUFREQ_ENTRY_INVALID) + continue; + seq_printf(m, " %d", freqs->freq_table[i]); + } + } + seq_putc(m, '\n'); + } + + rcu_read_lock(); + + hlist_for_each_entry_rcu(uid_entry, (struct hlist_head *)v, hash) { + if (uid_entry->max_state) + seq_printf(m, "%d:", uid_entry->uid); + for (i = 0; i < uid_entry->max_state; ++i) { + if (freq_index_invalid(i)) + continue; + seq_printf(m, " %lu", (unsigned long)nsec_to_clock_t( + uid_entry->time_in_state[i])); + } + if (uid_entry->max_state) + seq_putc(m, '\n'); + } + + rcu_read_unlock(); + return 0; +} + +static int concurrent_time_seq_show(struct seq_file *m, void *v, + atomic64_t *(*get_times)(struct concurrent_times *)) +{ + struct uid_entry *uid_entry; + int i, num_possible_cpus = num_possible_cpus(); + + rcu_read_lock(); + + hlist_for_each_entry_rcu(uid_entry, (struct hlist_head *)v, hash) { + atomic64_t *times = get_times(uid_entry->concurrent_times); + + seq_put_decimal_ull(m, "", (u64)uid_entry->uid); + seq_putc(m, ':'); + + for (i = 0; i < num_possible_cpus; ++i) { + u64 time = nsec_to_clock_t(atomic64_read(×[i])); + + seq_put_decimal_ull(m, " ", time); + } + seq_putc(m, '\n'); + } + + rcu_read_unlock(); + + return 0; +} + +static inline atomic64_t *get_active_times(struct concurrent_times *times) +{ + return times->active; +} + +static int concurrent_active_time_seq_show(struct seq_file *m, void *v) +{ + if (v == uid_hash_table) { + seq_put_decimal_ull(m, "cpus: ", num_possible_cpus()); + seq_putc(m, '\n'); + } + + return concurrent_time_seq_show(m, v, get_active_times); +} + +static inline atomic64_t *get_policy_times(struct concurrent_times *times) +{ + return times->policy; +} + +static int concurrent_policy_time_seq_show(struct seq_file *m, void *v) +{ + int i; + struct cpu_freqs *freqs, *last_freqs = NULL; + + if (v == uid_hash_table) { + int cnt = 0; + + for_each_possible_cpu(i) { + freqs = all_freqs[i]; + if (!freqs) + continue; + if (freqs != last_freqs) { + if (last_freqs) { + seq_put_decimal_ull(m, ": ", cnt); + seq_putc(m, ' '); + cnt = 0; + } + seq_put_decimal_ull(m, "policy", i); + + last_freqs = freqs; + } + cnt++; + } + if (last_freqs) { + seq_put_decimal_ull(m, ": ", cnt); + seq_putc(m, '\n'); + } + } + + return concurrent_time_seq_show(m, v, get_policy_times); +} + +void cpufreq_task_times_init(struct task_struct *p) +{ + unsigned long flags; + + spin_lock_irqsave(&task_time_in_state_lock, flags); + p->time_in_state = NULL; + spin_unlock_irqrestore(&task_time_in_state_lock, flags); + p->max_state = 0; +} + +void cpufreq_task_times_alloc(struct task_struct *p) +{ + void *temp; + unsigned long flags; + unsigned int max_state = READ_ONCE(next_offset); + + /* We use one array to avoid multiple allocs per task */ + temp = kcalloc(max_state, sizeof(p->time_in_state[0]), GFP_ATOMIC); + if (!temp) + return; + + spin_lock_irqsave(&task_time_in_state_lock, flags); + p->time_in_state = temp; + spin_unlock_irqrestore(&task_time_in_state_lock, flags); + p->max_state = max_state; +} + +/* Caller must hold task_time_in_state_lock */ +static int cpufreq_task_times_realloc_locked(struct task_struct *p) +{ + void *temp; + unsigned int max_state = READ_ONCE(next_offset); + + temp = krealloc(p->time_in_state, max_state * sizeof(u64), GFP_ATOMIC); + if (!temp) + return -ENOMEM; + p->time_in_state = temp; + memset(p->time_in_state + p->max_state, 0, + (max_state - p->max_state) * sizeof(u64)); + p->max_state = max_state; + return 0; +} + +void cpufreq_task_times_exit(struct task_struct *p) +{ + unsigned long flags; + void *temp; + + if (!p->time_in_state) + return; + + spin_lock_irqsave(&task_time_in_state_lock, flags); + temp = p->time_in_state; + p->time_in_state = NULL; + spin_unlock_irqrestore(&task_time_in_state_lock, flags); + kfree(temp); +} + +int proc_time_in_state_show(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *p) +{ + unsigned int cpu, i; + u64 cputime; + unsigned long flags; + struct cpu_freqs *freqs; + struct cpu_freqs *last_freqs = NULL; + + spin_lock_irqsave(&task_time_in_state_lock, flags); + for_each_possible_cpu(cpu) { + freqs = all_freqs[cpu]; + if (!freqs || freqs == last_freqs) + continue; + last_freqs = freqs; + + seq_printf(m, "cpu%u\n", cpu); + for (i = 0; i < freqs->max_state; i++) { + if (freqs->freq_table[i] == CPUFREQ_ENTRY_INVALID) + continue; + cputime = 0; + if (freqs->offset + i < p->max_state && + p->time_in_state) + cputime = p->time_in_state[freqs->offset + i]; + seq_printf(m, "%u %lu\n", freqs->freq_table[i], + (unsigned long)nsec_to_clock_t(cputime)); + } + } + spin_unlock_irqrestore(&task_time_in_state_lock, flags); + return 0; +} + +void cpufreq_acct_update_power(struct task_struct *p, u64 cputime) +{ + unsigned long flags; + unsigned int state; + unsigned int active_cpu_cnt = 0; + unsigned int policy_cpu_cnt = 0; + unsigned int policy_first_cpu; + struct uid_entry *uid_entry; + struct cpu_freqs *freqs = all_freqs[task_cpu(p)]; + struct cpufreq_policy *policy; + uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p)); + int cpu = 0; + + if (!freqs || is_idle_task(p) || p->flags & PF_EXITING) + return; + + state = freqs->offset + READ_ONCE(freqs->last_index); + + spin_lock_irqsave(&task_time_in_state_lock, flags); + if ((state < p->max_state || !cpufreq_task_times_realloc_locked(p)) && + p->time_in_state) + p->time_in_state[state] += cputime; + spin_unlock_irqrestore(&task_time_in_state_lock, flags); + + spin_lock_irqsave(&uid_lock, flags); + uid_entry = find_or_register_uid_locked(uid); + if (uid_entry && state < uid_entry->max_state) + uid_entry->time_in_state[state] += cputime; + spin_unlock_irqrestore(&uid_lock, flags); + + rcu_read_lock(); + uid_entry = find_uid_entry_rcu(uid); + if (!uid_entry) { + rcu_read_unlock(); + return; + } + + for_each_possible_cpu(cpu) + if (!idle_cpu(cpu)) + ++active_cpu_cnt; + + atomic64_add(cputime, + &uid_entry->concurrent_times->active[active_cpu_cnt - 1]); + + policy = cpufreq_cpu_get(task_cpu(p)); + if (!policy) { + /* + * This CPU may have just come up and not have a cpufreq policy + * yet. + */ + rcu_read_unlock(); + return; + } + + for_each_cpu(cpu, policy->related_cpus) + if (!idle_cpu(cpu)) + ++policy_cpu_cnt; + + policy_first_cpu = cpumask_first(policy->related_cpus); + cpufreq_cpu_put(policy); + + atomic64_add(cputime, + &uid_entry->concurrent_times->policy[policy_first_cpu + + policy_cpu_cnt - 1]); + rcu_read_unlock(); +} + +void cpufreq_times_create_policy(struct cpufreq_policy *policy) +{ + int cpu, index; + unsigned int count = 0; + struct cpufreq_frequency_table *pos, *table; + struct cpu_freqs *freqs; + void *tmp; + + if (all_freqs[policy->cpu]) + return; + + table = policy->freq_table; + if (!table) + return; + + cpufreq_for_each_entry(pos, table) + count++; + + tmp = kzalloc(sizeof(*freqs) + sizeof(freqs->freq_table[0]) * count, + GFP_KERNEL); + if (!tmp) + return; + + freqs = tmp; + freqs->max_state = count; + + index = cpufreq_frequency_table_get_index(policy, policy->cur); + if (index >= 0) + WRITE_ONCE(freqs->last_index, index); + + cpufreq_for_each_entry(pos, table) + freqs->freq_table[pos - table] = pos->frequency; + + freqs->offset = next_offset; + WRITE_ONCE(next_offset, freqs->offset + count); + for_each_cpu(cpu, policy->related_cpus) + all_freqs[cpu] = freqs; +} + +static void uid_entry_reclaim(struct rcu_head *rcu) +{ + struct uid_entry *uid_entry = container_of(rcu, struct uid_entry, rcu); + + kfree(uid_entry->concurrent_times); + kfree(uid_entry); +} + +void cpufreq_task_times_remove_uids(uid_t uid_start, uid_t uid_end) +{ + struct uid_entry *uid_entry; + struct hlist_node *tmp; + unsigned long flags; + + spin_lock_irqsave(&uid_lock, flags); + + for (; uid_start <= uid_end; uid_start++) { + hash_for_each_possible_safe(uid_hash_table, uid_entry, tmp, + hash, uid_start) { + if (uid_start == uid_entry->uid) { + hash_del_rcu(&uid_entry->hash); + call_rcu(&uid_entry->rcu, uid_entry_reclaim); + } + } + } + + spin_unlock_irqrestore(&uid_lock, flags); +} + +void cpufreq_times_record_transition(struct cpufreq_freqs *freq) +{ + int index; + struct cpu_freqs *freqs = all_freqs[freq->cpu]; + struct cpufreq_policy *policy; + + if (!freqs) + return; + + policy = cpufreq_cpu_get(freq->cpu); + if (!policy) + return; + + index = cpufreq_frequency_table_get_index(policy, freq->new); + if (index >= 0) + WRITE_ONCE(freqs->last_index, index); + + cpufreq_cpu_put(policy); +} + +static const struct seq_operations uid_time_in_state_seq_ops = { + .start = uid_seq_start, + .next = uid_seq_next, + .stop = uid_seq_stop, + .show = uid_time_in_state_seq_show, +}; + +static int uid_time_in_state_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &uid_time_in_state_seq_ops); +} + +int single_uid_time_in_state_open(struct inode *inode, struct file *file) +{ + return single_open(file, single_uid_time_in_state_show, + &(inode->i_uid)); +} + +static const struct file_operations uid_time_in_state_fops = { + .open = uid_time_in_state_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static const struct seq_operations concurrent_active_time_seq_ops = { + .start = uid_seq_start, + .next = uid_seq_next, + .stop = uid_seq_stop, + .show = concurrent_active_time_seq_show, +}; + +static int concurrent_active_time_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &concurrent_active_time_seq_ops); +} + +static const struct file_operations concurrent_active_time_fops = { + .open = concurrent_active_time_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static const struct seq_operations concurrent_policy_time_seq_ops = { + .start = uid_seq_start, + .next = uid_seq_next, + .stop = uid_seq_stop, + .show = concurrent_policy_time_seq_show, +}; + +static int concurrent_policy_time_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &concurrent_policy_time_seq_ops); +} + +static const struct file_operations concurrent_policy_time_fops = { + .open = concurrent_policy_time_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init cpufreq_times_init(void) +{ + proc_create_data("uid_time_in_state", 0444, NULL, + &uid_time_in_state_fops, NULL); + + proc_create_data("uid_concurrent_active_time", 0444, NULL, + &concurrent_active_time_fops, NULL); + + proc_create_data("uid_concurrent_policy_time", 0444, NULL, + &concurrent_policy_time_fops, NULL); + + return 0; +} + +early_initcall(cpufreq_times_init);
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index ed4df58..7c33193 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c
@@ -212,7 +212,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, } /* Take note of the planned idle state. */ - sched_idle_set_state(target_state); + sched_idle_set_state(target_state, index); trace_cpu_idle_rcuidle(index, dev->cpu); time_start = ns_to_ktime(local_clock()); @@ -226,7 +226,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); /* The cpu is no longer idle or about to enter idle. */ - sched_idle_set_state(NULL); + sched_idle_set_state(NULL, -1); if (broadcast) { if (WARN_ON_ONCE(!irqs_disabled())) @@ -263,12 +263,18 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, * * @drv: the cpuidle driver * @dev: the cpuidle device + * @stop_tick: indication on whether or not to stop the tick * * Returns the index of the idle state. The return value must not be negative. + * + * The memory location pointed to by @stop_tick is expected to be written the + * 'false' boolean value if the scheduler tick should not be stopped before + * entering the returned state. */ -int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) +int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, + bool *stop_tick) { - return cpuidle_curr_governor->select(drv, dev); + return cpuidle_curr_governor->select(drv, dev, stop_tick); } /**
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index ce1a2ff..0213e07 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c
@@ -62,9 +62,10 @@ static inline void ladder_do_selection(struct ladder_device *ldev, * ladder_select_state - selects the next state to enter * @drv: cpuidle driver * @dev: the CPU + * @dummy: not used */ static int ladder_select_state(struct cpuidle_driver *drv, - struct cpuidle_device *dev) + struct cpuidle_device *dev, bool *dummy) { struct ladder_device *ldev = this_cpu_ptr(&ladder_devices); struct ladder_device_state *last_state;
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 48eaf28..58c103b 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c
@@ -123,6 +123,7 @@ struct menu_device { int last_state_idx; int needs_update; + int tick_wakeup; unsigned int next_timer_us; unsigned int predicted_us; @@ -180,7 +181,12 @@ static inline int performance_multiplier(unsigned long nr_iowaiters, unsigned lo /* for higher loadavg, we are more reluctant */ - mult += 2 * get_loadavg(load); + /* + * this doesn't work as intended - it is almost always 0, but can + * sometimes, depending on workload, spike very high into the hundreds + * even when the average cpu load is under 10%. + */ + /* mult += 2 * get_loadavg(); */ /* for IO wait tasks (per cpu!) we add 5x each */ mult += 10 * nr_iowaiters; @@ -279,8 +285,10 @@ static unsigned int get_typical_interval(struct menu_device *data) * menu_select - selects the next idle state to enter * @drv: cpuidle driver containing state data * @dev: the CPU + * @stop_tick: indication on whether or not to stop the tick */ -static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) +static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, + bool *stop_tick) { struct menu_device *data = this_cpu_ptr(&menu_devices); struct device *device = get_cpu_device(dev->cpu); @@ -292,6 +300,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) unsigned int expected_interval; unsigned long nr_iowaiters, cpu_load; int resume_latency = dev_pm_qos_raw_read_value(device); + ktime_t delta_next; if (data->needs_update) { menu_update(drv, dev); @@ -303,11 +312,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) latency_req = resume_latency; /* Special case when user has set very strict latency requirement */ - if (unlikely(latency_req == 0)) + if (unlikely(latency_req == 0)) { + *stop_tick = false; return 0; + } /* determine the expected residency time, round up */ - data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length()); + data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next)); get_iowait_load(&nr_iowaiters, &cpu_load); data->bucket = which_bucket(data->next_timer_us, nr_iowaiters); @@ -346,14 +357,30 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) */ data->predicted_us = min(data->predicted_us, expected_interval); - /* - * Use the performance multiplier and the user-configurable - * latency_req to determine the maximum exit latency. - */ - interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load); - if (latency_req > interactivity_req) - latency_req = interactivity_req; + if (tick_nohz_tick_stopped()) { + /* + * If the tick is already stopped, the cost of possible short + * idle duration misprediction is much higher, because the CPU + * may be stuck in a shallow idle state for a long time as a + * result of it. In that case say we might mispredict and try + * to force the CPU into a state for which we would have stopped + * the tick, unless a timer is going to expire really soon + * anyway. + */ + if (data->predicted_us < TICK_USEC) + data->predicted_us = min_t(unsigned int, TICK_USEC, + ktime_to_us(delta_next)); + } else { + /* + * Use the performance multiplier and the user-configurable + * latency_req to determine the maximum exit latency. + */ + interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load); + if (latency_req > interactivity_req) + latency_req = interactivity_req; + } + expected_interval = data->predicted_us; /* * Find the idle state with the lowest power while satisfying * our constraints. @@ -369,15 +396,52 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) idx = i; /* first enabled state */ if (s->target_residency > data->predicted_us) break; - if (s->exit_latency > latency_req) + if (s->exit_latency > latency_req) { + /* + * If we break out of the loop for latency reasons, use + * the target residency of the selected state as the + * expected idle duration so that the tick is retained + * as long as that target residency is low enough. + */ + expected_interval = drv->states[idx].target_residency; break; - + } idx = i; } if (idx == -1) idx = 0; /* No states enabled. Must use 0. */ + /* + * Don't stop the tick if the selected state is a polling one or if the + * expected idle duration is shorter than the tick period length. + */ + if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) || + expected_interval < TICK_USEC) { + unsigned int delta_next_us = ktime_to_us(delta_next); + + *stop_tick = false; + + if (!tick_nohz_tick_stopped() && idx > 0 && + drv->states[idx].target_residency > delta_next_us) { + /* + * The tick is not going to be stopped and the target + * residency of the state to be returned is not within + * the time until the next timer event including the + * tick, so try to correct that. + */ + for (i = idx - 1; i >= 0; i--) { + if (drv->states[i].disabled || + dev->states_usage[i].disable) + continue; + + idx = i; + if (drv->states[i].target_residency <= delta_next_us) + break; + } + } + } + data->last_state_idx = idx; return data->last_state_idx; @@ -397,6 +461,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index) data->last_state_idx = index; data->needs_update = 1; + data->tick_wakeup = tick_nohz_idle_got_tick(); } /** @@ -427,14 +492,27 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) * assume the state was never reached and the exit latency is 0. */ - /* measured value */ - measured_us = cpuidle_get_last_residency(dev); + if (data->tick_wakeup && data->next_timer_us > TICK_USEC) { + /* + * The nohz code said that there wouldn't be any events within + * the tick boundary (if the tick was stopped), but the idle + * duration predictor had a differing opinion. Since the CPU + * was woken up by a tick (that wasn't stopped after all), the + * predictor was not quite right, so assume that the CPU could + * have been idle long (but not forever) to help the idle + * duration predictor do a better job next time. + */ + measured_us = 9 * MAX_INTERESTING / 10; + } else { + /* measured value */ + measured_us = cpuidle_get_last_residency(dev); - /* Deduct exit latency */ - if (measured_us > 2 * target->exit_latency) - measured_us -= target->exit_latency; - else - measured_us /= 2; + /* Deduct exit latency */ + if (measured_us > 2 * target->exit_latency) + measured_us -= target->exit_latency; + else + measured_us /= 2; + } /* Make sure our coefficients do not exceed unity */ if (measured_us > data->next_timer_us)
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 9a30279..33ea68b45 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c
@@ -329,8 +329,12 @@ dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb) spin_lock_irqsave(fence->lock, flags); ret = !list_empty(&cb->node); - if (ret) + if (ret) { list_del_init(&cb->node); + if (list_empty(&fence->cb_list)) + if (fence->ops->disable_signaling) + fence->ops->disable_signaling(fence); + } spin_unlock_irqrestore(fence->lock, flags);
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 24f83f9e..ebc1196 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c
@@ -169,6 +169,13 @@ static bool timeline_fence_enable_signaling(struct dma_fence *fence) return true; } +static void timeline_fence_disable_signaling(struct dma_fence *fence) +{ + struct sync_pt *pt = dma_fence_to_sync_pt(fence); + + list_del_init(&pt->link); +} + static void timeline_fence_value_str(struct dma_fence *fence, char *str, int size) { @@ -187,6 +194,7 @@ static const struct dma_fence_ops timeline_fence_ops = { .get_driver_name = timeline_fence_get_driver_name, .get_timeline_name = timeline_fence_get_timeline_name, .enable_signaling = timeline_fence_enable_signaling, + .disable_signaling = timeline_fence_disable_signaling, .signaled = timeline_fence_signaled, .wait = dma_fence_default_wait, .release = timeline_fence_release,
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 79c1330..594ce6abd 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c
@@ -50,7 +50,7 @@ int edac_mc_get_poll_msec(void) return edac_mc_poll_msec; } -static int edac_set_poll_msec(const char *val, struct kernel_param *kp) +static int edac_set_poll_msec(const char *val, const struct kernel_param *kp) { unsigned long l; int ret;
diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c index 172598a..32a931d 100644 --- a/drivers/edac/edac_module.c +++ b/drivers/edac/edac_module.c
@@ -19,7 +19,8 @@ #ifdef CONFIG_EDAC_DEBUG -static int edac_set_debug_level(const char *buf, struct kernel_param *kp) +static int edac_set_debug_level(const char *buf, + const struct kernel_param *kp) { unsigned long val; int ret;
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 678bc91..8a1e54b 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile
@@ -21,7 +21,8 @@ KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \ -D__NO_FORTIFY \ $(call cc-option,-ffreestanding) \ - $(call cc-option,-fno-stack-protector) + $(call cc-option,-fno-stack-protector) \ + $(DISABLE_LTO) GCOV_PROFILE := n KASAN_SANITIZE := n
diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index 20b40ad2..42ed887 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c
@@ -34,7 +34,8 @@ module_param(emulate_scroll_wheel, bool, 0644); MODULE_PARM_DESC(emulate_scroll_wheel, "Emulate a scroll wheel"); static unsigned int scroll_speed = 32; -static int param_set_scroll_speed(const char *val, struct kernel_param *kp) { +static int param_set_scroll_speed(const char *val, + const struct kernel_param *kp) { unsigned long speed; if (!val || kstrtoul(val, 0, &speed) || speed > 63) return -EINVAL;
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index d127ace..6ee866f 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c
@@ -244,7 +244,7 @@ struct chs_geom { static unsigned int ide_disks; static struct chs_geom ide_disks_chs[MAX_HWIFS * MAX_DRIVES]; -static int ide_set_disk_chs(const char *str, struct kernel_param *kp) +static int ide_set_disk_chs(const char *str, const struct kernel_param *kp) { unsigned int a, b, c = 0, h = 0, s = 0, i, j = 1; @@ -328,7 +328,7 @@ static void ide_dev_apply_params(ide_drive_t *drive, u8 unit) static unsigned int ide_ignore_cable; -static int ide_set_ignore_cable(const char *s, struct kernel_param *kp) +static int ide_set_ignore_cable(const char *s, const struct kernel_param *kp) { int i, j = 1;
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 14cadf6..a45e460 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -150,7 +150,7 @@ static struct kparam_string kp_txselect = { .string = txselect_list, .maxlen = MAX_ATTEN_LEN }; -static int setup_txselect(const char *, struct kernel_param *); +static int setup_txselect(const char *, const struct kernel_param *); module_param_call(txselect, setup_txselect, param_get_string, &kp_txselect, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(txselect, @@ -6169,7 +6169,7 @@ static void set_no_qsfp_atten(struct qib_devdata *dd, int change) } /* handle the txselect parameter changing */ -static int setup_txselect(const char *str, struct kernel_param *kp) +static int setup_txselect(const char *str, const struct kernel_param *kp) { struct qib_devdata *dd; unsigned long val;
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 47f3f56..0835e07 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -80,7 +80,7 @@ module_param(srpt_srq_size, int, 0444); MODULE_PARM_DESC(srpt_srq_size, "Shared receive queue (SRQ) size."); -static int srpt_get_u64_x(char *buffer, struct kernel_param *kp) +static int srpt_get_u64_x(char *buffer, const struct kernel_param *kp) { return sprintf(buffer, "0x%016llx", *(u64 *)kp->arg); }
diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig index ff80377..724715e 100644 --- a/drivers/input/Kconfig +++ b/drivers/input/Kconfig
@@ -184,6 +184,19 @@ To compile this driver as a module, choose M here: the module will be called apm-power. +config INPUT_KEYRESET + bool "Reset key" + depends on INPUT + select INPUT_KEYCOMBO + ---help--- + Say Y here if you want to reboot when some keys are pressed; + +config INPUT_KEYCOMBO + bool "Key combo" + depends on INPUT + ---help--- + Say Y here if you want to take action when some keys are pressed; + comment "Input Device Drivers" source "drivers/input/keyboard/Kconfig"
diff --git a/drivers/input/Makefile b/drivers/input/Makefile index 40de6a7..f0351af 100644 --- a/drivers/input/Makefile +++ b/drivers/input/Makefile
@@ -27,5 +27,7 @@ obj-$(CONFIG_INPUT_MISC) += misc/ obj-$(CONFIG_INPUT_APMPOWER) += apm-power.o +obj-$(CONFIG_INPUT_KEYRESET) += keyreset.o +obj-$(CONFIG_INPUT_KEYCOMBO) += keycombo.o obj-$(CONFIG_RMI4_CORE) += rmi4/
diff --git a/drivers/input/keyboard/goldfish_events.c b/drivers/input/keyboard/goldfish_events.c index f6e643b..c877e56 100644 --- a/drivers/input/keyboard/goldfish_events.c +++ b/drivers/input/keyboard/goldfish_events.c
@@ -17,6 +17,7 @@ #include <linux/interrupt.h> #include <linux/types.h> #include <linux/input.h> +#include <linux/input/mt.h> #include <linux/kernel.h> #include <linux/platform_device.h> #include <linux/slab.h> @@ -24,6 +25,8 @@ #include <linux/io.h> #include <linux/acpi.h> +#define GOLDFISH_MAX_FINGERS 5 + enum { REG_READ = 0x00, REG_SET_PAGE = 0x00, @@ -52,7 +55,21 @@ static irqreturn_t events_interrupt(int irq, void *dev_id) value = __raw_readl(edev->addr + REG_READ); input_event(edev->input, type, code, value); - input_sync(edev->input); + // Send an extra (EV_SYN, SYN_REPORT, 0x0) event + // if a key was pressed. Some keyboard device + // drivers may only send the EV_KEY event and + // not EV_SYN. + // Note that sending an extra SYN_REPORT is not + // necessary nor correct protocol with other + // devices such as touchscreens, which will send + // their own SYN_REPORT's when sufficient event + // information has been collected (e.g., for + // touchscreens, when pressure and X/Y coordinates + // have been received). Hence, we will only send + // this extra SYN_REPORT if type == EV_KEY. + if (type == EV_KEY) { + input_sync(edev->input); + } return IRQ_HANDLED; } @@ -154,6 +171,15 @@ static int events_probe(struct platform_device *pdev) input_dev->name = edev->name; input_dev->id.bustype = BUS_HOST; + // Set the Goldfish Device to be multi-touch. + // In the Ranchu kernel, there is multi-touch-specific + // code for handling ABS_MT_SLOT events. + // See drivers/input/input.c:input_handle_abs_event. + // If we do not issue input_mt_init_slots, + // the kernel will filter out needed ABS_MT_SLOT + // events when we touch the screen in more than one place, + // preventing multi-touch with more than one finger from working. + input_mt_init_slots(input_dev, GOLDFISH_MAX_FINGERS, 0); events_import_bits(edev, input_dev->evbit, EV_SYN, EV_MAX); events_import_bits(edev, input_dev->keybit, EV_KEY, KEY_MAX);
diff --git a/drivers/input/keycombo.c b/drivers/input/keycombo.c new file mode 100644 index 0000000..2fba451 --- /dev/null +++ b/drivers/input/keycombo.c
@@ -0,0 +1,261 @@ +/* drivers/input/keycombo.c + * + * Copyright (C) 2014 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/input.h> +#include <linux/keycombo.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/reboot.h> +#include <linux/sched.h> +#include <linux/slab.h> + +struct keycombo_state { + struct input_handler input_handler; + unsigned long keybit[BITS_TO_LONGS(KEY_CNT)]; + unsigned long upbit[BITS_TO_LONGS(KEY_CNT)]; + unsigned long key[BITS_TO_LONGS(KEY_CNT)]; + spinlock_t lock; + struct workqueue_struct *wq; + int key_down_target; + int key_down; + int key_up; + struct delayed_work key_down_work; + int delay; + struct work_struct key_up_work; + void (*key_up_fn)(void *); + void (*key_down_fn)(void *); + void *priv; + int key_is_down; + struct wakeup_source combo_held_wake_source; + struct wakeup_source combo_up_wake_source; +}; + +static void do_key_down(struct work_struct *work) +{ + struct delayed_work *dwork = container_of(work, struct delayed_work, + work); + struct keycombo_state *state = container_of(dwork, + struct keycombo_state, key_down_work); + if (state->key_down_fn) + state->key_down_fn(state->priv); +} + +static void do_key_up(struct work_struct *work) +{ + struct keycombo_state *state = container_of(work, struct keycombo_state, + key_up_work); + if (state->key_up_fn) + state->key_up_fn(state->priv); + __pm_relax(&state->combo_up_wake_source); +} + +static void keycombo_event(struct input_handle *handle, unsigned int type, + unsigned int code, int value) +{ + unsigned long flags; + struct keycombo_state *state = handle->private; + + if (type != EV_KEY) + return; + + if (code >= KEY_MAX) + return; + + if (!test_bit(code, state->keybit)) + return; + + spin_lock_irqsave(&state->lock, flags); + if (!test_bit(code, state->key) == !value) + goto done; + __change_bit(code, state->key); + if (test_bit(code, state->upbit)) { + if (value) + state->key_up++; + else + state->key_up--; + } else { + if (value) + state->key_down++; + else + state->key_down--; + } + if (state->key_down == state->key_down_target && state->key_up == 0) { + __pm_stay_awake(&state->combo_held_wake_source); + state->key_is_down = 1; + if (queue_delayed_work(state->wq, &state->key_down_work, + state->delay)) + pr_debug("Key down work already queued!"); + } else if (state->key_is_down) { + if (!cancel_delayed_work(&state->key_down_work)) { + __pm_stay_awake(&state->combo_up_wake_source); + queue_work(state->wq, &state->key_up_work); + } + __pm_relax(&state->combo_held_wake_source); + state->key_is_down = 0; + } +done: + spin_unlock_irqrestore(&state->lock, flags); +} + +static int keycombo_connect(struct input_handler *handler, + struct input_dev *dev, + const struct input_device_id *id) +{ + int i; + int ret; + struct input_handle *handle; + struct keycombo_state *state = + container_of(handler, struct keycombo_state, input_handler); + for (i = 0; i < KEY_MAX; i++) { + if (test_bit(i, state->keybit) && test_bit(i, dev->keybit)) + break; + } + if (i == KEY_MAX) + return -ENODEV; + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + handle->dev = dev; + handle->handler = handler; + handle->name = KEYCOMBO_NAME; + handle->private = state; + + ret = input_register_handle(handle); + if (ret) + goto err_input_register_handle; + + ret = input_open_device(handle); + if (ret) + goto err_input_open_device; + + return 0; + +err_input_open_device: + input_unregister_handle(handle); +err_input_register_handle: + kfree(handle); + return ret; +} + +static void keycombo_disconnect(struct input_handle *handle) +{ + input_close_device(handle); + input_unregister_handle(handle); + kfree(handle); +} + +static const struct input_device_id keycombo_ids[] = { + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT, + .evbit = { BIT_MASK(EV_KEY) }, + }, + { }, +}; +MODULE_DEVICE_TABLE(input, keycombo_ids); + +static int keycombo_probe(struct platform_device *pdev) +{ + int ret; + int key, *keyp; + struct keycombo_state *state; + struct keycombo_platform_data *pdata = pdev->dev.platform_data; + + if (!pdata) + return -EINVAL; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return -ENOMEM; + + spin_lock_init(&state->lock); + keyp = pdata->keys_down; + while ((key = *keyp++)) { + if (key >= KEY_MAX) + continue; + state->key_down_target++; + __set_bit(key, state->keybit); + } + if (pdata->keys_up) { + keyp = pdata->keys_up; + while ((key = *keyp++)) { + if (key >= KEY_MAX) + continue; + __set_bit(key, state->keybit); + __set_bit(key, state->upbit); + } + } + + state->wq = alloc_ordered_workqueue("keycombo", 0); + if (!state->wq) + return -ENOMEM; + + state->priv = pdata->priv; + + if (pdata->key_down_fn) + state->key_down_fn = pdata->key_down_fn; + INIT_DELAYED_WORK(&state->key_down_work, do_key_down); + + if (pdata->key_up_fn) + state->key_up_fn = pdata->key_up_fn; + INIT_WORK(&state->key_up_work, do_key_up); + + wakeup_source_init(&state->combo_held_wake_source, "key combo"); + wakeup_source_init(&state->combo_up_wake_source, "key combo up"); + state->delay = msecs_to_jiffies(pdata->key_down_delay); + + state->input_handler.event = keycombo_event; + state->input_handler.connect = keycombo_connect; + state->input_handler.disconnect = keycombo_disconnect; + state->input_handler.name = KEYCOMBO_NAME; + state->input_handler.id_table = keycombo_ids; + ret = input_register_handler(&state->input_handler); + if (ret) { + kfree(state); + return ret; + } + platform_set_drvdata(pdev, state); + return 0; +} + +int keycombo_remove(struct platform_device *pdev) +{ + struct keycombo_state *state = platform_get_drvdata(pdev); + input_unregister_handler(&state->input_handler); + destroy_workqueue(state->wq); + kfree(state); + return 0; +} + + +struct platform_driver keycombo_driver = { + .driver.name = KEYCOMBO_NAME, + .probe = keycombo_probe, + .remove = keycombo_remove, +}; + +static int __init keycombo_init(void) +{ + return platform_driver_register(&keycombo_driver); +} + +static void __exit keycombo_exit(void) +{ + return platform_driver_unregister(&keycombo_driver); +} + +module_init(keycombo_init); +module_exit(keycombo_exit);
diff --git a/drivers/input/keyreset.c b/drivers/input/keyreset.c new file mode 100644 index 0000000..7e5222ae --- /dev/null +++ b/drivers/input/keyreset.c
@@ -0,0 +1,144 @@ +/* drivers/input/keyreset.c + * + * Copyright (C) 2014 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/input.h> +#include <linux/keyreset.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/reboot.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/syscalls.h> +#include <linux/keycombo.h> + +struct keyreset_state { + int restart_requested; + int (*reset_fn)(void); + struct platform_device *pdev_child; + struct work_struct restart_work; +}; + +static void do_restart(struct work_struct *unused) +{ + orderly_reboot(); +} + +static void do_reset_fn(void *priv) +{ + struct keyreset_state *state = priv; + if (state->restart_requested) + panic("keyboard reset failed, %d", state->restart_requested); + if (state->reset_fn) { + state->restart_requested = state->reset_fn(); + } else { + pr_info("keyboard reset\n"); + schedule_work(&state->restart_work); + state->restart_requested = 1; + } +} + +static int keyreset_probe(struct platform_device *pdev) +{ + int ret = -ENOMEM; + struct keycombo_platform_data *pdata_child; + struct keyreset_platform_data *pdata = pdev->dev.platform_data; + int up_size = 0, down_size = 0, size; + int key, *keyp; + struct keyreset_state *state; + + if (!pdata) + return -EINVAL; + state = devm_kzalloc(&pdev->dev, sizeof(*state), GFP_KERNEL); + if (!state) + return -ENOMEM; + + state->pdev_child = platform_device_alloc(KEYCOMBO_NAME, + PLATFORM_DEVID_AUTO); + if (!state->pdev_child) + return -ENOMEM; + state->pdev_child->dev.parent = &pdev->dev; + INIT_WORK(&state->restart_work, do_restart); + + keyp = pdata->keys_down; + while ((key = *keyp++)) { + if (key >= KEY_MAX) + continue; + down_size++; + } + if (pdata->keys_up) { + keyp = pdata->keys_up; + while ((key = *keyp++)) { + if (key >= KEY_MAX) + continue; + up_size++; + } + } + size = sizeof(struct keycombo_platform_data) + + sizeof(int) * (down_size + 1); + pdata_child = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); + if (!pdata_child) + goto error; + memcpy(pdata_child->keys_down, pdata->keys_down, + sizeof(int) * down_size); + if (up_size > 0) { + pdata_child->keys_up = devm_kzalloc(&pdev->dev, up_size + 1, + GFP_KERNEL); + if (!pdata_child->keys_up) + goto error; + memcpy(pdata_child->keys_up, pdata->keys_up, + sizeof(int) * up_size); + if (!pdata_child->keys_up) + goto error; + } + state->reset_fn = pdata->reset_fn; + pdata_child->key_down_fn = do_reset_fn; + pdata_child->priv = state; + pdata_child->key_down_delay = pdata->key_down_delay; + ret = platform_device_add_data(state->pdev_child, pdata_child, size); + if (ret) + goto error; + platform_set_drvdata(pdev, state); + return platform_device_add(state->pdev_child); +error: + platform_device_put(state->pdev_child); + return ret; +} + +int keyreset_remove(struct platform_device *pdev) +{ + struct keyreset_state *state = platform_get_drvdata(pdev); + platform_device_put(state->pdev_child); + return 0; +} + + +struct platform_driver keyreset_driver = { + .driver.name = KEYRESET_NAME, + .probe = keyreset_probe, + .remove = keyreset_remove, +}; + +static int __init keyreset_init(void) +{ + return platform_driver_register(&keyreset_driver); +} + +static void __exit keyreset_exit(void) +{ + return platform_driver_unregister(&keyreset_driver); +} + +module_init(keyreset_init); +module_exit(keyreset_exit);
diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index 9f082a3..4b269b3 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig
@@ -367,6 +367,17 @@ To compile this driver as a module, choose M here: the module will be called ati_remote2. +config INPUT_KEYCHORD + tristate "Key chord input driver support" + help + Say Y here if you want to enable the key chord driver + accessible at /dev/keychord. This driver can be used + for receiving notifications when client specified key + combinations are pressed. + + To compile this driver as a module, choose M here: the + module will be called keychord. + config INPUT_KEYSPAN_REMOTE tristate "Keyspan DMR USB remote control" depends on USB_ARCH_HAS_HCD @@ -535,6 +546,11 @@ To compile this driver as a module, choose M here: the module will be called sgi_btns. +config INPUT_GPIO + tristate "GPIO driver support" + help + Say Y here if you want to support gpio based keys, wheels etc... + config HP_SDC_RTC tristate "HP SDC Real Time Clock" depends on (GSC || HP300) && SERIO
diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile index 4b6118d3..2ecb686 100644 --- a/drivers/input/misc/Makefile +++ b/drivers/input/misc/Makefile
@@ -38,10 +38,12 @@ obj-$(CONFIG_INPUT_GPIO_BEEPER) += gpio-beeper.o obj-$(CONFIG_INPUT_GPIO_TILT_POLLED) += gpio_tilt_polled.o obj-$(CONFIG_INPUT_GPIO_DECODER) += gpio_decoder.o +obj-$(CONFIG_INPUT_GPIO) += gpio_event.o gpio_matrix.o gpio_input.o gpio_output.o gpio_axis.o obj-$(CONFIG_INPUT_HISI_POWERKEY) += hisi_powerkey.o obj-$(CONFIG_HP_SDC_RTC) += hp_sdc_rtc.o obj-$(CONFIG_INPUT_IMS_PCU) += ims-pcu.o obj-$(CONFIG_INPUT_IXP4XX_BEEPER) += ixp4xx-beeper.o +obj-$(CONFIG_INPUT_KEYCHORD) += keychord.o obj-$(CONFIG_INPUT_KEYSPAN_REMOTE) += keyspan_remote.o obj-$(CONFIG_INPUT_KXTJ9) += kxtj9.o obj-$(CONFIG_INPUT_M68K_BEEP) += m68kspkr.o
diff --git a/drivers/input/misc/gpio_axis.c b/drivers/input/misc/gpio_axis.c new file mode 100644 index 0000000..0acf4a5 --- /dev/null +++ b/drivers/input/misc/gpio_axis.c
@@ -0,0 +1,192 @@ +/* drivers/input/misc/gpio_axis.c + * + * Copyright (C) 2007 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/kernel.h> +#include <linux/gpio.h> +#include <linux/gpio_event.h> +#include <linux/interrupt.h> +#include <linux/slab.h> + +struct gpio_axis_state { + struct gpio_event_input_devs *input_devs; + struct gpio_event_axis_info *info; + uint32_t pos; +}; + +uint16_t gpio_axis_4bit_gray_map_table[] = { + [0x0] = 0x0, [0x1] = 0x1, /* 0000 0001 */ + [0x3] = 0x2, [0x2] = 0x3, /* 0011 0010 */ + [0x6] = 0x4, [0x7] = 0x5, /* 0110 0111 */ + [0x5] = 0x6, [0x4] = 0x7, /* 0101 0100 */ + [0xc] = 0x8, [0xd] = 0x9, /* 1100 1101 */ + [0xf] = 0xa, [0xe] = 0xb, /* 1111 1110 */ + [0xa] = 0xc, [0xb] = 0xd, /* 1010 1011 */ + [0x9] = 0xe, [0x8] = 0xf, /* 1001 1000 */ +}; +uint16_t gpio_axis_4bit_gray_map(struct gpio_event_axis_info *info, uint16_t in) +{ + return gpio_axis_4bit_gray_map_table[in]; +} + +uint16_t gpio_axis_5bit_singletrack_map_table[] = { + [0x10] = 0x00, [0x14] = 0x01, [0x1c] = 0x02, /* 10000 10100 11100 */ + [0x1e] = 0x03, [0x1a] = 0x04, [0x18] = 0x05, /* 11110 11010 11000 */ + [0x08] = 0x06, [0x0a] = 0x07, [0x0e] = 0x08, /* 01000 01010 01110 */ + [0x0f] = 0x09, [0x0d] = 0x0a, [0x0c] = 0x0b, /* 01111 01101 01100 */ + [0x04] = 0x0c, [0x05] = 0x0d, [0x07] = 0x0e, /* 00100 00101 00111 */ + [0x17] = 0x0f, [0x16] = 0x10, [0x06] = 0x11, /* 10111 10110 00110 */ + [0x02] = 0x12, [0x12] = 0x13, [0x13] = 0x14, /* 00010 10010 10011 */ + [0x1b] = 0x15, [0x0b] = 0x16, [0x03] = 0x17, /* 11011 01011 00011 */ + [0x01] = 0x18, [0x09] = 0x19, [0x19] = 0x1a, /* 00001 01001 11001 */ + [0x1d] = 0x1b, [0x15] = 0x1c, [0x11] = 0x1d, /* 11101 10101 10001 */ +}; +uint16_t gpio_axis_5bit_singletrack_map( + struct gpio_event_axis_info *info, uint16_t in) +{ + return gpio_axis_5bit_singletrack_map_table[in]; +} + +static void gpio_event_update_axis(struct gpio_axis_state *as, int report) +{ + struct gpio_event_axis_info *ai = as->info; + int i; + int change; + uint16_t state = 0; + uint16_t pos; + uint16_t old_pos = as->pos; + for (i = ai->count - 1; i >= 0; i--) + state = (state << 1) | gpio_get_value(ai->gpio[i]); + pos = ai->map(ai, state); + if (ai->flags & GPIOEAF_PRINT_RAW) + pr_info("axis %d-%d raw %x, pos %d -> %d\n", + ai->type, ai->code, state, old_pos, pos); + if (report && pos != old_pos) { + if (ai->type == EV_REL) { + change = (ai->decoded_size + pos - old_pos) % + ai->decoded_size; + if (change > ai->decoded_size / 2) + change -= ai->decoded_size; + if (change == ai->decoded_size / 2) { + if (ai->flags & GPIOEAF_PRINT_EVENT) + pr_info("axis %d-%d unknown direction, " + "pos %d -> %d\n", ai->type, + ai->code, old_pos, pos); + change = 0; /* no closest direction */ + } + if (ai->flags & GPIOEAF_PRINT_EVENT) + pr_info("axis %d-%d change %d\n", + ai->type, ai->code, change); + input_report_rel(as->input_devs->dev[ai->dev], + ai->code, change); + } else { + if (ai->flags & GPIOEAF_PRINT_EVENT) + pr_info("axis %d-%d now %d\n", + ai->type, ai->code, pos); + input_event(as->input_devs->dev[ai->dev], + ai->type, ai->code, pos); + } + input_sync(as->input_devs->dev[ai->dev]); + } + as->pos = pos; +} + +static irqreturn_t gpio_axis_irq_handler(int irq, void *dev_id) +{ + struct gpio_axis_state *as = dev_id; + gpio_event_update_axis(as, 1); + return IRQ_HANDLED; +} + +int gpio_event_axis_func(struct gpio_event_input_devs *input_devs, + struct gpio_event_info *info, void **data, int func) +{ + int ret; + int i; + int irq; + struct gpio_event_axis_info *ai; + struct gpio_axis_state *as; + + ai = container_of(info, struct gpio_event_axis_info, info); + if (func == GPIO_EVENT_FUNC_SUSPEND) { + for (i = 0; i < ai->count; i++) + disable_irq(gpio_to_irq(ai->gpio[i])); + return 0; + } + if (func == GPIO_EVENT_FUNC_RESUME) { + for (i = 0; i < ai->count; i++) + enable_irq(gpio_to_irq(ai->gpio[i])); + return 0; + } + + if (func == GPIO_EVENT_FUNC_INIT) { + *data = as = kmalloc(sizeof(*as), GFP_KERNEL); + if (as == NULL) { + ret = -ENOMEM; + goto err_alloc_axis_state_failed; + } + as->input_devs = input_devs; + as->info = ai; + if (ai->dev >= input_devs->count) { + pr_err("gpio_event_axis: bad device index %d >= %d " + "for %d:%d\n", ai->dev, input_devs->count, + ai->type, ai->code); + ret = -EINVAL; + goto err_bad_device_index; + } + + input_set_capability(input_devs->dev[ai->dev], + ai->type, ai->code); + if (ai->type == EV_ABS) { + input_set_abs_params(input_devs->dev[ai->dev], ai->code, + 0, ai->decoded_size - 1, 0, 0); + } + for (i = 0; i < ai->count; i++) { + ret = gpio_request(ai->gpio[i], "gpio_event_axis"); + if (ret < 0) + goto err_request_gpio_failed; + ret = gpio_direction_input(ai->gpio[i]); + if (ret < 0) + goto err_gpio_direction_input_failed; + ret = irq = gpio_to_irq(ai->gpio[i]); + if (ret < 0) + goto err_get_irq_num_failed; + ret = request_irq(irq, gpio_axis_irq_handler, + IRQF_TRIGGER_RISING | + IRQF_TRIGGER_FALLING, + "gpio_event_axis", as); + if (ret < 0) + goto err_request_irq_failed; + } + gpio_event_update_axis(as, 0); + return 0; + } + + ret = 0; + as = *data; + for (i = ai->count - 1; i >= 0; i--) { + free_irq(gpio_to_irq(ai->gpio[i]), as); +err_request_irq_failed: +err_get_irq_num_failed: +err_gpio_direction_input_failed: + gpio_free(ai->gpio[i]); +err_request_gpio_failed: + ; + } +err_bad_device_index: + kfree(as); + *data = NULL; +err_alloc_axis_state_failed: + return ret; +}
diff --git a/drivers/input/misc/gpio_event.c b/drivers/input/misc/gpio_event.c new file mode 100644 index 0000000..90f07eb --- /dev/null +++ b/drivers/input/misc/gpio_event.c
@@ -0,0 +1,228 @@ +/* drivers/input/misc/gpio_event.c + * + * Copyright (C) 2007 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/module.h> +#include <linux/input.h> +#include <linux/gpio_event.h> +#include <linux/hrtimer.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +struct gpio_event { + struct gpio_event_input_devs *input_devs; + const struct gpio_event_platform_data *info; + void *state[0]; +}; + +static int gpio_input_event( + struct input_dev *dev, unsigned int type, unsigned int code, int value) +{ + int i; + int devnr; + int ret = 0; + int tmp_ret; + struct gpio_event_info **ii; + struct gpio_event *ip = input_get_drvdata(dev); + + for (devnr = 0; devnr < ip->input_devs->count; devnr++) + if (ip->input_devs->dev[devnr] == dev) + break; + if (devnr == ip->input_devs->count) { + pr_err("gpio_input_event: unknown device %p\n", dev); + return -EIO; + } + + for (i = 0, ii = ip->info->info; i < ip->info->info_count; i++, ii++) { + if ((*ii)->event) { + tmp_ret = (*ii)->event(ip->input_devs, *ii, + &ip->state[i], + devnr, type, code, value); + if (tmp_ret) + ret = tmp_ret; + } + } + return ret; +} + +static int gpio_event_call_all_func(struct gpio_event *ip, int func) +{ + int i; + int ret; + struct gpio_event_info **ii; + + if (func == GPIO_EVENT_FUNC_INIT || func == GPIO_EVENT_FUNC_RESUME) { + ii = ip->info->info; + for (i = 0; i < ip->info->info_count; i++, ii++) { + if ((*ii)->func == NULL) { + ret = -ENODEV; + pr_err("gpio_event_probe: Incomplete pdata, " + "no function\n"); + goto err_no_func; + } + if (func == GPIO_EVENT_FUNC_RESUME && (*ii)->no_suspend) + continue; + ret = (*ii)->func(ip->input_devs, *ii, &ip->state[i], + func); + if (ret) { + pr_err("gpio_event_probe: function failed\n"); + goto err_func_failed; + } + } + return 0; + } + + ret = 0; + i = ip->info->info_count; + ii = ip->info->info + i; + while (i > 0) { + i--; + ii--; + if ((func & ~1) == GPIO_EVENT_FUNC_SUSPEND && (*ii)->no_suspend) + continue; + (*ii)->func(ip->input_devs, *ii, &ip->state[i], func & ~1); +err_func_failed: +err_no_func: + ; + } + return ret; +} + +static void __maybe_unused gpio_event_suspend(struct gpio_event *ip) +{ + gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_SUSPEND); + if (ip->info->power) + ip->info->power(ip->info, 0); +} + +static void __maybe_unused gpio_event_resume(struct gpio_event *ip) +{ + if (ip->info->power) + ip->info->power(ip->info, 1); + gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_RESUME); +} + +static int gpio_event_probe(struct platform_device *pdev) +{ + int err; + struct gpio_event *ip; + struct gpio_event_platform_data *event_info; + int dev_count = 1; + int i; + int registered = 0; + + event_info = pdev->dev.platform_data; + if (event_info == NULL) { + pr_err("gpio_event_probe: No pdata\n"); + return -ENODEV; + } + if ((!event_info->name && !event_info->names[0]) || + !event_info->info || !event_info->info_count) { + pr_err("gpio_event_probe: Incomplete pdata\n"); + return -ENODEV; + } + if (!event_info->name) + while (event_info->names[dev_count]) + dev_count++; + ip = kzalloc(sizeof(*ip) + + sizeof(ip->state[0]) * event_info->info_count + + sizeof(*ip->input_devs) + + sizeof(ip->input_devs->dev[0]) * dev_count, GFP_KERNEL); + if (ip == NULL) { + err = -ENOMEM; + pr_err("gpio_event_probe: Failed to allocate private data\n"); + goto err_kp_alloc_failed; + } + ip->input_devs = (void*)&ip->state[event_info->info_count]; + platform_set_drvdata(pdev, ip); + + for (i = 0; i < dev_count; i++) { + struct input_dev *input_dev = input_allocate_device(); + if (input_dev == NULL) { + err = -ENOMEM; + pr_err("gpio_event_probe: " + "Failed to allocate input device\n"); + goto err_input_dev_alloc_failed; + } + input_set_drvdata(input_dev, ip); + input_dev->name = event_info->name ? + event_info->name : event_info->names[i]; + input_dev->event = gpio_input_event; + ip->input_devs->dev[i] = input_dev; + } + ip->input_devs->count = dev_count; + ip->info = event_info; + if (event_info->power) + ip->info->power(ip->info, 1); + + err = gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_INIT); + if (err) + goto err_call_all_func_failed; + + for (i = 0; i < dev_count; i++) { + err = input_register_device(ip->input_devs->dev[i]); + if (err) { + pr_err("gpio_event_probe: Unable to register %s " + "input device\n", ip->input_devs->dev[i]->name); + goto err_input_register_device_failed; + } + registered++; + } + + return 0; + +err_input_register_device_failed: + gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_UNINIT); +err_call_all_func_failed: + if (event_info->power) + ip->info->power(ip->info, 0); + for (i = 0; i < registered; i++) + input_unregister_device(ip->input_devs->dev[i]); + for (i = dev_count - 1; i >= registered; i--) { + input_free_device(ip->input_devs->dev[i]); +err_input_dev_alloc_failed: + ; + } + kfree(ip); +err_kp_alloc_failed: + return err; +} + +static int gpio_event_remove(struct platform_device *pdev) +{ + struct gpio_event *ip = platform_get_drvdata(pdev); + int i; + + gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_UNINIT); + if (ip->info->power) + ip->info->power(ip->info, 0); + for (i = 0; i < ip->input_devs->count; i++) + input_unregister_device(ip->input_devs->dev[i]); + kfree(ip); + return 0; +} + +static struct platform_driver gpio_event_driver = { + .probe = gpio_event_probe, + .remove = gpio_event_remove, + .driver = { + .name = GPIO_EVENT_DEV_NAME, + }, +}; + +module_platform_driver(gpio_event_driver); + +MODULE_DESCRIPTION("GPIO Event Driver"); +MODULE_LICENSE("GPL"); +
diff --git a/drivers/input/misc/gpio_input.c b/drivers/input/misc/gpio_input.c new file mode 100644 index 0000000..5875d73 --- /dev/null +++ b/drivers/input/misc/gpio_input.c
@@ -0,0 +1,390 @@ +/* drivers/input/misc/gpio_input.c + * + * Copyright (C) 2007 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/kernel.h> +#include <linux/gpio.h> +#include <linux/gpio_event.h> +#include <linux/hrtimer.h> +#include <linux/input.h> +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <linux/pm_wakeup.h> + +enum { + DEBOUNCE_UNSTABLE = BIT(0), /* Got irq, while debouncing */ + DEBOUNCE_PRESSED = BIT(1), + DEBOUNCE_NOTPRESSED = BIT(2), + DEBOUNCE_WAIT_IRQ = BIT(3), /* Stable irq state */ + DEBOUNCE_POLL = BIT(4), /* Stable polling state */ + + DEBOUNCE_UNKNOWN = + DEBOUNCE_PRESSED | DEBOUNCE_NOTPRESSED, +}; + +struct gpio_key_state { + struct gpio_input_state *ds; + uint8_t debounce; +}; + +struct gpio_input_state { + struct gpio_event_input_devs *input_devs; + const struct gpio_event_input_info *info; + struct hrtimer timer; + int use_irq; + int debounce_count; + spinlock_t irq_lock; + struct wakeup_source *ws; + struct gpio_key_state key_state[0]; +}; + +static enum hrtimer_restart gpio_event_input_timer_func(struct hrtimer *timer) +{ + int i; + int pressed; + struct gpio_input_state *ds = + container_of(timer, struct gpio_input_state, timer); + unsigned gpio_flags = ds->info->flags; + unsigned npolarity; + int nkeys = ds->info->keymap_size; + const struct gpio_event_direct_entry *key_entry; + struct gpio_key_state *key_state; + unsigned long irqflags; + uint8_t debounce; + bool sync_needed; + +#if 0 + key_entry = kp->keys_info->keymap; + key_state = kp->key_state; + for (i = 0; i < nkeys; i++, key_entry++, key_state++) + pr_info("gpio_read_detect_status %d %d\n", key_entry->gpio, + gpio_read_detect_status(key_entry->gpio)); +#endif + key_entry = ds->info->keymap; + key_state = ds->key_state; + sync_needed = false; + spin_lock_irqsave(&ds->irq_lock, irqflags); + for (i = 0; i < nkeys; i++, key_entry++, key_state++) { + debounce = key_state->debounce; + if (debounce & DEBOUNCE_WAIT_IRQ) + continue; + if (key_state->debounce & DEBOUNCE_UNSTABLE) { + debounce = key_state->debounce = DEBOUNCE_UNKNOWN; + enable_irq(gpio_to_irq(key_entry->gpio)); + if (gpio_flags & GPIOEDF_PRINT_KEY_UNSTABLE) + pr_info("gpio_keys_scan_keys: key %x-%x, %d " + "(%d) continue debounce\n", + ds->info->type, key_entry->code, + i, key_entry->gpio); + } + npolarity = !(gpio_flags & GPIOEDF_ACTIVE_HIGH); + pressed = gpio_get_value(key_entry->gpio) ^ npolarity; + if (debounce & DEBOUNCE_POLL) { + if (pressed == !(debounce & DEBOUNCE_PRESSED)) { + ds->debounce_count++; + key_state->debounce = DEBOUNCE_UNKNOWN; + if (gpio_flags & GPIOEDF_PRINT_KEY_DEBOUNCE) + pr_info("gpio_keys_scan_keys: key %x-" + "%x, %d (%d) start debounce\n", + ds->info->type, key_entry->code, + i, key_entry->gpio); + } + continue; + } + if (pressed && (debounce & DEBOUNCE_NOTPRESSED)) { + if (gpio_flags & GPIOEDF_PRINT_KEY_DEBOUNCE) + pr_info("gpio_keys_scan_keys: key %x-%x, %d " + "(%d) debounce pressed 1\n", + ds->info->type, key_entry->code, + i, key_entry->gpio); + key_state->debounce = DEBOUNCE_PRESSED; + continue; + } + if (!pressed && (debounce & DEBOUNCE_PRESSED)) { + if (gpio_flags & GPIOEDF_PRINT_KEY_DEBOUNCE) + pr_info("gpio_keys_scan_keys: key %x-%x, %d " + "(%d) debounce pressed 0\n", + ds->info->type, key_entry->code, + i, key_entry->gpio); + key_state->debounce = DEBOUNCE_NOTPRESSED; + continue; + } + /* key is stable */ + ds->debounce_count--; + if (ds->use_irq) + key_state->debounce |= DEBOUNCE_WAIT_IRQ; + else + key_state->debounce |= DEBOUNCE_POLL; + if (gpio_flags & GPIOEDF_PRINT_KEYS) + pr_info("gpio_keys_scan_keys: key %x-%x, %d (%d) " + "changed to %d\n", ds->info->type, + key_entry->code, i, key_entry->gpio, pressed); + input_event(ds->input_devs->dev[key_entry->dev], ds->info->type, + key_entry->code, pressed); + sync_needed = true; + } + if (sync_needed) { + for (i = 0; i < ds->input_devs->count; i++) + input_sync(ds->input_devs->dev[i]); + } + +#if 0 + key_entry = kp->keys_info->keymap; + key_state = kp->key_state; + for (i = 0; i < nkeys; i++, key_entry++, key_state++) { + pr_info("gpio_read_detect_status %d %d\n", key_entry->gpio, + gpio_read_detect_status(key_entry->gpio)); + } +#endif + + if (ds->debounce_count) + hrtimer_start(timer, ds->info->debounce_time, HRTIMER_MODE_REL); + else if (!ds->use_irq) + hrtimer_start(timer, ds->info->poll_time, HRTIMER_MODE_REL); + else + __pm_relax(ds->ws); + + spin_unlock_irqrestore(&ds->irq_lock, irqflags); + + return HRTIMER_NORESTART; +} + +static irqreturn_t gpio_event_input_irq_handler(int irq, void *dev_id) +{ + struct gpio_key_state *ks = dev_id; + struct gpio_input_state *ds = ks->ds; + int keymap_index = ks - ds->key_state; + const struct gpio_event_direct_entry *key_entry; + unsigned long irqflags; + int pressed; + + if (!ds->use_irq) + return IRQ_HANDLED; + + key_entry = &ds->info->keymap[keymap_index]; + + if (ds->info->debounce_time) { + spin_lock_irqsave(&ds->irq_lock, irqflags); + if (ks->debounce & DEBOUNCE_WAIT_IRQ) { + ks->debounce = DEBOUNCE_UNKNOWN; + if (ds->debounce_count++ == 0) { + __pm_stay_awake(ds->ws); + hrtimer_start( + &ds->timer, ds->info->debounce_time, + HRTIMER_MODE_REL); + } + if (ds->info->flags & GPIOEDF_PRINT_KEY_DEBOUNCE) + pr_info("gpio_event_input_irq_handler: " + "key %x-%x, %d (%d) start debounce\n", + ds->info->type, key_entry->code, + keymap_index, key_entry->gpio); + } else { + disable_irq_nosync(irq); + ks->debounce = DEBOUNCE_UNSTABLE; + } + spin_unlock_irqrestore(&ds->irq_lock, irqflags); + } else { + pressed = gpio_get_value(key_entry->gpio) ^ + !(ds->info->flags & GPIOEDF_ACTIVE_HIGH); + if (ds->info->flags & GPIOEDF_PRINT_KEYS) + pr_info("gpio_event_input_irq_handler: key %x-%x, %d " + "(%d) changed to %d\n", + ds->info->type, key_entry->code, keymap_index, + key_entry->gpio, pressed); + input_event(ds->input_devs->dev[key_entry->dev], ds->info->type, + key_entry->code, pressed); + input_sync(ds->input_devs->dev[key_entry->dev]); + } + return IRQ_HANDLED; +} + +static int gpio_event_input_request_irqs(struct gpio_input_state *ds) +{ + int i; + int err; + unsigned int irq; + unsigned long req_flags = IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING; + + for (i = 0; i < ds->info->keymap_size; i++) { + err = irq = gpio_to_irq(ds->info->keymap[i].gpio); + if (err < 0) + goto err_gpio_get_irq_num_failed; + err = request_irq(irq, gpio_event_input_irq_handler, + req_flags, "gpio_keys", &ds->key_state[i]); + if (err) { + pr_err("gpio_event_input_request_irqs: request_irq " + "failed for input %d, irq %d\n", + ds->info->keymap[i].gpio, irq); + goto err_request_irq_failed; + } + if (ds->info->info.no_suspend) { + err = enable_irq_wake(irq); + if (err) { + pr_err("gpio_event_input_request_irqs: " + "enable_irq_wake failed for input %d, " + "irq %d\n", + ds->info->keymap[i].gpio, irq); + goto err_enable_irq_wake_failed; + } + } + } + return 0; + + for (i = ds->info->keymap_size - 1; i >= 0; i--) { + irq = gpio_to_irq(ds->info->keymap[i].gpio); + if (ds->info->info.no_suspend) + disable_irq_wake(irq); +err_enable_irq_wake_failed: + free_irq(irq, &ds->key_state[i]); +err_request_irq_failed: +err_gpio_get_irq_num_failed: + ; + } + return err; +} + +int gpio_event_input_func(struct gpio_event_input_devs *input_devs, + struct gpio_event_info *info, void **data, int func) +{ + int ret; + int i; + unsigned long irqflags; + struct gpio_event_input_info *di; + struct gpio_input_state *ds = *data; + char *wlname; + + di = container_of(info, struct gpio_event_input_info, info); + + if (func == GPIO_EVENT_FUNC_SUSPEND) { + if (ds->use_irq) + for (i = 0; i < di->keymap_size; i++) + disable_irq(gpio_to_irq(di->keymap[i].gpio)); + hrtimer_cancel(&ds->timer); + return 0; + } + if (func == GPIO_EVENT_FUNC_RESUME) { + spin_lock_irqsave(&ds->irq_lock, irqflags); + if (ds->use_irq) + for (i = 0; i < di->keymap_size; i++) + enable_irq(gpio_to_irq(di->keymap[i].gpio)); + hrtimer_start(&ds->timer, ktime_set(0, 0), HRTIMER_MODE_REL); + spin_unlock_irqrestore(&ds->irq_lock, irqflags); + return 0; + } + + if (func == GPIO_EVENT_FUNC_INIT) { + if (ktime_to_ns(di->poll_time) <= 0) + di->poll_time = ktime_set(0, 20 * NSEC_PER_MSEC); + + *data = ds = kzalloc(sizeof(*ds) + sizeof(ds->key_state[0]) * + di->keymap_size, GFP_KERNEL); + if (ds == NULL) { + ret = -ENOMEM; + pr_err("gpio_event_input_func: " + "Failed to allocate private data\n"); + goto err_ds_alloc_failed; + } + ds->debounce_count = di->keymap_size; + ds->input_devs = input_devs; + ds->info = di; + wlname = kasprintf(GFP_KERNEL, "gpio_input:%s%s", + input_devs->dev[0]->name, + (input_devs->count > 1) ? "..." : ""); + + ds->ws = wakeup_source_register(wlname); + kfree(wlname); + if (!ds->ws) { + ret = -ENOMEM; + pr_err("gpio_event_input_func: " + "Failed to allocate wakeup source\n"); + goto err_ws_failed; + } + + spin_lock_init(&ds->irq_lock); + + for (i = 0; i < di->keymap_size; i++) { + int dev = di->keymap[i].dev; + if (dev >= input_devs->count) { + pr_err("gpio_event_input_func: bad device " + "index %d >= %d for key code %d\n", + dev, input_devs->count, + di->keymap[i].code); + ret = -EINVAL; + goto err_bad_keymap; + } + input_set_capability(input_devs->dev[dev], di->type, + di->keymap[i].code); + ds->key_state[i].ds = ds; + ds->key_state[i].debounce = DEBOUNCE_UNKNOWN; + } + + for (i = 0; i < di->keymap_size; i++) { + ret = gpio_request(di->keymap[i].gpio, "gpio_kp_in"); + if (ret) { + pr_err("gpio_event_input_func: gpio_request " + "failed for %d\n", di->keymap[i].gpio); + goto err_gpio_request_failed; + } + ret = gpio_direction_input(di->keymap[i].gpio); + if (ret) { + pr_err("gpio_event_input_func: " + "gpio_direction_input failed for %d\n", + di->keymap[i].gpio); + goto err_gpio_configure_failed; + } + } + + ret = gpio_event_input_request_irqs(ds); + + spin_lock_irqsave(&ds->irq_lock, irqflags); + ds->use_irq = ret == 0; + + pr_info("GPIO Input Driver: Start gpio inputs for %s%s in %s " + "mode\n", input_devs->dev[0]->name, + (input_devs->count > 1) ? "..." : "", + ret == 0 ? "interrupt" : "polling"); + + hrtimer_init(&ds->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + ds->timer.function = gpio_event_input_timer_func; + hrtimer_start(&ds->timer, ktime_set(0, 0), HRTIMER_MODE_REL); + spin_unlock_irqrestore(&ds->irq_lock, irqflags); + return 0; + } + + ret = 0; + spin_lock_irqsave(&ds->irq_lock, irqflags); + hrtimer_cancel(&ds->timer); + if (ds->use_irq) { + for (i = di->keymap_size - 1; i >= 0; i--) { + int irq = gpio_to_irq(di->keymap[i].gpio); + if (ds->info->info.no_suspend) + disable_irq_wake(irq); + free_irq(irq, &ds->key_state[i]); + } + } + spin_unlock_irqrestore(&ds->irq_lock, irqflags); + + for (i = di->keymap_size - 1; i >= 0; i--) { +err_gpio_configure_failed: + gpio_free(di->keymap[i].gpio); +err_gpio_request_failed: + ; + } +err_bad_keymap: + wakeup_source_unregister(ds->ws); +err_ws_failed: + kfree(ds); +err_ds_alloc_failed: + return ret; +}
diff --git a/drivers/input/misc/gpio_matrix.c b/drivers/input/misc/gpio_matrix.c new file mode 100644 index 0000000..08769dd --- /dev/null +++ b/drivers/input/misc/gpio_matrix.c
@@ -0,0 +1,440 @@ +/* drivers/input/misc/gpio_matrix.c + * + * Copyright (C) 2007 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/kernel.h> +#include <linux/gpio.h> +#include <linux/gpio_event.h> +#include <linux/hrtimer.h> +#include <linux/interrupt.h> +#include <linux/slab.h> + +struct gpio_kp { + struct gpio_event_input_devs *input_devs; + struct gpio_event_matrix_info *keypad_info; + struct hrtimer timer; + struct wakeup_source wake_src; + int current_output; + unsigned int use_irq:1; + unsigned int key_state_changed:1; + unsigned int last_key_state_changed:1; + unsigned int some_keys_pressed:2; + unsigned int disabled_irq:1; + unsigned long keys_pressed[0]; +}; + +static void clear_phantom_key(struct gpio_kp *kp, int out, int in) +{ + struct gpio_event_matrix_info *mi = kp->keypad_info; + int key_index = out * mi->ninputs + in; + unsigned short keyentry = mi->keymap[key_index]; + unsigned short keycode = keyentry & MATRIX_KEY_MASK; + unsigned short dev = keyentry >> MATRIX_CODE_BITS; + + if (!test_bit(keycode, kp->input_devs->dev[dev]->key)) { + if (mi->flags & GPIOKPF_PRINT_PHANTOM_KEYS) + pr_info("gpiomatrix: phantom key %x, %d-%d (%d-%d) " + "cleared\n", keycode, out, in, + mi->output_gpios[out], mi->input_gpios[in]); + __clear_bit(key_index, kp->keys_pressed); + } else { + if (mi->flags & GPIOKPF_PRINT_PHANTOM_KEYS) + pr_info("gpiomatrix: phantom key %x, %d-%d (%d-%d) " + "not cleared\n", keycode, out, in, + mi->output_gpios[out], mi->input_gpios[in]); + } +} + +static int restore_keys_for_input(struct gpio_kp *kp, int out, int in) +{ + int rv = 0; + int key_index; + + key_index = out * kp->keypad_info->ninputs + in; + while (out < kp->keypad_info->noutputs) { + if (test_bit(key_index, kp->keys_pressed)) { + rv = 1; + clear_phantom_key(kp, out, in); + } + key_index += kp->keypad_info->ninputs; + out++; + } + return rv; +} + +static void remove_phantom_keys(struct gpio_kp *kp) +{ + int out, in, inp; + int key_index; + + if (kp->some_keys_pressed < 3) + return; + + for (out = 0; out < kp->keypad_info->noutputs; out++) { + inp = -1; + key_index = out * kp->keypad_info->ninputs; + for (in = 0; in < kp->keypad_info->ninputs; in++, key_index++) { + if (test_bit(key_index, kp->keys_pressed)) { + if (inp == -1) { + inp = in; + continue; + } + if (inp >= 0) { + if (!restore_keys_for_input(kp, out + 1, + inp)) + break; + clear_phantom_key(kp, out, inp); + inp = -2; + } + restore_keys_for_input(kp, out, in); + } + } + } +} + +static void report_key(struct gpio_kp *kp, int key_index, int out, int in) +{ + struct gpio_event_matrix_info *mi = kp->keypad_info; + int pressed = test_bit(key_index, kp->keys_pressed); + unsigned short keyentry = mi->keymap[key_index]; + unsigned short keycode = keyentry & MATRIX_KEY_MASK; + unsigned short dev = keyentry >> MATRIX_CODE_BITS; + + if (pressed != test_bit(keycode, kp->input_devs->dev[dev]->key)) { + if (keycode == KEY_RESERVED) { + if (mi->flags & GPIOKPF_PRINT_UNMAPPED_KEYS) + pr_info("gpiomatrix: unmapped key, %d-%d " + "(%d-%d) changed to %d\n", + out, in, mi->output_gpios[out], + mi->input_gpios[in], pressed); + } else { + if (mi->flags & GPIOKPF_PRINT_MAPPED_KEYS) + pr_info("gpiomatrix: key %x, %d-%d (%d-%d) " + "changed to %d\n", keycode, + out, in, mi->output_gpios[out], + mi->input_gpios[in], pressed); + input_report_key(kp->input_devs->dev[dev], keycode, pressed); + } + } +} + +static void report_sync(struct gpio_kp *kp) +{ + int i; + + for (i = 0; i < kp->input_devs->count; i++) + input_sync(kp->input_devs->dev[i]); +} + +static enum hrtimer_restart gpio_keypad_timer_func(struct hrtimer *timer) +{ + int out, in; + int key_index; + int gpio; + struct gpio_kp *kp = container_of(timer, struct gpio_kp, timer); + struct gpio_event_matrix_info *mi = kp->keypad_info; + unsigned gpio_keypad_flags = mi->flags; + unsigned polarity = !!(gpio_keypad_flags & GPIOKPF_ACTIVE_HIGH); + + out = kp->current_output; + if (out == mi->noutputs) { + out = 0; + kp->last_key_state_changed = kp->key_state_changed; + kp->key_state_changed = 0; + kp->some_keys_pressed = 0; + } else { + key_index = out * mi->ninputs; + for (in = 0; in < mi->ninputs; in++, key_index++) { + gpio = mi->input_gpios[in]; + if (gpio_get_value(gpio) ^ !polarity) { + if (kp->some_keys_pressed < 3) + kp->some_keys_pressed++; + kp->key_state_changed |= !__test_and_set_bit( + key_index, kp->keys_pressed); + } else + kp->key_state_changed |= __test_and_clear_bit( + key_index, kp->keys_pressed); + } + gpio = mi->output_gpios[out]; + if (gpio_keypad_flags & GPIOKPF_DRIVE_INACTIVE) + gpio_set_value(gpio, !polarity); + else + gpio_direction_input(gpio); + out++; + } + kp->current_output = out; + if (out < mi->noutputs) { + gpio = mi->output_gpios[out]; + if (gpio_keypad_flags & GPIOKPF_DRIVE_INACTIVE) + gpio_set_value(gpio, polarity); + else + gpio_direction_output(gpio, polarity); + hrtimer_start(timer, mi->settle_time, HRTIMER_MODE_REL); + return HRTIMER_NORESTART; + } + if (gpio_keypad_flags & GPIOKPF_DEBOUNCE) { + if (kp->key_state_changed) { + hrtimer_start(&kp->timer, mi->debounce_delay, + HRTIMER_MODE_REL); + return HRTIMER_NORESTART; + } + kp->key_state_changed = kp->last_key_state_changed; + } + if (kp->key_state_changed) { + if (gpio_keypad_flags & GPIOKPF_REMOVE_SOME_PHANTOM_KEYS) + remove_phantom_keys(kp); + key_index = 0; + for (out = 0; out < mi->noutputs; out++) + for (in = 0; in < mi->ninputs; in++, key_index++) + report_key(kp, key_index, out, in); + report_sync(kp); + } + if (!kp->use_irq || kp->some_keys_pressed) { + hrtimer_start(timer, mi->poll_time, HRTIMER_MODE_REL); + return HRTIMER_NORESTART; + } + + /* No keys are pressed, reenable interrupt */ + for (out = 0; out < mi->noutputs; out++) { + if (gpio_keypad_flags & GPIOKPF_DRIVE_INACTIVE) + gpio_set_value(mi->output_gpios[out], polarity); + else + gpio_direction_output(mi->output_gpios[out], polarity); + } + for (in = 0; in < mi->ninputs; in++) + enable_irq(gpio_to_irq(mi->input_gpios[in])); + __pm_relax(&kp->wake_src); + return HRTIMER_NORESTART; +} + +static irqreturn_t gpio_keypad_irq_handler(int irq_in, void *dev_id) +{ + int i; + struct gpio_kp *kp = dev_id; + struct gpio_event_matrix_info *mi = kp->keypad_info; + unsigned gpio_keypad_flags = mi->flags; + + if (!kp->use_irq) { + /* ignore interrupt while registering the handler */ + kp->disabled_irq = 1; + disable_irq_nosync(irq_in); + return IRQ_HANDLED; + } + + for (i = 0; i < mi->ninputs; i++) + disable_irq_nosync(gpio_to_irq(mi->input_gpios[i])); + for (i = 0; i < mi->noutputs; i++) { + if (gpio_keypad_flags & GPIOKPF_DRIVE_INACTIVE) + gpio_set_value(mi->output_gpios[i], + !(gpio_keypad_flags & GPIOKPF_ACTIVE_HIGH)); + else + gpio_direction_input(mi->output_gpios[i]); + } + __pm_stay_awake(&kp->wake_src); + hrtimer_start(&kp->timer, ktime_set(0, 0), HRTIMER_MODE_REL); + return IRQ_HANDLED; +} + +static int gpio_keypad_request_irqs(struct gpio_kp *kp) +{ + int i; + int err; + unsigned int irq; + unsigned long request_flags; + struct gpio_event_matrix_info *mi = kp->keypad_info; + + switch (mi->flags & (GPIOKPF_ACTIVE_HIGH|GPIOKPF_LEVEL_TRIGGERED_IRQ)) { + default: + request_flags = IRQF_TRIGGER_FALLING; + break; + case GPIOKPF_ACTIVE_HIGH: + request_flags = IRQF_TRIGGER_RISING; + break; + case GPIOKPF_LEVEL_TRIGGERED_IRQ: + request_flags = IRQF_TRIGGER_LOW; + break; + case GPIOKPF_LEVEL_TRIGGERED_IRQ | GPIOKPF_ACTIVE_HIGH: + request_flags = IRQF_TRIGGER_HIGH; + break; + } + + for (i = 0; i < mi->ninputs; i++) { + err = irq = gpio_to_irq(mi->input_gpios[i]); + if (err < 0) + goto err_gpio_get_irq_num_failed; + err = request_irq(irq, gpio_keypad_irq_handler, request_flags, + "gpio_kp", kp); + if (err) { + pr_err("gpiomatrix: request_irq failed for input %d, " + "irq %d\n", mi->input_gpios[i], irq); + goto err_request_irq_failed; + } + err = enable_irq_wake(irq); + if (err) { + pr_err("gpiomatrix: set_irq_wake failed for input %d, " + "irq %d\n", mi->input_gpios[i], irq); + } + disable_irq(irq); + if (kp->disabled_irq) { + kp->disabled_irq = 0; + enable_irq(irq); + } + } + return 0; + + for (i = mi->noutputs - 1; i >= 0; i--) { + free_irq(gpio_to_irq(mi->input_gpios[i]), kp); +err_request_irq_failed: +err_gpio_get_irq_num_failed: + ; + } + return err; +} + +int gpio_event_matrix_func(struct gpio_event_input_devs *input_devs, + struct gpio_event_info *info, void **data, int func) +{ + int i; + int err; + int key_count; + struct gpio_kp *kp; + struct gpio_event_matrix_info *mi; + + mi = container_of(info, struct gpio_event_matrix_info, info); + if (func == GPIO_EVENT_FUNC_SUSPEND || func == GPIO_EVENT_FUNC_RESUME) { + /* TODO: disable scanning */ + return 0; + } + + if (func == GPIO_EVENT_FUNC_INIT) { + if (mi->keymap == NULL || + mi->input_gpios == NULL || + mi->output_gpios == NULL) { + err = -ENODEV; + pr_err("gpiomatrix: Incomplete pdata\n"); + goto err_invalid_platform_data; + } + key_count = mi->ninputs * mi->noutputs; + + *data = kp = kzalloc(sizeof(*kp) + sizeof(kp->keys_pressed[0]) * + BITS_TO_LONGS(key_count), GFP_KERNEL); + if (kp == NULL) { + err = -ENOMEM; + pr_err("gpiomatrix: Failed to allocate private data\n"); + goto err_kp_alloc_failed; + } + kp->input_devs = input_devs; + kp->keypad_info = mi; + for (i = 0; i < key_count; i++) { + unsigned short keyentry = mi->keymap[i]; + unsigned short keycode = keyentry & MATRIX_KEY_MASK; + unsigned short dev = keyentry >> MATRIX_CODE_BITS; + if (dev >= input_devs->count) { + pr_err("gpiomatrix: bad device index %d >= " + "%d for key code %d\n", + dev, input_devs->count, keycode); + err = -EINVAL; + goto err_bad_keymap; + } + if (keycode && keycode <= KEY_MAX) + input_set_capability(input_devs->dev[dev], + EV_KEY, keycode); + } + + for (i = 0; i < mi->noutputs; i++) { + err = gpio_request(mi->output_gpios[i], "gpio_kp_out"); + if (err) { + pr_err("gpiomatrix: gpio_request failed for " + "output %d\n", mi->output_gpios[i]); + goto err_request_output_gpio_failed; + } + if (gpio_cansleep(mi->output_gpios[i])) { + pr_err("gpiomatrix: unsupported output gpio %d," + " can sleep\n", mi->output_gpios[i]); + err = -EINVAL; + goto err_output_gpio_configure_failed; + } + if (mi->flags & GPIOKPF_DRIVE_INACTIVE) + err = gpio_direction_output(mi->output_gpios[i], + !(mi->flags & GPIOKPF_ACTIVE_HIGH)); + else + err = gpio_direction_input(mi->output_gpios[i]); + if (err) { + pr_err("gpiomatrix: gpio_configure failed for " + "output %d\n", mi->output_gpios[i]); + goto err_output_gpio_configure_failed; + } + } + for (i = 0; i < mi->ninputs; i++) { + err = gpio_request(mi->input_gpios[i], "gpio_kp_in"); + if (err) { + pr_err("gpiomatrix: gpio_request failed for " + "input %d\n", mi->input_gpios[i]); + goto err_request_input_gpio_failed; + } + err = gpio_direction_input(mi->input_gpios[i]); + if (err) { + pr_err("gpiomatrix: gpio_direction_input failed" + " for input %d\n", mi->input_gpios[i]); + goto err_gpio_direction_input_failed; + } + } + kp->current_output = mi->noutputs; + kp->key_state_changed = 1; + + hrtimer_init(&kp->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + kp->timer.function = gpio_keypad_timer_func; + wakeup_source_init(&kp->wake_src, "gpio_kp"); + err = gpio_keypad_request_irqs(kp); + kp->use_irq = err == 0; + + pr_info("GPIO Matrix Keypad Driver: Start keypad matrix for " + "%s%s in %s mode\n", input_devs->dev[0]->name, + (input_devs->count > 1) ? "..." : "", + kp->use_irq ? "interrupt" : "polling"); + + if (kp->use_irq) + __pm_stay_awake(&kp->wake_src); + hrtimer_start(&kp->timer, ktime_set(0, 0), HRTIMER_MODE_REL); + + return 0; + } + + err = 0; + kp = *data; + + if (kp->use_irq) + for (i = mi->noutputs - 1; i >= 0; i--) + free_irq(gpio_to_irq(mi->input_gpios[i]), kp); + + hrtimer_cancel(&kp->timer); + wakeup_source_trash(&kp->wake_src); + for (i = mi->noutputs - 1; i >= 0; i--) { +err_gpio_direction_input_failed: + gpio_free(mi->input_gpios[i]); +err_request_input_gpio_failed: + ; + } + for (i = mi->noutputs - 1; i >= 0; i--) { +err_output_gpio_configure_failed: + gpio_free(mi->output_gpios[i]); +err_request_output_gpio_failed: + ; + } +err_bad_keymap: + kfree(kp); +err_kp_alloc_failed: +err_invalid_platform_data: + return err; +}
diff --git a/drivers/input/misc/gpio_output.c b/drivers/input/misc/gpio_output.c new file mode 100644 index 0000000..2aac2fa --- /dev/null +++ b/drivers/input/misc/gpio_output.c
@@ -0,0 +1,97 @@ +/* drivers/input/misc/gpio_output.c + * + * Copyright (C) 2007 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/kernel.h> +#include <linux/gpio.h> +#include <linux/gpio_event.h> + +int gpio_event_output_event( + struct gpio_event_input_devs *input_devs, struct gpio_event_info *info, + void **data, unsigned int dev, unsigned int type, + unsigned int code, int value) +{ + int i; + struct gpio_event_output_info *oi; + oi = container_of(info, struct gpio_event_output_info, info); + if (type != oi->type) + return 0; + if (!(oi->flags & GPIOEDF_ACTIVE_HIGH)) + value = !value; + for (i = 0; i < oi->keymap_size; i++) + if (dev == oi->keymap[i].dev && code == oi->keymap[i].code) + gpio_set_value(oi->keymap[i].gpio, value); + return 0; +} + +int gpio_event_output_func( + struct gpio_event_input_devs *input_devs, struct gpio_event_info *info, + void **data, int func) +{ + int ret; + int i; + struct gpio_event_output_info *oi; + oi = container_of(info, struct gpio_event_output_info, info); + + if (func == GPIO_EVENT_FUNC_SUSPEND || func == GPIO_EVENT_FUNC_RESUME) + return 0; + + if (func == GPIO_EVENT_FUNC_INIT) { + int output_level = !(oi->flags & GPIOEDF_ACTIVE_HIGH); + + for (i = 0; i < oi->keymap_size; i++) { + int dev = oi->keymap[i].dev; + if (dev >= input_devs->count) { + pr_err("gpio_event_output_func: bad device " + "index %d >= %d for key code %d\n", + dev, input_devs->count, + oi->keymap[i].code); + ret = -EINVAL; + goto err_bad_keymap; + } + input_set_capability(input_devs->dev[dev], oi->type, + oi->keymap[i].code); + } + + for (i = 0; i < oi->keymap_size; i++) { + ret = gpio_request(oi->keymap[i].gpio, + "gpio_event_output"); + if (ret) { + pr_err("gpio_event_output_func: gpio_request " + "failed for %d\n", oi->keymap[i].gpio); + goto err_gpio_request_failed; + } + ret = gpio_direction_output(oi->keymap[i].gpio, + output_level); + if (ret) { + pr_err("gpio_event_output_func: " + "gpio_direction_output failed for %d\n", + oi->keymap[i].gpio); + goto err_gpio_direction_output_failed; + } + } + return 0; + } + + ret = 0; + for (i = oi->keymap_size - 1; i >= 0; i--) { +err_gpio_direction_output_failed: + gpio_free(oi->keymap[i].gpio); +err_gpio_request_failed: + ; + } +err_bad_keymap: + return ret; +} +
diff --git a/drivers/input/misc/keychord.c b/drivers/input/misc/keychord.c new file mode 100644 index 0000000..791f285 --- /dev/null +++ b/drivers/input/misc/keychord.c
@@ -0,0 +1,467 @@ +/* + * drivers/input/misc/keychord.c + * + * Copyright (C) 2008 Google, Inc. + * Author: Mike Lockwood <lockwood@android.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * +*/ + +#include <linux/poll.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/fs.h> +#include <linux/miscdevice.h> +#include <linux/keychord.h> +#include <linux/sched.h> + +#define KEYCHORD_NAME "keychord" +#define BUFFER_SIZE 16 + +MODULE_AUTHOR("Mike Lockwood <lockwood@android.com>"); +MODULE_DESCRIPTION("Key chord input driver"); +MODULE_SUPPORTED_DEVICE("keychord"); +MODULE_LICENSE("GPL"); + +#define NEXT_KEYCHORD(kc) ((struct input_keychord *) \ + ((char *)kc + sizeof(struct input_keychord) + \ + kc->count * sizeof(kc->keycodes[0]))) + +struct keychord_device { + struct input_handler input_handler; + int registered; + + /* list of keychords to monitor */ + struct input_keychord *keychords; + int keychord_count; + + /* bitmask of keys contained in our keychords */ + unsigned long keybit[BITS_TO_LONGS(KEY_CNT)]; + /* current state of the keys */ + unsigned long keystate[BITS_TO_LONGS(KEY_CNT)]; + /* number of keys that are currently pressed */ + int key_down; + + /* second input_device_id is needed for null termination */ + struct input_device_id device_ids[2]; + + spinlock_t lock; + wait_queue_head_t waitq; + unsigned char head; + unsigned char tail; + __u16 buff[BUFFER_SIZE]; + /* Bit to serialize writes to this device */ +#define KEYCHORD_BUSY 0x01 + unsigned long flags; + wait_queue_head_t write_waitq; +}; + +static int check_keychord(struct keychord_device *kdev, + struct input_keychord *keychord) +{ + int i; + + if (keychord->count != kdev->key_down) + return 0; + + for (i = 0; i < keychord->count; i++) { + if (!test_bit(keychord->keycodes[i], kdev->keystate)) + return 0; + } + + /* we have a match */ + return 1; +} + +static void keychord_event(struct input_handle *handle, unsigned int type, + unsigned int code, int value) +{ + struct keychord_device *kdev = handle->private; + struct input_keychord *keychord; + unsigned long flags; + int i, got_chord = 0; + + if (type != EV_KEY || code >= KEY_MAX) + return; + + spin_lock_irqsave(&kdev->lock, flags); + /* do nothing if key state did not change */ + if (!test_bit(code, kdev->keystate) == !value) + goto done; + __change_bit(code, kdev->keystate); + if (value) + kdev->key_down++; + else + kdev->key_down--; + + /* don't notify on key up */ + if (!value) + goto done; + /* ignore this event if it is not one of the keys we are monitoring */ + if (!test_bit(code, kdev->keybit)) + goto done; + + keychord = kdev->keychords; + if (!keychord) + goto done; + + /* check to see if the keyboard state matches any keychords */ + for (i = 0; i < kdev->keychord_count; i++) { + if (check_keychord(kdev, keychord)) { + kdev->buff[kdev->head] = keychord->id; + kdev->head = (kdev->head + 1) % BUFFER_SIZE; + got_chord = 1; + break; + } + /* skip to next keychord */ + keychord = NEXT_KEYCHORD(keychord); + } + +done: + spin_unlock_irqrestore(&kdev->lock, flags); + + if (got_chord) { + pr_info("keychord: got keychord id %d. Any tasks: %d\n", + keychord->id, + !list_empty_careful(&kdev->waitq.head)); + wake_up_interruptible(&kdev->waitq); + } +} + +static int keychord_connect(struct input_handler *handler, + struct input_dev *dev, + const struct input_device_id *id) +{ + int i, ret; + struct input_handle *handle; + struct keychord_device *kdev = + container_of(handler, struct keychord_device, input_handler); + + /* + * ignore this input device if it does not contain any keycodes + * that we are monitoring + */ + for (i = 0; i < KEY_MAX; i++) { + if (test_bit(i, kdev->keybit) && test_bit(i, dev->keybit)) + break; + } + if (i == KEY_MAX) + return -ENODEV; + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + handle->dev = dev; + handle->handler = handler; + handle->name = KEYCHORD_NAME; + handle->private = kdev; + + ret = input_register_handle(handle); + if (ret) + goto err_input_register_handle; + + ret = input_open_device(handle); + if (ret) + goto err_input_open_device; + + pr_info("keychord: using input dev %s for fevent\n", dev->name); + return 0; + +err_input_open_device: + input_unregister_handle(handle); +err_input_register_handle: + kfree(handle); + return ret; +} + +static void keychord_disconnect(struct input_handle *handle) +{ + input_close_device(handle); + input_unregister_handle(handle); + kfree(handle); +} + +/* + * keychord_read is used to read keychord events from the driver + */ +static ssize_t keychord_read(struct file *file, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct keychord_device *kdev = file->private_data; + __u16 id; + int retval; + unsigned long flags; + + if (count < sizeof(id)) + return -EINVAL; + count = sizeof(id); + + if (kdev->head == kdev->tail && (file->f_flags & O_NONBLOCK)) + return -EAGAIN; + + retval = wait_event_interruptible(kdev->waitq, + kdev->head != kdev->tail); + if (retval) + return retval; + + spin_lock_irqsave(&kdev->lock, flags); + /* pop a keychord ID off the queue */ + id = kdev->buff[kdev->tail]; + kdev->tail = (kdev->tail + 1) % BUFFER_SIZE; + spin_unlock_irqrestore(&kdev->lock, flags); + + if (copy_to_user(buffer, &id, count)) + return -EFAULT; + + return count; +} + +/* + * serializes writes on a device. can use mutex_lock_interruptible() + * for this particular use case as well - a matter of preference. + */ +static int +keychord_write_lock(struct keychord_device *kdev) +{ + int ret; + unsigned long flags; + + spin_lock_irqsave(&kdev->lock, flags); + while (kdev->flags & KEYCHORD_BUSY) { + spin_unlock_irqrestore(&kdev->lock, flags); + ret = wait_event_interruptible(kdev->write_waitq, + ((kdev->flags & KEYCHORD_BUSY) == 0)); + if (ret) + return ret; + spin_lock_irqsave(&kdev->lock, flags); + } + kdev->flags |= KEYCHORD_BUSY; + spin_unlock_irqrestore(&kdev->lock, flags); + return 0; +} + +static void +keychord_write_unlock(struct keychord_device *kdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kdev->lock, flags); + kdev->flags &= ~KEYCHORD_BUSY; + spin_unlock_irqrestore(&kdev->lock, flags); + wake_up_interruptible(&kdev->write_waitq); +} + +/* + * keychord_write is used to configure the driver + */ +static ssize_t keychord_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct keychord_device *kdev = file->private_data; + struct input_keychord *keychords = 0; + struct input_keychord *keychord; + int ret, i, key; + unsigned long flags; + size_t resid = count; + size_t key_bytes; + + if (count < sizeof(struct input_keychord) || count > PAGE_SIZE) + return -EINVAL; + keychords = kzalloc(count, GFP_KERNEL); + if (!keychords) + return -ENOMEM; + + /* read list of keychords from userspace */ + if (copy_from_user(keychords, buffer, count)) { + kfree(keychords); + return -EFAULT; + } + + /* + * Serialize writes to this device to prevent various races. + * 1) writers racing here could do duplicate input_unregister_handler() + * calls, resulting in attempting to unlink a node from a list that + * does not exist. + * 2) writers racing here could do duplicate input_register_handler() calls + * below, resulting in a duplicate insertion of a node into the list. + * 3) a double kfree of keychords can occur (in the event that + * input_register_handler() fails below. + */ + ret = keychord_write_lock(kdev); + if (ret) { + kfree(keychords); + return ret; + } + + /* unregister handler before changing configuration */ + if (kdev->registered) { + input_unregister_handler(&kdev->input_handler); + kdev->registered = 0; + } + + spin_lock_irqsave(&kdev->lock, flags); + /* clear any existing configuration */ + kfree(kdev->keychords); + kdev->keychords = 0; + kdev->keychord_count = 0; + kdev->key_down = 0; + memset(kdev->keybit, 0, sizeof(kdev->keybit)); + memset(kdev->keystate, 0, sizeof(kdev->keystate)); + kdev->head = kdev->tail = 0; + + keychord = keychords; + + while (resid > 0) { + /* Is the entire keychord entry header present ? */ + if (resid < sizeof(struct input_keychord)) { + pr_err("keychord: Insufficient bytes present for header %zu\n", + resid); + goto err_unlock_return; + } + resid -= sizeof(struct input_keychord); + if (keychord->count <= 0) { + pr_err("keychord: invalid keycode count %d\n", + keychord->count); + goto err_unlock_return; + } + key_bytes = keychord->count * sizeof(keychord->keycodes[0]); + /* Do we have all the expected keycodes ? */ + if (resid < key_bytes) { + pr_err("keychord: Insufficient bytes present for keycount %zu\n", + resid); + goto err_unlock_return; + } + resid -= key_bytes; + + if (keychord->version != KEYCHORD_VERSION) { + pr_err("keychord: unsupported version %d\n", + keychord->version); + goto err_unlock_return; + } + + /* keep track of the keys we are monitoring in keybit */ + for (i = 0; i < keychord->count; i++) { + key = keychord->keycodes[i]; + if (key < 0 || key >= KEY_CNT) { + pr_err("keychord: keycode %d out of range\n", + key); + goto err_unlock_return; + } + __set_bit(key, kdev->keybit); + } + + kdev->keychord_count++; + keychord = NEXT_KEYCHORD(keychord); + } + + kdev->keychords = keychords; + spin_unlock_irqrestore(&kdev->lock, flags); + + ret = input_register_handler(&kdev->input_handler); + if (ret) { + kfree(keychords); + kdev->keychords = 0; + keychord_write_unlock(kdev); + return ret; + } + kdev->registered = 1; + + keychord_write_unlock(kdev); + + return count; + +err_unlock_return: + spin_unlock_irqrestore(&kdev->lock, flags); + kfree(keychords); + keychord_write_unlock(kdev); + return -EINVAL; +} + +static unsigned int keychord_poll(struct file *file, poll_table *wait) +{ + struct keychord_device *kdev = file->private_data; + + poll_wait(file, &kdev->waitq, wait); + + if (kdev->head != kdev->tail) + return POLLIN | POLLRDNORM; + + return 0; +} + +static int keychord_open(struct inode *inode, struct file *file) +{ + struct keychord_device *kdev; + + kdev = kzalloc(sizeof(struct keychord_device), GFP_KERNEL); + if (!kdev) + return -ENOMEM; + + spin_lock_init(&kdev->lock); + init_waitqueue_head(&kdev->waitq); + init_waitqueue_head(&kdev->write_waitq); + + kdev->input_handler.event = keychord_event; + kdev->input_handler.connect = keychord_connect; + kdev->input_handler.disconnect = keychord_disconnect; + kdev->input_handler.name = KEYCHORD_NAME; + kdev->input_handler.id_table = kdev->device_ids; + + kdev->device_ids[0].flags = INPUT_DEVICE_ID_MATCH_EVBIT; + __set_bit(EV_KEY, kdev->device_ids[0].evbit); + + file->private_data = kdev; + + return 0; +} + +static int keychord_release(struct inode *inode, struct file *file) +{ + struct keychord_device *kdev = file->private_data; + + if (kdev->registered) + input_unregister_handler(&kdev->input_handler); + kfree(kdev->keychords); + kfree(kdev); + + return 0; +} + +static const struct file_operations keychord_fops = { + .owner = THIS_MODULE, + .open = keychord_open, + .release = keychord_release, + .read = keychord_read, + .write = keychord_write, + .poll = keychord_poll, +}; + +static struct miscdevice keychord_misc = { + .fops = &keychord_fops, + .name = KEYCHORD_NAME, + .minor = MISC_DYNAMIC_MINOR, +}; + +static int __init keychord_init(void) +{ + return misc_register(&keychord_misc); +} + +static void __exit keychord_exit(void) +{ + misc_deregister(&keychord_misc); +} + +module_init(keychord_init); +module_exit(keychord_exit);
diff --git a/drivers/isdn/hardware/mISDN/avmfritz.c b/drivers/isdn/hardware/mISDN/avmfritz.c index dce6632d..ae2b266 100644 --- a/drivers/isdn/hardware/mISDN/avmfritz.c +++ b/drivers/isdn/hardware/mISDN/avmfritz.c
@@ -156,7 +156,7 @@ _set_debug(struct fritzcard *card) } static int -set_debug(const char *val, struct kernel_param *kp) +set_debug(const char *val, const struct kernel_param *kp) { int ret; struct fritzcard *card;
diff --git a/drivers/isdn/hardware/mISDN/mISDNinfineon.c b/drivers/isdn/hardware/mISDN/mISDNinfineon.c index d5bdbaf..1fc2906 100644 --- a/drivers/isdn/hardware/mISDN/mISDNinfineon.c +++ b/drivers/isdn/hardware/mISDN/mISDNinfineon.c
@@ -244,7 +244,7 @@ _set_debug(struct inf_hw *card) } static int -set_debug(const char *val, struct kernel_param *kp) +set_debug(const char *val, const struct kernel_param *kp) { int ret; struct inf_hw *card;
diff --git a/drivers/isdn/hardware/mISDN/netjet.c b/drivers/isdn/hardware/mISDN/netjet.c index 6a6d848..89d9ba8e 100644 --- a/drivers/isdn/hardware/mISDN/netjet.c +++ b/drivers/isdn/hardware/mISDN/netjet.c
@@ -111,7 +111,7 @@ _set_debug(struct tiger_hw *card) } static int -set_debug(const char *val, struct kernel_param *kp) +set_debug(const char *val, const struct kernel_param *kp) { int ret; struct tiger_hw *card;
diff --git a/drivers/isdn/hardware/mISDN/speedfax.c b/drivers/isdn/hardware/mISDN/speedfax.c index 9815bb4..1f1446e 100644 --- a/drivers/isdn/hardware/mISDN/speedfax.c +++ b/drivers/isdn/hardware/mISDN/speedfax.c
@@ -94,7 +94,7 @@ _set_debug(struct sfax_hw *card) } static int -set_debug(const char *val, struct kernel_param *kp) +set_debug(const char *val, const struct kernel_param *kp) { int ret; struct sfax_hw *card;
diff --git a/drivers/isdn/hardware/mISDN/w6692.c b/drivers/isdn/hardware/mISDN/w6692.c index d80072f..209036a 100644 --- a/drivers/isdn/hardware/mISDN/w6692.c +++ b/drivers/isdn/hardware/mISDN/w6692.c
@@ -101,7 +101,7 @@ _set_debug(struct w6692_hw *card) } static int -set_debug(const char *val, struct kernel_param *kp) +set_debug(const char *val, const struct kernel_param *kp) { int ret; struct w6692_hw *card;
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 4a249ee8..e458bd8 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig
@@ -540,4 +540,50 @@ If unsure, say N. +config DM_VERITY_AVB + tristate "Support AVB specific verity error behavior" + depends on DM_VERITY + ---help--- + Enables Android Verified Boot platform-specific error + behavior. In particular, it will modify the vbmeta partition + specified on the kernel command-line when non-transient error + occurs (followed by a panic). + +config DM_ANDROID_VERITY + bool "Android verity target support" + depends on BLK_DEV_DM=y + depends on DM_VERITY=y + depends on X509_CERTIFICATE_PARSER + depends on SYSTEM_TRUSTED_KEYRING + depends on CRYPTO_RSA + depends on KEYS + depends on ASYMMETRIC_KEY_TYPE + depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE + ---help--- + This device-mapper target is virtually a VERITY target. This + target is setup by reading the metadata contents piggybacked + to the actual data blocks in the block device. The signature + of the metadata contents are verified against the key included + in the system keyring. Upon success, the underlying verity + target is setup. + +config DM_ANDROID_VERITY_AT_MOST_ONCE_DEFAULT_ENABLED + bool "Verity will validate blocks at most once" + depends on DM_VERITY + ---help--- + Default enables at_most_once option for dm-verity + + Verify data blocks only the first time they are read from the + data device, rather than every time. This reduces the overhead + of dm-verity so that it can be used on systems that are memory + and/or CPU constrained. However, it provides a reduced level + of security because only offline tampering of the data device's + content will be detected, not online tampering. + + Hash blocks are still verified each time they are read from the + hash device, since verification of hash blocks is less performance + critical than data blocks, and a hash block will not be verified + any more after all the data blocks it covers have been verified anyway. + + If unsure, say N. endif # MD
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index e94b6f9..f98717ff 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile
@@ -71,3 +71,11 @@ ifeq ($(CONFIG_DM_VERITY_FEC),y) dm-verity-objs += dm-verity-fec.o endif + +ifeq ($(CONFIG_DM_VERITY_AVB),y) +dm-verity-objs += dm-verity-avb.o +endif + +ifeq ($(CONFIG_DM_ANDROID_VERITY),y) +dm-verity-objs += dm-android-verity.o +endif
diff --git a/drivers/md/dm-android-verity.c b/drivers/md/dm-android-verity.c new file mode 100644 index 0000000..20e0593 --- /dev/null +++ b/drivers/md/dm-android-verity.c
@@ -0,0 +1,925 @@ +/* + * Copyright (C) 2015 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/buffer_head.h> +#include <linux/debugfs.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/device-mapper.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/fcntl.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/key.h> +#include <linux/module.h> +#include <linux/mount.h> +#include <linux/namei.h> +#include <linux/of.h> +#include <linux/reboot.h> +#include <linux/string.h> +#include <linux/vmalloc.h> + +#include <asm/setup.h> +#include <crypto/hash.h> +#include <crypto/hash_info.h> +#include <crypto/public_key.h> +#include <crypto/sha.h> +#include <keys/asymmetric-type.h> +#include <keys/system_keyring.h> + +#include "dm-verity.h" +#include "dm-android-verity.h" + +static char verifiedbootstate[VERITY_COMMANDLINE_PARAM_LENGTH]; +static char veritymode[VERITY_COMMANDLINE_PARAM_LENGTH]; +static char veritykeyid[VERITY_DEFAULT_KEY_ID_LENGTH]; +static char buildvariant[BUILD_VARIANT]; + +static bool target_added; +static bool verity_enabled = true; +struct dentry *debug_dir; +static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv); + +static struct target_type android_verity_target = { + .name = "android-verity", + .version = {1, 0, 0}, + .module = THIS_MODULE, + .ctr = android_verity_ctr, + .dtr = verity_dtr, + .map = verity_map, + .status = verity_status, + .prepare_ioctl = verity_prepare_ioctl, + .iterate_devices = verity_iterate_devices, + .io_hints = verity_io_hints, +}; + +static int __init verified_boot_state_param(char *line) +{ + strlcpy(verifiedbootstate, line, sizeof(verifiedbootstate)); + return 1; +} + +__setup("androidboot.verifiedbootstate=", verified_boot_state_param); + +static int __init verity_mode_param(char *line) +{ + strlcpy(veritymode, line, sizeof(veritymode)); + return 1; +} + +__setup("androidboot.veritymode=", verity_mode_param); + +static int __init verity_keyid_param(char *line) +{ + strlcpy(veritykeyid, line, sizeof(veritykeyid)); + return 1; +} + +__setup("veritykeyid=", verity_keyid_param); + +static int __init verity_buildvariant(char *line) +{ + strlcpy(buildvariant, line, sizeof(buildvariant)); + return 1; +} + +__setup("buildvariant=", verity_buildvariant); + +static inline bool default_verity_key_id(void) +{ + return veritykeyid[0] != '\0'; +} + +static inline bool is_eng(void) +{ + static const char typeeng[] = "eng"; + + return !strncmp(buildvariant, typeeng, sizeof(typeeng)); +} + +static inline bool is_userdebug(void) +{ + static const char typeuserdebug[] = "userdebug"; + + return !strncmp(buildvariant, typeuserdebug, sizeof(typeuserdebug)); +} + +static inline bool is_unlocked(void) +{ + static const char unlocked[] = "orange"; + + return !strncmp(verifiedbootstate, unlocked, sizeof(unlocked)); +} + +static int read_block_dev(struct bio_read *payload, struct block_device *bdev, + sector_t offset, int length) +{ + struct bio *bio; + int err = 0, i; + + payload->number_of_pages = DIV_ROUND_UP(length, PAGE_SIZE); + + bio = bio_alloc(GFP_KERNEL, payload->number_of_pages); + if (!bio) { + DMERR("Error while allocating bio"); + return -ENOMEM; + } + + bio_set_dev(bio, bdev); + bio->bi_iter.bi_sector = offset; + bio_set_op_attrs(bio, REQ_OP_READ, 0); + + payload->page_io = kzalloc(sizeof(struct page *) * + payload->number_of_pages, GFP_KERNEL); + if (!payload->page_io) { + DMERR("page_io array alloc failed"); + err = -ENOMEM; + goto free_bio; + } + + for (i = 0; i < payload->number_of_pages; i++) { + payload->page_io[i] = alloc_page(GFP_KERNEL); + if (!payload->page_io[i]) { + DMERR("alloc_page failed"); + err = -ENOMEM; + goto free_pages; + } + if (!bio_add_page(bio, payload->page_io[i], PAGE_SIZE, 0)) { + DMERR("bio_add_page error"); + err = -EIO; + goto free_pages; + } + } + + if (!submit_bio_wait(bio)) + /* success */ + goto free_bio; + DMERR("bio read failed"); + err = -EIO; + +free_pages: + for (i = 0; i < payload->number_of_pages; i++) + if (payload->page_io[i]) + __free_page(payload->page_io[i]); + kfree(payload->page_io); +free_bio: + bio_put(bio); + return err; +} + +static inline u64 fec_div_round_up(u64 x, u64 y) +{ + u64 remainder; + + return div64_u64_rem(x, y, &remainder) + + (remainder > 0 ? 1 : 0); +} + +static inline void populate_fec_metadata(struct fec_header *header, + struct fec_ecc_metadata *ecc) +{ + ecc->blocks = fec_div_round_up(le64_to_cpu(header->inp_size), + FEC_BLOCK_SIZE); + ecc->roots = le32_to_cpu(header->roots); + ecc->start = le64_to_cpu(header->inp_size); +} + +static inline int validate_fec_header(struct fec_header *header, u64 offset) +{ + /* move offset to make the sanity check work for backup header + * as well. */ + offset -= offset % FEC_BLOCK_SIZE; + if (le32_to_cpu(header->magic) != FEC_MAGIC || + le32_to_cpu(header->version) != FEC_VERSION || + le32_to_cpu(header->size) != sizeof(struct fec_header) || + le32_to_cpu(header->roots) == 0 || + le32_to_cpu(header->roots) >= FEC_RSM) + return -EINVAL; + + return 0; +} + +static int extract_fec_header(dev_t dev, struct fec_header *fec, + struct fec_ecc_metadata *ecc) +{ + u64 device_size; + struct bio_read payload; + int i, err = 0; + struct block_device *bdev; + + bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL); + + if (IS_ERR_OR_NULL(bdev)) { + DMERR("bdev get error"); + return PTR_ERR(bdev); + } + + device_size = i_size_read(bdev->bd_inode); + + /* fec metadata size is a power of 2 and PAGE_SIZE + * is a power of 2 as well. + */ + BUG_ON(FEC_BLOCK_SIZE > PAGE_SIZE); + /* 512 byte sector alignment */ + BUG_ON(((device_size - FEC_BLOCK_SIZE) % (1 << SECTOR_SHIFT)) != 0); + + err = read_block_dev(&payload, bdev, (device_size - + FEC_BLOCK_SIZE) / (1 << SECTOR_SHIFT), FEC_BLOCK_SIZE); + if (err) { + DMERR("Error while reading verity metadata"); + goto error; + } + + BUG_ON(sizeof(struct fec_header) > PAGE_SIZE); + memcpy(fec, page_address(payload.page_io[0]), + sizeof(*fec)); + + ecc->valid = true; + if (validate_fec_header(fec, device_size - FEC_BLOCK_SIZE)) { + /* Try the backup header */ + memcpy(fec, page_address(payload.page_io[0]) + FEC_BLOCK_SIZE + - sizeof(*fec) , + sizeof(*fec)); + if (validate_fec_header(fec, device_size - + sizeof(struct fec_header))) + ecc->valid = false; + } + + if (ecc->valid) + populate_fec_metadata(fec, ecc); + + for (i = 0; i < payload.number_of_pages; i++) + __free_page(payload.page_io[i]); + kfree(payload.page_io); + +error: + blkdev_put(bdev, FMODE_READ); + return err; +} +static void find_metadata_offset(struct fec_header *fec, + struct block_device *bdev, u64 *metadata_offset) +{ + u64 device_size; + + device_size = i_size_read(bdev->bd_inode); + + if (le32_to_cpu(fec->magic) == FEC_MAGIC) + *metadata_offset = le64_to_cpu(fec->inp_size) - + VERITY_METADATA_SIZE; + else + *metadata_offset = device_size - VERITY_METADATA_SIZE; +} + +static int find_size(dev_t dev, u64 *device_size) +{ + struct block_device *bdev; + + bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL); + if (IS_ERR_OR_NULL(bdev)) { + DMERR("blkdev_get_by_dev failed"); + return PTR_ERR(bdev); + } + + *device_size = i_size_read(bdev->bd_inode); + *device_size >>= SECTOR_SHIFT; + + DMINFO("blkdev size in sectors: %llu", *device_size); + blkdev_put(bdev, FMODE_READ); + return 0; +} + +static int verify_header(struct android_metadata_header *header) +{ + int retval = -EINVAL; + + if (is_userdebug() && le32_to_cpu(header->magic_number) == + VERITY_METADATA_MAGIC_DISABLE) + return VERITY_STATE_DISABLE; + + if (!(le32_to_cpu(header->magic_number) == + VERITY_METADATA_MAGIC_NUMBER) || + (le32_to_cpu(header->magic_number) == + VERITY_METADATA_MAGIC_DISABLE)) { + DMERR("Incorrect magic number"); + return retval; + } + + if (le32_to_cpu(header->protocol_version) != + VERITY_METADATA_VERSION) { + DMERR("Unsupported version %u", + le32_to_cpu(header->protocol_version)); + return retval; + } + + return 0; +} + +static int extract_metadata(dev_t dev, struct fec_header *fec, + struct android_metadata **metadata, + bool *verity_enabled) +{ + struct block_device *bdev; + struct android_metadata_header *header; + int i; + u32 table_length, copy_length, offset; + u64 metadata_offset; + struct bio_read payload; + int err = 0; + + bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL); + + if (IS_ERR_OR_NULL(bdev)) { + DMERR("blkdev_get_by_dev failed"); + return -ENODEV; + } + + find_metadata_offset(fec, bdev, &metadata_offset); + + /* Verity metadata size is a power of 2 and PAGE_SIZE + * is a power of 2 as well. + * PAGE_SIZE is also a multiple of 512 bytes. + */ + if (VERITY_METADATA_SIZE > PAGE_SIZE) + BUG_ON(VERITY_METADATA_SIZE % PAGE_SIZE != 0); + /* 512 byte sector alignment */ + BUG_ON(metadata_offset % (1 << SECTOR_SHIFT) != 0); + + err = read_block_dev(&payload, bdev, metadata_offset / + (1 << SECTOR_SHIFT), VERITY_METADATA_SIZE); + if (err) { + DMERR("Error while reading verity metadata"); + goto blkdev_release; + } + + header = kzalloc(sizeof(*header), GFP_KERNEL); + if (!header) { + DMERR("kzalloc failed for header"); + err = -ENOMEM; + goto free_payload; + } + + memcpy(header, page_address(payload.page_io[0]), + sizeof(*header)); + + DMINFO("bio magic_number:%u protocol_version:%d table_length:%u", + le32_to_cpu(header->magic_number), + le32_to_cpu(header->protocol_version), + le32_to_cpu(header->table_length)); + + err = verify_header(header); + + if (err == VERITY_STATE_DISABLE) { + DMERR("Mounting root with verity disabled"); + *verity_enabled = false; + /* we would still have to read the metadata to figure out + * the data blocks size. Or may be could map the entire + * partition similar to mounting the device. + * + * Reset error as well as the verity_enabled flag is changed. + */ + err = 0; + } else if (err) + goto free_header; + + *metadata = kzalloc(sizeof(**metadata), GFP_KERNEL); + if (!*metadata) { + DMERR("kzalloc for metadata failed"); + err = -ENOMEM; + goto free_header; + } + + (*metadata)->header = header; + table_length = le32_to_cpu(header->table_length); + + if (table_length == 0 || + table_length > (VERITY_METADATA_SIZE - + sizeof(struct android_metadata_header))) { + DMERR("table_length too long"); + err = -EINVAL; + goto free_metadata; + } + + (*metadata)->verity_table = kzalloc(table_length + 1, GFP_KERNEL); + + if (!(*metadata)->verity_table) { + DMERR("kzalloc verity_table failed"); + err = -ENOMEM; + goto free_metadata; + } + + if (sizeof(struct android_metadata_header) + + table_length <= PAGE_SIZE) { + memcpy((*metadata)->verity_table, + page_address(payload.page_io[0]) + + sizeof(struct android_metadata_header), + table_length); + } else { + copy_length = PAGE_SIZE - + sizeof(struct android_metadata_header); + memcpy((*metadata)->verity_table, + page_address(payload.page_io[0]) + + sizeof(struct android_metadata_header), + copy_length); + table_length -= copy_length; + offset = copy_length; + i = 1; + while (table_length != 0) { + if (table_length > PAGE_SIZE) { + memcpy((*metadata)->verity_table + offset, + page_address(payload.page_io[i]), + PAGE_SIZE); + offset += PAGE_SIZE; + table_length -= PAGE_SIZE; + } else { + memcpy((*metadata)->verity_table + offset, + page_address(payload.page_io[i]), + table_length); + table_length = 0; + } + i++; + } + } + (*metadata)->verity_table[table_length] = '\0'; + + DMINFO("verity_table: %s", (*metadata)->verity_table); + goto free_payload; + +free_metadata: + kfree(*metadata); +free_header: + kfree(header); +free_payload: + for (i = 0; i < payload.number_of_pages; i++) + if (payload.page_io[i]) + __free_page(payload.page_io[i]); + kfree(payload.page_io); +blkdev_release: + blkdev_put(bdev, FMODE_READ); + return err; +} + +/* helper functions to extract properties from dts */ +const char *find_dt_value(const char *name) +{ + struct device_node *firmware; + const char *value; + + firmware = of_find_node_by_path("/firmware/android"); + if (!firmware) + return NULL; + value = of_get_property(firmware, name, NULL); + of_node_put(firmware); + + return value; +} + +static int verity_mode(void) +{ + static const char enforcing[] = "enforcing"; + static const char verified_mode_prop[] = "veritymode"; + const char *value; + + value = find_dt_value(verified_mode_prop); + if (!value) + value = veritymode; + if (!strncmp(value, enforcing, sizeof(enforcing) - 1)) + return DM_VERITY_MODE_RESTART; + + return DM_VERITY_MODE_EIO; +} + +static void handle_error(void) +{ + int mode = verity_mode(); + if (mode == DM_VERITY_MODE_RESTART) { + DMERR("triggering restart"); + kernel_restart("dm-verity device corrupted"); + } else { + DMERR("Mounting verity root failed"); + } +} + +static struct public_key_signature *table_make_digest( + enum hash_algo hash, + const void *table, + unsigned long table_len) +{ + struct public_key_signature *pks = NULL; + struct crypto_shash *tfm; + struct shash_desc *desc; + size_t digest_size, desc_size; + int ret; + + /* Allocate the hashing algorithm we're going to need and find out how + * big the hash operational data will be. + */ + tfm = crypto_alloc_shash(hash_algo_name[hash], 0, 0); + if (IS_ERR(tfm)) + return ERR_CAST(tfm); + + desc_size = crypto_shash_descsize(tfm) + sizeof(*desc); + digest_size = crypto_shash_digestsize(tfm); + + /* We allocate the hash operational data storage on the end of out + * context data and the digest output buffer on the end of that. + */ + ret = -ENOMEM; + pks = kzalloc(digest_size + sizeof(*pks) + desc_size, GFP_KERNEL); + if (!pks) + goto error; + + pks->pkey_algo = "rsa"; + pks->hash_algo = hash_algo_name[hash]; + pks->digest = (u8 *)pks + sizeof(*pks) + desc_size; + pks->digest_size = digest_size; + + desc = (struct shash_desc *)(pks + 1); + desc->tfm = tfm; + desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + + ret = crypto_shash_init(desc); + if (ret < 0) + goto error; + + ret = crypto_shash_finup(desc, table, table_len, pks->digest); + if (ret < 0) + goto error; + + crypto_free_shash(tfm); + return pks; + +error: + kfree(pks); + crypto_free_shash(tfm); + return ERR_PTR(ret); +} + + +static int verify_verity_signature(char *key_id, + struct android_metadata *metadata) +{ + struct public_key_signature *pks = NULL; + int retval = -EINVAL; + + if (!key_id) + goto error; + + pks = table_make_digest(HASH_ALGO_SHA256, + (const void *)metadata->verity_table, + le32_to_cpu(metadata->header->table_length)); + if (IS_ERR(pks)) { + DMERR("hashing failed"); + retval = PTR_ERR(pks); + pks = NULL; + goto error; + } + + pks->s = kmemdup(&metadata->header->signature[0], RSANUMBYTES, GFP_KERNEL); + if (!pks->s) { + DMERR("Error allocating memory for signature"); + goto error; + } + pks->s_size = RSANUMBYTES; + + retval = verify_signature_one(pks, NULL, key_id); + kfree(pks->s); +error: + kfree(pks); + return retval; +} + +static inline bool test_mult_overflow(sector_t a, u32 b) +{ + sector_t r = (sector_t)~0ULL; + + sector_div(r, b); + return a > r; +} + +static int add_as_linear_device(struct dm_target *ti, char *dev) +{ + /*Move to linear mapping defines*/ + char *linear_table_args[DM_LINEAR_ARGS] = {dev, + DM_LINEAR_TARGET_OFFSET}; + int err = 0; + + android_verity_target.dtr = dm_linear_dtr, + android_verity_target.map = dm_linear_map, + android_verity_target.status = dm_linear_status, + android_verity_target.end_io = dm_linear_end_io, + android_verity_target.prepare_ioctl = dm_linear_prepare_ioctl, + android_verity_target.iterate_devices = dm_linear_iterate_devices, + android_verity_target.direct_access = dm_linear_dax_direct_access, + android_verity_target.dax_copy_from_iter = dm_linear_dax_copy_from_iter, + android_verity_target.io_hints = NULL; + + set_disk_ro(dm_disk(dm_table_get_md(ti->table)), 0); + + err = dm_linear_ctr(ti, DM_LINEAR_ARGS, linear_table_args); + + if (!err) { + DMINFO("Added android-verity as a linear target"); + target_added = true; + } else + DMERR("Failed to add android-verity as linear target"); + + return err; +} + +static int create_linear_device(struct dm_target *ti, dev_t dev, + char *target_device) +{ + u64 device_size = 0; + int err = find_size(dev, &device_size); + + if (err) { + DMERR("error finding bdev size"); + handle_error(); + return err; + } + + ti->len = device_size; + err = add_as_linear_device(ti, target_device); + if (err) { + handle_error(); + return err; + } + verity_enabled = false; + return 0; +} + +/* + * Target parameters: + * <key id> Key id of the public key in the system keyring. + * Verity metadata's signature would be verified against + * this. If the key id contains spaces, replace them + * with '#'. + * <block device> The block device for which dm-verity is being setup. + */ +static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) +{ + dev_t uninitialized_var(dev); + struct android_metadata *metadata = NULL; + int err = 0, i, mode; + char *key_id = NULL, *table_ptr, dummy, *target_device; + char *verity_table_args[VERITY_TABLE_ARGS + 2 + VERITY_TABLE_OPT_FEC_ARGS]; + /* One for specifying number of opt args and one for mode */ + sector_t data_sectors; + u32 data_block_size; + unsigned int no_of_args = VERITY_TABLE_ARGS + 2 + VERITY_TABLE_OPT_FEC_ARGS; + struct fec_header uninitialized_var(fec); + struct fec_ecc_metadata uninitialized_var(ecc); + char buf[FEC_ARG_LENGTH], *buf_ptr; + unsigned long long tmpll; + + if (argc == 1) { + /* Use the default keyid */ + if (default_verity_key_id()) + key_id = veritykeyid; + else if (!is_eng()) { + DMERR("veritykeyid= is not set"); + handle_error(); + return -EINVAL; + } + target_device = argv[0]; + } else if (argc == 2) { + key_id = argv[0]; + target_device = argv[1]; + } else { + DMERR("Incorrect number of arguments"); + handle_error(); + return -EINVAL; + } + + dev = name_to_dev_t(target_device); + if (!dev) { + DMERR("no dev found for %s", target_device); + handle_error(); + return -EINVAL; + } + + if (is_eng()) + return create_linear_device(ti, dev, target_device); + + strreplace(key_id, '#', ' '); + + DMINFO("key:%s dev:%s", key_id, target_device); + + if (extract_fec_header(dev, &fec, &ecc)) { + DMERR("Error while extracting fec header"); + handle_error(); + return -EINVAL; + } + + err = extract_metadata(dev, &fec, &metadata, &verity_enabled); + + if (err) { + /* Allow invalid metadata when the device is unlocked */ + if (is_unlocked()) { + DMWARN("Allow invalid metadata when unlocked"); + return create_linear_device(ti, dev, target_device); + } + DMERR("Error while extracting metadata"); + handle_error(); + goto free_metadata; + } + + if (verity_enabled) { + err = verify_verity_signature(key_id, metadata); + + if (err) { + DMERR("Signature verification failed"); + handle_error(); + goto free_metadata; + } else + DMINFO("Signature verification success"); + } + + table_ptr = metadata->verity_table; + + for (i = 0; i < VERITY_TABLE_ARGS; i++) { + verity_table_args[i] = strsep(&table_ptr, " "); + if (verity_table_args[i] == NULL) + break; + } + + if (i != VERITY_TABLE_ARGS) { + DMERR("Verity table not in the expected format"); + err = -EINVAL; + handle_error(); + goto free_metadata; + } + + if (sscanf(verity_table_args[5], "%llu%c", &tmpll, &dummy) + != 1) { + DMERR("Verity table not in the expected format"); + handle_error(); + err = -EINVAL; + goto free_metadata; + } + + if (tmpll > ULONG_MAX) { + DMERR("<num_data_blocks> too large. Forgot to turn on CONFIG_LBDAF?"); + handle_error(); + err = -EINVAL; + goto free_metadata; + } + + data_sectors = tmpll; + + if (sscanf(verity_table_args[3], "%u%c", &data_block_size, &dummy) + != 1) { + DMERR("Verity table not in the expected format"); + handle_error(); + err = -EINVAL; + goto free_metadata; + } + + if (test_mult_overflow(data_sectors, data_block_size >> + SECTOR_SHIFT)) { + DMERR("data_sectors too large"); + handle_error(); + err = -EOVERFLOW; + goto free_metadata; + } + + data_sectors *= data_block_size >> SECTOR_SHIFT; + DMINFO("Data sectors %llu", (unsigned long long)data_sectors); + + /* update target length */ + ti->len = data_sectors; + + /* Setup linear target and free */ + if (!verity_enabled) { + err = add_as_linear_device(ti, target_device); + goto free_metadata; + } + + /*substitute data_dev and hash_dev*/ + verity_table_args[1] = target_device; + verity_table_args[2] = target_device; + + mode = verity_mode(); + + if (ecc.valid && IS_BUILTIN(CONFIG_DM_VERITY_FEC)) { + if (mode) { + err = snprintf(buf, FEC_ARG_LENGTH, + "%u %s " VERITY_TABLE_OPT_FEC_FORMAT, + 1 + VERITY_TABLE_OPT_FEC_ARGS, + mode == DM_VERITY_MODE_RESTART ? + VERITY_TABLE_OPT_RESTART : + VERITY_TABLE_OPT_LOGGING, + target_device, + ecc.start / FEC_BLOCK_SIZE, ecc.blocks, + ecc.roots); + } else { + err = snprintf(buf, FEC_ARG_LENGTH, + "%u " VERITY_TABLE_OPT_FEC_FORMAT, + VERITY_TABLE_OPT_FEC_ARGS, target_device, + ecc.start / FEC_BLOCK_SIZE, ecc.blocks, + ecc.roots); + } + } else if (mode) { + err = snprintf(buf, FEC_ARG_LENGTH, + "2 " VERITY_TABLE_OPT_IGNZERO " %s", + mode == DM_VERITY_MODE_RESTART ? + VERITY_TABLE_OPT_RESTART : VERITY_TABLE_OPT_LOGGING); + } else { + err = snprintf(buf, FEC_ARG_LENGTH, "1 %s", + "ignore_zero_blocks"); + } + + if (err < 0 || err >= FEC_ARG_LENGTH) + goto free_metadata; + + buf_ptr = buf; + + for (i = VERITY_TABLE_ARGS; i < (VERITY_TABLE_ARGS + + VERITY_TABLE_OPT_FEC_ARGS + 2); i++) { + verity_table_args[i] = strsep(&buf_ptr, " "); + if (verity_table_args[i] == NULL) { + no_of_args = i; + break; + } + } + + err = verity_ctr(ti, no_of_args, verity_table_args); + if (err) { + DMERR("android-verity failed to create a verity target"); + } else { + target_added = true; + DMINFO("android-verity created as verity target"); + } + +free_metadata: + if (metadata) { + kfree(metadata->header); + kfree(metadata->verity_table); + } + kfree(metadata); + return err; +} + +static int __init dm_android_verity_init(void) +{ + int r; + struct dentry *file; + + r = dm_register_target(&android_verity_target); + if (r < 0) + DMERR("register failed %d", r); + + /* Tracks the status of the last added target */ + debug_dir = debugfs_create_dir("android_verity", NULL); + + if (IS_ERR_OR_NULL(debug_dir)) { + DMERR("Cannot create android_verity debugfs directory: %ld", + PTR_ERR(debug_dir)); + goto end; + } + + file = debugfs_create_bool("target_added", S_IRUGO, debug_dir, + &target_added); + + if (IS_ERR_OR_NULL(file)) { + DMERR("Cannot create android_verity debugfs directory: %ld", + PTR_ERR(debug_dir)); + debugfs_remove_recursive(debug_dir); + goto end; + } + + file = debugfs_create_bool("verity_enabled", S_IRUGO, debug_dir, + &verity_enabled); + + if (IS_ERR_OR_NULL(file)) { + DMERR("Cannot create android_verity debugfs directory: %ld", + PTR_ERR(debug_dir)); + debugfs_remove_recursive(debug_dir); + } + +end: + return r; +} + +static void __exit dm_android_verity_exit(void) +{ + if (!IS_ERR_OR_NULL(debug_dir)) + debugfs_remove_recursive(debug_dir); + + dm_unregister_target(&android_verity_target); +} + +module_init(dm_android_verity_init); +module_exit(dm_android_verity_exit);
diff --git a/drivers/md/dm-android-verity.h b/drivers/md/dm-android-verity.h new file mode 100644 index 0000000..ef406c1 --- /dev/null +++ b/drivers/md/dm-android-verity.h
@@ -0,0 +1,128 @@ +/* + * Copyright (C) 2015 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef DM_ANDROID_VERITY_H +#define DM_ANDROID_VERITY_H + +#include <crypto/sha.h> + +#define RSANUMBYTES 256 +#define VERITY_METADATA_MAGIC_NUMBER 0xb001b001 +#define VERITY_METADATA_MAGIC_DISABLE 0x46464f56 +#define VERITY_METADATA_VERSION 0 +#define VERITY_STATE_DISABLE 1 +#define DATA_BLOCK_SIZE (4 * 1024) +#define VERITY_METADATA_SIZE (8 * DATA_BLOCK_SIZE) +#define VERITY_TABLE_ARGS 10 +#define VERITY_COMMANDLINE_PARAM_LENGTH 20 +#define BUILD_VARIANT 20 + +/* + * <subject>:<sha1-id> is the format for the identifier. + * subject can either be the Common Name(CN) + Organization Name(O) or + * just the CN if the it is prefixed with O + * From https://tools.ietf.org/html/rfc5280#appendix-A + * ub-organization-name-length INTEGER ::= 64 + * ub-common-name-length INTEGER ::= 64 + * + * http://lxr.free-electrons.com/source/crypto/asymmetric_keys/x509_cert_parser.c?v=3.9#L278 + * ctx->o_size + 2 + ctx->cn_size + 1 + * + 41 characters for ":" and sha1 id + * 64 + 2 + 64 + 1 + 1 + 40 (172) + * setting VERITY_DEFAULT_KEY_ID_LENGTH to 200 characters. + */ +#define VERITY_DEFAULT_KEY_ID_LENGTH 200 + +#define FEC_MAGIC 0xFECFECFE +#define FEC_BLOCK_SIZE (4 * 1024) +#define FEC_VERSION 0 +#define FEC_RSM 255 +#define FEC_ARG_LENGTH 300 + +#define VERITY_TABLE_OPT_RESTART "restart_on_corruption" +#define VERITY_TABLE_OPT_LOGGING "ignore_corruption" +#define VERITY_TABLE_OPT_IGNZERO "ignore_zero_blocks" + +#define VERITY_TABLE_OPT_FEC_FORMAT \ + "use_fec_from_device %s fec_start %llu fec_blocks %llu fec_roots %u ignore_zero_blocks" +#define VERITY_TABLE_OPT_FEC_ARGS 9 + +#define VERITY_DEBUG 0 + +#define DM_MSG_PREFIX "android-verity" + +#define DM_LINEAR_ARGS 2 +#define DM_LINEAR_TARGET_OFFSET "0" + +/* + * There can be two formats. + * if fec is present + * <data_blocks> <verity_tree> <verity_metdata_32K><fec_data><fec_data_4K> + * if fec is not present + * <data_blocks> <verity_tree> <verity_metdata_32K> + */ +struct fec_header { + __le32 magic; + __le32 version; + __le32 size; + __le32 roots; + __le32 fec_size; + __le64 inp_size; + u8 hash[SHA256_DIGEST_SIZE]; +} __attribute__((packed)); + +struct android_metadata_header { + __le32 magic_number; + __le32 protocol_version; + char signature[RSANUMBYTES]; + __le32 table_length; +}; + +struct android_metadata { + struct android_metadata_header *header; + char *verity_table; +}; + +struct fec_ecc_metadata { + bool valid; + u32 roots; + u64 blocks; + u64 rounds; + u64 start; +}; + +struct bio_read { + struct page **page_io; + int number_of_pages; +}; + +extern struct target_type linear_target; + +extern void dm_linear_dtr(struct dm_target *ti); +extern int dm_linear_map(struct dm_target *ti, struct bio *bio); +extern int dm_linear_end_io(struct dm_target *ti, struct bio *bio, + blk_status_t *error); +extern void dm_linear_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen); +extern int dm_linear_prepare_ioctl(struct dm_target *ti, + struct block_device **bdev, fmode_t *mode); +extern int dm_linear_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, void *data); +extern int dm_linear_ctr(struct dm_target *ti, unsigned int argc, char **argv); +extern long dm_linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, + long nr_pages, void **kaddr, + pfn_t *pfn); +extern size_t dm_linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i); +#endif /* DM_ANDROID_VERITY_H */
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 1f6d8b6..a125cc3 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c
@@ -1893,6 +1893,13 @@ static int crypt_alloc_tfms_skcipher(struct crypt_config *cc, char *ciphermode) } } + /* + * dm-crypt performance can vary greatly depending on which crypto + * algorithm implementation is used. Help people debug performance + * problems by logging the ->cra_driver_name. + */ + DMINFO("%s using implementation \"%s\"", ciphermode, + crypto_skcipher_alg(any_tfm(cc))->base.cra_driver_name); return 0; } @@ -1911,6 +1918,8 @@ static int crypt_alloc_tfms_aead(struct crypt_config *cc, char *ciphermode) return err; } + DMINFO("%s using implementation \"%s\"", ciphermode, + crypto_aead_alg(any_tfm_aead(cc))->base.cra_driver_name); return 0; }
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index ca94815..787afba 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c
@@ -1986,6 +1986,45 @@ void dm_interface_exit(void) dm_hash_exit(); } + +/** + * dm_ioctl_export - Permanently export a mapped device via the ioctl interface + * @md: Pointer to mapped_device + * @name: Buffer (size DM_NAME_LEN) for name + * @uuid: Buffer (size DM_UUID_LEN) for uuid or NULL if not desired + */ +int dm_ioctl_export(struct mapped_device *md, const char *name, + const char *uuid) +{ + int r = 0; + struct hash_cell *hc; + + if (!md) { + r = -ENXIO; + goto out; + } + + /* The name and uuid can only be set once. */ + mutex_lock(&dm_hash_cells_mutex); + hc = dm_get_mdptr(md); + mutex_unlock(&dm_hash_cells_mutex); + if (hc) { + DMERR("%s: already exported", dm_device_name(md)); + r = -ENXIO; + goto out; + } + + r = dm_hash_insert(name, uuid, md); + if (r) { + DMERR("%s: could not bind to '%s'", dm_device_name(md), name); + goto out; + } + + /* Let udev know we've changed. */ + dm_kobject_uevent(md, KOBJ_CHANGE, dm_get_event_nr(md)); +out: + return r; +} /** * dm_copy_name_and_uuid - Copy mapped device name & uuid into supplied buffers * @md: Pointer to mapped_device
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index a53de71..e6fd31b 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c
@@ -26,7 +26,7 @@ struct linear_c { /* * Construct a linear mapping: <dev_path> <offset> */ -static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) +int dm_linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct linear_c *lc; unsigned long long tmp; @@ -69,7 +69,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) return ret; } -static void linear_dtr(struct dm_target *ti) +void dm_linear_dtr(struct dm_target *ti) { struct linear_c *lc = (struct linear_c *) ti->private; @@ -94,15 +94,14 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio) linear_map_sector(ti, bio->bi_iter.bi_sector); } -static int linear_map(struct dm_target *ti, struct bio *bio) +int dm_linear_map(struct dm_target *ti, struct bio *bio) { linear_map_bio(ti, bio); return DM_MAPIO_REMAPPED; } -#ifdef CONFIG_BLK_DEV_ZONED -static int linear_end_io(struct dm_target *ti, struct bio *bio, +int dm_linear_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error) { struct linear_c *lc = ti->private; @@ -112,9 +111,9 @@ static int linear_end_io(struct dm_target *ti, struct bio *bio, return DM_ENDIO_DONE; } -#endif +EXPORT_SYMBOL_GPL(dm_linear_end_io); -static void linear_status(struct dm_target *ti, status_type_t type, +void dm_linear_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { struct linear_c *lc = (struct linear_c *) ti->private; @@ -131,7 +130,7 @@ static void linear_status(struct dm_target *ti, status_type_t type, } } -static int linear_prepare_ioctl(struct dm_target *ti, +int dm_linear_prepare_ioctl(struct dm_target *ti, struct block_device **bdev, fmode_t *mode) { struct linear_c *lc = (struct linear_c *) ti->private; @@ -148,7 +147,7 @@ static int linear_prepare_ioctl(struct dm_target *ti, return 0; } -static int linear_iterate_devices(struct dm_target *ti, +int dm_linear_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { struct linear_c *lc = ti->private; @@ -156,7 +155,7 @@ static int linear_iterate_devices(struct dm_target *ti, return fn(ti, lc->dev, lc->start, ti->len, data); } -static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, +long dm_linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) { long ret; @@ -171,8 +170,9 @@ static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, return ret; return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); } +EXPORT_SYMBOL_GPL(dm_linear_dax_direct_access); -static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, +size_t dm_linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { struct linear_c *lc = ti->private; @@ -185,25 +185,22 @@ static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, return 0; return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } +EXPORT_SYMBOL_GPL(dm_linear_dax_copy_from_iter); static struct target_type linear_target = { .name = "linear", .version = {1, 4, 0}, -#ifdef CONFIG_BLK_DEV_ZONED - .end_io = linear_end_io, .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_ZONED_HM, -#else - .features = DM_TARGET_PASSES_INTEGRITY, -#endif .module = THIS_MODULE, - .ctr = linear_ctr, - .dtr = linear_dtr, - .map = linear_map, - .status = linear_status, - .prepare_ioctl = linear_prepare_ioctl, - .iterate_devices = linear_iterate_devices, - .direct_access = linear_dax_direct_access, - .dax_copy_from_iter = linear_dax_copy_from_iter, + .ctr = dm_linear_ctr, + .dtr = dm_linear_dtr, + .map = dm_linear_map, + .status = dm_linear_status, + .end_io = dm_linear_end_io, + .prepare_ioctl = dm_linear_prepare_ioctl, + .iterate_devices = dm_linear_iterate_devices, + .direct_access = dm_linear_dax_direct_access, + .dax_copy_from_iter = dm_linear_dax_copy_from_iter, }; int __init dm_linear_init(void)
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index f9cd813..6916aa9 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c
@@ -11,6 +11,7 @@ #include <linux/vmalloc.h> #include <linux/blkdev.h> #include <linux/namei.h> +#include <linux/mount.h> #include <linux/ctype.h> #include <linux/string.h> #include <linux/slab.h> @@ -547,14 +548,14 @@ static int adjoin(struct dm_table *table, struct dm_target *ti) * On the other hand, dm-switch needs to process bulk data using messages and * excessive use of GFP_NOIO could cause trouble. */ -static char **realloc_argv(unsigned *array_size, char **old_argv) +static char **realloc_argv(unsigned *size, char **old_argv) { char **argv; unsigned new_size; gfp_t gfp; - if (*array_size) { - new_size = *array_size * 2; + if (*size) { + new_size = *size * 2; gfp = GFP_KERNEL; } else { new_size = 8; @@ -562,8 +563,8 @@ static char **realloc_argv(unsigned *array_size, char **old_argv) } argv = kmalloc(new_size * sizeof(*argv), gfp); if (argv) { - memcpy(argv, old_argv, *array_size * sizeof(*argv)); - *array_size = new_size; + memcpy(argv, old_argv, *size * sizeof(*argv)); + *size = new_size; } kfree(old_argv); @@ -1858,6 +1859,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, smp_mb(); if (dm_table_request_based(t)) queue_flag_set_unlocked(QUEUE_FLAG_STACKABLE, q); + + /* io_pages is used for readahead */ + q->backing_dev_info->io_pages = limits->max_sectors >> (PAGE_SHIFT - 9); } unsigned int dm_table_get_num_targets(struct dm_table *t)
diff --git a/drivers/md/dm-verity-avb.c b/drivers/md/dm-verity-avb.c new file mode 100644 index 0000000..a9f102a --- /dev/null +++ b/drivers/md/dm-verity-avb.c
@@ -0,0 +1,229 @@ +/* + * Copyright (C) 2017 Google. + * + * This file is released under the GPLv2. + * + * Based on drivers/md/dm-verity-chromeos.c + */ + +#include <linux/device-mapper.h> +#include <linux/module.h> +#include <linux/mount.h> + +#define DM_MSG_PREFIX "verity-avb" + +/* Set via module parameters. */ +static char avb_vbmeta_device[64]; +static char avb_invalidate_on_error[4]; + +static void invalidate_vbmeta_endio(struct bio *bio) +{ + if (bio->bi_status) + DMERR("invalidate_vbmeta_endio: error %d", bio->bi_status); + complete(bio->bi_private); +} + +static int invalidate_vbmeta_submit(struct bio *bio, + struct block_device *bdev, + int op, int access_last_sector, + struct page *page) +{ + DECLARE_COMPLETION_ONSTACK(wait); + + bio->bi_private = &wait; + bio->bi_end_io = invalidate_vbmeta_endio; + bio_set_dev(bio, bdev); + bio_set_op_attrs(bio, op, REQ_SYNC); + + bio->bi_iter.bi_sector = 0; + if (access_last_sector) { + sector_t last_sector; + + last_sector = (i_size_read(bdev->bd_inode)>>SECTOR_SHIFT) - 1; + bio->bi_iter.bi_sector = last_sector; + } + if (!bio_add_page(bio, page, PAGE_SIZE, 0)) { + DMERR("invalidate_vbmeta_submit: bio_add_page error"); + return -EIO; + } + + submit_bio(bio); + /* Wait up to 2 seconds for completion or fail. */ + if (!wait_for_completion_timeout(&wait, msecs_to_jiffies(2000))) + return -EIO; + return 0; +} + +static int invalidate_vbmeta(dev_t vbmeta_devt) +{ + int ret = 0; + struct block_device *bdev; + struct bio *bio; + struct page *page; + fmode_t dev_mode; + /* Ensure we do synchronous unblocked I/O. We may also need + * sync_bdev() on completion, but it really shouldn't. + */ + int access_last_sector = 0; + + DMINFO("invalidate_vbmeta: acting on device %d:%d", + MAJOR(vbmeta_devt), MINOR(vbmeta_devt)); + + /* First we open the device for reading. */ + dev_mode = FMODE_READ | FMODE_EXCL; + bdev = blkdev_get_by_dev(vbmeta_devt, dev_mode, + invalidate_vbmeta); + if (IS_ERR(bdev)) { + DMERR("invalidate_kernel: could not open device for reading"); + dev_mode = 0; + ret = -ENOENT; + goto failed_to_read; + } + + bio = bio_alloc(GFP_NOIO, 1); + if (!bio) { + ret = -ENOMEM; + goto failed_bio_alloc; + } + + page = alloc_page(GFP_NOIO); + if (!page) { + ret = -ENOMEM; + goto failed_to_alloc_page; + } + + access_last_sector = 0; + ret = invalidate_vbmeta_submit(bio, bdev, REQ_OP_READ, + access_last_sector, page); + if (ret) { + DMERR("invalidate_vbmeta: error reading"); + goto failed_to_submit_read; + } + + /* We have a page. Let's make sure it looks right. */ + if (memcmp("AVB0", page_address(page), 4) == 0) { + /* Stamp it. */ + memcpy(page_address(page), "AVE0", 4); + DMINFO("invalidate_vbmeta: found vbmeta partition"); + } else { + /* Could be this is on a AVB footer, check. Also, since the + * AVB footer is in the last 64 bytes, adjust for the fact that + * we're dealing with 512-byte sectors. + */ + size_t offset = (1<<SECTOR_SHIFT) - 64; + + access_last_sector = 1; + ret = invalidate_vbmeta_submit(bio, bdev, REQ_OP_READ, + access_last_sector, page); + if (ret) { + DMERR("invalidate_vbmeta: error reading"); + goto failed_to_submit_read; + } + if (memcmp("AVBf", page_address(page) + offset, 4) != 0) { + DMERR("invalidate_vbmeta on non-vbmeta partition"); + ret = -EINVAL; + goto invalid_header; + } + /* Stamp it. */ + memcpy(page_address(page) + offset, "AVE0", 4); + DMINFO("invalidate_vbmeta: found vbmeta footer partition"); + } + + /* Now rewrite the changed page - the block dev was being + * changed on read. Let's reopen here. + */ + blkdev_put(bdev, dev_mode); + dev_mode = FMODE_WRITE | FMODE_EXCL; + bdev = blkdev_get_by_dev(vbmeta_devt, dev_mode, + invalidate_vbmeta); + if (IS_ERR(bdev)) { + DMERR("invalidate_vbmeta: could not open device for writing"); + dev_mode = 0; + ret = -ENOENT; + goto failed_to_write; + } + + /* We re-use the same bio to do the write after the read. Need to reset + * it to initialize bio->bi_remaining. + */ + bio_reset(bio); + + ret = invalidate_vbmeta_submit(bio, bdev, REQ_OP_WRITE, + access_last_sector, page); + if (ret) { + DMERR("invalidate_vbmeta: error writing"); + goto failed_to_submit_write; + } + + DMERR("invalidate_vbmeta: completed."); + ret = 0; +failed_to_submit_write: +failed_to_write: +invalid_header: + __free_page(page); +failed_to_submit_read: + /* Technically, we'll leak a page with the pending bio, but + * we're about to reboot anyway. + */ +failed_to_alloc_page: + bio_put(bio); +failed_bio_alloc: + if (dev_mode) + blkdev_put(bdev, dev_mode); +failed_to_read: + return ret; +} + +void dm_verity_avb_error_handler(void) +{ + dev_t dev; + + DMINFO("AVB error handler called for %s", avb_vbmeta_device); + + if (strcmp(avb_invalidate_on_error, "yes") != 0) { + DMINFO("Not configured to invalidate"); + return; + } + + if (avb_vbmeta_device[0] == '\0') { + DMERR("avb_vbmeta_device parameter not set"); + goto fail_no_dev; + } + + dev = name_to_dev_t(avb_vbmeta_device); + if (!dev) { + DMERR("No matching partition for device: %s", + avb_vbmeta_device); + goto fail_no_dev; + } + + invalidate_vbmeta(dev); + +fail_no_dev: + ; +} + +static int __init dm_verity_avb_init(void) +{ + DMINFO("AVB error handler initialized with vbmeta device: %s", + avb_vbmeta_device); + return 0; +} + +static void __exit dm_verity_avb_exit(void) +{ +} + +module_init(dm_verity_avb_init); +module_exit(dm_verity_avb_exit); + +MODULE_AUTHOR("David Zeuthen <zeuthen@google.com>"); +MODULE_DESCRIPTION("AVB-specific error handler for dm-verity"); +MODULE_LICENSE("GPL"); + +/* Declare parameter with no module prefix */ +#undef MODULE_PARAM_PREFIX +#define MODULE_PARAM_PREFIX "androidboot.vbmeta." +module_param_string(device, avb_vbmeta_device, sizeof(avb_vbmeta_device), 0); +module_param_string(invalidate_on_error, avb_invalidate_on_error, + sizeof(avb_invalidate_on_error), 0);
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index e13f908..776a4f7 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c
@@ -11,6 +11,7 @@ #include "dm-verity-fec.h" #include <linux/math64.h> +#include <linux/sysfs.h> #define DM_MSG_PREFIX "verity-fec" @@ -175,9 +176,11 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio, if (r < 0 && neras) DMERR_LIMIT("%s: FEC %llu: failed to correct: %d", v->data_dev->name, (unsigned long long)rsb, r); - else if (r > 0) + else if (r > 0) { DMWARN_LIMIT("%s: FEC %llu: corrected %d errors", v->data_dev->name, (unsigned long long)rsb, r); + atomic_add_unless(&v->fec->corrected, 1, INT_MAX); + } return r; } @@ -545,6 +548,7 @@ unsigned verity_fec_status_table(struct dm_verity *v, unsigned sz, void verity_fec_dtr(struct dm_verity *v) { struct dm_verity_fec *f = v->fec; + struct kobject *kobj = &f->kobj_holder.kobj; if (!verity_fec_is_enabled(v)) goto out; @@ -561,6 +565,12 @@ void verity_fec_dtr(struct dm_verity *v) if (f->dev) dm_put_device(v->ti, f->dev); + + if (kobj->state_initialized) { + kobject_put(kobj); + wait_for_completion(dm_get_completion_from_kobject(kobj)); + } + out: kfree(f); v->fec = NULL; @@ -649,6 +659,28 @@ int verity_fec_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v, return 0; } +static ssize_t corrected_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct dm_verity_fec *f = container_of(kobj, struct dm_verity_fec, + kobj_holder.kobj); + + return sprintf(buf, "%d\n", atomic_read(&f->corrected)); +} + +static struct kobj_attribute attr_corrected = __ATTR_RO(corrected); + +static struct attribute *fec_attrs[] = { + &attr_corrected.attr, + NULL +}; + +static struct kobj_type fec_ktype = { + .sysfs_ops = &kobj_sysfs_ops, + .default_attrs = fec_attrs, + .release = dm_kobject_release +}; + /* * Allocate dm_verity_fec for v->fec. Must be called before verity_fec_ctr. */ @@ -672,8 +704,10 @@ int verity_fec_ctr_alloc(struct dm_verity *v) */ int verity_fec_ctr(struct dm_verity *v) { + int r; struct dm_verity_fec *f = v->fec; struct dm_target *ti = v->ti; + struct mapped_device *md = dm_table_get_md(ti->table); u64 hash_blocks; if (!verity_fec_is_enabled(v)) { @@ -681,6 +715,16 @@ int verity_fec_ctr(struct dm_verity *v) return 0; } + /* Create a kobject and sysfs attributes */ + init_completion(&f->kobj_holder.completion); + + r = kobject_init_and_add(&f->kobj_holder.kobj, &fec_ktype, + &disk_to_dev(dm_disk(md))->kobj, "%s", "fec"); + if (r) { + ti->error = "Cannot create kobject"; + return r; + } + /* * FEC is computed over data blocks, possible metadata, and * hash blocks. In other words, FEC covers total of fec_blocks
diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h index bb31ce8..4db0cae 100644 --- a/drivers/md/dm-verity-fec.h +++ b/drivers/md/dm-verity-fec.h
@@ -12,6 +12,8 @@ #ifndef DM_VERITY_FEC_H #define DM_VERITY_FEC_H +#include "dm.h" +#include "dm-core.h" #include "dm-verity.h" #include <linux/rslib.h> @@ -51,6 +53,8 @@ struct dm_verity_fec { mempool_t *extra_pool; /* mempool for extra buffers */ mempool_t *output_pool; /* mempool for output */ struct kmem_cache *cache; /* cache for buffers */ + atomic_t corrected; /* corrected errors */ + struct dm_kobject_holder kobj_holder; /* for sysfs attributes */ }; /* per-bio data */
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 8573c70..5cf77ab 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c
@@ -32,6 +32,7 @@ #define DM_VERITY_OPT_LOGGING "ignore_corruption" #define DM_VERITY_OPT_RESTART "restart_on_corruption" #define DM_VERITY_OPT_IGN_ZEROES "ignore_zero_blocks" +#define DM_VERITY_OPT_AT_MOST_ONCE "check_at_most_once" #define DM_VERITY_OPTS_MAX (2 + DM_VERITY_OPTS_FEC) @@ -291,8 +292,12 @@ static int verity_handle_err(struct dm_verity *v, enum verity_block_type type, if (v->mode == DM_VERITY_MODE_LOGGING) return 0; - if (v->mode == DM_VERITY_MODE_RESTART) + if (v->mode == DM_VERITY_MODE_RESTART) { +#ifdef CONFIG_DM_VERITY_AVB + dm_verity_avb_error_handler(); +#endif kernel_restart("dm-verity device corrupted"); + } return 1; } @@ -490,6 +495,18 @@ static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io, } /* + * Moves the bio iter one data block forward. + */ +static inline void verity_bv_skip_block(struct dm_verity *v, + struct dm_verity_io *io, + struct bvec_iter *iter) +{ + struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size); + + bio_advance_iter(bio, iter, 1 << v->data_dev_block_bits); +} + +/* * Verify one "dm_verity_io" structure. */ static int verity_verify_io(struct dm_verity_io *io) @@ -502,9 +519,16 @@ static int verity_verify_io(struct dm_verity_io *io) for (b = 0; b < io->n_blocks; b++) { int r; + sector_t cur_block = io->block + b; struct ahash_request *req = verity_io_hash_req(v, io); - r = verity_hash_for_block(v, io, io->block + b, + if (v->validated_blocks && + likely(test_bit(cur_block, v->validated_blocks))) { + verity_bv_skip_block(v, io, &io->iter); + continue; + } + + r = verity_hash_for_block(v, io, cur_block, verity_io_want_digest(v, io), &is_zero); if (unlikely(r < 0)) @@ -538,13 +562,16 @@ static int verity_verify_io(struct dm_verity_io *io) return r; if (likely(memcmp(verity_io_real_digest(v, io), - verity_io_want_digest(v, io), v->digest_size) == 0)) + verity_io_want_digest(v, io), v->digest_size) == 0)) { + if (v->validated_blocks) + set_bit(cur_block, v->validated_blocks); continue; + } else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, - io->block + b, NULL, &start) == 0) + cur_block, NULL, &start) == 0) continue; else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, - io->block + b)) + cur_block)) return -EIO; } @@ -648,7 +675,7 @@ static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io) * Bio map function. It allocates dm_verity_io structure and bio vector and * fills them. Then it issues prefetches and the I/O. */ -static int verity_map(struct dm_target *ti, struct bio *bio) +int verity_map(struct dm_target *ti, struct bio *bio) { struct dm_verity *v = ti->private; struct dm_verity_io *io; @@ -693,7 +720,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio) /* * Status: V (valid) or C (corruption found) */ -static void verity_status(struct dm_target *ti, status_type_t type, +void verity_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { struct dm_verity *v = ti->private; @@ -730,6 +757,8 @@ static void verity_status(struct dm_target *ti, status_type_t type, args += DM_VERITY_OPTS_FEC; if (v->zero_digest) args++; + if (v->validated_blocks) + args++; if (!args) return; DMEMIT(" %u", args); @@ -748,12 +777,14 @@ static void verity_status(struct dm_target *ti, status_type_t type, } if (v->zero_digest) DMEMIT(" " DM_VERITY_OPT_IGN_ZEROES); + if (v->validated_blocks) + DMEMIT(" " DM_VERITY_OPT_AT_MOST_ONCE); sz = verity_fec_status_table(v, sz, result, maxlen); break; } } -static int verity_prepare_ioctl(struct dm_target *ti, +int verity_prepare_ioctl(struct dm_target *ti, struct block_device **bdev, fmode_t *mode) { struct dm_verity *v = ti->private; @@ -766,7 +797,7 @@ static int verity_prepare_ioctl(struct dm_target *ti, return 0; } -static int verity_iterate_devices(struct dm_target *ti, +int verity_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { struct dm_verity *v = ti->private; @@ -774,7 +805,7 @@ static int verity_iterate_devices(struct dm_target *ti, return fn(ti, v->data_dev, v->data_start, ti->len, data); } -static void verity_io_hints(struct dm_target *ti, struct queue_limits *limits) +void verity_io_hints(struct dm_target *ti, struct queue_limits *limits) { struct dm_verity *v = ti->private; @@ -787,7 +818,7 @@ static void verity_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_min(limits, limits->logical_block_size); } -static void verity_dtr(struct dm_target *ti) +void verity_dtr(struct dm_target *ti) { struct dm_verity *v = ti->private; @@ -797,6 +828,7 @@ static void verity_dtr(struct dm_target *ti) if (v->bufio) dm_bufio_client_destroy(v->bufio); + kvfree(v->validated_blocks); kfree(v->salt); kfree(v->root_digest); kfree(v->zero_digest); @@ -817,6 +849,26 @@ static void verity_dtr(struct dm_target *ti) kfree(v); } +static int verity_alloc_most_once(struct dm_verity *v) +{ + struct dm_target *ti = v->ti; + + /* the bitset can only handle INT_MAX blocks */ + if (v->data_blocks > INT_MAX) { + ti->error = "device too large to use check_at_most_once"; + return -E2BIG; + } + + v->validated_blocks = kvzalloc(BITS_TO_LONGS(v->data_blocks) * + sizeof(unsigned long), GFP_KERNEL); + if (!v->validated_blocks) { + ti->error = "failed to allocate bitset for check_at_most_once"; + return -ENOMEM; + } + + return 0; +} + static int verity_alloc_zero_digest(struct dm_verity *v) { int r = -ENOMEM; @@ -886,6 +938,12 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v) } continue; + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_AT_MOST_ONCE)) { + r = verity_alloc_most_once(v); + if (r) + return r; + continue; + } else if (verity_is_fec_opt_arg(arg_name)) { r = verity_fec_parse_opt_args(as, v, &argc, arg_name); if (r) @@ -914,7 +972,7 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v) * <digest> * <salt> Hex string or "-" if no salt. */ -static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) +int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) { struct dm_verity *v; struct dm_arg_set as; @@ -1027,6 +1085,15 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) v->tfm = NULL; goto bad; } + + /* + * dm-verity performance can vary greatly depending on which hash + * algorithm implementation is used. Help people debug performance + * problems by logging the ->cra_driver_name. + */ + DMINFO("%s using implementation \"%s\"", v->alg_name, + crypto_hash_alg_common(v->tfm)->base.cra_driver_name); + v->digest_size = crypto_ahash_digestsize(v->tfm); if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) { ti->error = "Digest size too big"; @@ -1078,6 +1145,14 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } +#ifdef CONFIG_DM_ANDROID_VERITY_AT_MOST_ONCE_DEFAULT_ENABLED + if (!v->validated_blocks) { + r = verity_alloc_most_once(v); + if (r) + goto bad; + } +#endif + v->hash_per_block_bits = __fls((1 << v->hash_dev_block_bits) / v->digest_size); @@ -1153,7 +1228,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) static struct target_type verity_target = { .name = "verity", - .version = {1, 3, 0}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr,
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index a59e0ad..116e91f 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h
@@ -63,6 +63,7 @@ struct dm_verity { sector_t hash_level_block[DM_VERITY_MAX_LEVELS]; struct dm_verity_fec *fec; /* forward error correction */ + unsigned long *validated_blocks; /* bitset blocks validated */ }; struct dm_verity_io { @@ -131,4 +132,15 @@ extern int verity_hash(struct dm_verity *v, struct ahash_request *req, extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, sector_t block, u8 *digest, bool *is_zero); +extern void verity_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen); +extern int verity_prepare_ioctl(struct dm_target *ti, + struct block_device **bdev, fmode_t *mode); +extern int verity_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, void *data); +extern void verity_io_hints(struct dm_target *ti, struct queue_limits *limits); +extern void verity_dtr(struct dm_target *ti); +extern int verity_ctr(struct dm_target *ti, unsigned argc, char **argv); +extern int verity_map(struct dm_target *ti, struct bio *bio); +extern void dm_verity_avb_error_handler(void); #endif /* DM_VERITY_H */
diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 38c84c0..ab289ce 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h
@@ -80,8 +80,6 @@ void dm_set_md_type(struct mapped_device *md, enum dm_queue_mode type); enum dm_queue_mode dm_get_md_type(struct mapped_device *md); struct target_type *dm_get_immutable_target_type(struct mapped_device *md); -int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t); - /* * To check the return value from dm_table_find_target(). */
diff --git a/drivers/md/md.c b/drivers/md/md.c index 5599712..24f0f7c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c
@@ -5371,7 +5371,7 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) return NULL; } -static int add_named_array(const char *val, struct kernel_param *kp) +static int add_named_array(const char *val, const struct kernel_param *kp) { /* * val must be "md_*" or "mdNNN". @@ -9346,11 +9346,11 @@ static __exit void md_exit(void) subsys_initcall(md_init); module_exit(md_exit) -static int get_ro(char *buffer, struct kernel_param *kp) +static int get_ro(char *buffer, const struct kernel_param *kp) { return sprintf(buffer, "%d", start_readonly); } -static int set_ro(const char *val, struct kernel_param *kp) +static int set_ro(const char *val, const struct kernel_param *kp) { return kstrtouint(val, 10, (unsigned int *)&start_readonly); }
diff --git a/drivers/media/pci/tw686x/tw686x-core.c b/drivers/media/pci/tw686x/tw686x-core.c index 336e2f9..b762e5f 100644 --- a/drivers/media/pci/tw686x/tw686x-core.c +++ b/drivers/media/pci/tw686x/tw686x-core.c
@@ -72,12 +72,12 @@ static const char *dma_mode_name(unsigned int mode) } } -static int tw686x_dma_mode_get(char *buffer, struct kernel_param *kp) +static int tw686x_dma_mode_get(char *buffer, const struct kernel_param *kp) { return sprintf(buffer, "%s", dma_mode_name(dma_mode)); } -static int tw686x_dma_mode_set(const char *val, struct kernel_param *kp) +static int tw686x_dma_mode_set(const char *val, const struct kernel_param *kp) { if (!strcasecmp(val, dma_mode_name(TW686X_DMA_MODE_MEMCPY))) dma_mode = TW686X_DMA_MODE_MEMCPY;
diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index 064d882..f5b17e5 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c
@@ -2223,7 +2223,7 @@ static int uvc_reset_resume(struct usb_interface *intf) * Module parameters */ -static int uvc_clock_param_get(char *buffer, struct kernel_param *kp) +static int uvc_clock_param_get(char *buffer, const struct kernel_param *kp) { if (uvc_clock_param == CLOCK_MONOTONIC) return sprintf(buffer, "CLOCK_MONOTONIC"); @@ -2231,7 +2231,7 @@ static int uvc_clock_param_get(char *buffer, struct kernel_param *kp) return sprintf(buffer, "CLOCK_REALTIME"); } -static int uvc_clock_param_set(const char *val, struct kernel_param *kp) +static int uvc_clock_param_set(const char *val, const struct kernel_param *kp) { if (strncasecmp(val, "clock_", strlen("clock_")) == 0) val += strlen("clock_");
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index f1ef4e9..63ac1bb 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -2482,11 +2482,8 @@ struct v4l2_ioctl_info { unsigned int ioctl; u32 flags; const char * const name; - union { - u32 offset; - int (*func)(const struct v4l2_ioctl_ops *ops, - struct file *file, void *fh, void *p); - } u; + int (*func)(const struct v4l2_ioctl_ops *ops, struct file *file, + void *fh, void *p); void (*debug)(const void *arg, bool write_only); }; @@ -2494,27 +2491,23 @@ struct v4l2_ioctl_info { #define INFO_FL_PRIO (1 << 0) /* This control can be valid if the filehandle passes a control handler. */ #define INFO_FL_CTRL (1 << 1) -/* This is a standard ioctl, no need for special code */ -#define INFO_FL_STD (1 << 2) /* This is ioctl has its own function */ -#define INFO_FL_FUNC (1 << 3) +#define INFO_FL_FUNC (1 << 2) /* Queuing ioctl */ -#define INFO_FL_QUEUE (1 << 4) +#define INFO_FL_QUEUE (1 << 3) /* Always copy back result, even on error */ -#define INFO_FL_ALWAYS_COPY (1 << 5) +#define INFO_FL_ALWAYS_COPY (1 << 4) /* Zero struct from after the field to the end */ #define INFO_FL_CLEAR(v4l2_struct, field) \ ((offsetof(struct v4l2_struct, field) + \ sizeof(((struct v4l2_struct *)0)->field)) << 16) #define INFO_FL_CLEAR_MASK (_IOC_SIZEMASK << 16) -#define IOCTL_INFO_STD(_ioctl, _vidioc, _debug, _flags) \ - [_IOC_NR(_ioctl)] = { \ - .ioctl = _ioctl, \ - .flags = _flags | INFO_FL_STD, \ - .name = #_ioctl, \ - .u.offset = offsetof(struct v4l2_ioctl_ops, _vidioc), \ - .debug = _debug, \ +#define DEFINE_IOCTL_STD_FNC(_vidioc) \ + static int __v4l_ ## _vidioc ## _fnc( \ + const struct v4l2_ioctl_ops *ops, \ + struct file *file, void *fh, void *p) { \ + return ops->_vidioc(file, fh, p); \ } #define IOCTL_INFO_FNC(_ioctl, _func, _debug, _flags) \ @@ -2522,10 +2515,42 @@ struct v4l2_ioctl_info { .ioctl = _ioctl, \ .flags = _flags | INFO_FL_FUNC, \ .name = #_ioctl, \ - .u.func = _func, \ + .func = _func, \ .debug = _debug, \ } +#define IOCTL_INFO_STD(_ioctl, _vidioc, _debug, _flags) \ + IOCTL_INFO_FNC(_ioctl, __v4l_ ## _vidioc ## _fnc, _debug, _flags) + +DEFINE_IOCTL_STD_FNC(vidioc_g_fbuf) +DEFINE_IOCTL_STD_FNC(vidioc_s_fbuf) +DEFINE_IOCTL_STD_FNC(vidioc_expbuf) +DEFINE_IOCTL_STD_FNC(vidioc_g_std) +DEFINE_IOCTL_STD_FNC(vidioc_g_audio) +DEFINE_IOCTL_STD_FNC(vidioc_s_audio) +DEFINE_IOCTL_STD_FNC(vidioc_g_input) +DEFINE_IOCTL_STD_FNC(vidioc_g_edid) +DEFINE_IOCTL_STD_FNC(vidioc_s_edid) +DEFINE_IOCTL_STD_FNC(vidioc_g_output) +DEFINE_IOCTL_STD_FNC(vidioc_g_audout) +DEFINE_IOCTL_STD_FNC(vidioc_s_audout) +DEFINE_IOCTL_STD_FNC(vidioc_g_jpegcomp) +DEFINE_IOCTL_STD_FNC(vidioc_s_jpegcomp) +DEFINE_IOCTL_STD_FNC(vidioc_enumaudio) +DEFINE_IOCTL_STD_FNC(vidioc_enumaudout) +DEFINE_IOCTL_STD_FNC(vidioc_enum_framesizes) +DEFINE_IOCTL_STD_FNC(vidioc_enum_frameintervals) +DEFINE_IOCTL_STD_FNC(vidioc_g_enc_index) +DEFINE_IOCTL_STD_FNC(vidioc_encoder_cmd) +DEFINE_IOCTL_STD_FNC(vidioc_try_encoder_cmd) +DEFINE_IOCTL_STD_FNC(vidioc_decoder_cmd) +DEFINE_IOCTL_STD_FNC(vidioc_try_decoder_cmd) +DEFINE_IOCTL_STD_FNC(vidioc_s_dv_timings) +DEFINE_IOCTL_STD_FNC(vidioc_g_dv_timings) +DEFINE_IOCTL_STD_FNC(vidioc_enum_dv_timings) +DEFINE_IOCTL_STD_FNC(vidioc_query_dv_timings) +DEFINE_IOCTL_STD_FNC(vidioc_dv_timings_cap) + static struct v4l2_ioctl_info v4l2_ioctls[] = { IOCTL_INFO_FNC(VIDIOC_QUERYCAP, v4l_querycap, v4l_print_querycap, 0), IOCTL_INFO_FNC(VIDIOC_ENUM_FMT, v4l_enum_fmt, v4l_print_fmtdesc, INFO_FL_CLEAR(v4l2_fmtdesc, type)), @@ -2710,14 +2735,8 @@ static long __video_do_ioctl(struct file *file, } write_only = _IOC_DIR(cmd) == _IOC_WRITE; - if (info->flags & INFO_FL_STD) { - typedef int (*vidioc_op)(struct file *file, void *fh, void *p); - const void *p = vfd->ioctl_ops; - const vidioc_op *vidioc = p + info->u.offset; - - ret = (*vidioc)(file, fh, arg); - } else if (info->flags & INFO_FL_FUNC) { - ret = info->u.func(ops, file, fh, arg); + if (info->flags & INFO_FL_FUNC) { + ret = info->func(ops, file, fh, arg); } else if (!ops->vidioc_default) { ret = -ENOTTY; } else {
diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c index 84eab28..7a93400 100644 --- a/drivers/message/fusion/mptbase.c +++ b/drivers/message/fusion/mptbase.c
@@ -99,7 +99,7 @@ module_param(mpt_channel_mapping, int, 0); MODULE_PARM_DESC(mpt_channel_mapping, " Mapping id's to channels (default=0)"); static int mpt_debug_level; -static int mpt_set_debug_level(const char *val, struct kernel_param *kp); +static int mpt_set_debug_level(const char *val, const struct kernel_param *kp); module_param_call(mpt_debug_level, mpt_set_debug_level, param_get_int, &mpt_debug_level, 0600); MODULE_PARM_DESC(mpt_debug_level, @@ -242,7 +242,7 @@ pci_enable_io_access(struct pci_dev *pdev) pci_write_config_word(pdev, PCI_COMMAND, command_reg); } -static int mpt_set_debug_level(const char *val, struct kernel_param *kp) +static int mpt_set_debug_level(const char *val, const struct kernel_param *kp) { int ret = param_set_int(val, kp); MPT_ADAPTER *ioc;
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 8136dc7..ce2449a 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig
@@ -506,6 +506,27 @@ Enable this configuration option to enable the host side test driver for PCI Endpoint. +config UID_SYS_STATS + bool "Per-UID statistics" + depends on PROFILING && TASK_XACCT && TASK_IO_ACCOUNTING + help + Per UID based cpu time statistics exported to /proc/uid_cputime + Per UID based io statistics exported to /proc/uid_io + Per UID based procstat control in /proc/uid_procstat + +config UID_SYS_STATS_DEBUG + bool "Per-TASK statistics" + depends on UID_SYS_STATS + default n + help + Per TASK based io statistics exported to /proc/uid_io + +config MEMORY_STATE_TIME + tristate "Memory freq/bandwidth time statistics" + depends on PROFILING + help + Memory time statistics exported to /sys/kernel/memory_state_time + source "drivers/misc/c2port/Kconfig" source "drivers/misc/eeprom/Kconfig" source "drivers/misc/cb710/Kconfig"
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index ad0e64f..a4ccc7d 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile
@@ -57,6 +57,9 @@ obj-$(CONFIG_ASPEED_LPC_SNOOP) += aspeed-lpc-snoop.o obj-$(CONFIG_PCI_ENDPOINT_TEST) += pci_endpoint_test.o +obj-$(CONFIG_UID_SYS_STATS) += uid_sys_stats.o +obj-$(CONFIG_MEMORY_STATE_TIME) += memory_state_time.o + lkdtm-$(CONFIG_LKDTM) += lkdtm_core.o lkdtm-$(CONFIG_LKDTM) += lkdtm_bugs.o lkdtm-$(CONFIG_LKDTM) += lkdtm_heap.o @@ -66,6 +69,7 @@ lkdtm-$(CONFIG_LKDTM) += lkdtm_usercopy.o KCOV_INSTRUMENT_lkdtm_rodata.o := n +CFLAGS_lkdtm_rodata.o += $(DISABLE_LTO) OBJCOPYFLAGS := OBJCOPYFLAGS_lkdtm_rodata_objcopy.o := \
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index fc7efed..24108bf 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c
@@ -1132,7 +1132,8 @@ static void kgdbts_put_char(u8 chr) ts.run_test(0, chr); } -static int param_set_kgdbts_var(const char *kmessage, struct kernel_param *kp) +static int param_set_kgdbts_var(const char *kmessage, + const struct kernel_param *kp) { int len = strlen(kmessage);
diff --git a/drivers/misc/memory_state_time.c b/drivers/misc/memory_state_time.c new file mode 100644 index 0000000..ba94dcf --- /dev/null +++ b/drivers/misc/memory_state_time.c
@@ -0,0 +1,462 @@ +/* drivers/misc/memory_state_time.c + * + * Copyright (C) 2016 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/device.h> +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/hashtable.h> +#include <linux/kconfig.h> +#include <linux/kernel.h> +#include <linux/kobject.h> +#include <linux/memory-state-time.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/of_platform.h> +#include <linux/slab.h> +#include <linux/sysfs.h> +#include <linux/time.h> +#include <linux/timekeeping.h> +#include <linux/workqueue.h> + +#define KERNEL_ATTR_RO(_name) \ +static struct kobj_attribute _name##_attr = __ATTR_RO(_name) + +#define KERNEL_ATTR_RW(_name) \ +static struct kobj_attribute _name##_attr = \ + __ATTR(_name, 0644, _name##_show, _name##_store) + +#define FREQ_HASH_BITS 4 +DECLARE_HASHTABLE(freq_hash_table, FREQ_HASH_BITS); + +static DEFINE_MUTEX(mem_lock); + +#define TAG "memory_state_time" +#define BW_NODE "/soc/memory-state-time" +#define FREQ_TBL "freq-tbl" +#define BW_TBL "bw-buckets" +#define NUM_SOURCES "num-sources" + +#define LOWEST_FREQ 2 + +static int curr_bw; +static int curr_freq; +static u32 *bw_buckets; +static u32 *freq_buckets; +static int num_freqs; +static int num_buckets; +static int registered_bw_sources; +static u64 last_update; +static bool init_success; +static struct workqueue_struct *memory_wq; +static u32 num_sources = 10; +static int *bandwidths; + +struct freq_entry { + int freq; + u64 *buckets; /* Bandwidth buckets. */ + struct hlist_node hash; +}; + +struct queue_container { + struct work_struct update_state; + int value; + u64 time_now; + int id; + struct mutex *lock; +}; + +static int find_bucket(int bw) +{ + int i; + + if (bw_buckets != NULL) { + for (i = 0; i < num_buckets; i++) { + if (bw_buckets[i] > bw) { + pr_debug("Found bucket %d for bandwidth %d\n", + i, bw); + return i; + } + } + return num_buckets - 1; + } + return 0; +} + +static u64 get_time_diff(u64 time_now) +{ + u64 ms; + + ms = time_now - last_update; + last_update = time_now; + return ms; +} + +static ssize_t show_stat_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + int i, j; + int len = 0; + struct freq_entry *freq_entry; + + for (i = 0; i < num_freqs; i++) { + hash_for_each_possible(freq_hash_table, freq_entry, hash, + freq_buckets[i]) { + if (freq_entry->freq == freq_buckets[i]) { + len += scnprintf(buf + len, PAGE_SIZE - len, + "%d ", freq_buckets[i]); + if (len >= PAGE_SIZE) + break; + for (j = 0; j < num_buckets; j++) { + len += scnprintf(buf + len, + PAGE_SIZE - len, + "%llu ", + freq_entry->buckets[j]); + } + len += scnprintf(buf + len, PAGE_SIZE - len, + "\n"); + } + } + } + pr_debug("Current Time: %llu\n", ktime_get_boot_ns()); + return len; +} +KERNEL_ATTR_RO(show_stat); + +static void update_table(u64 time_now) +{ + struct freq_entry *freq_entry; + + pr_debug("Last known bw %d freq %d\n", curr_bw, curr_freq); + hash_for_each_possible(freq_hash_table, freq_entry, hash, curr_freq) { + if (curr_freq == freq_entry->freq) { + freq_entry->buckets[find_bucket(curr_bw)] + += get_time_diff(time_now); + break; + } + } +} + +static bool freq_exists(int freq) +{ + int i; + + for (i = 0; i < num_freqs; i++) { + if (freq == freq_buckets[i]) + return true; + } + return false; +} + +static int calculate_total_bw(int bw, int index) +{ + int i; + int total_bw = 0; + + pr_debug("memory_state_time New bw %d for id %d\n", bw, index); + bandwidths[index] = bw; + for (i = 0; i < registered_bw_sources; i++) + total_bw += bandwidths[i]; + return total_bw; +} + +static void freq_update_do_work(struct work_struct *work) +{ + struct queue_container *freq_state_update + = container_of(work, struct queue_container, + update_state); + if (freq_state_update) { + mutex_lock(&mem_lock); + update_table(freq_state_update->time_now); + curr_freq = freq_state_update->value; + mutex_unlock(&mem_lock); + kfree(freq_state_update); + } +} + +static void bw_update_do_work(struct work_struct *work) +{ + struct queue_container *bw_state_update + = container_of(work, struct queue_container, + update_state); + if (bw_state_update) { + mutex_lock(&mem_lock); + update_table(bw_state_update->time_now); + curr_bw = calculate_total_bw(bw_state_update->value, + bw_state_update->id); + mutex_unlock(&mem_lock); + kfree(bw_state_update); + } +} + +static void memory_state_freq_update(struct memory_state_update_block *ub, + int value) +{ + if (IS_ENABLED(CONFIG_MEMORY_STATE_TIME)) { + if (freq_exists(value) && init_success) { + struct queue_container *freq_container + = kmalloc(sizeof(struct queue_container), + GFP_KERNEL); + if (!freq_container) + return; + INIT_WORK(&freq_container->update_state, + freq_update_do_work); + freq_container->time_now = ktime_get_boot_ns(); + freq_container->value = value; + pr_debug("Scheduling freq update in work queue\n"); + queue_work(memory_wq, &freq_container->update_state); + } else { + pr_debug("Freq does not exist.\n"); + } + } +} + +static void memory_state_bw_update(struct memory_state_update_block *ub, + int value) +{ + if (IS_ENABLED(CONFIG_MEMORY_STATE_TIME)) { + if (init_success) { + struct queue_container *bw_container + = kmalloc(sizeof(struct queue_container), + GFP_KERNEL); + if (!bw_container) + return; + INIT_WORK(&bw_container->update_state, + bw_update_do_work); + bw_container->time_now = ktime_get_boot_ns(); + bw_container->value = value; + bw_container->id = ub->id; + pr_debug("Scheduling bandwidth update in work queue\n"); + queue_work(memory_wq, &bw_container->update_state); + } + } +} + +struct memory_state_update_block *memory_state_register_frequency_source(void) +{ + struct memory_state_update_block *block; + + if (IS_ENABLED(CONFIG_MEMORY_STATE_TIME)) { + pr_debug("Allocating frequency source\n"); + block = kmalloc(sizeof(struct memory_state_update_block), + GFP_KERNEL); + if (!block) + return NULL; + block->update_call = memory_state_freq_update; + return block; + } + pr_err("Config option disabled.\n"); + return NULL; +} +EXPORT_SYMBOL_GPL(memory_state_register_frequency_source); + +struct memory_state_update_block *memory_state_register_bandwidth_source(void) +{ + struct memory_state_update_block *block; + + if (IS_ENABLED(CONFIG_MEMORY_STATE_TIME)) { + pr_debug("Allocating bandwidth source %d\n", + registered_bw_sources); + block = kmalloc(sizeof(struct memory_state_update_block), + GFP_KERNEL); + if (!block) + return NULL; + block->update_call = memory_state_bw_update; + if (registered_bw_sources < num_sources) { + block->id = registered_bw_sources++; + } else { + pr_err("Unable to allocate source; max number reached\n"); + kfree(block); + return NULL; + } + return block; + } + pr_err("Config option disabled.\n"); + return NULL; +} +EXPORT_SYMBOL_GPL(memory_state_register_bandwidth_source); + +/* Buckets are designated by their maximum. + * Returns the buckets decided by the capability of the device. + */ +static int get_bw_buckets(struct device *dev) +{ + int ret, lenb; + struct device_node *node = dev->of_node; + + of_property_read_u32(node, NUM_SOURCES, &num_sources); + if (!of_find_property(node, BW_TBL, &lenb)) { + pr_err("Missing %s property\n", BW_TBL); + return -ENODATA; + } + + bandwidths = devm_kzalloc(dev, + sizeof(*bandwidths) * num_sources, GFP_KERNEL); + if (!bandwidths) + return -ENOMEM; + lenb /= sizeof(*bw_buckets); + bw_buckets = devm_kzalloc(dev, lenb * sizeof(*bw_buckets), + GFP_KERNEL); + if (!bw_buckets) { + devm_kfree(dev, bandwidths); + return -ENOMEM; + } + ret = of_property_read_u32_array(node, BW_TBL, bw_buckets, + lenb); + if (ret < 0) { + devm_kfree(dev, bandwidths); + devm_kfree(dev, bw_buckets); + pr_err("Unable to read bandwidth table from device tree.\n"); + return ret; + } + + curr_bw = 0; + num_buckets = lenb; + return 0; +} + +/* Adds struct freq_entry nodes to the hashtable for each compatible frequency. + * Returns the supported number of frequencies. + */ +static int freq_buckets_init(struct device *dev) +{ + struct freq_entry *freq_entry; + int i; + int ret, lenf; + struct device_node *node = dev->of_node; + + if (!of_find_property(node, FREQ_TBL, &lenf)) { + pr_err("Missing %s property\n", FREQ_TBL); + return -ENODATA; + } + + lenf /= sizeof(*freq_buckets); + freq_buckets = devm_kzalloc(dev, lenf * sizeof(*freq_buckets), + GFP_KERNEL); + if (!freq_buckets) + return -ENOMEM; + pr_debug("freqs found len %d\n", lenf); + ret = of_property_read_u32_array(node, FREQ_TBL, freq_buckets, + lenf); + if (ret < 0) { + devm_kfree(dev, freq_buckets); + pr_err("Unable to read frequency table from device tree.\n"); + return ret; + } + pr_debug("ret freq %d\n", ret); + + num_freqs = lenf; + curr_freq = freq_buckets[LOWEST_FREQ]; + + for (i = 0; i < num_freqs; i++) { + freq_entry = devm_kzalloc(dev, sizeof(struct freq_entry), + GFP_KERNEL); + if (!freq_entry) + return -ENOMEM; + freq_entry->buckets = devm_kzalloc(dev, sizeof(u64)*num_buckets, + GFP_KERNEL); + if (!freq_entry->buckets) { + devm_kfree(dev, freq_entry); + return -ENOMEM; + } + pr_debug("memory_state_time Adding freq to ht %d\n", + freq_buckets[i]); + freq_entry->freq = freq_buckets[i]; + hash_add(freq_hash_table, &freq_entry->hash, freq_buckets[i]); + } + return 0; +} + +struct kobject *memory_kobj; +EXPORT_SYMBOL_GPL(memory_kobj); + +static struct attribute *memory_attrs[] = { + &show_stat_attr.attr, + NULL +}; + +static struct attribute_group memory_attr_group = { + .attrs = memory_attrs, +}; + +static int memory_state_time_probe(struct platform_device *pdev) +{ + int error; + + error = get_bw_buckets(&pdev->dev); + if (error) + return error; + error = freq_buckets_init(&pdev->dev); + if (error) + return error; + last_update = ktime_get_boot_ns(); + init_success = true; + + pr_debug("memory_state_time initialized with num_freqs %d\n", + num_freqs); + return 0; +} + +static const struct of_device_id match_table[] = { + { .compatible = "memory-state-time" }, + {} +}; + +static struct platform_driver memory_state_time_driver = { + .probe = memory_state_time_probe, + .driver = { + .name = "memory-state-time", + .of_match_table = match_table, + .owner = THIS_MODULE, + }, +}; + +static int __init memory_state_time_init(void) +{ + int error; + + hash_init(freq_hash_table); + memory_wq = create_singlethread_workqueue("memory_wq"); + if (!memory_wq) { + pr_err("Unable to create workqueue.\n"); + return -EINVAL; + } + /* + * Create sys/kernel directory for memory_state_time. + */ + memory_kobj = kobject_create_and_add(TAG, kernel_kobj); + if (!memory_kobj) { + pr_err("Unable to allocate memory_kobj for sysfs directory.\n"); + error = -ENOMEM; + goto wq; + } + error = sysfs_create_group(memory_kobj, &memory_attr_group); + if (error) { + pr_err("Unable to create sysfs folder.\n"); + goto kobj; + } + + error = platform_driver_register(&memory_state_time_driver); + if (error) { + pr_err("Unable to register memory_state_time platform driver.\n"); + goto group; + } + return 0; + +group: sysfs_remove_group(memory_kobj, &memory_attr_group); +kobj: kobject_put(memory_kobj); +wq: destroy_workqueue(memory_wq); + return error; +} +module_init(memory_state_time_init);
diff --git a/drivers/misc/uid_sys_stats.c b/drivers/misc/uid_sys_stats.c new file mode 100644 index 0000000..88dc1cd --- /dev/null +++ b/drivers/misc/uid_sys_stats.c
@@ -0,0 +1,703 @@ +/* drivers/misc/uid_sys_stats.c + * + * Copyright (C) 2014 - 2015 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/atomic.h> +#include <linux/cpufreq_times.h> +#include <linux/err.h> +#include <linux/hashtable.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/mm.h> +#include <linux/proc_fs.h> +#include <linux/profile.h> +#include <linux/rtmutex.h> +#include <linux/sched/cputime.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/uaccess.h> + + +#define UID_HASH_BITS 10 +DECLARE_HASHTABLE(hash_table, UID_HASH_BITS); + +static DEFINE_RT_MUTEX(uid_lock); +static struct proc_dir_entry *cpu_parent; +static struct proc_dir_entry *io_parent; +static struct proc_dir_entry *proc_parent; + +struct io_stats { + u64 read_bytes; + u64 write_bytes; + u64 rchar; + u64 wchar; + u64 fsync; +}; + +#define UID_STATE_FOREGROUND 0 +#define UID_STATE_BACKGROUND 1 +#define UID_STATE_BUCKET_SIZE 2 + +#define UID_STATE_TOTAL_CURR 2 +#define UID_STATE_TOTAL_LAST 3 +#define UID_STATE_DEAD_TASKS 4 +#define UID_STATE_SIZE 5 + +#define MAX_TASK_COMM_LEN 256 + +struct task_entry { + char comm[MAX_TASK_COMM_LEN]; + pid_t pid; + struct io_stats io[UID_STATE_SIZE]; + struct hlist_node hash; +}; + +struct uid_entry { + uid_t uid; + u64 utime; + u64 stime; + u64 active_utime; + u64 active_stime; + int state; + struct io_stats io[UID_STATE_SIZE]; + struct hlist_node hash; +#ifdef CONFIG_UID_SYS_STATS_DEBUG + DECLARE_HASHTABLE(task_entries, UID_HASH_BITS); +#endif +}; + +static u64 compute_write_bytes(struct task_struct *task) +{ + if (task->ioac.write_bytes <= task->ioac.cancelled_write_bytes) + return 0; + + return task->ioac.write_bytes - task->ioac.cancelled_write_bytes; +} + +static void compute_io_bucket_stats(struct io_stats *io_bucket, + struct io_stats *io_curr, + struct io_stats *io_last, + struct io_stats *io_dead) +{ + /* tasks could switch to another uid group, but its io_last in the + * previous uid group could still be positive. + * therefore before each update, do an overflow check first + */ + int64_t delta; + + delta = io_curr->read_bytes + io_dead->read_bytes - + io_last->read_bytes; + io_bucket->read_bytes += delta > 0 ? delta : 0; + delta = io_curr->write_bytes + io_dead->write_bytes - + io_last->write_bytes; + io_bucket->write_bytes += delta > 0 ? delta : 0; + delta = io_curr->rchar + io_dead->rchar - io_last->rchar; + io_bucket->rchar += delta > 0 ? delta : 0; + delta = io_curr->wchar + io_dead->wchar - io_last->wchar; + io_bucket->wchar += delta > 0 ? delta : 0; + delta = io_curr->fsync + io_dead->fsync - io_last->fsync; + io_bucket->fsync += delta > 0 ? delta : 0; + + io_last->read_bytes = io_curr->read_bytes; + io_last->write_bytes = io_curr->write_bytes; + io_last->rchar = io_curr->rchar; + io_last->wchar = io_curr->wchar; + io_last->fsync = io_curr->fsync; + + memset(io_dead, 0, sizeof(struct io_stats)); +} + +#ifdef CONFIG_UID_SYS_STATS_DEBUG +static void get_full_task_comm(struct task_entry *task_entry, + struct task_struct *task) +{ + int i = 0, offset = 0, len = 0; + /* save one byte for terminating null character */ + int unused_len = MAX_TASK_COMM_LEN - TASK_COMM_LEN - 1; + char buf[unused_len]; + struct mm_struct *mm = task->mm; + + /* fill the first TASK_COMM_LEN bytes with thread name */ + __get_task_comm(task_entry->comm, TASK_COMM_LEN, task); + i = strlen(task_entry->comm); + while (i < TASK_COMM_LEN) + task_entry->comm[i++] = ' '; + + /* next the executable file name */ + if (mm) { + down_read(&mm->mmap_sem); + if (mm->exe_file) { + char *pathname = d_path(&mm->exe_file->f_path, buf, + unused_len); + + if (!IS_ERR(pathname)) { + len = strlcpy(task_entry->comm + i, pathname, + unused_len); + i += len; + task_entry->comm[i++] = ' '; + unused_len--; + } + } + up_read(&mm->mmap_sem); + } + unused_len -= len; + + /* fill the rest with command line argument + * replace each null or new line character + * between args in argv with whitespace */ + len = get_cmdline(task, buf, unused_len); + while (offset < len) { + if (buf[offset] != '\0' && buf[offset] != '\n') + task_entry->comm[i++] = buf[offset]; + else + task_entry->comm[i++] = ' '; + offset++; + } + + /* get rid of trailing whitespaces in case when arg is memset to + * zero before being reset in userspace + */ + while (task_entry->comm[i-1] == ' ') + i--; + task_entry->comm[i] = '\0'; +} + +static struct task_entry *find_task_entry(struct uid_entry *uid_entry, + struct task_struct *task) +{ + struct task_entry *task_entry; + + hash_for_each_possible(uid_entry->task_entries, task_entry, hash, + task->pid) { + if (task->pid == task_entry->pid) { + /* if thread name changed, update the entire command */ + int len = strnchr(task_entry->comm, ' ', TASK_COMM_LEN) + - task_entry->comm; + + if (strncmp(task_entry->comm, task->comm, len)) + get_full_task_comm(task_entry, task); + return task_entry; + } + } + return NULL; +} + +static struct task_entry *find_or_register_task(struct uid_entry *uid_entry, + struct task_struct *task) +{ + struct task_entry *task_entry; + pid_t pid = task->pid; + + task_entry = find_task_entry(uid_entry, task); + if (task_entry) + return task_entry; + + task_entry = kzalloc(sizeof(struct task_entry), GFP_ATOMIC); + if (!task_entry) + return NULL; + + get_full_task_comm(task_entry, task); + + task_entry->pid = pid; + hash_add(uid_entry->task_entries, &task_entry->hash, (unsigned int)pid); + + return task_entry; +} + +static void remove_uid_tasks(struct uid_entry *uid_entry) +{ + struct task_entry *task_entry; + unsigned long bkt_task; + struct hlist_node *tmp_task; + + hash_for_each_safe(uid_entry->task_entries, bkt_task, + tmp_task, task_entry, hash) { + hash_del(&task_entry->hash); + kfree(task_entry); + } +} + +static void set_io_uid_tasks_zero(struct uid_entry *uid_entry) +{ + struct task_entry *task_entry; + unsigned long bkt_task; + + hash_for_each(uid_entry->task_entries, bkt_task, task_entry, hash) { + memset(&task_entry->io[UID_STATE_TOTAL_CURR], 0, + sizeof(struct io_stats)); + } +} + +static void add_uid_tasks_io_stats(struct uid_entry *uid_entry, + struct task_struct *task, int slot) +{ + struct task_entry *task_entry = find_or_register_task(uid_entry, task); + struct io_stats *task_io_slot = &task_entry->io[slot]; + + task_io_slot->read_bytes += task->ioac.read_bytes; + task_io_slot->write_bytes += compute_write_bytes(task); + task_io_slot->rchar += task->ioac.rchar; + task_io_slot->wchar += task->ioac.wchar; + task_io_slot->fsync += task->ioac.syscfs; +} + +static void compute_io_uid_tasks(struct uid_entry *uid_entry) +{ + struct task_entry *task_entry; + unsigned long bkt_task; + + hash_for_each(uid_entry->task_entries, bkt_task, task_entry, hash) { + compute_io_bucket_stats(&task_entry->io[uid_entry->state], + &task_entry->io[UID_STATE_TOTAL_CURR], + &task_entry->io[UID_STATE_TOTAL_LAST], + &task_entry->io[UID_STATE_DEAD_TASKS]); + } +} + +static void show_io_uid_tasks(struct seq_file *m, struct uid_entry *uid_entry) +{ + struct task_entry *task_entry; + unsigned long bkt_task; + + hash_for_each(uid_entry->task_entries, bkt_task, task_entry, hash) { + /* Separated by comma because space exists in task comm */ + seq_printf(m, "task,%s,%lu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu\n", + task_entry->comm, + (unsigned long)task_entry->pid, + task_entry->io[UID_STATE_FOREGROUND].rchar, + task_entry->io[UID_STATE_FOREGROUND].wchar, + task_entry->io[UID_STATE_FOREGROUND].read_bytes, + task_entry->io[UID_STATE_FOREGROUND].write_bytes, + task_entry->io[UID_STATE_BACKGROUND].rchar, + task_entry->io[UID_STATE_BACKGROUND].wchar, + task_entry->io[UID_STATE_BACKGROUND].read_bytes, + task_entry->io[UID_STATE_BACKGROUND].write_bytes, + task_entry->io[UID_STATE_FOREGROUND].fsync, + task_entry->io[UID_STATE_BACKGROUND].fsync); + } +} +#else +static void remove_uid_tasks(struct uid_entry *uid_entry) {}; +static void set_io_uid_tasks_zero(struct uid_entry *uid_entry) {}; +static void add_uid_tasks_io_stats(struct uid_entry *uid_entry, + struct task_struct *task, int slot) {}; +static void compute_io_uid_tasks(struct uid_entry *uid_entry) {}; +static void show_io_uid_tasks(struct seq_file *m, + struct uid_entry *uid_entry) {} +#endif + +static struct uid_entry *find_uid_entry(uid_t uid) +{ + struct uid_entry *uid_entry; + hash_for_each_possible(hash_table, uid_entry, hash, uid) { + if (uid_entry->uid == uid) + return uid_entry; + } + return NULL; +} + +static struct uid_entry *find_or_register_uid(uid_t uid) +{ + struct uid_entry *uid_entry; + + uid_entry = find_uid_entry(uid); + if (uid_entry) + return uid_entry; + + uid_entry = kzalloc(sizeof(struct uid_entry), GFP_ATOMIC); + if (!uid_entry) + return NULL; + + uid_entry->uid = uid; +#ifdef CONFIG_UID_SYS_STATS_DEBUG + hash_init(uid_entry->task_entries); +#endif + hash_add(hash_table, &uid_entry->hash, uid); + + return uid_entry; +} + +static int uid_cputime_show(struct seq_file *m, void *v) +{ + struct uid_entry *uid_entry = NULL; + struct task_struct *task, *temp; + struct user_namespace *user_ns = current_user_ns(); + u64 utime; + u64 stime; + unsigned long bkt; + uid_t uid; + + rt_mutex_lock(&uid_lock); + + hash_for_each(hash_table, bkt, uid_entry, hash) { + uid_entry->active_stime = 0; + uid_entry->active_utime = 0; + } + + rcu_read_lock(); + do_each_thread(temp, task) { + uid = from_kuid_munged(user_ns, task_uid(task)); + if (!uid_entry || uid_entry->uid != uid) + uid_entry = find_or_register_uid(uid); + if (!uid_entry) { + rcu_read_unlock(); + rt_mutex_unlock(&uid_lock); + pr_err("%s: failed to find the uid_entry for uid %d\n", + __func__, uid); + return -ENOMEM; + } + task_cputime_adjusted(task, &utime, &stime); + uid_entry->active_utime += utime; + uid_entry->active_stime += stime; + } while_each_thread(temp, task); + rcu_read_unlock(); + + hash_for_each(hash_table, bkt, uid_entry, hash) { + u64 total_utime = uid_entry->utime + + uid_entry->active_utime; + u64 total_stime = uid_entry->stime + + uid_entry->active_stime; + seq_printf(m, "%d: %llu %llu\n", uid_entry->uid, + ktime_to_ms(total_utime), ktime_to_ms(total_stime)); + } + + rt_mutex_unlock(&uid_lock); + return 0; +} + +static int uid_cputime_open(struct inode *inode, struct file *file) +{ + return single_open(file, uid_cputime_show, PDE_DATA(inode)); +} + +static const struct file_operations uid_cputime_fops = { + .open = uid_cputime_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int uid_remove_open(struct inode *inode, struct file *file) +{ + return single_open(file, NULL, NULL); +} + +static ssize_t uid_remove_write(struct file *file, + const char __user *buffer, size_t count, loff_t *ppos) +{ + struct uid_entry *uid_entry; + struct hlist_node *tmp; + char uids[128]; + char *start_uid, *end_uid = NULL; + long int uid_start = 0, uid_end = 0; + + if (count >= sizeof(uids)) + count = sizeof(uids) - 1; + + if (copy_from_user(uids, buffer, count)) + return -EFAULT; + + uids[count] = '\0'; + end_uid = uids; + start_uid = strsep(&end_uid, "-"); + + if (!start_uid || !end_uid) + return -EINVAL; + + if (kstrtol(start_uid, 10, &uid_start) != 0 || + kstrtol(end_uid, 10, &uid_end) != 0) { + return -EINVAL; + } + + /* Also remove uids from /proc/uid_time_in_state */ + cpufreq_task_times_remove_uids(uid_start, uid_end); + + rt_mutex_lock(&uid_lock); + + for (; uid_start <= uid_end; uid_start++) { + hash_for_each_possible_safe(hash_table, uid_entry, tmp, + hash, (uid_t)uid_start) { + if (uid_start == uid_entry->uid) { + remove_uid_tasks(uid_entry); + hash_del(&uid_entry->hash); + kfree(uid_entry); + } + } + } + + rt_mutex_unlock(&uid_lock); + return count; +} + +static const struct file_operations uid_remove_fops = { + .open = uid_remove_open, + .release = single_release, + .write = uid_remove_write, +}; + + +static void add_uid_io_stats(struct uid_entry *uid_entry, + struct task_struct *task, int slot) +{ + struct io_stats *io_slot = &uid_entry->io[slot]; + + io_slot->read_bytes += task->ioac.read_bytes; + io_slot->write_bytes += compute_write_bytes(task); + io_slot->rchar += task->ioac.rchar; + io_slot->wchar += task->ioac.wchar; + io_slot->fsync += task->ioac.syscfs; + + add_uid_tasks_io_stats(uid_entry, task, slot); +} + +static void update_io_stats_all_locked(void) +{ + struct uid_entry *uid_entry = NULL; + struct task_struct *task, *temp; + struct user_namespace *user_ns = current_user_ns(); + unsigned long bkt; + uid_t uid; + + hash_for_each(hash_table, bkt, uid_entry, hash) { + memset(&uid_entry->io[UID_STATE_TOTAL_CURR], 0, + sizeof(struct io_stats)); + set_io_uid_tasks_zero(uid_entry); + } + + rcu_read_lock(); + do_each_thread(temp, task) { + uid = from_kuid_munged(user_ns, task_uid(task)); + if (!uid_entry || uid_entry->uid != uid) + uid_entry = find_or_register_uid(uid); + if (!uid_entry) + continue; + add_uid_io_stats(uid_entry, task, UID_STATE_TOTAL_CURR); + } while_each_thread(temp, task); + rcu_read_unlock(); + + hash_for_each(hash_table, bkt, uid_entry, hash) { + compute_io_bucket_stats(&uid_entry->io[uid_entry->state], + &uid_entry->io[UID_STATE_TOTAL_CURR], + &uid_entry->io[UID_STATE_TOTAL_LAST], + &uid_entry->io[UID_STATE_DEAD_TASKS]); + compute_io_uid_tasks(uid_entry); + } +} + +static void update_io_stats_uid_locked(struct uid_entry *uid_entry) +{ + struct task_struct *task, *temp; + struct user_namespace *user_ns = current_user_ns(); + + memset(&uid_entry->io[UID_STATE_TOTAL_CURR], 0, + sizeof(struct io_stats)); + set_io_uid_tasks_zero(uid_entry); + + rcu_read_lock(); + do_each_thread(temp, task) { + if (from_kuid_munged(user_ns, task_uid(task)) != uid_entry->uid) + continue; + add_uid_io_stats(uid_entry, task, UID_STATE_TOTAL_CURR); + } while_each_thread(temp, task); + rcu_read_unlock(); + + compute_io_bucket_stats(&uid_entry->io[uid_entry->state], + &uid_entry->io[UID_STATE_TOTAL_CURR], + &uid_entry->io[UID_STATE_TOTAL_LAST], + &uid_entry->io[UID_STATE_DEAD_TASKS]); + compute_io_uid_tasks(uid_entry); +} + + +static int uid_io_show(struct seq_file *m, void *v) +{ + struct uid_entry *uid_entry; + unsigned long bkt; + + rt_mutex_lock(&uid_lock); + + update_io_stats_all_locked(); + + hash_for_each(hash_table, bkt, uid_entry, hash) { + seq_printf(m, "%d %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", + uid_entry->uid, + uid_entry->io[UID_STATE_FOREGROUND].rchar, + uid_entry->io[UID_STATE_FOREGROUND].wchar, + uid_entry->io[UID_STATE_FOREGROUND].read_bytes, + uid_entry->io[UID_STATE_FOREGROUND].write_bytes, + uid_entry->io[UID_STATE_BACKGROUND].rchar, + uid_entry->io[UID_STATE_BACKGROUND].wchar, + uid_entry->io[UID_STATE_BACKGROUND].read_bytes, + uid_entry->io[UID_STATE_BACKGROUND].write_bytes, + uid_entry->io[UID_STATE_FOREGROUND].fsync, + uid_entry->io[UID_STATE_BACKGROUND].fsync); + + show_io_uid_tasks(m, uid_entry); + } + + rt_mutex_unlock(&uid_lock); + return 0; +} + +static int uid_io_open(struct inode *inode, struct file *file) +{ + return single_open(file, uid_io_show, PDE_DATA(inode)); +} + +static const struct file_operations uid_io_fops = { + .open = uid_io_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int uid_procstat_open(struct inode *inode, struct file *file) +{ + return single_open(file, NULL, NULL); +} + +static ssize_t uid_procstat_write(struct file *file, + const char __user *buffer, size_t count, loff_t *ppos) +{ + struct uid_entry *uid_entry; + uid_t uid; + int argc, state; + char input[128]; + + if (count >= sizeof(input)) + return -EINVAL; + + if (copy_from_user(input, buffer, count)) + return -EFAULT; + + input[count] = '\0'; + + argc = sscanf(input, "%u %d", &uid, &state); + if (argc != 2) + return -EINVAL; + + if (state != UID_STATE_BACKGROUND && state != UID_STATE_FOREGROUND) + return -EINVAL; + + rt_mutex_lock(&uid_lock); + + uid_entry = find_or_register_uid(uid); + if (!uid_entry) { + rt_mutex_unlock(&uid_lock); + return -EINVAL; + } + + if (uid_entry->state == state) { + rt_mutex_unlock(&uid_lock); + return count; + } + + update_io_stats_uid_locked(uid_entry); + + uid_entry->state = state; + + rt_mutex_unlock(&uid_lock); + + return count; +} + +static const struct file_operations uid_procstat_fops = { + .open = uid_procstat_open, + .release = single_release, + .write = uid_procstat_write, +}; + +static int process_notifier(struct notifier_block *self, + unsigned long cmd, void *v) +{ + struct task_struct *task = v; + struct uid_entry *uid_entry; + u64 utime, stime; + uid_t uid; + + if (!task) + return NOTIFY_OK; + + rt_mutex_lock(&uid_lock); + uid = from_kuid_munged(current_user_ns(), task_uid(task)); + uid_entry = find_or_register_uid(uid); + if (!uid_entry) { + pr_err("%s: failed to find uid %d\n", __func__, uid); + goto exit; + } + + task_cputime_adjusted(task, &utime, &stime); + uid_entry->utime += utime; + uid_entry->stime += stime; + + add_uid_io_stats(uid_entry, task, UID_STATE_DEAD_TASKS); + +exit: + rt_mutex_unlock(&uid_lock); + return NOTIFY_OK; +} + +static struct notifier_block process_notifier_block = { + .notifier_call = process_notifier, +}; + +static int __init proc_uid_sys_stats_init(void) +{ + hash_init(hash_table); + + cpu_parent = proc_mkdir("uid_cputime", NULL); + if (!cpu_parent) { + pr_err("%s: failed to create uid_cputime proc entry\n", + __func__); + goto err; + } + + proc_create_data("remove_uid_range", 0222, cpu_parent, + &uid_remove_fops, NULL); + proc_create_data("show_uid_stat", 0444, cpu_parent, + &uid_cputime_fops, NULL); + + io_parent = proc_mkdir("uid_io", NULL); + if (!io_parent) { + pr_err("%s: failed to create uid_io proc entry\n", + __func__); + goto err; + } + + proc_create_data("stats", 0444, io_parent, + &uid_io_fops, NULL); + + proc_parent = proc_mkdir("uid_procstat", NULL); + if (!proc_parent) { + pr_err("%s: failed to create uid_procstat proc entry\n", + __func__); + goto err; + } + + proc_create_data("set", 0222, proc_parent, + &uid_procstat_fops, NULL); + + profile_event_register(PROFILE_TASK_EXIT, &process_notifier_block); + + return 0; + +err: + remove_proc_subtree("uid_cputime", NULL); + remove_proc_subtree("uid_io", NULL); + remove_proc_subtree("uid_procstat", NULL); + return -ENOMEM; +} + +early_initcall(proc_uid_sys_stats_init);
diff --git a/drivers/mmc/core/Kconfig b/drivers/mmc/core/Kconfig index 42e8906..b3faf10 100644 --- a/drivers/mmc/core/Kconfig +++ b/drivers/mmc/core/Kconfig
@@ -80,3 +80,17 @@ This driver is only of interest to those developing or testing a host driver. Most people should say N here. +config MMC_EMBEDDED_SDIO + boolean "MMC embedded SDIO device support" + help + If you say Y here, support will be added for embedded SDIO + devices which do not contain the necessary enumeration + support in hardware to be properly detected. + +config MMC_PARANOID_SD_INIT + bool "Enable paranoid SD card initialization" + help + If you say Y here, the MMC layer will be extra paranoid + about re-trying SD init requests. This can be a useful + work-around for buggy controllers and hardware. Enable + if you are experiencing issues with SD detection.
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 29bfff2..e71e6070 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c
@@ -2810,6 +2810,22 @@ void mmc_init_context_info(struct mmc_host *host) init_waitqueue_head(&host->context_info.wait); } +#ifdef CONFIG_MMC_EMBEDDED_SDIO +void mmc_set_embedded_sdio_data(struct mmc_host *host, + struct sdio_cis *cis, + struct sdio_cccr *cccr, + struct sdio_embedded_func *funcs, + int num_funcs) +{ + host->embedded_sdio_data.cis = cis; + host->embedded_sdio_data.cccr = cccr; + host->embedded_sdio_data.funcs = funcs; + host->embedded_sdio_data.num_funcs = num_funcs; +} + +EXPORT_SYMBOL(mmc_set_embedded_sdio_data); +#endif + static int __init mmc_init(void) { int ret;
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index ad88deb..f469254 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c
@@ -429,7 +429,8 @@ int mmc_add_host(struct mmc_host *host) #endif mmc_start_host(host); - mmc_register_pm_notifier(host); + if (!(host->pm_flags & MMC_PM_IGNORE_PM_NOTIFY)) + mmc_register_pm_notifier(host); return 0; } @@ -446,7 +447,8 @@ EXPORT_SYMBOL(mmc_add_host); */ void mmc_remove_host(struct mmc_host *host) { - mmc_unregister_pm_notifier(host); + if (!(host->pm_flags & MMC_PM_IGNORE_PM_NOTIFY)) + mmc_unregister_pm_notifier(host); mmc_stop_host(host); #ifdef CONFIG_DEBUG_FS
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 814a04e..b682e06 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c
@@ -780,6 +780,7 @@ MMC_DEV_ATTR(manfid, "0x%06x\n", card->cid.manfid); MMC_DEV_ATTR(name, "%s\n", card->cid.prod_name); MMC_DEV_ATTR(oemid, "0x%04x\n", card->cid.oemid); MMC_DEV_ATTR(prv, "0x%x\n", card->cid.prv); +MMC_DEV_ATTR(rev, "0x%x\n", card->ext_csd.rev); MMC_DEV_ATTR(pre_eol_info, "0x%02x\n", card->ext_csd.pre_eol_info); MMC_DEV_ATTR(life_time, "0x%02x 0x%02x\n", card->ext_csd.device_life_time_est_typ_a, @@ -838,6 +839,7 @@ static struct attribute *mmc_std_attrs[] = { &dev_attr_name.attr, &dev_attr_oemid.attr, &dev_attr_prv.attr, + &dev_attr_rev.attr, &dev_attr_pre_eol_info.attr, &dev_attr_life_time.attr, &dev_attr_serial.attr,
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 0a4e77a..8c4721f 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c
@@ -17,6 +17,8 @@ #include <linux/mmc/card.h> #include <linux/mmc/host.h> +#include <linux/sched/rt.h> +#include <uapi/linux/sched/types.h> #include "queue.h" #include "block.h" @@ -43,6 +45,11 @@ static int mmc_queue_thread(void *d) struct mmc_queue *mq = d; struct request_queue *q = mq->queue; struct mmc_context_info *cntx = &mq->card->host->context_info; + struct sched_param scheduler_params = {0}; + + scheduler_params.sched_priority = 1; + + sched_setscheduler(current, SCHED_FIFO, &scheduler_params); current->flags |= PF_MEMALLOC;
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index eb9de21..4bbdfa3 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c
@@ -834,6 +834,9 @@ int mmc_sd_setup_card(struct mmc_host *host, struct mmc_card *card, bool reinit) { int err; +#ifdef CONFIG_MMC_PARANOID_SD_INIT + int retries; +#endif if (!reinit) { /* @@ -860,7 +863,26 @@ int mmc_sd_setup_card(struct mmc_host *host, struct mmc_card *card, /* * Fetch switch information from card. */ +#ifdef CONFIG_MMC_PARANOID_SD_INIT + for (retries = 1; retries <= 3; retries++) { + err = mmc_read_switch(card); + if (!err) { + if (retries > 1) { + printk(KERN_WARNING + "%s: recovered\n", + mmc_hostname(host)); + } + break; + } else { + printk(KERN_WARNING + "%s: read switch failed (attempt %d)\n", + mmc_hostname(host), retries); + } + } +#else err = mmc_read_switch(card); +#endif + if (err) return err; } @@ -1054,14 +1076,33 @@ static int mmc_sd_alive(struct mmc_host *host) */ static void mmc_sd_detect(struct mmc_host *host) { - int err; + int err = 0; +#ifdef CONFIG_MMC_PARANOID_SD_INIT + int retries = 5; +#endif mmc_get_card(host->card); /* * Just check if our card has been removed. */ +#ifdef CONFIG_MMC_PARANOID_SD_INIT + while(retries) { + err = mmc_send_status(host->card, NULL); + if (err) { + retries--; + udelay(5); + continue; + } + break; + } + if (!retries) { + printk(KERN_ERR "%s(%s): Unable to re-detect card (%d)\n", + __func__, mmc_hostname(host), err); + } +#else err = _mmc_detect_card_removed(host); +#endif mmc_put_card(host->card); @@ -1120,6 +1161,9 @@ static int mmc_sd_suspend(struct mmc_host *host) static int _mmc_sd_resume(struct mmc_host *host) { int err = 0; +#ifdef CONFIG_MMC_PARANOID_SD_INIT + int retries; +#endif mmc_claim_host(host); @@ -1127,7 +1171,23 @@ static int _mmc_sd_resume(struct mmc_host *host) goto out; mmc_power_up(host, host->card->ocr); +#ifdef CONFIG_MMC_PARANOID_SD_INIT + retries = 5; + while (retries) { + err = mmc_sd_init_card(host, host->card->ocr, host->card); + + if (err) { + printk(KERN_ERR "%s: Re-init card rc = %d (retries = %d)\n", + mmc_hostname(host), err, retries); + mdelay(5); + retries--; + continue; + } + break; + } +#else err = mmc_sd_init_card(host, host->card->ocr, host->card); +#endif mmc_card_clr_suspended(host->card); out: @@ -1202,6 +1262,9 @@ int mmc_attach_sd(struct mmc_host *host) { int err; u32 ocr, rocr; +#ifdef CONFIG_MMC_PARANOID_SD_INIT + int retries; +#endif WARN_ON(!host->claimed); @@ -1237,9 +1300,27 @@ int mmc_attach_sd(struct mmc_host *host) /* * Detect and init the card. */ +#ifdef CONFIG_MMC_PARANOID_SD_INIT + retries = 5; + while (retries) { + err = mmc_sd_init_card(host, rocr, NULL); + if (err) { + retries--; + continue; + } + break; + } + + if (!retries) { + printk(KERN_ERR "%s: mmc_sd_init_card() failure (err = %d)\n", + mmc_hostname(host), err); + goto err; + } +#else err = mmc_sd_init_card(host, rocr, NULL); if (err) goto err; +#endif mmc_release_host(host); err = mmc_add_card(host->card);
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index cc43687..c42e3cd 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c
@@ -31,6 +31,10 @@ #include "sdio_ops.h" #include "sdio_cis.h" +#ifdef CONFIG_MMC_EMBEDDED_SDIO +#include <linux/mmc/sdio_ids.h> +#endif + static int sdio_read_fbr(struct sdio_func *func) { int ret; @@ -706,28 +710,44 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr, goto finish; } - /* - * Read the common registers. Note that we should try to - * validate whether UHS would work or not. - */ - err = sdio_read_cccr(card, ocr); - if (err) { - mmc_sdio_resend_if_cond(host, card); - if (ocr & R4_18V_PRESENT) { - /* Retry init sequence, but without R4_18V_PRESENT. */ - retries = 0; - goto try_again; - } else { - goto remove; +#ifdef CONFIG_MMC_EMBEDDED_SDIO + if (host->embedded_sdio_data.cccr) + memcpy(&card->cccr, host->embedded_sdio_data.cccr, sizeof(struct sdio_cccr)); + else { +#endif + /* + * Read the common registers. Note that we should try to + * validate whether UHS would work or not. + */ + err = sdio_read_cccr(card, ocr); + if (err) { + mmc_sdio_resend_if_cond(host, card); + if (ocr & R4_18V_PRESENT) { + /* Retry init sequence, but without R4_18V_PRESENT. */ + retries = 0; + goto try_again; + } else { + goto remove; + } } +#ifdef CONFIG_MMC_EMBEDDED_SDIO } +#endif - /* - * Read the common CIS tuples. - */ - err = sdio_read_common_cis(card); - if (err) - goto remove; +#ifdef CONFIG_MMC_EMBEDDED_SDIO + if (host->embedded_sdio_data.cis) + memcpy(&card->cis, host->embedded_sdio_data.cis, sizeof(struct sdio_cis)); + else { +#endif + /* + * Read the common CIS tuples. + */ + err = sdio_read_common_cis(card); + if (err) + goto remove; +#ifdef CONFIG_MMC_EMBEDDED_SDIO + } +#endif if (oldcard) { int same = (card->cis.vendor == oldcard->cis.vendor && @@ -1129,14 +1149,36 @@ int mmc_attach_sdio(struct mmc_host *host) funcs = (ocr & 0x70000000) >> 28; card->sdio_funcs = 0; +#ifdef CONFIG_MMC_EMBEDDED_SDIO + if (host->embedded_sdio_data.funcs) + card->sdio_funcs = funcs = host->embedded_sdio_data.num_funcs; +#endif + /* * Initialize (but don't add) all present functions. */ for (i = 0; i < funcs; i++, card->sdio_funcs++) { - err = sdio_init_func(host->card, i + 1); - if (err) - goto remove; +#ifdef CONFIG_MMC_EMBEDDED_SDIO + if (host->embedded_sdio_data.funcs) { + struct sdio_func *tmp; + tmp = sdio_alloc_func(host->card); + if (IS_ERR(tmp)) + goto remove; + tmp->num = (i + 1); + card->sdio_func[i] = tmp; + tmp->class = host->embedded_sdio_data.funcs[i].f_class; + tmp->max_blksize = host->embedded_sdio_data.funcs[i].f_maxblksize; + tmp->vendor = card->cis.vendor; + tmp->device = card->cis.device; + } else { +#endif + err = sdio_init_func(host->card, i + 1); + if (err) + goto remove; +#ifdef CONFIG_MMC_EMBEDDED_SDIO + } +#endif /* * Enable Runtime PM for this func (if supported) */
diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c index 2b32b88..997d556 100644 --- a/drivers/mmc/core/sdio_bus.c +++ b/drivers/mmc/core/sdio_bus.c
@@ -29,6 +29,10 @@ #include "sdio_cis.h" #include "sdio_bus.h" +#ifdef CONFIG_MMC_EMBEDDED_SDIO +#include <linux/mmc/host.h> +#endif + #define to_sdio_driver(d) container_of(d, struct sdio_driver, drv) /* show configuration fields */ @@ -264,7 +268,14 @@ static void sdio_release_func(struct device *dev) { struct sdio_func *func = dev_to_sdio_func(dev); - sdio_free_func_cis(func); +#ifdef CONFIG_MMC_EMBEDDED_SDIO + /* + * If this device is embedded then we never allocated + * cis tables for this func + */ + if (!func->card->host->embedded_sdio_data.funcs) +#endif + sdio_free_func_cis(func); kfree(func->info); kfree(func->tmpbuf);
diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c index 7c887f1..62fd690 100644 --- a/drivers/mtd/devices/block2mtd.c +++ b/drivers/mtd/devices/block2mtd.c
@@ -431,7 +431,7 @@ static int block2mtd_setup2(const char *val) } -static int block2mtd_setup(const char *val, struct kernel_param *kp) +static int block2mtd_setup(const char *val, const struct kernel_param *kp) { #ifdef MODULE return block2mtd_setup2(val);
diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c index 8b66e52..7287696 100644 --- a/drivers/mtd/devices/phram.c +++ b/drivers/mtd/devices/phram.c
@@ -266,7 +266,7 @@ static int phram_setup(const char *val) return ret; } -static int phram_param_call(const char *val, struct kernel_param *kp) +static int phram_param_call(const char *val, const struct kernel_param *kp) { #ifdef MODULE return phram_setup(val);
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 6445c69..34b53d0 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c
@@ -1349,7 +1349,7 @@ static int bytes_str_to_int(const char *str) * This function returns zero in case of success and a negative error code in * case of error. */ -static int ubi_mtd_param_parse(const char *val, struct kernel_param *kp) +static int ubi_mtd_param_parse(const char *val, const struct kernel_param *kp) { int i, len; struct mtd_dev_param *p;
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index 3df872f..3702647 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -376,7 +376,7 @@ static int efx_mcdi_poll(struct efx_nic *efx) * because generally mcdi responses are fast. After that, back off * and poll once a jiffy (approximately) */ - spins = TICK_USEC; + spins = USER_TICK_USEC; finish = jiffies + MCDI_RPC_TIMEOUT; while (1) {
diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c index 814fd8f..b408512 100644 --- a/drivers/net/ppp/ppp_async.c +++ b/drivers/net/ppp/ppp_async.c
@@ -770,7 +770,7 @@ process_input_packet(struct asyncppp *ap) { struct sk_buff *skb; unsigned char *p; - unsigned int len, fcs, proto; + unsigned int len, fcs; skb = ap->rpkt; if (ap->state & (SC_TOSS | SC_ESCAPE)) @@ -799,14 +799,14 @@ process_input_packet(struct asyncppp *ap) goto err; p = skb_pull(skb, 2); } - proto = p[0]; - if (proto & 1) { - /* protocol is compressed */ - *(u8 *)skb_push(skb, 1) = 0; - } else { + + /* If protocol field is not compressed, it can be LCP packet */ + if (!(p[0] & 0x01)) { + unsigned int proto; + if (skb->len < 2) goto err; - proto = (proto << 8) + p[1]; + proto = (p[0] << 8) + p[1]; if (proto == PPP_LCP) async_lcp_peek(ap, p, skb->len, 1); }
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 34b24d7..c06ae54 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c
@@ -1979,6 +1979,46 @@ ppp_do_recv(struct ppp *ppp, struct sk_buff *skb, struct channel *pch) ppp_recv_unlock(ppp); } +/** + * __ppp_decompress_proto - Decompress protocol field, slim version. + * @skb: Socket buffer where protocol field should be decompressed. It must have + * at least 1 byte of head room and 1 byte of linear data. First byte of + * data must be a protocol field byte. + * + * Decompress protocol field in PPP header if it's compressed, e.g. when + * Protocol-Field-Compression (PFC) was negotiated. No checks w.r.t. skb data + * length are done in this function. + */ +static void __ppp_decompress_proto(struct sk_buff *skb) +{ + if (skb->data[0] & 0x01) + *(u8 *)skb_push(skb, 1) = 0x00; +} + +/** + * ppp_decompress_proto - Check skb data room and decompress protocol field. + * @skb: Socket buffer where protocol field should be decompressed. First byte + * of data must be a protocol field byte. + * + * Decompress protocol field in PPP header if it's compressed, e.g. when + * Protocol-Field-Compression (PFC) was negotiated. This function also makes + * sure that skb data room is sufficient for Protocol field, before and after + * decompression. + * + * Return: true - decompressed successfully, false - not enough room in skb. + */ +static bool ppp_decompress_proto(struct sk_buff *skb) +{ + /* At least one byte should be present (if protocol is compressed) */ + if (!pskb_may_pull(skb, 1)) + return false; + + __ppp_decompress_proto(skb); + + /* Protocol field should occupy 2 bytes when not compressed */ + return pskb_may_pull(skb, 2); +} + void ppp_input(struct ppp_channel *chan, struct sk_buff *skb) { @@ -1991,7 +2031,7 @@ ppp_input(struct ppp_channel *chan, struct sk_buff *skb) } read_lock_bh(&pch->upl); - if (!pskb_may_pull(skb, 2)) { + if (!ppp_decompress_proto(skb)) { kfree_skb(skb); if (pch->ppp) { ++pch->ppp->dev->stats.rx_length_errors; @@ -2088,6 +2128,9 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) if (ppp->flags & SC_MUST_COMP && ppp->rstate & SC_DC_FERROR) goto err; + /* At this point the "Protocol" field MUST be decompressed, either in + * ppp_input(), ppp_decompress_frame() or in ppp_receive_mp_frame(). + */ proto = PPP_PROTO(skb); switch (proto) { case PPP_VJC_COMP: @@ -2259,6 +2302,9 @@ ppp_decompress_frame(struct ppp *ppp, struct sk_buff *skb) skb_put(skb, len); skb_pull(skb, 2); /* pull off the A/C bytes */ + /* Don't call __ppp_decompress_proto() here, but instead rely on + * corresponding algo (mppe/bsd/deflate) to decompress it. + */ } else { /* Uncompressed frame - pass to decompressor so it can update its dictionary if necessary. */ @@ -2304,9 +2350,11 @@ ppp_receive_mp_frame(struct ppp *ppp, struct sk_buff *skb, struct channel *pch) /* * Do protocol ID decompression on the first fragment of each packet. + * We have to do that here, because ppp_receive_nonmp_frame() expects + * decompressed protocol field. */ - if ((PPP_MP_CB(skb)->BEbits & B) && (skb->data[0] & 1)) - *(u8 *)skb_push(skb, 1) = 0; + if (PPP_MP_CB(skb)->BEbits & B) + __ppp_decompress_proto(skb); /* * Expand sequence number to 32 bits, making it as close
diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c index 7868c29..41b5b47 100644 --- a/drivers/net/ppp/ppp_synctty.c +++ b/drivers/net/ppp/ppp_synctty.c
@@ -708,11 +708,10 @@ ppp_sync_input(struct syncppp *ap, const unsigned char *buf, p = skb_pull(skb, 2); } - /* decompress protocol field if compressed */ - if (p[0] & 1) { - /* protocol is compressed */ - *(u8 *)skb_push(skb, 1) = 0; - } else if (skb->len < 2) + /* PPP packet length should be >= 2 bytes when protocol field is not + * compressed. + */ + if (!(p[0] & 0x01) && skb->len < 2) goto err; /* queue the frame to be processed */
diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 9b70a3a..70ebd3b 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c
@@ -325,11 +325,6 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb) skb_pull(skb, 2); } - if ((*skb->data) & 1) { - /* protocol is compressed */ - *(u8 *)skb_push(skb, 1) = 0; - } - skb->ip_summed = CHECKSUM_NONE; skb_set_network_header(skb, skb->head-skb->data); ppp_input(&po->chan, skb);
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 2956bb6..c8946ce 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c
@@ -2283,6 +2283,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, int le; int ret; +#ifdef CONFIG_ANDROID_PARANOID_NETWORK + if (cmd != TUNGETIFF && !capable(CAP_NET_ADMIN)) { + return -EPERM; + } +#endif + if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == SOCK_IOC_TYPE) { if (copy_from_user(&ifr, argp, ifreq_len)) return -EFAULT;
diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig index 166920a..8c456a6 100644 --- a/drivers/net/wireless/Kconfig +++ b/drivers/net/wireless/Kconfig
@@ -114,4 +114,11 @@ If you choose to build a module, it'll be called rndis_wlan. +config VIRT_WIFI + tristate "Wifi wrapper for ethernet drivers" + depends on CFG80211 + ---help--- + This option adds support for ethernet connections to appear as if they + are wifi connections through a special rtnetlink device. + endif # WLAN
diff --git a/drivers/net/wireless/Makefile b/drivers/net/wireless/Makefile index 7fc9630..6cfe745 100644 --- a/drivers/net/wireless/Makefile +++ b/drivers/net/wireless/Makefile
@@ -27,3 +27,5 @@ obj-$(CONFIG_USB_NET_RNDIS_WLAN) += rndis_wlan.o obj-$(CONFIG_MAC80211_HWSIM) += mac80211_hwsim.o + +obj-$(CONFIG_VIRT_WIFI) += virt_wifi.o
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index cd6c5ec..ab8fef3 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -2841,7 +2841,6 @@ static s32 brcmf_inform_single_bss(struct brcmf_cfg80211_info *cfg, struct brcmf_bss_info_le *bi) { struct wiphy *wiphy = cfg_to_wiphy(cfg); - struct ieee80211_channel *notify_channel; struct cfg80211_bss *bss; struct ieee80211_supported_band *band; struct brcmu_chan ch; @@ -2851,7 +2850,7 @@ static s32 brcmf_inform_single_bss(struct brcmf_cfg80211_info *cfg, u16 notify_interval; u8 *notify_ie; size_t notify_ielen; - s32 notify_signal; + struct cfg80211_inform_bss bss_data = {}; if (le32_to_cpu(bi->length) > WL_BSS_INFO_MAX) { brcmf_err("Bss info is larger than buffer. Discarding\n"); @@ -2871,27 +2870,28 @@ static s32 brcmf_inform_single_bss(struct brcmf_cfg80211_info *cfg, band = wiphy->bands[NL80211_BAND_5GHZ]; freq = ieee80211_channel_to_frequency(channel, band->band); - notify_channel = ieee80211_get_channel(wiphy, freq); + bss_data.chan = ieee80211_get_channel(wiphy, freq); + bss_data.scan_width = NL80211_BSS_CHAN_WIDTH_20; + bss_data.boottime_ns = ktime_to_ns(ktime_get_boottime()); notify_capability = le16_to_cpu(bi->capability); notify_interval = le16_to_cpu(bi->beacon_period); notify_ie = (u8 *)bi + le16_to_cpu(bi->ie_offset); notify_ielen = le32_to_cpu(bi->ie_length); - notify_signal = (s16)le16_to_cpu(bi->RSSI) * 100; + bss_data.signal = (s16)le16_to_cpu(bi->RSSI) * 100; brcmf_dbg(CONN, "bssid: %pM\n", bi->BSSID); brcmf_dbg(CONN, "Channel: %d(%d)\n", channel, freq); brcmf_dbg(CONN, "Capability: %X\n", notify_capability); brcmf_dbg(CONN, "Beacon interval: %d\n", notify_interval); - brcmf_dbg(CONN, "Signal: %d\n", notify_signal); + brcmf_dbg(CONN, "Signal: %d\n", bss_data.signal); - bss = cfg80211_inform_bss(wiphy, notify_channel, - CFG80211_BSS_FTYPE_UNKNOWN, - (const u8 *)bi->BSSID, - 0, notify_capability, - notify_interval, notify_ie, - notify_ielen, notify_signal, - GFP_KERNEL); + bss = cfg80211_inform_bss_data(wiphy, &bss_data, + CFG80211_BSS_FTYPE_UNKNOWN, + (const u8 *)bi->BSSID, + 0, notify_capability, + notify_interval, notify_ie, + notify_ielen, GFP_KERNEL); if (!bss) return -ENOMEM;
diff --git a/drivers/net/wireless/ti/wlcore/init.c b/drivers/net/wireless/ti/wlcore/init.c index 58898b9..145e10a 100644 --- a/drivers/net/wireless/ti/wlcore/init.c +++ b/drivers/net/wireless/ti/wlcore/init.c
@@ -549,6 +549,11 @@ static int wl12xx_init_ap_role(struct wl1271 *wl, struct wl12xx_vif *wlvif) { int ret; + /* Disable filtering */ + ret = wl1271_acx_group_address_tbl(wl, wlvif, false, NULL, 0); + if (ret < 0) + return ret; + ret = wl1271_acx_ap_max_tx_retry(wl, wlvif); if (ret < 0) return ret;
diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c new file mode 100644 index 0000000..815de4f --- /dev/null +++ b/drivers/net/wireless/virt_wifi.c
@@ -0,0 +1,634 @@ +// SPDX-License-Identifier: GPL-2.0 +/* drivers/net/wireless/virt_wifi.c + * + * A fake implementation of cfg80211_ops that can be tacked on to an ethernet + * net_device to make it appear as a wireless connection. + * + * Copyright (C) 2018 Google, Inc. + * + * Author: schuffelen@google.com + */ + +#include <net/cfg80211.h> +#include <net/rtnetlink.h> +#include <linux/etherdevice.h> +#include <linux/module.h> + +#include <net/cfg80211.h> +#include <net/rtnetlink.h> +#include <linux/etherdevice.h> +#include <linux/module.h> + +static struct wiphy *common_wiphy; + +struct virt_wifi_wiphy_priv { + struct delayed_work scan_result; + struct cfg80211_scan_request *scan_request; + bool being_deleted; +}; + +static struct ieee80211_channel channel_2ghz = { + .band = NL80211_BAND_2GHZ, + .center_freq = 2432, + .hw_value = 2432, + .max_power = 20, +}; + +static struct ieee80211_rate bitrates_2ghz[] = { + { .bitrate = 10 }, + { .bitrate = 20 }, + { .bitrate = 55 }, + { .bitrate = 110 }, + { .bitrate = 60 }, + { .bitrate = 120 }, + { .bitrate = 240 }, +}; + +static struct ieee80211_supported_band band_2ghz = { + .channels = &channel_2ghz, + .bitrates = bitrates_2ghz, + .band = NL80211_BAND_2GHZ, + .n_channels = 1, + .n_bitrates = ARRAY_SIZE(bitrates_2ghz), + .ht_cap = { + .ht_supported = true, + .cap = IEEE80211_HT_CAP_SUP_WIDTH_20_40 | + IEEE80211_HT_CAP_GRN_FLD | + IEEE80211_HT_CAP_SGI_20 | + IEEE80211_HT_CAP_SGI_40 | + IEEE80211_HT_CAP_DSSSCCK40, + .ampdu_factor = 0x3, + .ampdu_density = 0x6, + .mcs = { + .rx_mask = {0xff, 0xff}, + .tx_params = IEEE80211_HT_MCS_TX_DEFINED, + }, + }, +}; + +static struct ieee80211_channel channel_5ghz = { + .band = NL80211_BAND_5GHZ, + .center_freq = 5240, + .hw_value = 5240, + .max_power = 20, +}; + +static struct ieee80211_rate bitrates_5ghz[] = { + { .bitrate = 60 }, + { .bitrate = 120 }, + { .bitrate = 240 }, +}; + +#define RX_MCS_MAP (IEEE80211_VHT_MCS_SUPPORT_0_9 << 0 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 2 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 4 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 6 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 8 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 10 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 12 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 14) + +#define TX_MCS_MAP (IEEE80211_VHT_MCS_SUPPORT_0_9 << 0 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 2 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 4 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 6 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 8 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 10 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 12 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 14) + +static struct ieee80211_supported_band band_5ghz = { + .channels = &channel_5ghz, + .bitrates = bitrates_5ghz, + .band = NL80211_BAND_5GHZ, + .n_channels = 1, + .n_bitrates = ARRAY_SIZE(bitrates_5ghz), + .ht_cap = { + .ht_supported = true, + .cap = IEEE80211_HT_CAP_SUP_WIDTH_20_40 | + IEEE80211_HT_CAP_GRN_FLD | + IEEE80211_HT_CAP_SGI_20 | + IEEE80211_HT_CAP_SGI_40 | + IEEE80211_HT_CAP_DSSSCCK40, + .ampdu_factor = 0x3, + .ampdu_density = 0x6, + .mcs = { + .rx_mask = {0xff, 0xff}, + .tx_params = IEEE80211_HT_MCS_TX_DEFINED, + }, + }, + .vht_cap = { + .vht_supported = true, + .cap = IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ | + IEEE80211_VHT_CAP_RXLDPC | + IEEE80211_VHT_CAP_SHORT_GI_80 | + IEEE80211_VHT_CAP_SHORT_GI_160 | + IEEE80211_VHT_CAP_TXSTBC | + IEEE80211_VHT_CAP_RXSTBC_1 | + IEEE80211_VHT_CAP_RXSTBC_2 | + IEEE80211_VHT_CAP_RXSTBC_3 | + IEEE80211_VHT_CAP_RXSTBC_4 | + IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK, + .vht_mcs = { + .rx_mcs_map = cpu_to_le16(RX_MCS_MAP), + .tx_mcs_map = cpu_to_le16(TX_MCS_MAP), + } + }, +}; + +/* Assigned at module init. Guaranteed locally-administered and unicast. */ +static u8 fake_router_bssid[ETH_ALEN] __ro_after_init = {}; + +/* Called with the rtnl lock held. */ +static int virt_wifi_scan(struct wiphy *wiphy, + struct cfg80211_scan_request *request) +{ + struct virt_wifi_wiphy_priv *priv = wiphy_priv(wiphy); + + wiphy_debug(wiphy, "scan\n"); + + if (priv->scan_request || priv->being_deleted) + return -EBUSY; + + priv->scan_request = request; + schedule_delayed_work(&priv->scan_result, HZ * 2); + + return 0; +} + +/* Acquires and releases the rdev BSS lock. */ +static void virt_wifi_scan_result(struct work_struct *work) +{ + struct { + u8 tag; + u8 len; + u8 ssid[8]; + } __packed ssid = { + .tag = WLAN_EID_SSID, .len = 8, .ssid = "VirtWifi", + }; + struct cfg80211_bss *informed_bss; + struct virt_wifi_wiphy_priv *priv = + container_of(work, struct virt_wifi_wiphy_priv, + scan_result.work); + struct wiphy *wiphy = priv_to_wiphy(priv); + struct cfg80211_scan_info scan_info = { .aborted = false }; + + informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz, + CFG80211_BSS_FTYPE_PRESP, + fake_router_bssid, + ktime_get_boot_ns(), + WLAN_CAPABILITY_ESS, 0, + (void *)&ssid, sizeof(ssid), + DBM_TO_MBM(-50), GFP_KERNEL); + cfg80211_put_bss(wiphy, informed_bss); + + /* Schedules work which acquires and releases the rtnl lock. */ + cfg80211_scan_done(priv->scan_request, &scan_info); + priv->scan_request = NULL; +} + +/* May acquire and release the rdev BSS lock. */ +static void virt_wifi_cancel_scan(struct wiphy *wiphy) +{ + struct virt_wifi_wiphy_priv *priv = wiphy_priv(wiphy); + + cancel_delayed_work_sync(&priv->scan_result); + /* Clean up dangling callbacks if necessary. */ + if (priv->scan_request) { + struct cfg80211_scan_info scan_info = { .aborted = true }; + /* Schedules work which acquires and releases the rtnl lock. */ + cfg80211_scan_done(priv->scan_request, &scan_info); + priv->scan_request = NULL; + } +} + +struct virt_wifi_netdev_priv { + struct delayed_work connect; + struct net_device *lowerdev; + struct net_device *upperdev; + u32 tx_packets; + u32 tx_failed; + u8 connect_requested_bss[ETH_ALEN]; + bool is_up; + bool is_connected; + bool being_deleted; +}; + +/* Called with the rtnl lock held. */ +static int virt_wifi_connect(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_connect_params *sme) +{ + struct virt_wifi_netdev_priv *priv = netdev_priv(netdev); + bool could_schedule; + + if (priv->being_deleted || !priv->is_up) + return -EBUSY; + + could_schedule = schedule_delayed_work(&priv->connect, HZ * 2); + if (!could_schedule) + return -EBUSY; + + if (sme->bssid) + ether_addr_copy(priv->connect_requested_bss, sme->bssid); + else + eth_zero_addr(priv->connect_requested_bss); + + wiphy_debug(wiphy, "connect\n"); + + return 0; +} + +/* Acquires and releases the rdev event lock. */ +static void virt_wifi_connect_complete(struct work_struct *work) +{ + struct virt_wifi_netdev_priv *priv = + container_of(work, struct virt_wifi_netdev_priv, connect.work); + u8 *requested_bss = priv->connect_requested_bss; + bool has_addr = !is_zero_ether_addr(requested_bss); + bool right_addr = ether_addr_equal(requested_bss, fake_router_bssid); + u16 status = WLAN_STATUS_SUCCESS; + + if (!priv->is_up || (has_addr && !right_addr)) + status = WLAN_STATUS_UNSPECIFIED_FAILURE; + else + priv->is_connected = true; + + /* Schedules an event that acquires the rtnl lock. */ + cfg80211_connect_result(priv->upperdev, requested_bss, NULL, 0, NULL, 0, + status, GFP_KERNEL); + netif_carrier_on(priv->upperdev); +} + +/* May acquire and release the rdev event lock. */ +static void virt_wifi_cancel_connect(struct net_device *netdev) +{ + struct virt_wifi_netdev_priv *priv = netdev_priv(netdev); + + /* If there is work pending, clean up dangling callbacks. */ + if (cancel_delayed_work_sync(&priv->connect)) { + /* Schedules an event that acquires the rtnl lock. */ + cfg80211_connect_result(priv->upperdev, + priv->connect_requested_bss, NULL, 0, + NULL, 0, + WLAN_STATUS_UNSPECIFIED_FAILURE, + GFP_KERNEL); + } +} + +/* Called with the rtnl lock held. Acquires the rdev event lock. */ +static int virt_wifi_disconnect(struct wiphy *wiphy, struct net_device *netdev, + u16 reason_code) +{ + struct virt_wifi_netdev_priv *priv = netdev_priv(netdev); + + if (priv->being_deleted) + return -EBUSY; + + wiphy_debug(wiphy, "disconnect\n"); + virt_wifi_cancel_connect(netdev); + + cfg80211_disconnected(netdev, reason_code, NULL, 0, true, GFP_KERNEL); + priv->is_connected = false; + netif_carrier_off(netdev); + + return 0; +} + +/* Called with the rtnl lock held. */ +static int virt_wifi_get_station(struct wiphy *wiphy, struct net_device *dev, + const u8 *mac, struct station_info *sinfo) +{ + struct virt_wifi_netdev_priv *priv = netdev_priv(dev); + + wiphy_debug(wiphy, "get_station\n"); + + if (!priv->is_connected || !ether_addr_equal(mac, fake_router_bssid)) + return -ENOENT; + + sinfo->filled = BIT_ULL(NL80211_STA_INFO_TX_PACKETS) | + BIT_ULL(NL80211_STA_INFO_TX_FAILED) | + BIT_ULL(NL80211_STA_INFO_SIGNAL) | + BIT_ULL(NL80211_STA_INFO_TX_BITRATE); + sinfo->tx_packets = priv->tx_packets; + sinfo->tx_failed = priv->tx_failed; + /* For CFG80211_SIGNAL_TYPE_MBM, value is expressed in _dBm_ */ + sinfo->signal = -50; + sinfo->txrate = (struct rate_info) { + .legacy = 10, /* units are 100kbit/s */ + }; + return 0; +} + +/* Called with the rtnl lock held. */ +static int virt_wifi_dump_station(struct wiphy *wiphy, struct net_device *dev, + int idx, u8 *mac, struct station_info *sinfo) +{ + struct virt_wifi_netdev_priv *priv = netdev_priv(dev); + + wiphy_debug(wiphy, "dump_station\n"); + + if (idx != 0 || !priv->is_connected) + return -ENOENT; + + ether_addr_copy(mac, fake_router_bssid); + return virt_wifi_get_station(wiphy, dev, fake_router_bssid, sinfo); +} + +static const struct cfg80211_ops virt_wifi_cfg80211_ops = { + .scan = virt_wifi_scan, + + .connect = virt_wifi_connect, + .disconnect = virt_wifi_disconnect, + + .get_station = virt_wifi_get_station, + .dump_station = virt_wifi_dump_station, +}; + +/* Acquires and releases the rtnl lock. */ +static struct wiphy *virt_wifi_make_wiphy(void) +{ + struct wiphy *wiphy; + struct virt_wifi_wiphy_priv *priv; + int err; + + wiphy = wiphy_new(&virt_wifi_cfg80211_ops, sizeof(*priv)); + + if (!wiphy) + return NULL; + + wiphy->max_scan_ssids = 4; + wiphy->max_scan_ie_len = 1000; + wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM; + + wiphy->bands[NL80211_BAND_2GHZ] = &band_2ghz; + wiphy->bands[NL80211_BAND_5GHZ] = &band_5ghz; + wiphy->bands[NL80211_BAND_60GHZ] = NULL; + + wiphy->regulatory_flags = REGULATORY_WIPHY_SELF_MANAGED; + wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION); + + priv = wiphy_priv(wiphy); + priv->being_deleted = false; + priv->scan_request = NULL; + INIT_DELAYED_WORK(&priv->scan_result, virt_wifi_scan_result); + + err = wiphy_register(wiphy); + if (err < 0) { + wiphy_free(wiphy); + return NULL; + } + + return wiphy; +} + +/* Acquires and releases the rtnl lock. */ +static void virt_wifi_destroy_wiphy(struct wiphy *wiphy) +{ + struct virt_wifi_wiphy_priv *priv; + + WARN(!wiphy, "%s called with null wiphy", __func__); + if (!wiphy) + return; + + priv = wiphy_priv(wiphy); + priv->being_deleted = true; + virt_wifi_cancel_scan(wiphy); + + if (wiphy->registered) + wiphy_unregister(wiphy); + wiphy_free(wiphy); +} + +/* Enters and exits a RCU-bh critical section. */ +static netdev_tx_t virt_wifi_start_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct virt_wifi_netdev_priv *priv = netdev_priv(dev); + + priv->tx_packets++; + if (!priv->is_connected) { + priv->tx_failed++; + return NET_XMIT_DROP; + } + + skb->dev = priv->lowerdev; + return dev_queue_xmit(skb); +} + +/* Called with rtnl lock held. */ +static int virt_wifi_net_device_open(struct net_device *dev) +{ + struct virt_wifi_netdev_priv *priv = netdev_priv(dev); + + priv->is_up = true; + return 0; +} + +/* Called with rtnl lock held. */ +static int virt_wifi_net_device_stop(struct net_device *dev) +{ + struct virt_wifi_netdev_priv *n_priv = netdev_priv(dev); + struct virt_wifi_wiphy_priv *w_priv; + + n_priv->is_up = false; + + if (!dev->ieee80211_ptr) + return 0; + w_priv = wiphy_priv(dev->ieee80211_ptr->wiphy); + + virt_wifi_cancel_scan(dev->ieee80211_ptr->wiphy); + virt_wifi_cancel_connect(dev); + netif_carrier_off(dev); + + return 0; +} + +static const struct net_device_ops virt_wifi_ops = { + .ndo_start_xmit = virt_wifi_start_xmit, + .ndo_open = virt_wifi_net_device_open, + .ndo_stop = virt_wifi_net_device_stop, +}; + +/* Invoked as part of rtnl lock release. */ +static void virt_wifi_net_device_destructor(struct net_device *dev) +{ + /* Delayed past dellink to allow nl80211 to react to the device being + * deleted. + */ + kfree(dev->ieee80211_ptr); + dev->ieee80211_ptr = NULL; + free_netdev(dev); +} + +/* No lock interaction. */ +static void virt_wifi_setup(struct net_device *dev) +{ + ether_setup(dev); + dev->netdev_ops = &virt_wifi_ops; + dev->priv_destructor = virt_wifi_net_device_destructor; +} + +/* Called in a RCU read critical section from netif_receive_skb */ +static rx_handler_result_t virt_wifi_rx_handler(struct sk_buff **pskb) +{ + struct sk_buff *skb = *pskb; + struct virt_wifi_netdev_priv *priv = + rcu_dereference(skb->dev->rx_handler_data); + + if (!priv->is_connected) + return RX_HANDLER_PASS; + + /* GFP_ATOMIC because this is a packet interrupt handler. */ + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) { + dev_err(&priv->upperdev->dev, "can't skb_share_check\n"); + return RX_HANDLER_CONSUMED; + } + + *pskb = skb; + skb->dev = priv->upperdev; + skb->pkt_type = PACKET_HOST; + return RX_HANDLER_ANOTHER; +} + +/* Called with rtnl lock held. */ +static int virt_wifi_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct virt_wifi_netdev_priv *priv = netdev_priv(dev); + int err; + + if (!tb[IFLA_LINK]) + return -EINVAL; + + netif_carrier_off(dev); + + priv->upperdev = dev; + priv->lowerdev = __dev_get_by_index(src_net, + nla_get_u32(tb[IFLA_LINK])); + + if (!priv->lowerdev) + return -ENODEV; + if (!tb[IFLA_MTU]) + dev->mtu = priv->lowerdev->mtu; + else if (dev->mtu > priv->lowerdev->mtu) + return -EINVAL; + + err = netdev_rx_handler_register(priv->lowerdev, virt_wifi_rx_handler, + priv); + if (err) { + dev_err(&priv->lowerdev->dev, + "can't netdev_rx_handler_register: %d\n", err); + return err; + } + + eth_hw_addr_inherit(dev, priv->lowerdev); + netif_stacked_transfer_operstate(priv->lowerdev, dev); + + SET_NETDEV_DEV(dev, &priv->lowerdev->dev); + dev->ieee80211_ptr = kzalloc(sizeof(*dev->ieee80211_ptr), GFP_KERNEL); + + if (!dev->ieee80211_ptr) { + err = -ENOMEM; + goto remove_handler; + } + + dev->ieee80211_ptr->iftype = NL80211_IFTYPE_STATION; + dev->ieee80211_ptr->wiphy = common_wiphy; + + err = register_netdevice(dev); + if (err) { + dev_err(&priv->lowerdev->dev, "can't register_netdevice: %d\n", + err); + goto free_wireless_dev; + } + + err = netdev_upper_dev_link(priv->lowerdev, dev); + if (err) { + dev_err(&priv->lowerdev->dev, "can't netdev_upper_dev_link: %d\n", + err); + goto unregister_netdev; + } + + priv->being_deleted = false; + priv->is_connected = false; + priv->is_up = false; + INIT_DELAYED_WORK(&priv->connect, virt_wifi_connect_complete); + + return 0; +unregister_netdev: + unregister_netdevice(dev); +free_wireless_dev: + kfree(dev->ieee80211_ptr); + dev->ieee80211_ptr = NULL; +remove_handler: + netdev_rx_handler_unregister(priv->lowerdev); + + return err; +} + +/* Called with rtnl lock held. */ +static void virt_wifi_dellink(struct net_device *dev, + struct list_head *head) +{ + struct virt_wifi_netdev_priv *priv = netdev_priv(dev); + + if (dev->ieee80211_ptr) + virt_wifi_cancel_scan(dev->ieee80211_ptr->wiphy); + + priv->being_deleted = true; + virt_wifi_cancel_connect(dev); + netif_carrier_off(dev); + + netdev_rx_handler_unregister(priv->lowerdev); + netdev_upper_dev_unlink(priv->lowerdev, dev); + + unregister_netdevice_queue(dev, head); + + /* Deleting the wiphy is handled in the module destructor. */ +} + +static struct rtnl_link_ops virt_wifi_link_ops = { + .kind = "virt_wifi", + .setup = virt_wifi_setup, + .newlink = virt_wifi_newlink, + .dellink = virt_wifi_dellink, + .priv_size = sizeof(struct virt_wifi_netdev_priv), +}; + +/* Acquires and releases the rtnl lock. */ +static int __init virt_wifi_init_module(void) +{ + int err; + + /* Guaranteed to be locallly-administered and not multicast. */ + eth_random_addr(fake_router_bssid); + + common_wiphy = virt_wifi_make_wiphy(); + if (!common_wiphy) + return -ENOMEM; + + err = rtnl_link_register(&virt_wifi_link_ops); + if (err) + virt_wifi_destroy_wiphy(common_wiphy); + + return err; +} + +/* Acquires and releases the rtnl lock. */ +static void __exit virt_wifi_cleanup_module(void) +{ + /* Will delete any devices that depend on the wiphy. */ + rtnl_link_unregister(&virt_wifi_link_ops); + virt_wifi_destroy_wiphy(common_wiphy); +} + +module_init(virt_wifi_init_module); +module_exit(virt_wifi_cleanup_module); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Cody Schuffelen <schuffelen@google.com>"); +MODULE_DESCRIPTION("Driver for a wireless wrapper of ethernet devices"); +MODULE_ALIAS_RTNL_LINK("virt_wifi");
diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c index c4da50e..08a4f82 100644 --- a/drivers/nfc/fdp/i2c.c +++ b/drivers/nfc/fdp/i2c.c
@@ -176,6 +176,16 @@ static int fdp_nci_i2c_read(struct fdp_i2c_phy *phy, struct sk_buff **skb) /* Packet that contains a length */ if (tmp[0] == 0 && tmp[1] == 0) { phy->next_read_size = (tmp[2] << 8) + tmp[3] + 3; + /* + * Ensure next_read_size does not exceed sizeof(tmp) + * for reading that many bytes during next iteration + */ + if (phy->next_read_size > FDP_NCI_I2C_MAX_PAYLOAD) { + dev_dbg(&client->dev, "%s: corrupted packet\n", + __func__); + phy->next_read_size = 5; + goto flush; + } } else { phy->next_read_size = FDP_NCI_I2C_MIN_PAYLOAD;
diff --git a/drivers/nfc/st21nfca/dep.c b/drivers/nfc/st21nfca/dep.c index fd08be2..3420c51 100644 --- a/drivers/nfc/st21nfca/dep.c +++ b/drivers/nfc/st21nfca/dep.c
@@ -217,7 +217,8 @@ static int st21nfca_tm_recv_atr_req(struct nfc_hci_dev *hdev, atr_req = (struct st21nfca_atr_req *)skb->data; - if (atr_req->length < sizeof(struct st21nfca_atr_req)) { + if (atr_req->length < sizeof(struct st21nfca_atr_req) || + atr_req->length > skb->len) { r = -EPROTO; goto exit; }
diff --git a/drivers/nfc/st21nfca/se.c b/drivers/nfc/st21nfca/se.c index 3a98563..6e84e12 100644 --- a/drivers/nfc/st21nfca/se.c +++ b/drivers/nfc/st21nfca/se.c
@@ -320,23 +320,33 @@ int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host, * AID 81 5 to 16 * PARAMETERS 82 0 to 255 */ - if (skb->len < NFC_MIN_AID_LENGTH + 2 && + if (skb->len < NFC_MIN_AID_LENGTH + 2 || skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG) return -EPROTO; + /* + * Buffer should have enough space for at least + * two tag fields + two length fields + aid_len (skb->data[1]) + */ + if (skb->len < skb->data[1] + 4) + return -EPROTO; + transaction = (struct nfc_evt_transaction *)devm_kzalloc(dev, skb->len - 2, GFP_KERNEL); transaction->aid_len = skb->data[1]; memcpy(transaction->aid, &skb->data[2], transaction->aid_len); - - /* Check next byte is PARAMETERS tag (82) */ - if (skb->data[transaction->aid_len + 2] != - NFC_EVT_TRANSACTION_PARAMS_TAG) - return -EPROTO; - transaction->params_len = skb->data[transaction->aid_len + 3]; + + /* Check next byte is PARAMETERS tag (82) and the length field */ + if (skb->data[transaction->aid_len + 2] != + NFC_EVT_TRANSACTION_PARAMS_TAG || + skb->len < transaction->aid_len + transaction->params_len + 4) { + devm_kfree(dev, transaction); + return -EPROTO; + } + memcpy(transaction->params, skb->data + transaction->aid_len + 4, transaction->params_len);
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 6337c39..b1103e5 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c
@@ -1112,42 +1112,66 @@ int __init early_init_dt_scan_memory(unsigned long node, const char *uname, return 0; } +/* + * Convert configs to something easy to use in C code + */ +#if defined(CONFIG_CMDLINE_FORCE) +static const int overwrite_incoming_cmdline = 1; +static const int read_dt_cmdline; +static const int concat_cmdline; +#elif defined(CONFIG_CMDLINE_EXTEND) +static const int overwrite_incoming_cmdline; +static const int read_dt_cmdline = 1; +static const int concat_cmdline = 1; +#else /* CMDLINE_FROM_BOOTLOADER */ +static const int overwrite_incoming_cmdline; +static const int read_dt_cmdline = 1; +static const int concat_cmdline; +#endif + +#ifdef CONFIG_CMDLINE +static const char *config_cmdline = CONFIG_CMDLINE; +#else +static const char *config_cmdline = ""; +#endif + int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, int depth, void *data) { - int l; - const char *p; + int l = 0; + const char *p = NULL; + char *cmdline = data; pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname); - if (depth != 1 || !data || + if (depth != 1 || !cmdline || (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0)) return 0; early_init_dt_check_for_initrd(node); - /* Retrieve command line */ - p = of_get_flat_dt_prop(node, "bootargs", &l); - if (p != NULL && l > 0) - strlcpy(data, p, min((int)l, COMMAND_LINE_SIZE)); + /* Put CONFIG_CMDLINE in if forced or if data had nothing in it to start */ + if (overwrite_incoming_cmdline || !cmdline[0]) + strlcpy(cmdline, config_cmdline, COMMAND_LINE_SIZE); - /* - * CONFIG_CMDLINE is meant to be a default in case nothing else - * managed to set the command line, unless CONFIG_CMDLINE_FORCE - * is set in which case we override whatever was found earlier. - */ -#ifdef CONFIG_CMDLINE -#if defined(CONFIG_CMDLINE_EXTEND) - strlcat(data, " ", COMMAND_LINE_SIZE); - strlcat(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE); -#elif defined(CONFIG_CMDLINE_FORCE) - strlcpy(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE); -#else - /* No arguments from boot loader, use kernel's cmdl*/ - if (!((char *)data)[0]) - strlcpy(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE); -#endif -#endif /* CONFIG_CMDLINE */ + /* Retrieve command line unless forcing */ + if (read_dt_cmdline) + p = of_get_flat_dt_prop(node, "bootargs", &l); + + if (p != NULL && l > 0) { + if (concat_cmdline) { + int cmdline_len; + int copy_len; + strlcat(cmdline, " ", COMMAND_LINE_SIZE); + cmdline_len = strlen(cmdline); + copy_len = COMMAND_LINE_SIZE - cmdline_len - 1; + copy_len = min((int)l, copy_len); + strncpy(cmdline + cmdline_len, p, copy_len); + cmdline[cmdline_len + copy_len] = '\0'; + } else { + strlcpy(cmdline, p, min((int)l, COMMAND_LINE_SIZE)); + } + } pr_debug("Command line is: %s\n", (char*)data);
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index c0e1985..c06bc23 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c
@@ -1065,7 +1065,8 @@ void pci_disable_link_state(struct pci_dev *pdev, int state) } EXPORT_SYMBOL(pci_disable_link_state); -static int pcie_aspm_set_policy(const char *val, struct kernel_param *kp) +static int pcie_aspm_set_policy(const char *val, + const struct kernel_param *kp) { int i; struct pcie_link_state *link; @@ -1092,7 +1093,7 @@ static int pcie_aspm_set_policy(const char *val, struct kernel_param *kp) return 0; } -static int pcie_aspm_get_policy(char *buffer, struct kernel_param *kp) +static int pcie_aspm_get_policy(char *buffer, const struct kernel_param *kp) { int i, cnt = 0; for (i = 0; i < ARRAY_SIZE(policy_str); i++)
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index c407d52..c66efa0 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -9553,7 +9553,7 @@ static struct ibm_init_struct ibms_init[] __initdata = { }, }; -static int __init set_ibm_param(const char *val, struct kernel_param *kp) +static int __init set_ibm_param(const char *val, const struct kernel_param *kp) { unsigned int i; struct ibm_struct *ibm;
diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index 5204f11..da589c3 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c
@@ -121,7 +121,10 @@ static ssize_t power_supply_show_property(struct device *dev, else if (off >= POWER_SUPPLY_PROP_MODEL_NAME) return sprintf(buf, "%s\n", value.strval); - return sprintf(buf, "%d\n", value.intval); + if (off == POWER_SUPPLY_PROP_CHARGE_COUNTER_EXT) + return sprintf(buf, "%lld\n", value.int64val); + else + return sprintf(buf, "%d\n", value.intval); } static ssize_t power_supply_store_property(struct device *dev, @@ -245,6 +248,12 @@ static struct device_attribute power_supply_attrs[] = { POWER_SUPPLY_ATTR(precharge_current), POWER_SUPPLY_ATTR(charge_term_current), POWER_SUPPLY_ATTR(calibrate), + /* Local extensions */ + POWER_SUPPLY_ATTR(usb_hc), + POWER_SUPPLY_ATTR(usb_otg), + POWER_SUPPLY_ATTR(charge_enabled), + /* Local extensions of type int64_t */ + POWER_SUPPLY_ATTR(charge_counter_ext), /* Properties of type `const char *' */ POWER_SUPPLY_ATTR(model_name), POWER_SUPPLY_ATTR(manufacturer),
diff --git a/drivers/rtc/rtc-palmas.c b/drivers/rtc/rtc-palmas.c index 4bcfb88..34aea38 100644 --- a/drivers/rtc/rtc-palmas.c +++ b/drivers/rtc/rtc-palmas.c
@@ -45,6 +45,42 @@ struct palmas_rtc { /* Total number of RTC registers needed to set time*/ #define PALMAS_NUM_TIME_REGS (PALMAS_YEARS_REG - PALMAS_SECONDS_REG + 1) +/* + * Special bin2bcd mapping to deal with bcd storage of year. + * + * 0-69 -> 0xD0 + * 70-99 (1970 - 1999) -> 0xD0 - 0xF9 (correctly rolls to 0x00) + * 100-199 (2000 - 2099) -> 0x00 - 0x99 (does not roll to 0xA0 :-( ) + * 200-229 (2100 - 2129) -> 0xA0 - 0xC9 (really for completeness) + * 230- -> 0xC9 + * + * Confirmed: the only transition that does not work correctly for this rtc + * clock is the transition from 2099 to 2100, it proceeds to 2000. We will + * accept this issue since the clock retains and transitions the year correctly + * in all other conditions. + */ +static unsigned char year_bin2bcd(int val) +{ + if (val < 70) + return 0xD0; + if (val < 100) + return bin2bcd(val - 20) | 0x80; /* KISS leverage of bin2bcd */ + if (val >= 230) + return 0xC9; + if (val >= 200) + return bin2bcd(val - 180) | 0x80; + return bin2bcd(val - 100); +} + +static int year_bcd2bin(unsigned char val) +{ + if (val >= 0xD0) + return bcd2bin(val & 0x7F) + 20; + if (val >= 0xA0) + return bcd2bin(val & 0x7F) + 180; + return bcd2bin(val) + 100; +} + static int palmas_rtc_read_time(struct device *dev, struct rtc_time *tm) { unsigned char rtc_data[PALMAS_NUM_TIME_REGS]; @@ -71,7 +107,7 @@ static int palmas_rtc_read_time(struct device *dev, struct rtc_time *tm) tm->tm_hour = bcd2bin(rtc_data[2]); tm->tm_mday = bcd2bin(rtc_data[3]); tm->tm_mon = bcd2bin(rtc_data[4]) - 1; - tm->tm_year = bcd2bin(rtc_data[5]) + 100; + tm->tm_year = year_bcd2bin(rtc_data[5]); return ret; } @@ -87,7 +123,7 @@ static int palmas_rtc_set_time(struct device *dev, struct rtc_time *tm) rtc_data[2] = bin2bcd(tm->tm_hour); rtc_data[3] = bin2bcd(tm->tm_mday); rtc_data[4] = bin2bcd(tm->tm_mon + 1); - rtc_data[5] = bin2bcd(tm->tm_year - 100); + rtc_data[5] = year_bin2bcd(tm->tm_year); /* Stop RTC while updating the RTC time registers */ ret = palmas_update_bits(palmas, PALMAS_RTC_BASE, PALMAS_RTC_CTRL_REG, @@ -142,7 +178,7 @@ static int palmas_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm) alm->time.tm_hour = bcd2bin(alarm_data[2]); alm->time.tm_mday = bcd2bin(alarm_data[3]); alm->time.tm_mon = bcd2bin(alarm_data[4]) - 1; - alm->time.tm_year = bcd2bin(alarm_data[5]) + 100; + alm->time.tm_year = year_bcd2bin(alarm_data[5]); ret = palmas_read(palmas, PALMAS_RTC_BASE, PALMAS_RTC_INTERRUPTS_REG, &int_val); @@ -173,7 +209,7 @@ static int palmas_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm) alarm_data[2] = bin2bcd(alm->time.tm_hour); alarm_data[3] = bin2bcd(alm->time.tm_mday); alarm_data[4] = bin2bcd(alm->time.tm_mon + 1); - alarm_data[5] = bin2bcd(alm->time.tm_year - 100); + alarm_data[5] = year_bin2bcd(alm->time.tm_year); ret = palmas_bulk_write(palmas, PALMAS_RTC_BASE, PALMAS_ALARM_SECONDS_REG, alarm_data, PALMAS_NUM_TIME_REGS);
diff --git a/drivers/scsi/fcoe/fcoe_transport.c b/drivers/scsi/fcoe/fcoe_transport.c index 375c536..c5eb0c4 100644 --- a/drivers/scsi/fcoe/fcoe_transport.c +++ b/drivers/scsi/fcoe/fcoe_transport.c
@@ -32,13 +32,13 @@ MODULE_AUTHOR("Open-FCoE.org"); MODULE_DESCRIPTION("FIP discovery protocol and FCoE transport for FCoE HBAs"); MODULE_LICENSE("GPL v2"); -static int fcoe_transport_create(const char *, struct kernel_param *); -static int fcoe_transport_destroy(const char *, struct kernel_param *); +static int fcoe_transport_create(const char *, const struct kernel_param *); +static int fcoe_transport_destroy(const char *, const struct kernel_param *); static int fcoe_transport_show(char *buffer, const struct kernel_param *kp); static struct fcoe_transport *fcoe_transport_lookup(struct net_device *device); static struct fcoe_transport *fcoe_netdev_map_lookup(struct net_device *device); -static int fcoe_transport_enable(const char *, struct kernel_param *); -static int fcoe_transport_disable(const char *, struct kernel_param *); +static int fcoe_transport_enable(const char *, const struct kernel_param *); +static int fcoe_transport_disable(const char *, const struct kernel_param *); static int libfcoe_device_notification(struct notifier_block *notifier, ulong event, void *ptr); @@ -865,7 +865,8 @@ EXPORT_SYMBOL(fcoe_ctlr_destroy_store); * * Returns: 0 for success */ -static int fcoe_transport_create(const char *buffer, struct kernel_param *kp) +static int fcoe_transport_create(const char *buffer, + const struct kernel_param *kp) { int rc = -ENODEV; struct net_device *netdev = NULL; @@ -930,7 +931,8 @@ static int fcoe_transport_create(const char *buffer, struct kernel_param *kp) * * Returns: 0 for success */ -static int fcoe_transport_destroy(const char *buffer, struct kernel_param *kp) +static int fcoe_transport_destroy(const char *buffer, + const struct kernel_param *kp) { int rc = -ENODEV; struct net_device *netdev = NULL; @@ -974,7 +976,8 @@ static int fcoe_transport_destroy(const char *buffer, struct kernel_param *kp) * * Returns: 0 for success */ -static int fcoe_transport_disable(const char *buffer, struct kernel_param *kp) +static int fcoe_transport_disable(const char *buffer, + const struct kernel_param *kp) { int rc = -ENODEV; struct net_device *netdev = NULL; @@ -1008,7 +1011,8 @@ static int fcoe_transport_disable(const char *buffer, struct kernel_param *kp) * * Returns: 0 for success */ -static int fcoe_transport_enable(const char *buffer, struct kernel_param *kp) +static int fcoe_transport_enable(const char *buffer, + const struct kernel_param *kp) { int rc = -ENODEV; struct net_device *netdev = NULL;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 9b716c8..66a7982 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -105,7 +105,7 @@ _base_get_ioc_facts(struct MPT3SAS_ADAPTER *ioc); * */ static int -_scsih_set_fwfault_debug(const char *val, struct kernel_param *kp) +_scsih_set_fwfault_debug(const char *val, const struct kernel_param *kp) { int ret = param_set_int(val, kp); struct MPT3SAS_ADAPTER *ioc;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index b28efdd..83640d9 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -281,7 +281,7 @@ struct _scsi_io_transfer { * Note: The logging levels are defined in mpt3sas_debug.h. */ static int -_scsih_set_debug_level(const char *val, struct kernel_param *kp) +_scsih_set_debug_level(const char *val, const struct kernel_param *kp) { int ret = param_set_int(val, kp); struct MPT3SAS_ADAPTER *ioc;
diff --git a/drivers/staging/android/Kconfig b/drivers/staging/android/Kconfig index 71a50b9..4630dc8 100644 --- a/drivers/staging/android/Kconfig +++ b/drivers/staging/android/Kconfig
@@ -14,8 +14,19 @@ It is, in theory, a good memory allocator for low-memory devices, because it can discard shared memory units when under memory pressure. +config ANDROID_VSOC + tristate "Android Virtual SoC support" + default n + depends on PCI_MSI + ---help--- + This option adds support for the Virtual SoC driver needed to boot + a 'cuttlefish' Android image inside QEmu. The driver interacts with + a QEmu ivshmem device. If built as a module, it will be called vsoc. + source "drivers/staging/android/ion/Kconfig" +source "drivers/staging/android/fiq_debugger/Kconfig" + endif # if ANDROID endmenu
diff --git a/drivers/staging/android/Makefile b/drivers/staging/android/Makefile index 7cf1564..2638b4a2 100644 --- a/drivers/staging/android/Makefile +++ b/drivers/staging/android/Makefile
@@ -1,5 +1,7 @@ ccflags-y += -I$(src) # needed for trace events obj-y += ion/ +obj-$(CONFIG_FIQ_DEBUGGER) += fiq_debugger/ obj-$(CONFIG_ASHMEM) += ashmem.o +obj-$(CONFIG_ANDROID_VSOC) += vsoc.o
diff --git a/drivers/staging/android/TODO b/drivers/staging/android/TODO index 5f14247..ebd6ba3 100644 --- a/drivers/staging/android/TODO +++ b/drivers/staging/android/TODO
@@ -12,5 +12,14 @@ - Split /dev/ion up into multiple nodes (e.g. /dev/ion/heap0) - Better test framework (integration with VGEM was suggested) +vsoc.c, uapi/vsoc_shm.h + - The current driver uses the same wait queue for all of the futexes in a + region. This will cause false wakeups in regions with a large number of + waiting threads. We should eventually use multiple queues and select the + queue based on the region. + - Add debugfs support for examining the permissions of regions. + - Remove VSOC_WAIT_FOR_INCOMING_INTERRUPT ioctl. This functionality has been + superseded by the futex and is there for legacy reasons. + Please send patches to Greg Kroah-Hartman <greg@kroah.com> and Cc: Arve Hjønnevåg <arve@android.com> and Riley Andrews <riandrews@android.com>
diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 4151bb4..555b5a8 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c
@@ -406,22 +406,14 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) } get_file(asma->file); - /* - * XXX - Reworked to use shmem_zero_setup() instead of - * shmem_set_file while we're in staging. -jstultz - */ - if (vma->vm_flags & VM_SHARED) { - ret = shmem_zero_setup(vma); - if (ret) { - fput(asma->file); - goto out; - } + if (vma->vm_flags & VM_SHARED) + shmem_set_file(vma, asma->file); + else { + if (vma->vm_file) + fput(vma->vm_file); + vma->vm_file = asma->file; } - if (vma->vm_file) - fput(vma->vm_file); - vma->vm_file = asma->file; - out: mutex_unlock(&ashmem_mutex); return ret; @@ -458,9 +450,9 @@ ashmem_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) loff_t start = range->pgstart * PAGE_SIZE; loff_t end = (range->pgend + 1) * PAGE_SIZE; - vfs_fallocate(range->asma->file, - FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - start, end - start); + range->asma->file->f_op->fallocate(range->asma->file, + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + start, end - start); range->purged = ASHMEM_WAS_PURGED; lru_del(range);
diff --git a/drivers/staging/android/fiq_debugger/Kconfig b/drivers/staging/android/fiq_debugger/Kconfig new file mode 100644 index 0000000..60fc224 --- /dev/null +++ b/drivers/staging/android/fiq_debugger/Kconfig
@@ -0,0 +1,58 @@ +config FIQ_DEBUGGER + bool "FIQ Mode Serial Debugger" + default n + depends on ARM || ARM64 + help + The FIQ serial debugger can accept commands even when the + kernel is unresponsive due to being stuck with interrupts + disabled. + +config FIQ_DEBUGGER_NO_SLEEP + bool "Keep serial debugger active" + depends on FIQ_DEBUGGER + default n + help + Enables the serial debugger at boot. Passing + fiq_debugger.no_sleep on the kernel commandline will + override this config option. + +config FIQ_DEBUGGER_WAKEUP_IRQ_ALWAYS_ON + bool "Don't disable wakeup IRQ when debugger is active" + depends on FIQ_DEBUGGER + default n + help + Don't disable the wakeup irq when enabling the uart clock. This will + cause extra interrupts, but it makes the serial debugger usable with + on some MSM radio builds that ignore the uart clock request in power + collapse. + +config FIQ_DEBUGGER_CONSOLE + bool "Console on FIQ Serial Debugger port" + depends on FIQ_DEBUGGER + default n + help + Enables a console so that printk messages are displayed on + the debugger serial port as the occur. + +config FIQ_DEBUGGER_CONSOLE_DEFAULT_ENABLE + bool "Put the FIQ debugger into console mode by default" + depends on FIQ_DEBUGGER_CONSOLE + default n + help + If enabled, this puts the fiq debugger into console mode by default. + Otherwise, the fiq debugger will start out in debug mode. + +config FIQ_DEBUGGER_UART_OVERLAY + bool "Install uart DT overlay" + depends on FIQ_DEBUGGER + select OF_OVERLAY + default n + help + If enabled, fiq debugger is calling fiq_debugger_uart_overlay() + that will apply overlay uart_overlay@0 to disable proper uart. + +config FIQ_WATCHDOG + bool + select FIQ_DEBUGGER + select PSTORE_RAM + default n
diff --git a/drivers/staging/android/fiq_debugger/Makefile b/drivers/staging/android/fiq_debugger/Makefile new file mode 100644 index 0000000..a7ca487 --- /dev/null +++ b/drivers/staging/android/fiq_debugger/Makefile
@@ -0,0 +1,4 @@ +obj-y += fiq_debugger.o +obj-$(CONFIG_ARM) += fiq_debugger_arm.o +obj-$(CONFIG_ARM64) += fiq_debugger_arm64.o +obj-$(CONFIG_FIQ_WATCHDOG) += fiq_watchdog.o
diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger.c b/drivers/staging/android/fiq_debugger/fiq_debugger.c new file mode 100644 index 0000000..f6a8062 --- /dev/null +++ b/drivers/staging/android/fiq_debugger/fiq_debugger.c
@@ -0,0 +1,1246 @@ +/* + * drivers/staging/android/fiq_debugger.c + * + * Serial Debugger Interface accessed through an FIQ interrupt. + * + * Copyright (C) 2008 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <stdarg.h> +#include <linux/module.h> +#include <linux/io.h> +#include <linux/console.h> +#include <linux/interrupt.h> +#include <linux/clk.h> +#include <linux/platform_device.h> +#include <linux/kernel_stat.h> +#include <linux/kmsg_dump.h> +#include <linux/irq.h> +#include <linux/delay.h> +#include <linux/reboot.h> +#include <linux/sched/signal.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/timer.h> +#include <linux/tty.h> +#include <linux/tty_flip.h> + +#ifdef CONFIG_FIQ_GLUE +#include <asm/fiq_glue.h> +#endif + +#ifdef CONFIG_FIQ_DEBUGGER_UART_OVERLAY +#include <linux/of.h> +#endif + +#include <linux/uaccess.h> + +#include "fiq_debugger.h" +#include "fiq_debugger_priv.h" +#include "fiq_debugger_ringbuf.h" + +#define DEBUG_MAX 64 +#define MAX_UNHANDLED_FIQ_COUNT 1000000 + +#define MAX_FIQ_DEBUGGER_PORTS 4 + +struct fiq_debugger_state { +#ifdef CONFIG_FIQ_GLUE + struct fiq_glue_handler handler; +#endif + struct fiq_debugger_output output; + + int fiq; + int uart_irq; + int signal_irq; + int wakeup_irq; + bool wakeup_irq_no_set_wake; + struct clk *clk; + struct fiq_debugger_pdata *pdata; + struct platform_device *pdev; + + char debug_cmd[DEBUG_MAX]; + int debug_busy; + int debug_abort; + + char debug_buf[DEBUG_MAX]; + int debug_count; + + bool no_sleep; + bool debug_enable; + bool ignore_next_wakeup_irq; + struct timer_list sleep_timer; + spinlock_t sleep_timer_lock; + bool uart_enabled; + struct wakeup_source debugger_wake_src; + bool console_enable; + int current_cpu; + atomic_t unhandled_fiq_count; + bool in_fiq; + + struct work_struct work; + spinlock_t work_lock; + char work_cmd[DEBUG_MAX]; + +#ifdef CONFIG_FIQ_DEBUGGER_CONSOLE + spinlock_t console_lock; + struct console console; + struct tty_port tty_port; + struct fiq_debugger_ringbuf *tty_rbuf; + bool syslog_dumping; +#endif + + unsigned int last_irqs[NR_IRQS]; + unsigned int last_local_timer_irqs[NR_CPUS]; +}; + +#ifdef CONFIG_FIQ_DEBUGGER_CONSOLE +struct tty_driver *fiq_tty_driver; +#endif + +#ifdef CONFIG_FIQ_DEBUGGER_NO_SLEEP +static bool initial_no_sleep = true; +#else +static bool initial_no_sleep; +#endif + +#ifdef CONFIG_FIQ_DEBUGGER_CONSOLE_DEFAULT_ENABLE +static bool initial_debug_enable = true; +static bool initial_console_enable = true; +#else +static bool initial_debug_enable; +static bool initial_console_enable; +#endif + +static bool fiq_kgdb_enable; +static bool fiq_debugger_disable; + +module_param_named(no_sleep, initial_no_sleep, bool, 0644); +module_param_named(debug_enable, initial_debug_enable, bool, 0644); +module_param_named(console_enable, initial_console_enable, bool, 0644); +module_param_named(kgdb_enable, fiq_kgdb_enable, bool, 0644); +module_param_named(disable, fiq_debugger_disable, bool, 0644); + +#ifdef CONFIG_FIQ_DEBUGGER_WAKEUP_IRQ_ALWAYS_ON +static inline +void fiq_debugger_enable_wakeup_irq(struct fiq_debugger_state *state) {} +static inline +void fiq_debugger_disable_wakeup_irq(struct fiq_debugger_state *state) {} +#else +static inline +void fiq_debugger_enable_wakeup_irq(struct fiq_debugger_state *state) +{ + if (state->wakeup_irq < 0) + return; + enable_irq(state->wakeup_irq); + if (!state->wakeup_irq_no_set_wake) + enable_irq_wake(state->wakeup_irq); +} +static inline +void fiq_debugger_disable_wakeup_irq(struct fiq_debugger_state *state) +{ + if (state->wakeup_irq < 0) + return; + disable_irq_nosync(state->wakeup_irq); + if (!state->wakeup_irq_no_set_wake) + disable_irq_wake(state->wakeup_irq); +} +#endif + +static inline bool fiq_debugger_have_fiq(struct fiq_debugger_state *state) +{ + return (state->fiq >= 0); +} + +#ifdef CONFIG_FIQ_GLUE +static void fiq_debugger_force_irq(struct fiq_debugger_state *state) +{ + unsigned int irq = state->signal_irq; + + if (WARN_ON(!fiq_debugger_have_fiq(state))) + return; + if (state->pdata->force_irq) { + state->pdata->force_irq(state->pdev, irq); + } else { + struct irq_chip *chip = irq_get_chip(irq); + if (chip && chip->irq_retrigger) + chip->irq_retrigger(irq_get_irq_data(irq)); + } +} +#endif + +static void fiq_debugger_uart_enable(struct fiq_debugger_state *state) +{ + if (state->clk) + clk_enable(state->clk); + if (state->pdata->uart_enable) + state->pdata->uart_enable(state->pdev); +} + +static void fiq_debugger_uart_disable(struct fiq_debugger_state *state) +{ + if (state->pdata->uart_disable) + state->pdata->uart_disable(state->pdev); + if (state->clk) + clk_disable(state->clk); +} + +static void fiq_debugger_uart_flush(struct fiq_debugger_state *state) +{ + if (state->pdata->uart_flush) + state->pdata->uart_flush(state->pdev); +} + +static void fiq_debugger_putc(struct fiq_debugger_state *state, char c) +{ + state->pdata->uart_putc(state->pdev, c); +} + +static void fiq_debugger_puts(struct fiq_debugger_state *state, char *s) +{ + unsigned c; + while ((c = *s++)) { + if (c == '\n') + fiq_debugger_putc(state, '\r'); + fiq_debugger_putc(state, c); + } +} + +static void fiq_debugger_prompt(struct fiq_debugger_state *state) +{ + fiq_debugger_puts(state, "debug> "); +} + +static void fiq_debugger_dump_kernel_log(struct fiq_debugger_state *state) +{ + char buf[512]; + size_t len; + struct kmsg_dumper dumper = { .active = true }; + + + kmsg_dump_rewind_nolock(&dumper); + while (kmsg_dump_get_line_nolock(&dumper, true, buf, + sizeof(buf) - 1, &len)) { + buf[len] = 0; + fiq_debugger_puts(state, buf); + } +} + +static void fiq_debugger_printf(struct fiq_debugger_output *output, + const char *fmt, ...) +{ + struct fiq_debugger_state *state; + char buf[256]; + va_list ap; + + state = container_of(output, struct fiq_debugger_state, output); + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + fiq_debugger_puts(state, buf); +} + +/* Safe outside fiq context */ +static int fiq_debugger_printf_nfiq(void *cookie, const char *fmt, ...) +{ + struct fiq_debugger_state *state = cookie; + char buf[256]; + va_list ap; + unsigned long irq_flags; + + va_start(ap, fmt); + vsnprintf(buf, 128, fmt, ap); + va_end(ap); + + local_irq_save(irq_flags); + fiq_debugger_puts(state, buf); + fiq_debugger_uart_flush(state); + local_irq_restore(irq_flags); + return state->debug_abort; +} + +static void fiq_debugger_dump_irqs(struct fiq_debugger_state *state) +{ + int n; + struct irq_desc *desc; + + fiq_debugger_printf(&state->output, + "irqnr total since-last status name\n"); + for_each_irq_desc(n, desc) { + struct irqaction *act = desc->action; + if (!act && !kstat_irqs(n)) + continue; + fiq_debugger_printf(&state->output, "%5d: %10u %11u %8x %s\n", n, + kstat_irqs(n), + kstat_irqs(n) - state->last_irqs[n], + desc->status_use_accessors, + (act && act->name) ? act->name : "???"); + state->last_irqs[n] = kstat_irqs(n); + } +} + +static void fiq_debugger_do_ps(struct fiq_debugger_state *state) +{ + struct task_struct *g; + struct task_struct *p; + unsigned task_state; + static const char stat_nam[] = "RSDTtZX"; + + fiq_debugger_printf(&state->output, "pid ppid prio task pc\n"); + read_lock(&tasklist_lock); + do_each_thread(g, p) { + task_state = p->state ? __ffs(p->state) + 1 : 0; + fiq_debugger_printf(&state->output, + "%5d %5d %4d ", p->pid, p->parent->pid, p->prio); + fiq_debugger_printf(&state->output, "%-13.13s %c", p->comm, + task_state >= sizeof(stat_nam) ? '?' : stat_nam[task_state]); + if (task_state == TASK_RUNNING) + fiq_debugger_printf(&state->output, " running\n"); + else + fiq_debugger_printf(&state->output, " %08lx\n", + thread_saved_pc(p)); + } while_each_thread(g, p); + read_unlock(&tasklist_lock); +} + +#ifdef CONFIG_FIQ_DEBUGGER_CONSOLE +static void fiq_debugger_begin_syslog_dump(struct fiq_debugger_state *state) +{ + state->syslog_dumping = true; +} + +static void fiq_debugger_end_syslog_dump(struct fiq_debugger_state *state) +{ + state->syslog_dumping = false; +} +#else +extern int do_syslog(int type, char __user *bug, int count); +static void fiq_debugger_begin_syslog_dump(struct fiq_debugger_state *state) +{ + do_syslog(5 /* clear */, NULL, 0); +} + +static void fiq_debugger_end_syslog_dump(struct fiq_debugger_state *state) +{ + fiq_debugger_dump_kernel_log(state); +} +#endif + +static void fiq_debugger_do_sysrq(struct fiq_debugger_state *state, char rq) +{ + if ((rq == 'g' || rq == 'G') && !fiq_kgdb_enable) { + fiq_debugger_printf(&state->output, "sysrq-g blocked\n"); + return; + } + fiq_debugger_begin_syslog_dump(state); + handle_sysrq(rq); + fiq_debugger_end_syslog_dump(state); +} + +#ifdef CONFIG_KGDB +static void fiq_debugger_do_kgdb(struct fiq_debugger_state *state) +{ + if (!fiq_kgdb_enable) { + fiq_debugger_printf(&state->output, "kgdb through fiq debugger not enabled\n"); + return; + } + + fiq_debugger_printf(&state->output, "enabling console and triggering kgdb\n"); + state->console_enable = true; + handle_sysrq('g'); +} +#endif + +static void fiq_debugger_schedule_work(struct fiq_debugger_state *state, + char *cmd) +{ + unsigned long flags; + + spin_lock_irqsave(&state->work_lock, flags); + if (state->work_cmd[0] != '\0') { + fiq_debugger_printf(&state->output, "work command processor busy\n"); + spin_unlock_irqrestore(&state->work_lock, flags); + return; + } + + strlcpy(state->work_cmd, cmd, sizeof(state->work_cmd)); + spin_unlock_irqrestore(&state->work_lock, flags); + + schedule_work(&state->work); +} + +static void fiq_debugger_work(struct work_struct *work) +{ + struct fiq_debugger_state *state; + char work_cmd[DEBUG_MAX]; + char *cmd; + unsigned long flags; + + state = container_of(work, struct fiq_debugger_state, work); + + spin_lock_irqsave(&state->work_lock, flags); + + strlcpy(work_cmd, state->work_cmd, sizeof(work_cmd)); + state->work_cmd[0] = '\0'; + + spin_unlock_irqrestore(&state->work_lock, flags); + + cmd = work_cmd; + if (!strncmp(cmd, "reboot", 6)) { + cmd += 6; + while (*cmd == ' ') + cmd++; + if (*cmd != '\0') + kernel_restart(cmd); + else + kernel_restart(NULL); + } else { + fiq_debugger_printf(&state->output, "unknown work command '%s'\n", + work_cmd); + } +} + +/* This function CANNOT be called in FIQ context */ +static void fiq_debugger_irq_exec(struct fiq_debugger_state *state, char *cmd) +{ + if (!strcmp(cmd, "ps")) + fiq_debugger_do_ps(state); + if (!strcmp(cmd, "sysrq")) + fiq_debugger_do_sysrq(state, 'h'); + if (!strncmp(cmd, "sysrq ", 6)) + fiq_debugger_do_sysrq(state, cmd[6]); +#ifdef CONFIG_KGDB + if (!strcmp(cmd, "kgdb")) + fiq_debugger_do_kgdb(state); +#endif + if (!strncmp(cmd, "reboot", 6)) + fiq_debugger_schedule_work(state, cmd); +} + +static void fiq_debugger_help(struct fiq_debugger_state *state) +{ + fiq_debugger_printf(&state->output, + "FIQ Debugger commands:\n" + " pc PC status\n" + " regs Register dump\n" + " allregs Extended Register dump\n" + " bt Stack trace\n" + " reboot [<c>] Reboot with command <c>\n" + " reset [<c>] Hard reset with command <c>\n" + " irqs Interupt status\n" + " kmsg Kernel log\n" + " version Kernel version\n"); + fiq_debugger_printf(&state->output, + " sleep Allow sleep while in FIQ\n" + " nosleep Disable sleep while in FIQ\n" + " console Switch terminal to console\n" + " cpu Current CPU\n" + " cpu <number> Switch to CPU<number>\n"); + fiq_debugger_printf(&state->output, + " ps Process list\n" + " sysrq sysrq options\n" + " sysrq <param> Execute sysrq with <param>\n"); +#ifdef CONFIG_KGDB + fiq_debugger_printf(&state->output, + " kgdb Enter kernel debugger\n"); +#endif +} + +static void fiq_debugger_take_affinity(void *info) +{ + struct fiq_debugger_state *state = info; + struct cpumask cpumask; + + cpumask_clear(&cpumask); + cpumask_set_cpu(get_cpu(), &cpumask); + + irq_set_affinity(state->uart_irq, &cpumask); +} + +static void fiq_debugger_switch_cpu(struct fiq_debugger_state *state, int cpu) +{ + if (!fiq_debugger_have_fiq(state)) + smp_call_function_single(cpu, fiq_debugger_take_affinity, state, + false); + state->current_cpu = cpu; +} + +static bool fiq_debugger_fiq_exec(struct fiq_debugger_state *state, + const char *cmd, const struct pt_regs *regs, + void *svc_sp) +{ + bool signal_helper = false; + + if (!strcmp(cmd, "help") || !strcmp(cmd, "?")) { + fiq_debugger_help(state); + } else if (!strcmp(cmd, "pc")) { + fiq_debugger_dump_pc(&state->output, regs); + } else if (!strcmp(cmd, "regs")) { + fiq_debugger_dump_regs(&state->output, regs); + } else if (!strcmp(cmd, "allregs")) { + fiq_debugger_dump_allregs(&state->output, regs); + } else if (!strcmp(cmd, "bt")) { + fiq_debugger_dump_stacktrace(&state->output, regs, 100, svc_sp); + } else if (!strncmp(cmd, "reset", 5)) { + cmd += 5; + while (*cmd == ' ') + cmd++; + if (*cmd) { + char tmp_cmd[32]; + strlcpy(tmp_cmd, cmd, sizeof(tmp_cmd)); + machine_restart(tmp_cmd); + } else { + machine_restart(NULL); + } + } else if (!strcmp(cmd, "irqs")) { + fiq_debugger_dump_irqs(state); + } else if (!strcmp(cmd, "kmsg")) { + fiq_debugger_dump_kernel_log(state); + } else if (!strcmp(cmd, "version")) { + fiq_debugger_printf(&state->output, "%s\n", linux_banner); + } else if (!strcmp(cmd, "sleep")) { + state->no_sleep = false; + fiq_debugger_printf(&state->output, "enabling sleep\n"); + } else if (!strcmp(cmd, "nosleep")) { + state->no_sleep = true; + fiq_debugger_printf(&state->output, "disabling sleep\n"); + } else if (!strcmp(cmd, "console")) { + fiq_debugger_printf(&state->output, "console mode\n"); + fiq_debugger_uart_flush(state); + state->console_enable = true; + } else if (!strcmp(cmd, "cpu")) { + fiq_debugger_printf(&state->output, "cpu %d\n", state->current_cpu); + } else if (!strncmp(cmd, "cpu ", 4)) { + unsigned long cpu = 0; + if (kstrtoul(cmd + 4, 10, &cpu) == 0) + fiq_debugger_switch_cpu(state, cpu); + else + fiq_debugger_printf(&state->output, "invalid cpu\n"); + fiq_debugger_printf(&state->output, "cpu %d\n", state->current_cpu); + } else { + if (state->debug_busy) { + fiq_debugger_printf(&state->output, + "command processor busy. trying to abort.\n"); + state->debug_abort = -1; + } else { + strcpy(state->debug_cmd, cmd); + state->debug_busy = 1; + } + + return true; + } + if (!state->console_enable) + fiq_debugger_prompt(state); + + return signal_helper; +} + +static void fiq_debugger_sleep_timer_expired(unsigned long data) +{ + struct fiq_debugger_state *state = (struct fiq_debugger_state *)data; + unsigned long flags; + + spin_lock_irqsave(&state->sleep_timer_lock, flags); + if (state->uart_enabled && !state->no_sleep) { + if (state->debug_enable && !state->console_enable) { + state->debug_enable = false; + fiq_debugger_printf_nfiq(state, + "suspending fiq debugger\n"); + } + state->ignore_next_wakeup_irq = true; + fiq_debugger_uart_disable(state); + state->uart_enabled = false; + fiq_debugger_enable_wakeup_irq(state); + } + __pm_relax(&state->debugger_wake_src); + spin_unlock_irqrestore(&state->sleep_timer_lock, flags); +} + +static void fiq_debugger_handle_wakeup(struct fiq_debugger_state *state) +{ + unsigned long flags; + + spin_lock_irqsave(&state->sleep_timer_lock, flags); + if (state->wakeup_irq >= 0 && state->ignore_next_wakeup_irq) { + state->ignore_next_wakeup_irq = false; + } else if (!state->uart_enabled) { + __pm_stay_awake(&state->debugger_wake_src); + fiq_debugger_uart_enable(state); + state->uart_enabled = true; + fiq_debugger_disable_wakeup_irq(state); + mod_timer(&state->sleep_timer, jiffies + HZ / 2); + } + spin_unlock_irqrestore(&state->sleep_timer_lock, flags); +} + +static irqreturn_t fiq_debugger_wakeup_irq_handler(int irq, void *dev) +{ + struct fiq_debugger_state *state = dev; + + if (!state->no_sleep) + fiq_debugger_puts(state, "WAKEUP\n"); + fiq_debugger_handle_wakeup(state); + + return IRQ_HANDLED; +} + +static +void fiq_debugger_handle_console_irq_context(struct fiq_debugger_state *state) +{ +#if defined(CONFIG_FIQ_DEBUGGER_CONSOLE) + if (state->tty_port.ops) { + int i; + int count = fiq_debugger_ringbuf_level(state->tty_rbuf); + for (i = 0; i < count; i++) { + int c = fiq_debugger_ringbuf_peek(state->tty_rbuf, 0); + tty_insert_flip_char(&state->tty_port, c, TTY_NORMAL); + if (!fiq_debugger_ringbuf_consume(state->tty_rbuf, 1)) + pr_warn("fiq tty failed to consume byte\n"); + } + tty_flip_buffer_push(&state->tty_port); + } +#endif +} + +static void fiq_debugger_handle_irq_context(struct fiq_debugger_state *state) +{ + if (!state->no_sleep) { + unsigned long flags; + + spin_lock_irqsave(&state->sleep_timer_lock, flags); + __pm_stay_awake(&state->debugger_wake_src); + mod_timer(&state->sleep_timer, jiffies + HZ * 5); + spin_unlock_irqrestore(&state->sleep_timer_lock, flags); + } + fiq_debugger_handle_console_irq_context(state); + if (state->debug_busy) { + fiq_debugger_irq_exec(state, state->debug_cmd); + if (!state->console_enable) + fiq_debugger_prompt(state); + state->debug_busy = 0; + } +} + +static int fiq_debugger_getc(struct fiq_debugger_state *state) +{ + return state->pdata->uart_getc(state->pdev); +} + +static bool fiq_debugger_handle_uart_interrupt(struct fiq_debugger_state *state, + int this_cpu, const struct pt_regs *regs, void *svc_sp) +{ + int c; + static int last_c; + int count = 0; + bool signal_helper = false; + + if (this_cpu != state->current_cpu) { + if (state->in_fiq) + return false; + + if (atomic_inc_return(&state->unhandled_fiq_count) != + MAX_UNHANDLED_FIQ_COUNT) + return false; + + fiq_debugger_printf(&state->output, + "fiq_debugger: cpu %d not responding, " + "reverting to cpu %d\n", state->current_cpu, + this_cpu); + + atomic_set(&state->unhandled_fiq_count, 0); + fiq_debugger_switch_cpu(state, this_cpu); + return false; + } + + state->in_fiq = true; + + while ((c = fiq_debugger_getc(state)) != FIQ_DEBUGGER_NO_CHAR) { + count++; + if (!state->debug_enable) { + if ((c == 13) || (c == 10)) { + state->debug_enable = true; + state->debug_count = 0; + fiq_debugger_prompt(state); + } + } else if (c == FIQ_DEBUGGER_BREAK) { + state->console_enable = false; + fiq_debugger_puts(state, "fiq debugger mode\n"); + state->debug_count = 0; + fiq_debugger_prompt(state); +#ifdef CONFIG_FIQ_DEBUGGER_CONSOLE + } else if (state->console_enable && state->tty_rbuf) { + fiq_debugger_ringbuf_push(state->tty_rbuf, c); + signal_helper = true; +#endif + } else if ((c >= ' ') && (c < 127)) { + if (state->debug_count < (DEBUG_MAX - 1)) { + state->debug_buf[state->debug_count++] = c; + fiq_debugger_putc(state, c); + } + } else if ((c == 8) || (c == 127)) { + if (state->debug_count > 0) { + state->debug_count--; + fiq_debugger_putc(state, 8); + fiq_debugger_putc(state, ' '); + fiq_debugger_putc(state, 8); + } + } else if ((c == 13) || (c == 10)) { + if (c == '\r' || (c == '\n' && last_c != '\r')) { + fiq_debugger_putc(state, '\r'); + fiq_debugger_putc(state, '\n'); + } + if (state->debug_count) { + state->debug_buf[state->debug_count] = 0; + state->debug_count = 0; + signal_helper |= + fiq_debugger_fiq_exec(state, + state->debug_buf, + regs, svc_sp); + } else { + fiq_debugger_prompt(state); + } + } + last_c = c; + } + if (!state->console_enable) + fiq_debugger_uart_flush(state); + if (state->pdata->fiq_ack) + state->pdata->fiq_ack(state->pdev, state->fiq); + + /* poke sleep timer if necessary */ + if (state->debug_enable && !state->no_sleep) + signal_helper = true; + + atomic_set(&state->unhandled_fiq_count, 0); + state->in_fiq = false; + + return signal_helper; +} + +#ifdef CONFIG_FIQ_GLUE +static void fiq_debugger_fiq(struct fiq_glue_handler *h, + const struct pt_regs *regs, void *svc_sp) +{ + struct fiq_debugger_state *state = + container_of(h, struct fiq_debugger_state, handler); + unsigned int this_cpu = THREAD_INFO(svc_sp)->cpu; + bool need_irq; + + need_irq = fiq_debugger_handle_uart_interrupt(state, this_cpu, regs, + svc_sp); + if (need_irq) + fiq_debugger_force_irq(state); +} +#endif + +/* + * When not using FIQs, we only use this single interrupt as an entry point. + * This just effectively takes over the UART interrupt and does all the work + * in this context. + */ +static irqreturn_t fiq_debugger_uart_irq(int irq, void *dev) +{ + struct fiq_debugger_state *state = dev; + bool not_done; + + fiq_debugger_handle_wakeup(state); + + /* handle the debugger irq in regular context */ + not_done = fiq_debugger_handle_uart_interrupt(state, smp_processor_id(), + get_irq_regs(), + current_thread_info()); + if (not_done) + fiq_debugger_handle_irq_context(state); + + return IRQ_HANDLED; +} + +/* + * If FIQs are used, not everything can happen in fiq context. + * FIQ handler does what it can and then signals this interrupt to finish the + * job in irq context. + */ +static irqreturn_t fiq_debugger_signal_irq(int irq, void *dev) +{ + struct fiq_debugger_state *state = dev; + + if (state->pdata->force_irq_ack) + state->pdata->force_irq_ack(state->pdev, state->signal_irq); + + fiq_debugger_handle_irq_context(state); + + return IRQ_HANDLED; +} + +#ifdef CONFIG_FIQ_GLUE +static void fiq_debugger_resume(struct fiq_glue_handler *h) +{ + struct fiq_debugger_state *state = + container_of(h, struct fiq_debugger_state, handler); + if (state->pdata->uart_resume) + state->pdata->uart_resume(state->pdev); +} +#endif + +#if defined(CONFIG_FIQ_DEBUGGER_CONSOLE) +struct tty_driver *fiq_debugger_console_device(struct console *co, int *index) +{ + *index = co->index; + return fiq_tty_driver; +} + +static void fiq_debugger_console_write(struct console *co, + const char *s, unsigned int count) +{ + struct fiq_debugger_state *state; + unsigned long flags; + + state = container_of(co, struct fiq_debugger_state, console); + + if (!state->console_enable && !state->syslog_dumping) + return; + + fiq_debugger_uart_enable(state); + spin_lock_irqsave(&state->console_lock, flags); + while (count--) { + if (*s == '\n') + fiq_debugger_putc(state, '\r'); + fiq_debugger_putc(state, *s++); + } + fiq_debugger_uart_flush(state); + spin_unlock_irqrestore(&state->console_lock, flags); + fiq_debugger_uart_disable(state); +} + +static struct console fiq_debugger_console = { + .name = "ttyFIQ", + .device = fiq_debugger_console_device, + .write = fiq_debugger_console_write, + .flags = CON_PRINTBUFFER | CON_ANYTIME | CON_ENABLED, +}; + +int fiq_tty_open(struct tty_struct *tty, struct file *filp) +{ + int line = tty->index; + struct fiq_debugger_state **states = tty->driver->driver_state; + struct fiq_debugger_state *state = states[line]; + + return tty_port_open(&state->tty_port, tty, filp); +} + +void fiq_tty_close(struct tty_struct *tty, struct file *filp) +{ + tty_port_close(tty->port, tty, filp); +} + +int fiq_tty_write(struct tty_struct *tty, const unsigned char *buf, int count) +{ + int i; + int line = tty->index; + struct fiq_debugger_state **states = tty->driver->driver_state; + struct fiq_debugger_state *state = states[line]; + + if (!state->console_enable) + return count; + + fiq_debugger_uart_enable(state); + spin_lock_irq(&state->console_lock); + for (i = 0; i < count; i++) + fiq_debugger_putc(state, *buf++); + spin_unlock_irq(&state->console_lock); + fiq_debugger_uart_disable(state); + + return count; +} + +int fiq_tty_write_room(struct tty_struct *tty) +{ + return 16; +} + +#ifdef CONFIG_CONSOLE_POLL +static int fiq_tty_poll_init(struct tty_driver *driver, int line, char *options) +{ + return 0; +} + +static int fiq_tty_poll_get_char(struct tty_driver *driver, int line) +{ + struct fiq_debugger_state **states = driver->driver_state; + struct fiq_debugger_state *state = states[line]; + int c = NO_POLL_CHAR; + + fiq_debugger_uart_enable(state); + if (fiq_debugger_have_fiq(state)) { + int count = fiq_debugger_ringbuf_level(state->tty_rbuf); + if (count > 0) { + c = fiq_debugger_ringbuf_peek(state->tty_rbuf, 0); + fiq_debugger_ringbuf_consume(state->tty_rbuf, 1); + } + } else { + c = fiq_debugger_getc(state); + if (c == FIQ_DEBUGGER_NO_CHAR) + c = NO_POLL_CHAR; + } + fiq_debugger_uart_disable(state); + + return c; +} + +static void fiq_tty_poll_put_char(struct tty_driver *driver, int line, char ch) +{ + struct fiq_debugger_state **states = driver->driver_state; + struct fiq_debugger_state *state = states[line]; + fiq_debugger_uart_enable(state); + fiq_debugger_putc(state, ch); + fiq_debugger_uart_disable(state); +} +#endif + +static const struct tty_port_operations fiq_tty_port_ops; + +static const struct tty_operations fiq_tty_driver_ops = { + .write = fiq_tty_write, + .write_room = fiq_tty_write_room, + .open = fiq_tty_open, + .close = fiq_tty_close, +#ifdef CONFIG_CONSOLE_POLL + .poll_init = fiq_tty_poll_init, + .poll_get_char = fiq_tty_poll_get_char, + .poll_put_char = fiq_tty_poll_put_char, +#endif +}; + +static int fiq_debugger_tty_init(void) +{ + int ret; + struct fiq_debugger_state **states = NULL; + + states = kzalloc(sizeof(*states) * MAX_FIQ_DEBUGGER_PORTS, GFP_KERNEL); + if (!states) { + pr_err("Failed to allocate fiq debugger state structres\n"); + return -ENOMEM; + } + + fiq_tty_driver = alloc_tty_driver(MAX_FIQ_DEBUGGER_PORTS); + if (!fiq_tty_driver) { + pr_err("Failed to allocate fiq debugger tty\n"); + ret = -ENOMEM; + goto err_free_state; + } + + fiq_tty_driver->owner = THIS_MODULE; + fiq_tty_driver->driver_name = "fiq-debugger"; + fiq_tty_driver->name = "ttyFIQ"; + fiq_tty_driver->type = TTY_DRIVER_TYPE_SERIAL; + fiq_tty_driver->subtype = SERIAL_TYPE_NORMAL; + fiq_tty_driver->init_termios = tty_std_termios; + fiq_tty_driver->flags = TTY_DRIVER_REAL_RAW | + TTY_DRIVER_DYNAMIC_DEV; + fiq_tty_driver->driver_state = states; + + fiq_tty_driver->init_termios.c_cflag = + B115200 | CS8 | CREAD | HUPCL | CLOCAL; + fiq_tty_driver->init_termios.c_ispeed = 115200; + fiq_tty_driver->init_termios.c_ospeed = 115200; + + tty_set_operations(fiq_tty_driver, &fiq_tty_driver_ops); + + ret = tty_register_driver(fiq_tty_driver); + if (ret) { + pr_err("Failed to register fiq tty: %d\n", ret); + goto err_free_tty; + } + + pr_info("Registered FIQ tty driver\n"); + return 0; + +err_free_tty: + put_tty_driver(fiq_tty_driver); + fiq_tty_driver = NULL; +err_free_state: + kfree(states); + return ret; +} + +static int fiq_debugger_tty_init_one(struct fiq_debugger_state *state) +{ + int ret; + struct device *tty_dev; + struct fiq_debugger_state **states = fiq_tty_driver->driver_state; + + states[state->pdev->id] = state; + + state->tty_rbuf = fiq_debugger_ringbuf_alloc(1024); + if (!state->tty_rbuf) { + pr_err("Failed to allocate fiq debugger ringbuf\n"); + ret = -ENOMEM; + goto err; + } + + tty_port_init(&state->tty_port); + state->tty_port.ops = &fiq_tty_port_ops; + + tty_dev = tty_port_register_device(&state->tty_port, fiq_tty_driver, + state->pdev->id, &state->pdev->dev); + if (IS_ERR(tty_dev)) { + pr_err("Failed to register fiq debugger tty device\n"); + ret = PTR_ERR(tty_dev); + goto err; + } + + device_set_wakeup_capable(tty_dev, 1); + + pr_info("Registered fiq debugger ttyFIQ%d\n", state->pdev->id); + + return 0; + +err: + fiq_debugger_ringbuf_free(state->tty_rbuf); + state->tty_rbuf = NULL; + return ret; +} +#endif + +static int fiq_debugger_dev_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct fiq_debugger_state *state = platform_get_drvdata(pdev); + + if (state->pdata->uart_dev_suspend) + return state->pdata->uart_dev_suspend(pdev); + return 0; +} + +static int fiq_debugger_dev_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct fiq_debugger_state *state = platform_get_drvdata(pdev); + + if (state->pdata->uart_dev_resume) + return state->pdata->uart_dev_resume(pdev); + return 0; +} + +static int fiq_debugger_probe(struct platform_device *pdev) +{ + int ret; + struct fiq_debugger_pdata *pdata = dev_get_platdata(&pdev->dev); + struct fiq_debugger_state *state; + int fiq; + int uart_irq; + + if (pdev->id >= MAX_FIQ_DEBUGGER_PORTS) + return -EINVAL; + + if (!pdata->uart_getc || !pdata->uart_putc) + return -EINVAL; + if ((pdata->uart_enable && !pdata->uart_disable) || + (!pdata->uart_enable && pdata->uart_disable)) + return -EINVAL; + + fiq = platform_get_irq_byname(pdev, "fiq"); + uart_irq = platform_get_irq_byname(pdev, "uart_irq"); + + /* uart_irq mode and fiq mode are mutually exclusive, but one of them + * is required */ + if ((uart_irq < 0 && fiq < 0) || (uart_irq >= 0 && fiq >= 0)) + return -EINVAL; + if (fiq >= 0 && !pdata->fiq_enable) + return -EINVAL; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + state->output.printf = fiq_debugger_printf; + setup_timer(&state->sleep_timer, fiq_debugger_sleep_timer_expired, + (unsigned long)state); + state->pdata = pdata; + state->pdev = pdev; + state->no_sleep = initial_no_sleep; + state->debug_enable = initial_debug_enable; + state->console_enable = initial_console_enable; + + state->fiq = fiq; + state->uart_irq = uart_irq; + state->signal_irq = platform_get_irq_byname(pdev, "signal"); + state->wakeup_irq = platform_get_irq_byname(pdev, "wakeup"); + + INIT_WORK(&state->work, fiq_debugger_work); + spin_lock_init(&state->work_lock); + + platform_set_drvdata(pdev, state); + + spin_lock_init(&state->sleep_timer_lock); + + if (state->wakeup_irq < 0 && fiq_debugger_have_fiq(state)) + state->no_sleep = true; + state->ignore_next_wakeup_irq = !state->no_sleep; + + wakeup_source_init(&state->debugger_wake_src, "serial-debug"); + + state->clk = clk_get(&pdev->dev, NULL); + if (IS_ERR(state->clk)) + state->clk = NULL; + + /* do not call pdata->uart_enable here since uart_init may still + * need to do some initialization before uart_enable can work. + * So, only try to manage the clock during init. + */ + if (state->clk) + clk_enable(state->clk); + + if (pdata->uart_init) { + ret = pdata->uart_init(pdev); + if (ret) + goto err_uart_init; + } + + fiq_debugger_printf_nfiq(state, + "<hit enter %sto activate fiq debugger>\n", + state->no_sleep ? "" : "twice "); + +#ifdef CONFIG_FIQ_GLUE + if (fiq_debugger_have_fiq(state)) { + state->handler.fiq = fiq_debugger_fiq; + state->handler.resume = fiq_debugger_resume; + ret = fiq_glue_register_handler(&state->handler); + if (ret) { + pr_err("%s: could not install fiq handler\n", __func__); + goto err_register_irq; + } + + pdata->fiq_enable(pdev, state->fiq, 1); + } else +#endif + { + ret = request_irq(state->uart_irq, fiq_debugger_uart_irq, + IRQF_NO_SUSPEND, "debug", state); + if (ret) { + pr_err("%s: could not install irq handler\n", __func__); + goto err_register_irq; + } + + /* for irq-only mode, we want this irq to wake us up, if it + * can. + */ + enable_irq_wake(state->uart_irq); + } + + if (state->clk) + clk_disable(state->clk); + + if (state->signal_irq >= 0) { + ret = request_irq(state->signal_irq, fiq_debugger_signal_irq, + IRQF_TRIGGER_RISING, "debug-signal", state); + if (ret) + pr_err("serial_debugger: could not install signal_irq"); + } + + if (state->wakeup_irq >= 0) { + ret = request_irq(state->wakeup_irq, + fiq_debugger_wakeup_irq_handler, + IRQF_TRIGGER_FALLING, + "debug-wakeup", state); + if (ret) { + pr_err("serial_debugger: " + "could not install wakeup irq\n"); + state->wakeup_irq = -1; + } else { + ret = enable_irq_wake(state->wakeup_irq); + if (ret) { + pr_err("serial_debugger: " + "could not enable wakeup\n"); + state->wakeup_irq_no_set_wake = true; + } + } + } + if (state->no_sleep) + fiq_debugger_handle_wakeup(state); + +#if defined(CONFIG_FIQ_DEBUGGER_CONSOLE) + spin_lock_init(&state->console_lock); + state->console = fiq_debugger_console; + state->console.index = pdev->id; + if (!console_set_on_cmdline) + add_preferred_console(state->console.name, + state->console.index, NULL); + register_console(&state->console); + fiq_debugger_tty_init_one(state); +#endif + return 0; + +err_register_irq: + if (pdata->uart_free) + pdata->uart_free(pdev); +err_uart_init: + if (state->clk) + clk_disable(state->clk); + if (state->clk) + clk_put(state->clk); + wakeup_source_trash(&state->debugger_wake_src); + platform_set_drvdata(pdev, NULL); + kfree(state); + return ret; +} + +static const struct dev_pm_ops fiq_debugger_dev_pm_ops = { + .suspend = fiq_debugger_dev_suspend, + .resume = fiq_debugger_dev_resume, +}; + +static struct platform_driver fiq_debugger_driver = { + .probe = fiq_debugger_probe, + .driver = { + .name = "fiq_debugger", + .pm = &fiq_debugger_dev_pm_ops, + }, +}; + +#if defined(CONFIG_FIQ_DEBUGGER_UART_OVERLAY) +int fiq_debugger_uart_overlay(void) +{ + struct device_node *onp = of_find_node_by_path("/uart_overlay@0"); + int ret; + + if (!onp) { + pr_err("serial_debugger: uart overlay not found\n"); + return -ENODEV; + } + + ret = of_overlay_create(onp); + if (ret < 0) { + pr_err("serial_debugger: fail to create overlay: %d\n", ret); + of_node_put(onp); + return ret; + } + + pr_info("serial_debugger: uart overlay applied\n"); + return 0; +} +#endif + +static int __init fiq_debugger_init(void) +{ + if (fiq_debugger_disable) { + pr_err("serial_debugger: disabled\n"); + return -ENODEV; + } +#if defined(CONFIG_FIQ_DEBUGGER_CONSOLE) + fiq_debugger_tty_init(); +#endif +#if defined(CONFIG_FIQ_DEBUGGER_UART_OVERLAY) + fiq_debugger_uart_overlay(); +#endif + return platform_driver_register(&fiq_debugger_driver); +} + +postcore_initcall(fiq_debugger_init);
diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger.h b/drivers/staging/android/fiq_debugger/fiq_debugger.h new file mode 100644 index 0000000..c9ec4f8 --- /dev/null +++ b/drivers/staging/android/fiq_debugger/fiq_debugger.h
@@ -0,0 +1,64 @@ +/* + * drivers/staging/android/fiq_debugger/fiq_debugger.h + * + * Copyright (C) 2010 Google, Inc. + * Author: Colin Cross <ccross@android.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _ARCH_ARM_MACH_TEGRA_FIQ_DEBUGGER_H_ +#define _ARCH_ARM_MACH_TEGRA_FIQ_DEBUGGER_H_ + +#include <linux/serial_core.h> + +#define FIQ_DEBUGGER_NO_CHAR NO_POLL_CHAR +#define FIQ_DEBUGGER_BREAK 0x00ff0100 + +#define FIQ_DEBUGGER_FIQ_IRQ_NAME "fiq" +#define FIQ_DEBUGGER_SIGNAL_IRQ_NAME "signal" +#define FIQ_DEBUGGER_WAKEUP_IRQ_NAME "wakeup" + +/** + * struct fiq_debugger_pdata - fiq debugger platform data + * @uart_resume: used to restore uart state right before enabling + * the fiq. + * @uart_enable: Do the work necessary to communicate with the uart + * hw (enable clocks, etc.). This must be ref-counted. + * @uart_disable: Do the work necessary to disable the uart hw + * (disable clocks, etc.). This must be ref-counted. + * @uart_dev_suspend: called during PM suspend, generally not needed + * for real fiq mode debugger. + * @uart_dev_resume: called during PM resume, generally not needed + * for real fiq mode debugger. + */ +struct fiq_debugger_pdata { + int (*uart_init)(struct platform_device *pdev); + void (*uart_free)(struct platform_device *pdev); + int (*uart_resume)(struct platform_device *pdev); + int (*uart_getc)(struct platform_device *pdev); + void (*uart_putc)(struct platform_device *pdev, unsigned int c); + void (*uart_flush)(struct platform_device *pdev); + void (*uart_enable)(struct platform_device *pdev); + void (*uart_disable)(struct platform_device *pdev); + + int (*uart_dev_suspend)(struct platform_device *pdev); + int (*uart_dev_resume)(struct platform_device *pdev); + + void (*fiq_enable)(struct platform_device *pdev, unsigned int fiq, + bool enable); + void (*fiq_ack)(struct platform_device *pdev, unsigned int fiq); + + void (*force_irq)(struct platform_device *pdev, unsigned int irq); + void (*force_irq_ack)(struct platform_device *pdev, unsigned int irq); +}; + +#endif
diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger_arm.c b/drivers/staging/android/fiq_debugger/fiq_debugger_arm.c new file mode 100644 index 0000000..8b3e013 --- /dev/null +++ b/drivers/staging/android/fiq_debugger/fiq_debugger_arm.c
@@ -0,0 +1,240 @@ +/* + * Copyright (C) 2014 Google, Inc. + * Author: Colin Cross <ccross@android.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/ptrace.h> +#include <linux/uaccess.h> + +#include <asm/stacktrace.h> + +#include "fiq_debugger_priv.h" + +static char *mode_name(unsigned cpsr) +{ + switch (cpsr & MODE_MASK) { + case USR_MODE: return "USR"; + case FIQ_MODE: return "FIQ"; + case IRQ_MODE: return "IRQ"; + case SVC_MODE: return "SVC"; + case ABT_MODE: return "ABT"; + case UND_MODE: return "UND"; + case SYSTEM_MODE: return "SYS"; + default: return "???"; + } +} + +void fiq_debugger_dump_pc(struct fiq_debugger_output *output, + const struct pt_regs *regs) +{ + output->printf(output, " pc %08x cpsr %08x mode %s\n", + regs->ARM_pc, regs->ARM_cpsr, mode_name(regs->ARM_cpsr)); +} + +void fiq_debugger_dump_regs(struct fiq_debugger_output *output, + const struct pt_regs *regs) +{ + output->printf(output, + " r0 %08x r1 %08x r2 %08x r3 %08x\n", + regs->ARM_r0, regs->ARM_r1, regs->ARM_r2, regs->ARM_r3); + output->printf(output, + " r4 %08x r5 %08x r6 %08x r7 %08x\n", + regs->ARM_r4, regs->ARM_r5, regs->ARM_r6, regs->ARM_r7); + output->printf(output, + " r8 %08x r9 %08x r10 %08x r11 %08x mode %s\n", + regs->ARM_r8, regs->ARM_r9, regs->ARM_r10, regs->ARM_fp, + mode_name(regs->ARM_cpsr)); + output->printf(output, + " ip %08x sp %08x lr %08x pc %08x cpsr %08x\n", + regs->ARM_ip, regs->ARM_sp, regs->ARM_lr, regs->ARM_pc, + regs->ARM_cpsr); +} + +struct mode_regs { + unsigned long sp_svc; + unsigned long lr_svc; + unsigned long spsr_svc; + + unsigned long sp_abt; + unsigned long lr_abt; + unsigned long spsr_abt; + + unsigned long sp_und; + unsigned long lr_und; + unsigned long spsr_und; + + unsigned long sp_irq; + unsigned long lr_irq; + unsigned long spsr_irq; + + unsigned long r8_fiq; + unsigned long r9_fiq; + unsigned long r10_fiq; + unsigned long r11_fiq; + unsigned long r12_fiq; + unsigned long sp_fiq; + unsigned long lr_fiq; + unsigned long spsr_fiq; +}; + +static void __naked get_mode_regs(struct mode_regs *regs) +{ + asm volatile ( + "mrs r1, cpsr\n" + "msr cpsr_c, #0xd3 @(SVC_MODE | PSR_I_BIT | PSR_F_BIT)\n" + "stmia r0!, {r13 - r14}\n" + "mrs r2, spsr\n" + "msr cpsr_c, #0xd7 @(ABT_MODE | PSR_I_BIT | PSR_F_BIT)\n" + "stmia r0!, {r2, r13 - r14}\n" + "mrs r2, spsr\n" + "msr cpsr_c, #0xdb @(UND_MODE | PSR_I_BIT | PSR_F_BIT)\n" + "stmia r0!, {r2, r13 - r14}\n" + "mrs r2, spsr\n" + "msr cpsr_c, #0xd2 @(IRQ_MODE | PSR_I_BIT | PSR_F_BIT)\n" + "stmia r0!, {r2, r13 - r14}\n" + "mrs r2, spsr\n" + "msr cpsr_c, #0xd1 @(FIQ_MODE | PSR_I_BIT | PSR_F_BIT)\n" + "stmia r0!, {r2, r8 - r14}\n" + "mrs r2, spsr\n" + "stmia r0!, {r2}\n" + "msr cpsr_c, r1\n" + "bx lr\n"); +} + + +void fiq_debugger_dump_allregs(struct fiq_debugger_output *output, + const struct pt_regs *regs) +{ + struct mode_regs mode_regs; + unsigned long mode = regs->ARM_cpsr & MODE_MASK; + + fiq_debugger_dump_regs(output, regs); + get_mode_regs(&mode_regs); + + output->printf(output, + "%csvc: sp %08x lr %08x spsr %08x\n", + mode == SVC_MODE ? '*' : ' ', + mode_regs.sp_svc, mode_regs.lr_svc, mode_regs.spsr_svc); + output->printf(output, + "%cabt: sp %08x lr %08x spsr %08x\n", + mode == ABT_MODE ? '*' : ' ', + mode_regs.sp_abt, mode_regs.lr_abt, mode_regs.spsr_abt); + output->printf(output, + "%cund: sp %08x lr %08x spsr %08x\n", + mode == UND_MODE ? '*' : ' ', + mode_regs.sp_und, mode_regs.lr_und, mode_regs.spsr_und); + output->printf(output, + "%cirq: sp %08x lr %08x spsr %08x\n", + mode == IRQ_MODE ? '*' : ' ', + mode_regs.sp_irq, mode_regs.lr_irq, mode_regs.spsr_irq); + output->printf(output, + "%cfiq: r8 %08x r9 %08x r10 %08x r11 %08x r12 %08x\n", + mode == FIQ_MODE ? '*' : ' ', + mode_regs.r8_fiq, mode_regs.r9_fiq, mode_regs.r10_fiq, + mode_regs.r11_fiq, mode_regs.r12_fiq); + output->printf(output, + " fiq: sp %08x lr %08x spsr %08x\n", + mode_regs.sp_fiq, mode_regs.lr_fiq, mode_regs.spsr_fiq); +} + +struct stacktrace_state { + struct fiq_debugger_output *output; + unsigned int depth; +}; + +static int report_trace(struct stackframe *frame, void *d) +{ + struct stacktrace_state *sts = d; + + if (sts->depth) { + sts->output->printf(sts->output, + " pc: %p (%pF), lr %p (%pF), sp %p, fp %p\n", + frame->pc, frame->pc, frame->lr, frame->lr, + frame->sp, frame->fp); + sts->depth--; + return 0; + } + sts->output->printf(sts->output, " ...\n"); + + return sts->depth == 0; +} + +struct frame_tail { + struct frame_tail *fp; + unsigned long sp; + unsigned long lr; +} __attribute__((packed)); + +static struct frame_tail *user_backtrace(struct fiq_debugger_output *output, + struct frame_tail *tail) +{ + struct frame_tail buftail[2]; + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) { + output->printf(output, " invalid frame pointer %p\n", + tail); + return NULL; + } + if (__copy_from_user_inatomic(buftail, tail, sizeof(buftail))) { + output->printf(output, + " failed to copy frame pointer %p\n", tail); + return NULL; + } + + output->printf(output, " %p\n", buftail[0].lr); + + /* frame pointers should strictly progress back up the stack + * (towards higher addresses) */ + if (tail >= buftail[0].fp) + return NULL; + + return buftail[0].fp-1; +} + +void fiq_debugger_dump_stacktrace(struct fiq_debugger_output *output, + const struct pt_regs *regs, unsigned int depth, void *ssp) +{ + struct frame_tail *tail; + struct thread_info *real_thread_info = THREAD_INFO(ssp); + struct stacktrace_state sts; + + sts.depth = depth; + sts.output = output; + *current_thread_info() = *real_thread_info; + + if (!current) + output->printf(output, "current NULL\n"); + else + output->printf(output, "pid: %d comm: %s\n", + current->pid, current->comm); + fiq_debugger_dump_regs(output, regs); + + if (!user_mode(regs)) { + struct stackframe frame; + frame.fp = regs->ARM_fp; + frame.sp = regs->ARM_sp; + frame.lr = regs->ARM_lr; + frame.pc = regs->ARM_pc; + output->printf(output, + " pc: %p (%pF), lr %p (%pF), sp %p, fp %p\n", + regs->ARM_pc, regs->ARM_pc, regs->ARM_lr, regs->ARM_lr, + regs->ARM_sp, regs->ARM_fp); + walk_stackframe(&frame, report_trace, &sts); + return; + } + + tail = ((struct frame_tail *) regs->ARM_fp) - 1; + while (depth-- && tail && !((unsigned long) tail & 3)) + tail = user_backtrace(output, tail); +}
diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger_arm64.c b/drivers/staging/android/fiq_debugger/fiq_debugger_arm64.c new file mode 100644 index 0000000..c53f498 --- /dev/null +++ b/drivers/staging/android/fiq_debugger/fiq_debugger_arm64.c
@@ -0,0 +1,201 @@ +/* + * Copyright (C) 2014 Google, Inc. + * Author: Colin Cross <ccross@android.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/ptrace.h> +#include <asm/stacktrace.h> + +#include "fiq_debugger_priv.h" + +static char *mode_name(const struct pt_regs *regs) +{ + if (compat_user_mode(regs)) { + return "USR"; + } else { + switch (processor_mode(regs)) { + case PSR_MODE_EL0t: return "EL0t"; + case PSR_MODE_EL1t: return "EL1t"; + case PSR_MODE_EL1h: return "EL1h"; + case PSR_MODE_EL2t: return "EL2t"; + case PSR_MODE_EL2h: return "EL2h"; + default: return "???"; + } + } +} + +void fiq_debugger_dump_pc(struct fiq_debugger_output *output, + const struct pt_regs *regs) +{ + output->printf(output, " pc %016lx cpsr %08lx mode %s\n", + regs->pc, regs->pstate, mode_name(regs)); +} + +void fiq_debugger_dump_regs_aarch32(struct fiq_debugger_output *output, + const struct pt_regs *regs) +{ + output->printf(output, " r0 %08x r1 %08x r2 %08x r3 %08x\n", + regs->compat_usr(0), regs->compat_usr(1), + regs->compat_usr(2), regs->compat_usr(3)); + output->printf(output, " r4 %08x r5 %08x r6 %08x r7 %08x\n", + regs->compat_usr(4), regs->compat_usr(5), + regs->compat_usr(6), regs->compat_usr(7)); + output->printf(output, " r8 %08x r9 %08x r10 %08x r11 %08x\n", + regs->compat_usr(8), regs->compat_usr(9), + regs->compat_usr(10), regs->compat_usr(11)); + output->printf(output, " ip %08x sp %08x lr %08x pc %08x\n", + regs->compat_usr(12), regs->compat_sp, + regs->compat_lr, regs->pc); + output->printf(output, " cpsr %08x (%s)\n", + regs->pstate, mode_name(regs)); +} + +void fiq_debugger_dump_regs_aarch64(struct fiq_debugger_output *output, + const struct pt_regs *regs) +{ + + output->printf(output, " x0 %016lx x1 %016lx\n", + regs->regs[0], regs->regs[1]); + output->printf(output, " x2 %016lx x3 %016lx\n", + regs->regs[2], regs->regs[3]); + output->printf(output, " x4 %016lx x5 %016lx\n", + regs->regs[4], regs->regs[5]); + output->printf(output, " x6 %016lx x7 %016lx\n", + regs->regs[6], regs->regs[7]); + output->printf(output, " x8 %016lx x9 %016lx\n", + regs->regs[8], regs->regs[9]); + output->printf(output, " x10 %016lx x11 %016lx\n", + regs->regs[10], regs->regs[11]); + output->printf(output, " x12 %016lx x13 %016lx\n", + regs->regs[12], regs->regs[13]); + output->printf(output, " x14 %016lx x15 %016lx\n", + regs->regs[14], regs->regs[15]); + output->printf(output, " x16 %016lx x17 %016lx\n", + regs->regs[16], regs->regs[17]); + output->printf(output, " x18 %016lx x19 %016lx\n", + regs->regs[18], regs->regs[19]); + output->printf(output, " x20 %016lx x21 %016lx\n", + regs->regs[20], regs->regs[21]); + output->printf(output, " x22 %016lx x23 %016lx\n", + regs->regs[22], regs->regs[23]); + output->printf(output, " x24 %016lx x25 %016lx\n", + regs->regs[24], regs->regs[25]); + output->printf(output, " x26 %016lx x27 %016lx\n", + regs->regs[26], regs->regs[27]); + output->printf(output, " x28 %016lx x29 %016lx\n", + regs->regs[28], regs->regs[29]); + output->printf(output, " x30 %016lx sp %016lx\n", + regs->regs[30], regs->sp); + output->printf(output, " pc %016lx cpsr %08x (%s)\n", + regs->pc, regs->pstate, mode_name(regs)); +} + +void fiq_debugger_dump_regs(struct fiq_debugger_output *output, + const struct pt_regs *regs) +{ + if (compat_user_mode(regs)) + fiq_debugger_dump_regs_aarch32(output, regs); + else + fiq_debugger_dump_regs_aarch64(output, regs); +} + +#define READ_SPECIAL_REG(x) ({ \ + u64 val; \ + asm volatile ("mrs %0, " # x : "=r"(val)); \ + val; \ +}) + +void fiq_debugger_dump_allregs(struct fiq_debugger_output *output, + const struct pt_regs *regs) +{ + u32 pstate = READ_SPECIAL_REG(CurrentEl); + bool in_el2 = (pstate & PSR_MODE_MASK) >= PSR_MODE_EL2t; + + fiq_debugger_dump_regs(output, regs); + + output->printf(output, " sp_el0 %016lx\n", + READ_SPECIAL_REG(sp_el0)); + + if (in_el2) + output->printf(output, " sp_el1 %016lx\n", + READ_SPECIAL_REG(sp_el1)); + + output->printf(output, " elr_el1 %016lx\n", + READ_SPECIAL_REG(elr_el1)); + + output->printf(output, " spsr_el1 %08lx\n", + READ_SPECIAL_REG(spsr_el1)); + + if (in_el2) { + output->printf(output, " spsr_irq %08lx\n", + READ_SPECIAL_REG(spsr_irq)); + output->printf(output, " spsr_abt %08lx\n", + READ_SPECIAL_REG(spsr_abt)); + output->printf(output, " spsr_und %08lx\n", + READ_SPECIAL_REG(spsr_und)); + output->printf(output, " spsr_fiq %08lx\n", + READ_SPECIAL_REG(spsr_fiq)); + output->printf(output, " spsr_el2 %08lx\n", + READ_SPECIAL_REG(elr_el2)); + output->printf(output, " spsr_el2 %08lx\n", + READ_SPECIAL_REG(spsr_el2)); + } +} + +struct stacktrace_state { + struct fiq_debugger_output *output; + unsigned int depth; +}; + +static int report_trace(struct stackframe *frame, void *d) +{ + struct stacktrace_state *sts = d; + + if (sts->depth) { + sts->output->printf(sts->output, "%pF:\n", frame->pc); + sts->output->printf(sts->output, + " pc %016lx fp %016lx\n", + frame->pc, frame->fp); + sts->depth--; + return 0; + } + sts->output->printf(sts->output, " ...\n"); + + return sts->depth == 0; +} + +void fiq_debugger_dump_stacktrace(struct fiq_debugger_output *output, + const struct pt_regs *regs, unsigned int depth, void *ssp) +{ + struct thread_info *real_thread_info = THREAD_INFO(ssp); + struct stacktrace_state sts; + + sts.depth = depth; + sts.output = output; + *current_thread_info() = *real_thread_info; + + if (!current) + output->printf(output, "current NULL\n"); + else + output->printf(output, "pid: %d comm: %s\n", + current->pid, current->comm); + fiq_debugger_dump_regs(output, regs); + + if (!user_mode(regs)) { + struct stackframe frame; + frame.fp = regs->regs[29]; + frame.pc = regs->pc; + output->printf(output, "\n"); + walk_stackframe(current, &frame, report_trace, &sts); + } +}
diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger_priv.h b/drivers/staging/android/fiq_debugger/fiq_debugger_priv.h new file mode 100644 index 0000000..d5d051f --- /dev/null +++ b/drivers/staging/android/fiq_debugger/fiq_debugger_priv.h
@@ -0,0 +1,37 @@ +/* + * Copyright (C) 2014 Google, Inc. + * Author: Colin Cross <ccross@android.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _FIQ_DEBUGGER_PRIV_H_ +#define _FIQ_DEBUGGER_PRIV_H_ + +#define THREAD_INFO(sp) ((struct thread_info *) \ + ((unsigned long)(sp) & ~(THREAD_SIZE - 1))) + +struct fiq_debugger_output { + void (*printf)(struct fiq_debugger_output *output, const char *fmt, ...); +}; + +struct pt_regs; + +void fiq_debugger_dump_pc(struct fiq_debugger_output *output, + const struct pt_regs *regs); +void fiq_debugger_dump_regs(struct fiq_debugger_output *output, + const struct pt_regs *regs); +void fiq_debugger_dump_allregs(struct fiq_debugger_output *output, + const struct pt_regs *regs); +void fiq_debugger_dump_stacktrace(struct fiq_debugger_output *output, + const struct pt_regs *regs, unsigned int depth, void *ssp); + +#endif
diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger_ringbuf.h b/drivers/staging/android/fiq_debugger/fiq_debugger_ringbuf.h new file mode 100644 index 0000000..10c3c5d0 --- /dev/null +++ b/drivers/staging/android/fiq_debugger/fiq_debugger_ringbuf.h
@@ -0,0 +1,94 @@ +/* + * drivers/staging/android/fiq_debugger/fiq_debugger_ringbuf.h + * + * simple lockless ringbuffer + * + * Copyright (C) 2010 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> + +struct fiq_debugger_ringbuf { + int len; + int head; + int tail; + u8 buf[]; +}; + + +static inline struct fiq_debugger_ringbuf *fiq_debugger_ringbuf_alloc(int len) +{ + struct fiq_debugger_ringbuf *rbuf; + + rbuf = kzalloc(sizeof(*rbuf) + len, GFP_KERNEL); + if (rbuf == NULL) + return NULL; + + rbuf->len = len; + rbuf->head = 0; + rbuf->tail = 0; + smp_mb(); + + return rbuf; +} + +static inline void fiq_debugger_ringbuf_free(struct fiq_debugger_ringbuf *rbuf) +{ + kfree(rbuf); +} + +static inline int fiq_debugger_ringbuf_level(struct fiq_debugger_ringbuf *rbuf) +{ + int level = rbuf->head - rbuf->tail; + + if (level < 0) + level = rbuf->len + level; + + return level; +} + +static inline int fiq_debugger_ringbuf_room(struct fiq_debugger_ringbuf *rbuf) +{ + return rbuf->len - fiq_debugger_ringbuf_level(rbuf) - 1; +} + +static inline u8 +fiq_debugger_ringbuf_peek(struct fiq_debugger_ringbuf *rbuf, int i) +{ + return rbuf->buf[(rbuf->tail + i) % rbuf->len]; +} + +static inline int +fiq_debugger_ringbuf_consume(struct fiq_debugger_ringbuf *rbuf, int count) +{ + count = min(count, fiq_debugger_ringbuf_level(rbuf)); + + rbuf->tail = (rbuf->tail + count) % rbuf->len; + smp_mb(); + + return count; +} + +static inline int +fiq_debugger_ringbuf_push(struct fiq_debugger_ringbuf *rbuf, u8 datum) +{ + if (fiq_debugger_ringbuf_room(rbuf) == 0) + return 0; + + rbuf->buf[rbuf->head] = datum; + smp_mb(); + rbuf->head = (rbuf->head + 1) % rbuf->len; + smp_mb(); + + return 1; +}
diff --git a/drivers/staging/android/fiq_debugger/fiq_watchdog.c b/drivers/staging/android/fiq_debugger/fiq_watchdog.c new file mode 100644 index 0000000..194b541 --- /dev/null +++ b/drivers/staging/android/fiq_debugger/fiq_watchdog.c
@@ -0,0 +1,56 @@ +/* + * Copyright (C) 2014 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/pstore_ram.h> + +#include "fiq_watchdog.h" +#include "fiq_debugger_priv.h" + +static DEFINE_RAW_SPINLOCK(fiq_watchdog_lock); + +static void fiq_watchdog_printf(struct fiq_debugger_output *output, + const char *fmt, ...) +{ + char buf[256]; + va_list ap; + int len; + + va_start(ap, fmt); + len = vscnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + ramoops_console_write_buf(buf, len); +} + +struct fiq_debugger_output fiq_watchdog_output = { + .printf = fiq_watchdog_printf, +}; + +void fiq_watchdog_triggered(const struct pt_regs *regs, void *svc_sp) +{ + char msg[24]; + int len; + + raw_spin_lock(&fiq_watchdog_lock); + + len = scnprintf(msg, sizeof(msg), "watchdog fiq cpu %d\n", + THREAD_INFO(svc_sp)->cpu); + ramoops_console_write_buf(msg, len); + + fiq_debugger_dump_stacktrace(&fiq_watchdog_output, regs, 100, svc_sp); + + raw_spin_unlock(&fiq_watchdog_lock); +}
diff --git a/drivers/staging/android/fiq_debugger/fiq_watchdog.h b/drivers/staging/android/fiq_debugger/fiq_watchdog.h new file mode 100644 index 0000000..c6b507f --- /dev/null +++ b/drivers/staging/android/fiq_debugger/fiq_watchdog.h
@@ -0,0 +1,20 @@ +/* + * Copyright (C) 2014 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _FIQ_WATCHDOG_H_ +#define _FIQ_WATCHDOG_H_ + +void fiq_watchdog_triggered(const struct pt_regs *regs, void *svc_sp); + +#endif
diff --git a/drivers/staging/android/uapi/vsoc_shm.h b/drivers/staging/android/uapi/vsoc_shm.h new file mode 100644 index 0000000..741b138 --- /dev/null +++ b/drivers/staging/android/uapi/vsoc_shm.h
@@ -0,0 +1,303 @@ +/* + * Copyright (C) 2017 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _UAPI_LINUX_VSOC_SHM_H +#define _UAPI_LINUX_VSOC_SHM_H + +#include <linux/types.h> + +/** + * A permission is a token that permits a receiver to read and/or write an area + * of memory within a Vsoc region. + * + * An fd_scoped permission grants both read and write access, and can be + * attached to a file description (see open(2)). + * Ownership of the area can then be shared by passing a file descriptor + * among processes. + * + * begin_offset and end_offset define the area of memory that is controlled by + * the permission. owner_offset points to a word, also in shared memory, that + * controls ownership of the area. + * + * ownership of the region expires when the associated file description is + * released. + * + * At most one permission can be attached to each file description. + * + * This is useful when implementing HALs like gralloc that scope and pass + * ownership of shared resources via file descriptors. + * + * The caller is responsibe for doing any fencing. + * + * The calling process will normally identify a currently free area of + * memory. It will construct a proposed fd_scoped_permission_arg structure: + * + * begin_offset and end_offset describe the area being claimed + * + * owner_offset points to the location in shared memory that indicates the + * owner of the area. + * + * owned_value is the value that will be stored in owner_offset iff the + * permission can be granted. It must be different than VSOC_REGION_FREE. + * + * Two fd_scoped_permission structures are compatible if they vary only by + * their owned_value fields. + * + * The driver ensures that, for any group of simultaneous callers proposing + * compatible fd_scoped_permissions, it will accept exactly one of the + * propopsals. The other callers will get a failure with errno of EAGAIN. + * + * A process receiving a file descriptor can identify the region being + * granted using the VSOC_GET_FD_SCOPED_PERMISSION ioctl. + */ +struct fd_scoped_permission { + __u32 begin_offset; + __u32 end_offset; + __u32 owner_offset; + __u32 owned_value; +}; + +/* + * This value represents a free area of memory. The driver expects to see this + * value at owner_offset when creating a permission otherwise it will not do it, + * and will write this value back once the permission is no longer needed. + */ +#define VSOC_REGION_FREE ((__u32)0) + +/** + * ioctl argument for VSOC_CREATE_FD_SCOPE_PERMISSION + */ +struct fd_scoped_permission_arg { + struct fd_scoped_permission perm; + __s32 managed_region_fd; +}; + +#define VSOC_NODE_FREE ((__u32)0) + +/* + * Describes a signal table in shared memory. Each non-zero entry in the + * table indicates that the receiver should signal the futex at the given + * offset. Offsets are relative to the region, not the shared memory window. + * + * interrupt_signalled_offset is used to reliably signal interrupts across the + * vmm boundary. There are two roles: transmitter and receiver. For example, + * in the host_to_guest_signal_table the host is the transmitter and the + * guest is the receiver. The protocol is as follows: + * + * 1. The transmitter should convert the offset of the futex to an offset + * in the signal table [0, (1 << num_nodes_lg2)) + * The transmitter can choose any appropriate hashing algorithm, including + * hash = futex_offset & ((1 << num_nodes_lg2) - 1) + * + * 3. The transmitter should atomically compare and swap futex_offset with 0 + * at hash. There are 3 possible outcomes + * a. The swap fails because the futex_offset is already in the table. + * The transmitter should stop. + * b. Some other offset is in the table. This is a hash collision. The + * transmitter should move to another table slot and try again. One + * possible algorithm: + * hash = (hash + 1) & ((1 << num_nodes_lg2) - 1) + * c. The swap worked. Continue below. + * + * 3. The transmitter atomically swaps 1 with the value at the + * interrupt_signalled_offset. There are two outcomes: + * a. The prior value was 1. In this case an interrupt has already been + * posted. The transmitter is done. + * b. The prior value was 0, indicating that the receiver may be sleeping. + * The transmitter will issue an interrupt. + * + * 4. On waking the receiver immediately exchanges a 0 with the + * interrupt_signalled_offset. If it receives a 0 then this a spurious + * interrupt. That may occasionally happen in the current protocol, but + * should be rare. + * + * 5. The receiver scans the signal table by atomicaly exchanging 0 at each + * location. If a non-zero offset is returned from the exchange the + * receiver wakes all sleepers at the given offset: + * futex((int*)(region_base + old_value), FUTEX_WAKE, MAX_INT); + * + * 6. The receiver thread then does a conditional wait, waking immediately + * if the value at interrupt_signalled_offset is non-zero. This catches cases + * here additional signals were posted while the table was being scanned. + * On the guest the wait is handled via the VSOC_WAIT_FOR_INCOMING_INTERRUPT + * ioctl. + */ +struct vsoc_signal_table_layout { + /* log_2(Number of signal table entries) */ + __u32 num_nodes_lg2; + /* + * Offset to the first signal table entry relative to the start of the + * region + */ + __u32 futex_uaddr_table_offset; + /* + * Offset to an atomic_t / atomic uint32_t. A non-zero value indicates + * that one or more offsets are currently posted in the table. + * semi-unique access to an entry in the table + */ + __u32 interrupt_signalled_offset; +}; + +#define VSOC_REGION_WHOLE ((__s32)0) +#define VSOC_DEVICE_NAME_SZ 16 + +/** + * Each HAL would (usually) talk to a single device region + * Mulitple entities care about these regions: + * - The ivshmem_server will populate the regions in shared memory + * - The guest kernel will read the region, create minor device nodes, and + * allow interested parties to register for FUTEX_WAKE events in the region + * - HALs will access via the minor device nodes published by the guest kernel + * - Host side processes will access the region via the ivshmem_server: + * 1. Pass name to ivshmem_server at a UNIX socket + * 2. ivshmemserver will reply with 2 fds: + * - host->guest doorbell fd + * - guest->host doorbell fd + * - fd for the shared memory region + * - region offset + * 3. Start a futex receiver thread on the doorbell fd pointed at the + * signal_nodes + */ +struct vsoc_device_region { + __u16 current_version; + __u16 min_compatible_version; + __u32 region_begin_offset; + __u32 region_end_offset; + __u32 offset_of_region_data; + struct vsoc_signal_table_layout guest_to_host_signal_table; + struct vsoc_signal_table_layout host_to_guest_signal_table; + /* Name of the device. Must always be terminated with a '\0', so + * the longest supported device name is 15 characters. + */ + char device_name[VSOC_DEVICE_NAME_SZ]; + /* There are two ways that permissions to access regions are handled: + * - When subdivided_by is VSOC_REGION_WHOLE, any process that can + * open the device node for the region gains complete access to it. + * - When subdivided is set processes that open the region cannot + * access it. Access to a sub-region must be established by invoking + * the VSOC_CREATE_FD_SCOPE_PERMISSION ioctl on the region + * referenced in subdivided_by, providing a fileinstance + * (represented by a fd) opened on this region. + */ + __u32 managed_by; +}; + +/* + * The vsoc layout descriptor. + * The first 4K should be reserved for the shm header and region descriptors. + * The regions should be page aligned. + */ + +struct vsoc_shm_layout_descriptor { + __u16 major_version; + __u16 minor_version; + + /* size of the shm. This may be redundant but nice to have */ + __u32 size; + + /* number of shared memory regions */ + __u32 region_count; + + /* The offset to the start of region descriptors */ + __u32 vsoc_region_desc_offset; +}; + +/* + * This specifies the current version that should be stored in + * vsoc_shm_layout_descriptor.major_version and + * vsoc_shm_layout_descriptor.minor_version. + * It should be updated only if the vsoc_device_region and + * vsoc_shm_layout_descriptor structures have changed. + * Versioning within each region is transferred + * via the min_compatible_version and current_version fields in + * vsoc_device_region. The driver does not consult these fields: they are left + * for the HALs and host processes and will change independently of the layout + * version. + */ +#define CURRENT_VSOC_LAYOUT_MAJOR_VERSION 2 +#define CURRENT_VSOC_LAYOUT_MINOR_VERSION 0 + +#define VSOC_CREATE_FD_SCOPED_PERMISSION \ + _IOW(0xF5, 0, struct fd_scoped_permission) +#define VSOC_GET_FD_SCOPED_PERMISSION _IOR(0xF5, 1, struct fd_scoped_permission) + +/* + * This is used to signal the host to scan the guest_to_host_signal_table + * for new futexes to wake. This sends an interrupt if one is not already + * in flight. + */ +#define VSOC_MAYBE_SEND_INTERRUPT_TO_HOST _IO(0xF5, 2) + +/* + * When this returns the guest will scan host_to_guest_signal_table to + * check for new futexes to wake. + */ +/* TODO(ghartman): Consider moving this to the bottom half */ +#define VSOC_WAIT_FOR_INCOMING_INTERRUPT _IO(0xF5, 3) + +/* + * Guest HALs will use this to retrieve the region description after + * opening their device node. + */ +#define VSOC_DESCRIBE_REGION _IOR(0xF5, 4, struct vsoc_device_region) + +/* + * Wake any threads that may be waiting for a host interrupt on this region. + * This is mostly used during shutdown. + */ +#define VSOC_SELF_INTERRUPT _IO(0xF5, 5) + +/* + * This is used to signal the host to scan the guest_to_host_signal_table + * for new futexes to wake. This sends an interrupt unconditionally. + */ +#define VSOC_SEND_INTERRUPT_TO_HOST _IO(0xF5, 6) + +enum wait_types { + VSOC_WAIT_UNDEFINED = 0, + VSOC_WAIT_IF_EQUAL = 1, + VSOC_WAIT_IF_EQUAL_TIMEOUT = 2 +}; + +/* + * Wait for a condition to be true + * + * Note, this is sized and aligned so the 32 bit and 64 bit layouts are + * identical. + */ +struct vsoc_cond_wait { + /* Input: Offset of the 32 bit word to check */ + __u32 offset; + /* Input: Value that will be compared with the offset */ + __u32 value; + /* Monotonic time to wake at in seconds */ + __u64 wake_time_sec; + /* Input: Monotonic time to wait in nanoseconds */ + __u32 wake_time_nsec; + /* Input: Type of wait */ + __u32 wait_type; + /* Output: Number of times the thread woke before returning. */ + __u32 wakes; + /* Ensure that we're 8-byte aligned and 8 byte length for 32/64 bit + * compatibility. + */ + __u32 reserved_1; +}; + +#define VSOC_COND_WAIT _IOWR(0xF5, 7, struct vsoc_cond_wait) + +/* Wake any local threads waiting at the offset given in arg */ +#define VSOC_COND_WAKE _IO(0xF5, 8) + +#endif /* _UAPI_LINUX_VSOC_SHM_H */
diff --git a/drivers/staging/android/vsoc.c b/drivers/staging/android/vsoc.c new file mode 100644 index 0000000..954ed2c --- /dev/null +++ b/drivers/staging/android/vsoc.c
@@ -0,0 +1,1165 @@ +/* + * drivers/android/staging/vsoc.c + * + * Android Virtual System on a Chip (VSoC) driver + * + * Copyright (C) 2017 Google, Inc. + * + * Author: ghartman@google.com + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * + * Based on drivers/char/kvm_ivshmem.c - driver for KVM Inter-VM shared memory + * Copyright 2009 Cam Macdonell <cam@cs.ualberta.ca> + * + * Based on cirrusfb.c and 8139cp.c: + * Copyright 1999-2001 Jeff Garzik + * Copyright 2001-2004 Jeff Garzik + */ + +#include <linux/dma-mapping.h> +#include <linux/freezer.h> +#include <linux/futex.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/pci.h> +#include <linux/proc_fs.h> +#include <linux/sched.h> +#include <linux/syscalls.h> +#include <linux/uaccess.h> +#include <linux/interrupt.h> +#include <linux/mutex.h> +#include <linux/cdev.h> +#include <linux/file.h> +#include "uapi/vsoc_shm.h" + +#define VSOC_DEV_NAME "vsoc" + +/* + * Description of the ivshmem-doorbell PCI device used by QEmu. These + * constants follow docs/specs/ivshmem-spec.txt, which can be found in + * the QEmu repository. This was last reconciled with the version that + * came out with 2.8 + */ + +/* + * These constants are determined KVM Inter-VM shared memory device + * register offsets + */ +enum { + INTR_MASK = 0x00, /* Interrupt Mask */ + INTR_STATUS = 0x04, /* Interrupt Status */ + IV_POSITION = 0x08, /* VM ID */ + DOORBELL = 0x0c, /* Doorbell */ +}; + +static const int REGISTER_BAR; /* Equal to 0 */ +static const int MAX_REGISTER_BAR_LEN = 0x100; +/* + * The MSI-x BAR is not used directly. + * + * static const int MSI_X_BAR = 1; + */ +static const int SHARED_MEMORY_BAR = 2; + +struct vsoc_region_data { + char name[VSOC_DEVICE_NAME_SZ + 1]; + wait_queue_head_t interrupt_wait_queue; + /* TODO(b/73664181): Use multiple futex wait queues */ + wait_queue_head_t futex_wait_queue; + /* Flag indicating that an interrupt has been signalled by the host. */ + atomic_t *incoming_signalled; + /* Flag indicating the guest has signalled the host. */ + atomic_t *outgoing_signalled; + bool irq_requested; + bool device_created; +}; + +struct vsoc_device { + /* Kernel virtual address of REGISTER_BAR. */ + void __iomem *regs; + /* Physical address of SHARED_MEMORY_BAR. */ + phys_addr_t shm_phys_start; + /* Kernel virtual address of SHARED_MEMORY_BAR. */ + void __iomem *kernel_mapped_shm; + /* Size of the entire shared memory window in bytes. */ + size_t shm_size; + /* + * Pointer to the virtual address of the shared memory layout structure. + * This is probably identical to kernel_mapped_shm, but saving this + * here saves a lot of annoying casts. + */ + struct vsoc_shm_layout_descriptor *layout; + /* + * Points to a table of region descriptors in the kernel's virtual + * address space. Calculated from + * vsoc_shm_layout_descriptor.vsoc_region_desc_offset + */ + struct vsoc_device_region *regions; + /* Head of a list of permissions that have been granted. */ + struct list_head permissions; + struct pci_dev *dev; + /* Per-region (and therefore per-interrupt) information. */ + struct vsoc_region_data *regions_data; + /* + * Table of msi-x entries. This has to be separated from struct + * vsoc_region_data because the kernel deals with them as an array. + */ + struct msix_entry *msix_entries; + /* Mutex that protectes the permission list */ + struct mutex mtx; + /* Major number assigned by the kernel */ + int major; + /* Character device assigned by the kernel */ + struct cdev cdev; + /* Device class assigned by the kernel */ + struct class *class; + /* + * Flags that indicate what we've initialized. These are used to do an + * orderly cleanup of the device. + */ + bool enabled_device; + bool requested_regions; + bool cdev_added; + bool class_added; + bool msix_enabled; +}; + +static struct vsoc_device vsoc_dev; + +/* + * TODO(ghartman): Add a /sys filesystem entry that summarizes the permissions. + */ + +struct fd_scoped_permission_node { + struct fd_scoped_permission permission; + struct list_head list; +}; + +struct vsoc_private_data { + struct fd_scoped_permission_node *fd_scoped_permission_node; +}; + +static long vsoc_ioctl(struct file *, unsigned int, unsigned long); +static int vsoc_mmap(struct file *, struct vm_area_struct *); +static int vsoc_open(struct inode *, struct file *); +static int vsoc_release(struct inode *, struct file *); +static ssize_t vsoc_read(struct file *, char __user *, size_t, loff_t *); +static ssize_t vsoc_write(struct file *, const char __user *, size_t, loff_t *); +static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin); +static int do_create_fd_scoped_permission( + struct vsoc_device_region *region_p, + struct fd_scoped_permission_node *np, + struct fd_scoped_permission_arg __user *arg); +static void do_destroy_fd_scoped_permission( + struct vsoc_device_region *owner_region_p, + struct fd_scoped_permission *perm); +static long do_vsoc_describe_region(struct file *, + struct vsoc_device_region __user *); +static ssize_t vsoc_get_area(struct file *filp, __u32 *perm_off); + +/** + * Validate arguments on entry points to the driver. + */ +inline int vsoc_validate_inode(struct inode *inode) +{ + if (iminor(inode) >= vsoc_dev.layout->region_count) { + dev_err(&vsoc_dev.dev->dev, + "describe_region: invalid region %d\n", iminor(inode)); + return -ENODEV; + } + return 0; +} + +inline int vsoc_validate_filep(struct file *filp) +{ + int ret = vsoc_validate_inode(file_inode(filp)); + + if (ret) + return ret; + if (!filp->private_data) { + dev_err(&vsoc_dev.dev->dev, + "No private data on fd, region %d\n", + iminor(file_inode(filp))); + return -EBADFD; + } + return 0; +} + +/* Converts from shared memory offset to virtual address */ +static inline void *shm_off_to_virtual_addr(__u32 offset) +{ + return (void __force *)vsoc_dev.kernel_mapped_shm + offset; +} + +/* Converts from shared memory offset to physical address */ +static inline phys_addr_t shm_off_to_phys_addr(__u32 offset) +{ + return vsoc_dev.shm_phys_start + offset; +} + +/** + * Convenience functions to obtain the region from the inode or file. + * Dangerous to call before validating the inode/file. + */ +static inline struct vsoc_device_region *vsoc_region_from_inode( + struct inode *inode) +{ + return &vsoc_dev.regions[iminor(inode)]; +} + +static inline struct vsoc_device_region *vsoc_region_from_filep( + struct file *inode) +{ + return vsoc_region_from_inode(file_inode(inode)); +} + +static inline uint32_t vsoc_device_region_size(struct vsoc_device_region *r) +{ + return r->region_end_offset - r->region_begin_offset; +} + +static const struct file_operations vsoc_ops = { + .owner = THIS_MODULE, + .open = vsoc_open, + .mmap = vsoc_mmap, + .read = vsoc_read, + .unlocked_ioctl = vsoc_ioctl, + .compat_ioctl = vsoc_ioctl, + .write = vsoc_write, + .llseek = vsoc_lseek, + .release = vsoc_release, +}; + +static struct pci_device_id vsoc_id_table[] = { + {0x1af4, 0x1110, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {0}, +}; + +MODULE_DEVICE_TABLE(pci, vsoc_id_table); + +static void vsoc_remove_device(struct pci_dev *pdev); +static int vsoc_probe_device(struct pci_dev *pdev, + const struct pci_device_id *ent); + +static struct pci_driver vsoc_pci_driver = { + .name = "vsoc", + .id_table = vsoc_id_table, + .probe = vsoc_probe_device, + .remove = vsoc_remove_device, +}; + +static int do_create_fd_scoped_permission( + struct vsoc_device_region *region_p, + struct fd_scoped_permission_node *np, + struct fd_scoped_permission_arg __user *arg) +{ + struct file *managed_filp; + s32 managed_fd; + atomic_t *owner_ptr = NULL; + struct vsoc_device_region *managed_region_p; + + if (copy_from_user(&np->permission, &arg->perm, sizeof(*np)) || + copy_from_user(&managed_fd, + &arg->managed_region_fd, sizeof(managed_fd))) { + return -EFAULT; + } + managed_filp = fdget(managed_fd).file; + /* Check that it's a valid fd, */ + if (!managed_filp || vsoc_validate_filep(managed_filp)) + return -EPERM; + /* EEXIST if the given fd already has a permission. */ + if (((struct vsoc_private_data *)managed_filp->private_data)-> + fd_scoped_permission_node) + return -EEXIST; + managed_region_p = vsoc_region_from_filep(managed_filp); + /* Check that the provided region is managed by this one */ + if (&vsoc_dev.regions[managed_region_p->managed_by] != region_p) + return -EPERM; + /* The area must be well formed and have non-zero size */ + if (np->permission.begin_offset >= np->permission.end_offset) + return -EINVAL; + /* The area must fit in the memory window */ + if (np->permission.end_offset > + vsoc_device_region_size(managed_region_p)) + return -ERANGE; + /* The area must be in the region data section */ + if (np->permission.begin_offset < + managed_region_p->offset_of_region_data) + return -ERANGE; + /* The area must be page aligned */ + if (!PAGE_ALIGNED(np->permission.begin_offset) || + !PAGE_ALIGNED(np->permission.end_offset)) + return -EINVAL; + /* Owner offset must be naturally aligned in the window */ + if (np->permission.owner_offset & + (sizeof(np->permission.owner_offset) - 1)) + return -EINVAL; + /* The owner flag must reside in the owner memory */ + if (np->permission.owner_offset + sizeof(np->permission.owner_offset) > + vsoc_device_region_size(region_p)) + return -ERANGE; + /* The owner flag must reside in the data section */ + if (np->permission.owner_offset < region_p->offset_of_region_data) + return -EINVAL; + /* The owner value must change to claim the memory */ + if (np->permission.owned_value == VSOC_REGION_FREE) + return -EINVAL; + owner_ptr = + (atomic_t *)shm_off_to_virtual_addr(region_p->region_begin_offset + + np->permission.owner_offset); + /* We've already verified that this is in the shared memory window, so + * it should be safe to write to this address. + */ + if (atomic_cmpxchg(owner_ptr, + VSOC_REGION_FREE, + np->permission.owned_value) != VSOC_REGION_FREE) { + return -EBUSY; + } + ((struct vsoc_private_data *)managed_filp->private_data)-> + fd_scoped_permission_node = np; + /* The file offset needs to be adjusted if the calling + * process did any read/write operations on the fd + * before creating the permission. + */ + if (managed_filp->f_pos) { + if (managed_filp->f_pos > np->permission.end_offset) { + /* If the offset is beyond the permission end, set it + * to the end. + */ + managed_filp->f_pos = np->permission.end_offset; + } else { + /* If the offset is within the permission interval + * keep it there otherwise reset it to zero. + */ + if (managed_filp->f_pos < np->permission.begin_offset) { + managed_filp->f_pos = 0; + } else { + managed_filp->f_pos -= + np->permission.begin_offset; + } + } + } + return 0; +} + +static void do_destroy_fd_scoped_permission_node( + struct vsoc_device_region *owner_region_p, + struct fd_scoped_permission_node *node) +{ + if (node) { + do_destroy_fd_scoped_permission(owner_region_p, + &node->permission); + mutex_lock(&vsoc_dev.mtx); + list_del(&node->list); + mutex_unlock(&vsoc_dev.mtx); + kfree(node); + } +} + +static void do_destroy_fd_scoped_permission( + struct vsoc_device_region *owner_region_p, + struct fd_scoped_permission *perm) +{ + atomic_t *owner_ptr = NULL; + int prev = 0; + + if (!perm) + return; + owner_ptr = (atomic_t *)shm_off_to_virtual_addr( + owner_region_p->region_begin_offset + perm->owner_offset); + prev = atomic_xchg(owner_ptr, VSOC_REGION_FREE); + if (prev != perm->owned_value) + dev_err(&vsoc_dev.dev->dev, + "%x-%x: owner (%s) %x: expected to be %x was %x", + perm->begin_offset, perm->end_offset, + owner_region_p->device_name, perm->owner_offset, + perm->owned_value, prev); +} + +static long do_vsoc_describe_region(struct file *filp, + struct vsoc_device_region __user *dest) +{ + struct vsoc_device_region *region_p; + int retval = vsoc_validate_filep(filp); + + if (retval) + return retval; + region_p = vsoc_region_from_filep(filp); + if (copy_to_user(dest, region_p, sizeof(*region_p))) + return -EFAULT; + return 0; +} + +/** + * Implements the inner logic of cond_wait. Copies to and from userspace are + * done in the helper function below. + */ +static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg) +{ + DEFINE_WAIT(wait); + u32 region_number = iminor(file_inode(filp)); + struct vsoc_region_data *data = vsoc_dev.regions_data + region_number; + struct hrtimer_sleeper timeout, *to = NULL; + int ret = 0; + struct vsoc_device_region *region_p = vsoc_region_from_filep(filp); + atomic_t *address = NULL; + struct timespec ts; + + /* Ensure that the offset is aligned */ + if (arg->offset & (sizeof(uint32_t) - 1)) + return -EADDRNOTAVAIL; + /* Ensure that the offset is within shared memory */ + if (((uint64_t)arg->offset) + region_p->region_begin_offset + + sizeof(uint32_t) > region_p->region_end_offset) + return -E2BIG; + address = shm_off_to_virtual_addr(region_p->region_begin_offset + + arg->offset); + + /* Ensure that the type of wait is valid */ + switch (arg->wait_type) { + case VSOC_WAIT_IF_EQUAL: + break; + case VSOC_WAIT_IF_EQUAL_TIMEOUT: + to = &timeout; + break; + default: + return -EINVAL; + } + + if (to) { + /* Copy the user-supplied timesec into the kernel structure. + * We do things this way to flatten differences between 32 bit + * and 64 bit timespecs. + */ + ts.tv_sec = arg->wake_time_sec; + ts.tv_nsec = arg->wake_time_nsec; + + if (!timespec_valid(&ts)) + return -EINVAL; + hrtimer_init_on_stack(&to->timer, CLOCK_MONOTONIC, + HRTIMER_MODE_ABS); + hrtimer_set_expires_range_ns(&to->timer, timespec_to_ktime(ts), + current->timer_slack_ns); + + hrtimer_init_sleeper(to, current); + } + + while (1) { + prepare_to_wait(&data->futex_wait_queue, &wait, + TASK_INTERRUPTIBLE); + /* + * Check the sentinel value after prepare_to_wait. If the value + * changes after this check the writer will call signal, + * changing the task state from INTERRUPTIBLE to RUNNING. That + * will ensure that schedule() will eventually schedule this + * task. + */ + if (atomic_read(address) != arg->value) { + ret = 0; + break; + } + if (to) { + hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); + if (likely(to->task)) + freezable_schedule(); + hrtimer_cancel(&to->timer); + if (!to->task) { + ret = -ETIMEDOUT; + break; + } + } else { + freezable_schedule(); + } + /* Count the number of times that we woke up. This is useful + * for unit testing. + */ + ++arg->wakes; + if (signal_pending(current)) { + ret = -EINTR; + break; + } + } + finish_wait(&data->futex_wait_queue, &wait); + if (to) + destroy_hrtimer_on_stack(&to->timer); + return ret; +} + +/** + * Handles the details of copying from/to userspace to ensure that the copies + * happen on all of the return paths of cond_wait. + */ +static int do_vsoc_cond_wait(struct file *filp, + struct vsoc_cond_wait __user *untrusted_in) +{ + struct vsoc_cond_wait arg; + int rval = 0; + + if (copy_from_user(&arg, untrusted_in, sizeof(arg))) + return -EFAULT; + /* wakes is an out parameter. Initialize it to something sensible. */ + arg.wakes = 0; + rval = handle_vsoc_cond_wait(filp, &arg); + if (copy_to_user(untrusted_in, &arg, sizeof(arg))) + return -EFAULT; + return rval; +} + +static int do_vsoc_cond_wake(struct file *filp, uint32_t offset) +{ + struct vsoc_device_region *region_p = vsoc_region_from_filep(filp); + u32 region_number = iminor(file_inode(filp)); + struct vsoc_region_data *data = vsoc_dev.regions_data + region_number; + /* Ensure that the offset is aligned */ + if (offset & (sizeof(uint32_t) - 1)) + return -EADDRNOTAVAIL; + /* Ensure that the offset is within shared memory */ + if (((uint64_t)offset) + region_p->region_begin_offset + + sizeof(uint32_t) > region_p->region_end_offset) + return -E2BIG; + /* + * TODO(b/73664181): Use multiple futex wait queues. + * We need to wake every sleeper when the condition changes. Typically + * only a single thread will be waiting on the condition, but there + * are exceptions. The worst case is about 10 threads. + */ + wake_up_interruptible_all(&data->futex_wait_queue); + return 0; +} + +static long vsoc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + int rv = 0; + struct vsoc_device_region *region_p; + u32 reg_num; + struct vsoc_region_data *reg_data; + int retval = vsoc_validate_filep(filp); + + if (retval) + return retval; + region_p = vsoc_region_from_filep(filp); + reg_num = iminor(file_inode(filp)); + reg_data = vsoc_dev.regions_data + reg_num; + switch (cmd) { + case VSOC_CREATE_FD_SCOPED_PERMISSION: + { + struct fd_scoped_permission_node *node = NULL; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + /* We can't allocate memory for the permission */ + if (!node) + return -ENOMEM; + INIT_LIST_HEAD(&node->list); + rv = do_create_fd_scoped_permission( + region_p, + node, + (struct fd_scoped_permission_arg __user *)arg); + if (!rv) { + mutex_lock(&vsoc_dev.mtx); + list_add(&node->list, &vsoc_dev.permissions); + mutex_unlock(&vsoc_dev.mtx); + } else { + kfree(node); + return rv; + } + } + break; + + case VSOC_GET_FD_SCOPED_PERMISSION: + { + struct fd_scoped_permission_node *node = + ((struct vsoc_private_data *)filp->private_data)-> + fd_scoped_permission_node; + if (!node) + return -ENOENT; + if (copy_to_user + ((struct fd_scoped_permission __user *)arg, + &node->permission, sizeof(node->permission))) + return -EFAULT; + } + break; + + case VSOC_MAYBE_SEND_INTERRUPT_TO_HOST: + if (!atomic_xchg( + reg_data->outgoing_signalled, + 1)) { + writel(reg_num, vsoc_dev.regs + DOORBELL); + return 0; + } else { + return -EBUSY; + } + break; + + case VSOC_SEND_INTERRUPT_TO_HOST: + writel(reg_num, vsoc_dev.regs + DOORBELL); + return 0; + + case VSOC_WAIT_FOR_INCOMING_INTERRUPT: + wait_event_interruptible( + reg_data->interrupt_wait_queue, + (atomic_read(reg_data->incoming_signalled) != 0)); + break; + + case VSOC_DESCRIBE_REGION: + return do_vsoc_describe_region( + filp, + (struct vsoc_device_region __user *)arg); + + case VSOC_SELF_INTERRUPT: + atomic_set(reg_data->incoming_signalled, 1); + wake_up_interruptible(®_data->interrupt_wait_queue); + break; + + case VSOC_COND_WAIT: + return do_vsoc_cond_wait(filp, + (struct vsoc_cond_wait __user *)arg); + case VSOC_COND_WAKE: + return do_vsoc_cond_wake(filp, arg); + + default: + return -EINVAL; + } + return 0; +} + +static ssize_t vsoc_read(struct file *filp, char __user *buffer, size_t len, + loff_t *poffset) +{ + __u32 area_off; + const void *area_p; + ssize_t area_len; + int retval = vsoc_validate_filep(filp); + + if (retval) + return retval; + area_len = vsoc_get_area(filp, &area_off); + area_p = shm_off_to_virtual_addr(area_off); + area_p += *poffset; + area_len -= *poffset; + if (area_len <= 0) + return 0; + if (area_len < len) + len = area_len; + if (copy_to_user(buffer, area_p, len)) + return -EFAULT; + *poffset += len; + return len; +} + +static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin) +{ + ssize_t area_len = 0; + int retval = vsoc_validate_filep(filp); + + if (retval) + return retval; + area_len = vsoc_get_area(filp, NULL); + switch (origin) { + case SEEK_SET: + break; + + case SEEK_CUR: + if (offset > 0 && offset + filp->f_pos < 0) + return -EOVERFLOW; + offset += filp->f_pos; + break; + + case SEEK_END: + if (offset > 0 && offset + area_len < 0) + return -EOVERFLOW; + offset += area_len; + break; + + case SEEK_DATA: + if (offset >= area_len) + return -EINVAL; + if (offset < 0) + offset = 0; + break; + + case SEEK_HOLE: + /* Next hole is always the end of the region, unless offset is + * beyond that + */ + if (offset < area_len) + offset = area_len; + break; + + default: + return -EINVAL; + } + + if (offset < 0 || offset > area_len) + return -EINVAL; + filp->f_pos = offset; + + return offset; +} + +static ssize_t vsoc_write(struct file *filp, const char __user *buffer, + size_t len, loff_t *poffset) +{ + __u32 area_off; + void *area_p; + ssize_t area_len; + int retval = vsoc_validate_filep(filp); + + if (retval) + return retval; + area_len = vsoc_get_area(filp, &area_off); + area_p = shm_off_to_virtual_addr(area_off); + area_p += *poffset; + area_len -= *poffset; + if (area_len <= 0) + return 0; + if (area_len < len) + len = area_len; + if (copy_from_user(area_p, buffer, len)) + return -EFAULT; + *poffset += len; + return len; +} + +static irqreturn_t vsoc_interrupt(int irq, void *region_data_v) +{ + struct vsoc_region_data *region_data = + (struct vsoc_region_data *)region_data_v; + int reg_num = region_data - vsoc_dev.regions_data; + + if (unlikely(!region_data)) + return IRQ_NONE; + + if (unlikely(reg_num < 0 || + reg_num >= vsoc_dev.layout->region_count)) { + dev_err(&vsoc_dev.dev->dev, + "invalid irq @%p reg_num=0x%04x\n", + region_data, reg_num); + return IRQ_NONE; + } + if (unlikely(vsoc_dev.regions_data + reg_num != region_data)) { + dev_err(&vsoc_dev.dev->dev, + "irq not aligned @%p reg_num=0x%04x\n", + region_data, reg_num); + return IRQ_NONE; + } + wake_up_interruptible(®ion_data->interrupt_wait_queue); + return IRQ_HANDLED; +} + +static int vsoc_probe_device(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int result; + int i; + resource_size_t reg_size; + dev_t devt; + + vsoc_dev.dev = pdev; + result = pci_enable_device(pdev); + if (result) { + dev_err(&pdev->dev, + "pci_enable_device failed %s: error %d\n", + pci_name(pdev), result); + return result; + } + vsoc_dev.enabled_device = true; + result = pci_request_regions(pdev, "vsoc"); + if (result < 0) { + dev_err(&pdev->dev, "pci_request_regions failed\n"); + vsoc_remove_device(pdev); + return -EBUSY; + } + vsoc_dev.requested_regions = true; + /* Set up the control registers in BAR 0 */ + reg_size = pci_resource_len(pdev, REGISTER_BAR); + if (reg_size > MAX_REGISTER_BAR_LEN) + vsoc_dev.regs = + pci_iomap(pdev, REGISTER_BAR, MAX_REGISTER_BAR_LEN); + else + vsoc_dev.regs = pci_iomap(pdev, REGISTER_BAR, reg_size); + + if (!vsoc_dev.regs) { + dev_err(&pdev->dev, + "cannot map registers of size %zu\n", + (size_t)reg_size); + vsoc_remove_device(pdev); + return -EBUSY; + } + + /* Map the shared memory in BAR 2 */ + vsoc_dev.shm_phys_start = pci_resource_start(pdev, SHARED_MEMORY_BAR); + vsoc_dev.shm_size = pci_resource_len(pdev, SHARED_MEMORY_BAR); + + dev_info(&pdev->dev, "shared memory @ DMA %pa size=0x%zx\n", + &vsoc_dev.shm_phys_start, vsoc_dev.shm_size); + vsoc_dev.kernel_mapped_shm = pci_iomap_wc(pdev, SHARED_MEMORY_BAR, 0); + if (!vsoc_dev.kernel_mapped_shm) { + dev_err(&vsoc_dev.dev->dev, "cannot iomap region\n"); + vsoc_remove_device(pdev); + return -EBUSY; + } + + vsoc_dev.layout = (struct vsoc_shm_layout_descriptor __force *) + vsoc_dev.kernel_mapped_shm; + dev_info(&pdev->dev, "major_version: %d\n", + vsoc_dev.layout->major_version); + dev_info(&pdev->dev, "minor_version: %d\n", + vsoc_dev.layout->minor_version); + dev_info(&pdev->dev, "size: 0x%x\n", vsoc_dev.layout->size); + dev_info(&pdev->dev, "regions: %d\n", vsoc_dev.layout->region_count); + if (vsoc_dev.layout->major_version != + CURRENT_VSOC_LAYOUT_MAJOR_VERSION) { + dev_err(&vsoc_dev.dev->dev, + "driver supports only major_version %d\n", + CURRENT_VSOC_LAYOUT_MAJOR_VERSION); + vsoc_remove_device(pdev); + return -EBUSY; + } + result = alloc_chrdev_region(&devt, 0, vsoc_dev.layout->region_count, + VSOC_DEV_NAME); + if (result) { + dev_err(&vsoc_dev.dev->dev, "alloc_chrdev_region failed\n"); + vsoc_remove_device(pdev); + return -EBUSY; + } + vsoc_dev.major = MAJOR(devt); + cdev_init(&vsoc_dev.cdev, &vsoc_ops); + vsoc_dev.cdev.owner = THIS_MODULE; + result = cdev_add(&vsoc_dev.cdev, devt, vsoc_dev.layout->region_count); + if (result) { + dev_err(&vsoc_dev.dev->dev, "cdev_add error\n"); + vsoc_remove_device(pdev); + return -EBUSY; + } + vsoc_dev.cdev_added = true; + vsoc_dev.class = class_create(THIS_MODULE, VSOC_DEV_NAME); + if (IS_ERR(vsoc_dev.class)) { + dev_err(&vsoc_dev.dev->dev, "class_create failed\n"); + vsoc_remove_device(pdev); + return PTR_ERR(vsoc_dev.class); + } + vsoc_dev.class_added = true; + vsoc_dev.regions = (struct vsoc_device_region __force *) + ((void *)vsoc_dev.layout + + vsoc_dev.layout->vsoc_region_desc_offset); + vsoc_dev.msix_entries = kcalloc( + vsoc_dev.layout->region_count, + sizeof(vsoc_dev.msix_entries[0]), GFP_KERNEL); + if (!vsoc_dev.msix_entries) { + dev_err(&vsoc_dev.dev->dev, + "unable to allocate msix_entries\n"); + vsoc_remove_device(pdev); + return -ENOSPC; + } + vsoc_dev.regions_data = kcalloc( + vsoc_dev.layout->region_count, + sizeof(vsoc_dev.regions_data[0]), GFP_KERNEL); + if (!vsoc_dev.regions_data) { + dev_err(&vsoc_dev.dev->dev, + "unable to allocate regions' data\n"); + vsoc_remove_device(pdev); + return -ENOSPC; + } + for (i = 0; i < vsoc_dev.layout->region_count; ++i) + vsoc_dev.msix_entries[i].entry = i; + + result = pci_enable_msix_exact(vsoc_dev.dev, vsoc_dev.msix_entries, + vsoc_dev.layout->region_count); + if (result) { + dev_info(&pdev->dev, "pci_enable_msix failed: %d\n", result); + vsoc_remove_device(pdev); + return -ENOSPC; + } + /* Check that all regions are well formed */ + for (i = 0; i < vsoc_dev.layout->region_count; ++i) { + const struct vsoc_device_region *region = vsoc_dev.regions + i; + + if (!PAGE_ALIGNED(region->region_begin_offset) || + !PAGE_ALIGNED(region->region_end_offset)) { + dev_err(&vsoc_dev.dev->dev, + "region %d not aligned (%x:%x)", i, + region->region_begin_offset, + region->region_end_offset); + vsoc_remove_device(pdev); + return -EFAULT; + } + if (region->region_begin_offset >= region->region_end_offset || + region->region_end_offset > vsoc_dev.shm_size) { + dev_err(&vsoc_dev.dev->dev, + "region %d offsets are wrong: %x %x %zx", + i, region->region_begin_offset, + region->region_end_offset, vsoc_dev.shm_size); + vsoc_remove_device(pdev); + return -EFAULT; + } + if (region->managed_by >= vsoc_dev.layout->region_count) { + dev_err(&vsoc_dev.dev->dev, + "region %d has invalid owner: %u", + i, region->managed_by); + vsoc_remove_device(pdev); + return -EFAULT; + } + } + vsoc_dev.msix_enabled = true; + for (i = 0; i < vsoc_dev.layout->region_count; ++i) { + const struct vsoc_device_region *region = vsoc_dev.regions + i; + size_t name_sz = sizeof(vsoc_dev.regions_data[i].name) - 1; + const struct vsoc_signal_table_layout *h_to_g_signal_table = + ®ion->host_to_guest_signal_table; + const struct vsoc_signal_table_layout *g_to_h_signal_table = + ®ion->guest_to_host_signal_table; + + vsoc_dev.regions_data[i].name[name_sz] = '\0'; + memcpy(vsoc_dev.regions_data[i].name, region->device_name, + name_sz); + dev_info(&pdev->dev, "region %d name=%s\n", + i, vsoc_dev.regions_data[i].name); + init_waitqueue_head( + &vsoc_dev.regions_data[i].interrupt_wait_queue); + init_waitqueue_head(&vsoc_dev.regions_data[i].futex_wait_queue); + vsoc_dev.regions_data[i].incoming_signalled = + shm_off_to_virtual_addr(region->region_begin_offset) + + h_to_g_signal_table->interrupt_signalled_offset; + vsoc_dev.regions_data[i].outgoing_signalled = + shm_off_to_virtual_addr(region->region_begin_offset) + + g_to_h_signal_table->interrupt_signalled_offset; + result = request_irq( + vsoc_dev.msix_entries[i].vector, + vsoc_interrupt, 0, + vsoc_dev.regions_data[i].name, + vsoc_dev.regions_data + i); + if (result) { + dev_info(&pdev->dev, + "request_irq failed irq=%d vector=%d\n", + i, vsoc_dev.msix_entries[i].vector); + vsoc_remove_device(pdev); + return -ENOSPC; + } + vsoc_dev.regions_data[i].irq_requested = true; + if (!device_create(vsoc_dev.class, NULL, + MKDEV(vsoc_dev.major, i), + NULL, vsoc_dev.regions_data[i].name)) { + dev_err(&vsoc_dev.dev->dev, "device_create failed\n"); + vsoc_remove_device(pdev); + return -EBUSY; + } + vsoc_dev.regions_data[i].device_created = true; + } + return 0; +} + +/* + * This should undo all of the allocations in the probe function in reverse + * order. + * + * Notes: + * + * The device may have been partially initialized, so double check + * that the allocations happened. + * + * This function may be called multiple times, so mark resources as freed + * as they are deallocated. + */ +static void vsoc_remove_device(struct pci_dev *pdev) +{ + int i; + /* + * pdev is the first thing to be set on probe and the last thing + * to be cleared here. If it's NULL then there is no cleanup. + */ + if (!pdev || !vsoc_dev.dev) + return; + dev_info(&pdev->dev, "remove_device\n"); + if (vsoc_dev.regions_data) { + for (i = 0; i < vsoc_dev.layout->region_count; ++i) { + if (vsoc_dev.regions_data[i].device_created) { + device_destroy(vsoc_dev.class, + MKDEV(vsoc_dev.major, i)); + vsoc_dev.regions_data[i].device_created = false; + } + if (vsoc_dev.regions_data[i].irq_requested) + free_irq(vsoc_dev.msix_entries[i].vector, NULL); + vsoc_dev.regions_data[i].irq_requested = false; + } + kfree(vsoc_dev.regions_data); + vsoc_dev.regions_data = NULL; + } + if (vsoc_dev.msix_enabled) { + pci_disable_msix(pdev); + vsoc_dev.msix_enabled = false; + } + kfree(vsoc_dev.msix_entries); + vsoc_dev.msix_entries = NULL; + vsoc_dev.regions = NULL; + if (vsoc_dev.class_added) { + class_destroy(vsoc_dev.class); + vsoc_dev.class_added = false; + } + if (vsoc_dev.cdev_added) { + cdev_del(&vsoc_dev.cdev); + vsoc_dev.cdev_added = false; + } + if (vsoc_dev.major && vsoc_dev.layout) { + unregister_chrdev_region(MKDEV(vsoc_dev.major, 0), + vsoc_dev.layout->region_count); + vsoc_dev.major = 0; + } + vsoc_dev.layout = NULL; + if (vsoc_dev.kernel_mapped_shm) { + pci_iounmap(pdev, vsoc_dev.kernel_mapped_shm); + vsoc_dev.kernel_mapped_shm = NULL; + } + if (vsoc_dev.regs) { + pci_iounmap(pdev, vsoc_dev.regs); + vsoc_dev.regs = NULL; + } + if (vsoc_dev.requested_regions) { + pci_release_regions(pdev); + vsoc_dev.requested_regions = false; + } + if (vsoc_dev.enabled_device) { + pci_disable_device(pdev); + vsoc_dev.enabled_device = false; + } + /* Do this last: it indicates that the device is not initialized. */ + vsoc_dev.dev = NULL; +} + +static void __exit vsoc_cleanup_module(void) +{ + vsoc_remove_device(vsoc_dev.dev); + pci_unregister_driver(&vsoc_pci_driver); +} + +static int __init vsoc_init_module(void) +{ + int err = -ENOMEM; + + INIT_LIST_HEAD(&vsoc_dev.permissions); + mutex_init(&vsoc_dev.mtx); + + err = pci_register_driver(&vsoc_pci_driver); + if (err < 0) + return err; + return 0; +} + +static int vsoc_open(struct inode *inode, struct file *filp) +{ + /* Can't use vsoc_validate_filep because filp is still incomplete */ + int ret = vsoc_validate_inode(inode); + + if (ret) + return ret; + filp->private_data = + kzalloc(sizeof(struct vsoc_private_data), GFP_KERNEL); + if (!filp->private_data) + return -ENOMEM; + return 0; +} + +static int vsoc_release(struct inode *inode, struct file *filp) +{ + struct vsoc_private_data *private_data = NULL; + struct fd_scoped_permission_node *node = NULL; + struct vsoc_device_region *owner_region_p = NULL; + int retval = vsoc_validate_filep(filp); + + if (retval) + return retval; + private_data = (struct vsoc_private_data *)filp->private_data; + if (!private_data) + return 0; + + node = private_data->fd_scoped_permission_node; + if (node) { + owner_region_p = vsoc_region_from_inode(inode); + if (owner_region_p->managed_by != VSOC_REGION_WHOLE) { + owner_region_p = + &vsoc_dev.regions[owner_region_p->managed_by]; + } + do_destroy_fd_scoped_permission_node(owner_region_p, node); + private_data->fd_scoped_permission_node = NULL; + } + kfree(private_data); + filp->private_data = NULL; + + return 0; +} + +/* + * Returns the device relative offset and length of the area specified by the + * fd scoped permission. If there is no fd scoped permission set, a default + * permission covering the entire region is assumed, unless the region is owned + * by another one, in which case the default is a permission with zero size. + */ +static ssize_t vsoc_get_area(struct file *filp, __u32 *area_offset) +{ + __u32 off = 0; + ssize_t length = 0; + struct vsoc_device_region *region_p; + struct fd_scoped_permission *perm; + + region_p = vsoc_region_from_filep(filp); + off = region_p->region_begin_offset; + perm = &((struct vsoc_private_data *)filp->private_data)-> + fd_scoped_permission_node->permission; + if (perm) { + off += perm->begin_offset; + length = perm->end_offset - perm->begin_offset; + } else if (region_p->managed_by == VSOC_REGION_WHOLE) { + /* No permission set and the regions is not owned by another, + * default to full region access. + */ + length = vsoc_device_region_size(region_p); + } else { + /* return zero length, access is denied. */ + length = 0; + } + if (area_offset) + *area_offset = off; + return length; +} + +static int vsoc_mmap(struct file *filp, struct vm_area_struct *vma) +{ + unsigned long len = vma->vm_end - vma->vm_start; + __u32 area_off; + phys_addr_t mem_off; + ssize_t area_len; + int retval = vsoc_validate_filep(filp); + + if (retval) + return retval; + area_len = vsoc_get_area(filp, &area_off); + /* Add the requested offset */ + area_off += (vma->vm_pgoff << PAGE_SHIFT); + area_len -= (vma->vm_pgoff << PAGE_SHIFT); + if (area_len < len) + return -EINVAL; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + mem_off = shm_off_to_phys_addr(area_off); + if (io_remap_pfn_range(vma, vma->vm_start, mem_off >> PAGE_SHIFT, + len, vma->vm_page_prot)) + return -EAGAIN; + return 0; +} + +module_init(vsoc_init_module); +module_exit(vsoc_cleanup_module); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Greg Hartman <ghartman@google.com>"); +MODULE_DESCRIPTION("VSoC interpretation of QEmu's ivshmem device"); +MODULE_VERSION("1.0");
diff --git a/drivers/staging/goldfish/Kconfig b/drivers/staging/goldfish/Kconfig index 4e09460..d293bbc 100644 --- a/drivers/staging/goldfish/Kconfig +++ b/drivers/staging/goldfish/Kconfig
@@ -4,6 +4,14 @@ ---help--- Emulated audio channel for the Goldfish Android Virtual Device +config GOLDFISH_SYNC + tristate "Goldfish AVD Sync Driver" + depends on GOLDFISH + depends on SW_SYNC + depends on SYNC_FILE + ---help--- + Emulated sync fences for the Goldfish Android Virtual Device + config MTD_GOLDFISH_NAND tristate "Goldfish NAND device" depends on GOLDFISH
diff --git a/drivers/staging/goldfish/Makefile b/drivers/staging/goldfish/Makefile index dec34ad..3313fce 100644 --- a/drivers/staging/goldfish/Makefile +++ b/drivers/staging/goldfish/Makefile
@@ -4,3 +4,9 @@ obj-$(CONFIG_GOLDFISH_AUDIO) += goldfish_audio.o obj-$(CONFIG_MTD_GOLDFISH_NAND) += goldfish_nand.o + +# and sync + +ccflags-y := -Idrivers/staging/android +goldfish_sync-objs := goldfish_sync_timeline_fence.o goldfish_sync_timeline.o +obj-$(CONFIG_GOLDFISH_SYNC) += goldfish_sync.o
diff --git a/drivers/staging/goldfish/goldfish_audio.c b/drivers/staging/goldfish/goldfish_audio.c index bd55995..0bb0ee2 100644 --- a/drivers/staging/goldfish/goldfish_audio.c +++ b/drivers/staging/goldfish/goldfish_audio.c
@@ -28,6 +28,7 @@ #include <linux/uaccess.h> #include <linux/slab.h> #include <linux/goldfish.h> +#include <linux/acpi.h> MODULE_AUTHOR("Google, Inc."); MODULE_DESCRIPTION("Android QEMU Audio Driver"); @@ -116,6 +117,7 @@ static ssize_t goldfish_audio_read(struct file *fp, char __user *buf, size_t count, loff_t *pos) { struct goldfish_audio *data = fp->private_data; + unsigned long irq_flags; int length; int result = 0; @@ -129,6 +131,10 @@ static ssize_t goldfish_audio_read(struct file *fp, char __user *buf, wait_event_interruptible(data->wait, data->buffer_status & AUDIO_INT_READ_BUFFER_FULL); + spin_lock_irqsave(&data->lock, irq_flags); + data->buffer_status &= ~AUDIO_INT_READ_BUFFER_FULL; + spin_unlock_irqrestore(&data->lock, irq_flags); + length = AUDIO_READ(data, AUDIO_READ_BUFFER_AVAILABLE); /* copy data to user space */ @@ -351,12 +357,19 @@ static const struct of_device_id goldfish_audio_of_match[] = { }; MODULE_DEVICE_TABLE(of, goldfish_audio_of_match); +static const struct acpi_device_id goldfish_audio_acpi_match[] = { + { "GFSH0005", 0 }, + { }, +}; +MODULE_DEVICE_TABLE(acpi, goldfish_audio_acpi_match); + static struct platform_driver goldfish_audio_driver = { .probe = goldfish_audio_probe, .remove = goldfish_audio_remove, .driver = { .name = "goldfish_audio", .of_match_table = goldfish_audio_of_match, + .acpi_match_table = ACPI_PTR(goldfish_audio_acpi_match), } };
diff --git a/drivers/staging/goldfish/goldfish_sync_timeline.c b/drivers/staging/goldfish/goldfish_sync_timeline.c new file mode 100644 index 0000000..880d6e2 --- /dev/null +++ b/drivers/staging/goldfish/goldfish_sync_timeline.c
@@ -0,0 +1,962 @@ +/* + * Copyright (C) 2016 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/fdtable.h> +#include <linux/file.h> +#include <linux/init.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/platform_device.h> + +#include <linux/interrupt.h> +#include <linux/kref.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +#include <linux/io.h> +#include <linux/mm.h> +#include <linux/acpi.h> + +#include <linux/string.h> + +#include <linux/fs.h> +#include <linux/syscalls.h> +#include <linux/sync_file.h> +#include <linux/dma-fence.h> + +#include "goldfish_sync_timeline_fence.h" + +#define ERR(...) printk(KERN_ERR __VA_ARGS__); + +#define INFO(...) printk(KERN_INFO __VA_ARGS__); + +#define DPRINT(...) pr_debug(__VA_ARGS__); + +#define DTRACE() DPRINT("%s: enter", __func__) + +/* The Goldfish sync driver is designed to provide a interface + * between the underlying host's sync device and the kernel's + * fence sync framework.. + * The purpose of the device/driver is to enable lightweight + * creation and signaling of timelines and fences + * in order to synchronize the guest with host-side graphics events. + * + * Each time the interrupt trips, the driver + * may perform a sync operation. + */ + +/* The operations are: */ + +/* Ready signal - used to mark when irq should lower */ +#define CMD_SYNC_READY 0 + +/* Create a new timeline. writes timeline handle */ +#define CMD_CREATE_SYNC_TIMELINE 1 + +/* Create a fence object. reads timeline handle and time argument. + * Writes fence fd to the SYNC_REG_HANDLE register. */ +#define CMD_CREATE_SYNC_FENCE 2 + +/* Increments timeline. reads timeline handle and time argument */ +#define CMD_SYNC_TIMELINE_INC 3 + +/* Destroys a timeline. reads timeline handle */ +#define CMD_DESTROY_SYNC_TIMELINE 4 + +/* Starts a wait on the host with + * the given glsync object and sync thread handle. */ +#define CMD_TRIGGER_HOST_WAIT 5 + +/* The register layout is: */ + +#define SYNC_REG_BATCH_COMMAND 0x00 /* host->guest batch commands */ +#define SYNC_REG_BATCH_GUESTCOMMAND 0x04 /* guest->host batch commands */ +#define SYNC_REG_BATCH_COMMAND_ADDR 0x08 /* communicate physical address of host->guest batch commands */ +#define SYNC_REG_BATCH_COMMAND_ADDR_HIGH 0x0c /* 64-bit part */ +#define SYNC_REG_BATCH_GUESTCOMMAND_ADDR 0x10 /* communicate physical address of guest->host commands */ +#define SYNC_REG_BATCH_GUESTCOMMAND_ADDR_HIGH 0x14 /* 64-bit part */ +#define SYNC_REG_INIT 0x18 /* signals that the device has been probed */ + +/* There is an ioctl associated with goldfish sync driver. + * Make it conflict with ioctls that are not likely to be used + * in the emulator. + * + * '@' 00-0F linux/radeonfb.h conflict! + * '@' 00-0F drivers/video/aty/aty128fb.c conflict! + */ +#define GOLDFISH_SYNC_IOC_MAGIC '@' + +#define GOLDFISH_SYNC_IOC_QUEUE_WORK _IOWR(GOLDFISH_SYNC_IOC_MAGIC, 0, struct goldfish_sync_ioctl_info) + +/* The above definitions (command codes, register layout, ioctl definitions) + * need to be in sync with the following files: + * + * Host-side (emulator): + * external/qemu/android/emulation/goldfish_sync.h + * external/qemu-android/hw/misc/goldfish_sync.c + * + * Guest-side (system image): + * device/generic/goldfish-opengl/system/egl/goldfish_sync.h + * device/generic/goldfish/ueventd.ranchu.rc + * platform/build/target/board/generic/sepolicy/file_contexts + */ +struct goldfish_sync_hostcmd { + /* sorted for alignment */ + uint64_t handle; + uint64_t hostcmd_handle; + uint32_t cmd; + uint32_t time_arg; +}; + +struct goldfish_sync_guestcmd { + uint64_t host_command; /* uint64_t for alignment */ + uint64_t glsync_handle; + uint64_t thread_handle; + uint64_t guest_timeline_handle; +}; + +#define GOLDFISH_SYNC_MAX_CMDS 32 + +struct goldfish_sync_state { + char __iomem *reg_base; + int irq; + + /* Spinlock protects |to_do| / |to_do_end|. */ + spinlock_t lock; + /* |mutex_lock| protects all concurrent access + * to timelines for both kernel and user space. */ + struct mutex mutex_lock; + + /* Buffer holding commands issued from host. */ + struct goldfish_sync_hostcmd to_do[GOLDFISH_SYNC_MAX_CMDS]; + uint32_t to_do_end; + + /* Addresses for the reading or writing + * of individual commands. The host can directly write + * to |batch_hostcmd| (and then this driver immediately + * copies contents to |to_do|). This driver either replies + * through |batch_hostcmd| or simply issues a + * guest->host command through |batch_guestcmd|. + */ + struct goldfish_sync_hostcmd *batch_hostcmd; + struct goldfish_sync_guestcmd *batch_guestcmd; + + /* Used to give this struct itself to a work queue + * function for executing actual sync commands. */ + struct work_struct work_item; +}; + +static struct goldfish_sync_state global_sync_state[1]; + +struct goldfish_sync_timeline_obj { + struct goldfish_sync_timeline *sync_tl; + uint32_t current_time; + /* We need to be careful about when we deallocate + * this |goldfish_sync_timeline_obj| struct. + * In order to ensure proper cleanup, we need to + * consider the triggered host-side wait that may + * still be in flight when the guest close()'s a + * goldfish_sync device's sync context fd (and + * destroys the |sync_tl| field above). + * The host-side wait may raise IRQ + * and tell the kernel to increment the timeline _after_ + * the |sync_tl| has already been set to null. + * + * From observations on OpenGL apps and CTS tests, this + * happens at some very low probability upon context + * destruction or process close, but it does happen + * and it needs to be handled properly. Otherwise, + * if we clean up the surrounding |goldfish_sync_timeline_obj| + * too early, any |handle| field of any host->guest command + * might not even point to a null |sync_tl| field, + * but to garbage memory or even a reclaimed |sync_tl|. + * If we do not count such "pending waits" and kfree the object + * immediately upon |goldfish_sync_timeline_destroy|, + * we might get mysterous RCU stalls after running a long + * time because the garbage memory that is being read + * happens to be interpretable as a |spinlock_t| struct + * that is currently in the locked state. + * + * To track when to free the |goldfish_sync_timeline_obj| + * itself, we maintain a kref. + * The kref essentially counts the timeline itself plus + * the number of waits in flight. kref_init/kref_put + * are issued on + * |goldfish_sync_timeline_create|/|goldfish_sync_timeline_destroy| + * and kref_get/kref_put are issued on + * |goldfish_sync_fence_create|/|goldfish_sync_timeline_inc|. + * + * The timeline is destroyed after reference count + * reaches zero, which would happen after + * |goldfish_sync_timeline_destroy| and all pending + * |goldfish_sync_timeline_inc|'s are fulfilled. + * + * NOTE (1): We assume that |fence_create| and + * |timeline_inc| calls are 1:1, otherwise the kref scheme + * will not work. This is a valid assumption as long + * as the host-side virtual device implementation + * does not insert any timeline increments + * that we did not trigger from here. + * + * NOTE (2): The use of kref by itself requires no locks, + * but this does not mean everything works without locks. + * Related timeline operations do require a lock of some sort, + * or at least are not proven to work without it. + * In particualr, we assume that all the operations + * done on the |kref| field above are done in contexts where + * |global_sync_state->mutex_lock| is held. Do not + * remove that lock until everything is proven to work + * without it!!! */ + struct kref kref; +}; + +/* We will call |delete_timeline_obj| when the last reference count + * of the kref is decremented. This deletes the sync + * timeline object along with the wrapper itself. */ +static void delete_timeline_obj(struct kref* kref) { + struct goldfish_sync_timeline_obj* obj = + container_of(kref, struct goldfish_sync_timeline_obj, kref); + + goldfish_sync_timeline_put_internal(obj->sync_tl); + obj->sync_tl = NULL; + kfree(obj); +} + +static uint64_t gensym_ctr; +static void gensym(char *dst) +{ + sprintf(dst, "goldfish_sync:gensym:%llu", gensym_ctr); + gensym_ctr++; +} + +/* |goldfish_sync_timeline_create| assumes that |global_sync_state->mutex_lock| + * is held. */ +static struct goldfish_sync_timeline_obj* +goldfish_sync_timeline_create(void) +{ + + char timeline_name[256]; + struct goldfish_sync_timeline *res_sync_tl = NULL; + struct goldfish_sync_timeline_obj *res; + + DTRACE(); + + gensym(timeline_name); + + res_sync_tl = goldfish_sync_timeline_create_internal(timeline_name); + if (!res_sync_tl) { + ERR("Failed to create goldfish_sw_sync timeline."); + return NULL; + } + + res = kzalloc(sizeof(struct goldfish_sync_timeline_obj), GFP_KERNEL); + res->sync_tl = res_sync_tl; + res->current_time = 0; + kref_init(&res->kref); + + DPRINT("new timeline_obj=0x%p", res); + return res; +} + +/* |goldfish_sync_fence_create| assumes that |global_sync_state->mutex_lock| + * is held. */ +static int +goldfish_sync_fence_create(struct goldfish_sync_timeline_obj *obj, + uint32_t val) +{ + + int fd; + char fence_name[256]; + struct sync_pt *syncpt = NULL; + struct sync_file *sync_file_obj = NULL; + struct goldfish_sync_timeline *tl; + + DTRACE(); + + if (!obj) return -1; + + tl = obj->sync_tl; + + syncpt = goldfish_sync_pt_create_internal( + tl, sizeof(struct sync_pt) + 4, val); + if (!syncpt) { + ERR("could not create sync point! " + "goldfish_sync_timeline=0x%p val=%d", + tl, val); + return -1; + } + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) { + ERR("could not get unused fd for sync fence. " + "errno=%d", fd); + goto err_cleanup_pt; + } + + gensym(fence_name); + + sync_file_obj = sync_file_create(&syncpt->base); + if (!sync_file_obj) { + ERR("could not create sync fence! " + "goldfish_sync_timeline=0x%p val=%d sync_pt=0x%p", + tl, val, syncpt); + goto err_cleanup_fd_pt; + } + + DPRINT("installing sync fence into fd %d sync_file_obj=0x%p", + fd, sync_file_obj); + fd_install(fd, sync_file_obj->file); + kref_get(&obj->kref); + + return fd; + +err_cleanup_fd_pt: + put_unused_fd(fd); +err_cleanup_pt: + dma_fence_put(&syncpt->base); + return -1; +} + +/* |goldfish_sync_timeline_inc| assumes that |global_sync_state->mutex_lock| + * is held. */ +static void +goldfish_sync_timeline_inc(struct goldfish_sync_timeline_obj *obj, uint32_t inc) +{ + DTRACE(); + /* Just give up if someone else nuked the timeline. + * Whoever it was won't care that it doesn't get signaled. */ + if (!obj) return; + + DPRINT("timeline_obj=0x%p", obj); + goldfish_sync_timeline_signal_internal(obj->sync_tl, inc); + DPRINT("incremented timeline. increment max_time"); + obj->current_time += inc; + + /* Here, we will end up deleting the timeline object if it + * turns out that this call was a pending increment after + * |goldfish_sync_timeline_destroy| was called. */ + kref_put(&obj->kref, delete_timeline_obj); + DPRINT("done"); +} + +/* |goldfish_sync_timeline_destroy| assumes + * that |global_sync_state->mutex_lock| is held. */ +static void +goldfish_sync_timeline_destroy(struct goldfish_sync_timeline_obj *obj) +{ + DTRACE(); + /* See description of |goldfish_sync_timeline_obj| for why we + * should not immediately destroy |obj| */ + kref_put(&obj->kref, delete_timeline_obj); +} + +static inline void +goldfish_sync_cmd_queue(struct goldfish_sync_state *sync_state, + uint32_t cmd, + uint64_t handle, + uint32_t time_arg, + uint64_t hostcmd_handle) +{ + struct goldfish_sync_hostcmd *to_add; + + DTRACE(); + + BUG_ON(sync_state->to_do_end == GOLDFISH_SYNC_MAX_CMDS); + + to_add = &sync_state->to_do[sync_state->to_do_end]; + + to_add->cmd = cmd; + to_add->handle = handle; + to_add->time_arg = time_arg; + to_add->hostcmd_handle = hostcmd_handle; + + sync_state->to_do_end += 1; +} + +static inline void +goldfish_sync_hostcmd_reply(struct goldfish_sync_state *sync_state, + uint32_t cmd, + uint64_t handle, + uint32_t time_arg, + uint64_t hostcmd_handle) +{ + unsigned long irq_flags; + struct goldfish_sync_hostcmd *batch_hostcmd = + sync_state->batch_hostcmd; + + DTRACE(); + + spin_lock_irqsave(&sync_state->lock, irq_flags); + + batch_hostcmd->cmd = cmd; + batch_hostcmd->handle = handle; + batch_hostcmd->time_arg = time_arg; + batch_hostcmd->hostcmd_handle = hostcmd_handle; + writel(0, sync_state->reg_base + SYNC_REG_BATCH_COMMAND); + + spin_unlock_irqrestore(&sync_state->lock, irq_flags); +} + +static inline void +goldfish_sync_send_guestcmd(struct goldfish_sync_state *sync_state, + uint32_t cmd, + uint64_t glsync_handle, + uint64_t thread_handle, + uint64_t timeline_handle) +{ + unsigned long irq_flags; + struct goldfish_sync_guestcmd *batch_guestcmd = + sync_state->batch_guestcmd; + + DTRACE(); + + spin_lock_irqsave(&sync_state->lock, irq_flags); + + batch_guestcmd->host_command = (uint64_t)cmd; + batch_guestcmd->glsync_handle = (uint64_t)glsync_handle; + batch_guestcmd->thread_handle = (uint64_t)thread_handle; + batch_guestcmd->guest_timeline_handle = (uint64_t)timeline_handle; + writel(0, sync_state->reg_base + SYNC_REG_BATCH_GUESTCOMMAND); + + spin_unlock_irqrestore(&sync_state->lock, irq_flags); +} + +/* |goldfish_sync_interrupt| handles IRQ raises from the virtual device. + * In the context of OpenGL, this interrupt will fire whenever we need + * to signal a fence fd in the guest, with the command + * |CMD_SYNC_TIMELINE_INC|. + * However, because this function will be called in an interrupt context, + * it is necessary to do the actual work of signaling off of interrupt context. + * The shared work queue is used for this purpose. At the end when + * all pending commands are intercepted by the interrupt handler, + * we call |schedule_work|, which will later run the actual + * desired sync command in |goldfish_sync_work_item_fn|. + */ +static irqreturn_t goldfish_sync_interrupt(int irq, void *dev_id) +{ + + struct goldfish_sync_state *sync_state = dev_id; + + uint32_t nextcmd; + uint32_t command_r; + uint64_t handle_rw; + uint32_t time_r; + uint64_t hostcmd_handle_rw; + + int count = 0; + + DTRACE(); + + sync_state = dev_id; + + spin_lock(&sync_state->lock); + + for (;;) { + + readl(sync_state->reg_base + SYNC_REG_BATCH_COMMAND); + nextcmd = sync_state->batch_hostcmd->cmd; + + if (nextcmd == 0) + break; + + command_r = nextcmd; + handle_rw = sync_state->batch_hostcmd->handle; + time_r = sync_state->batch_hostcmd->time_arg; + hostcmd_handle_rw = sync_state->batch_hostcmd->hostcmd_handle; + + goldfish_sync_cmd_queue( + sync_state, + command_r, + handle_rw, + time_r, + hostcmd_handle_rw); + + count++; + } + + spin_unlock(&sync_state->lock); + + schedule_work(&sync_state->work_item); + + return (count == 0) ? IRQ_NONE : IRQ_HANDLED; +} + +/* |goldfish_sync_work_item_fn| does the actual work of servicing + * host->guest sync commands. This function is triggered whenever + * the IRQ for the goldfish sync device is raised. Once it starts + * running, it grabs the contents of the buffer containing the + * commands it needs to execute (there may be multiple, because + * our IRQ is active high and not edge triggered), and then + * runs all of them one after the other. + */ +static void goldfish_sync_work_item_fn(struct work_struct *input) +{ + + struct goldfish_sync_state *sync_state; + int sync_fence_fd; + + struct goldfish_sync_timeline_obj *timeline; + uint64_t timeline_ptr; + + uint64_t hostcmd_handle; + + uint32_t cmd; + uint64_t handle; + uint32_t time_arg; + + struct goldfish_sync_hostcmd *todo; + uint32_t todo_end; + + unsigned long irq_flags; + + struct goldfish_sync_hostcmd to_run[GOLDFISH_SYNC_MAX_CMDS]; + uint32_t i = 0; + + sync_state = container_of(input, struct goldfish_sync_state, work_item); + + mutex_lock(&sync_state->mutex_lock); + + spin_lock_irqsave(&sync_state->lock, irq_flags); { + + todo_end = sync_state->to_do_end; + + DPRINT("num sync todos: %u", sync_state->to_do_end); + + for (i = 0; i < todo_end; i++) + to_run[i] = sync_state->to_do[i]; + + /* We expect that commands will come in at a slow enough rate + * so that incoming items will not be more than + * GOLDFISH_SYNC_MAX_CMDS. + * + * This is because the way the sync device is used, + * it's only for managing buffer data transfers per frame, + * with a sequential dependency between putting things in + * to_do and taking them out. Once a set of commands is + * queued up in to_do, the user of the device waits for + * them to be processed before queuing additional commands, + * which limits the rate at which commands come in + * to the rate at which we take them out here. + * + * We also don't expect more than MAX_CMDS to be issued + * at once; there is a correspondence between + * which buffers need swapping to the (display / buffer queue) + * to particular commands, and we don't expect there to be + * enough display or buffer queues in operation at once + * to overrun GOLDFISH_SYNC_MAX_CMDS. + */ + sync_state->to_do_end = 0; + + } spin_unlock_irqrestore(&sync_state->lock, irq_flags); + + for (i = 0; i < todo_end; i++) { + DPRINT("todo index: %u", i); + + todo = &to_run[i]; + + cmd = todo->cmd; + + handle = (uint64_t)todo->handle; + time_arg = todo->time_arg; + hostcmd_handle = (uint64_t)todo->hostcmd_handle; + + DTRACE(); + + timeline = (struct goldfish_sync_timeline_obj *)(uintptr_t)handle; + + switch (cmd) { + case CMD_SYNC_READY: + break; + case CMD_CREATE_SYNC_TIMELINE: + DPRINT("exec CMD_CREATE_SYNC_TIMELINE: " + "handle=0x%llx time_arg=%d", + handle, time_arg); + timeline = goldfish_sync_timeline_create(); + timeline_ptr = (uintptr_t)timeline; + goldfish_sync_hostcmd_reply(sync_state, CMD_CREATE_SYNC_TIMELINE, + timeline_ptr, + 0, + hostcmd_handle); + DPRINT("sync timeline created: %p", timeline); + break; + case CMD_CREATE_SYNC_FENCE: + DPRINT("exec CMD_CREATE_SYNC_FENCE: " + "handle=0x%llx time_arg=%d", + handle, time_arg); + sync_fence_fd = goldfish_sync_fence_create(timeline, time_arg); + goldfish_sync_hostcmd_reply(sync_state, CMD_CREATE_SYNC_FENCE, + sync_fence_fd, + 0, + hostcmd_handle); + break; + case CMD_SYNC_TIMELINE_INC: + DPRINT("exec CMD_SYNC_TIMELINE_INC: " + "handle=0x%llx time_arg=%d", + handle, time_arg); + goldfish_sync_timeline_inc(timeline, time_arg); + break; + case CMD_DESTROY_SYNC_TIMELINE: + DPRINT("exec CMD_DESTROY_SYNC_TIMELINE: " + "handle=0x%llx time_arg=%d", + handle, time_arg); + goldfish_sync_timeline_destroy(timeline); + break; + } + DPRINT("Done executing sync command"); + } + mutex_unlock(&sync_state->mutex_lock); +} + +/* Guest-side interface: file operations */ + +/* Goldfish sync context and ioctl info. + * + * When a sync context is created by open()-ing the goldfish sync device, we + * create a sync context (|goldfish_sync_context|). + * + * Currently, the only data required to track is the sync timeline itself + * along with the current time, which are all packed up in the + * |goldfish_sync_timeline_obj| field. We use a |goldfish_sync_context| + * as the filp->private_data. + * + * Next, when a sync context user requests that work be queued and a fence + * fd provided, we use the |goldfish_sync_ioctl_info| struct, which holds + * information about which host handles to touch for this particular + * queue-work operation. We need to know about the host-side sync thread + * and the particular host-side GLsync object. We also possibly write out + * a file descriptor. + */ +struct goldfish_sync_context { + struct goldfish_sync_timeline_obj *timeline; +}; + +struct goldfish_sync_ioctl_info { + uint64_t host_glsync_handle_in; + uint64_t host_syncthread_handle_in; + int fence_fd_out; +}; + +static int goldfish_sync_open(struct inode *inode, struct file *file) +{ + + struct goldfish_sync_context *sync_context; + + DTRACE(); + + mutex_lock(&global_sync_state->mutex_lock); + + sync_context = kzalloc(sizeof(struct goldfish_sync_context), GFP_KERNEL); + + if (sync_context == NULL) { + ERR("Creation of goldfish sync context failed!"); + mutex_unlock(&global_sync_state->mutex_lock); + return -ENOMEM; + } + + sync_context->timeline = NULL; + + file->private_data = sync_context; + + DPRINT("successfully create a sync context @0x%p", sync_context); + + mutex_unlock(&global_sync_state->mutex_lock); + + return 0; +} + +static int goldfish_sync_release(struct inode *inode, struct file *file) +{ + + struct goldfish_sync_context *sync_context; + + DTRACE(); + + mutex_lock(&global_sync_state->mutex_lock); + + sync_context = file->private_data; + + if (sync_context->timeline) + goldfish_sync_timeline_destroy(sync_context->timeline); + + sync_context->timeline = NULL; + + kfree(sync_context); + + mutex_unlock(&global_sync_state->mutex_lock); + + return 0; +} + +/* |goldfish_sync_ioctl| is the guest-facing interface of goldfish sync + * and is used in conjunction with eglCreateSyncKHR to queue up the + * actual work of waiting for the EGL sync command to complete, + * possibly returning a fence fd to the guest. + */ +static long goldfish_sync_ioctl(struct file *file, + unsigned int cmd, + unsigned long arg) +{ + struct goldfish_sync_context *sync_context_data; + struct goldfish_sync_timeline_obj *timeline; + int fd_out; + struct goldfish_sync_ioctl_info ioctl_data; + + DTRACE(); + + sync_context_data = file->private_data; + fd_out = -1; + + switch (cmd) { + case GOLDFISH_SYNC_IOC_QUEUE_WORK: + + DPRINT("exec GOLDFISH_SYNC_IOC_QUEUE_WORK"); + + mutex_lock(&global_sync_state->mutex_lock); + + if (copy_from_user(&ioctl_data, + (void __user *)arg, + sizeof(ioctl_data))) { + ERR("Failed to copy memory for ioctl_data from user."); + mutex_unlock(&global_sync_state->mutex_lock); + return -EFAULT; + } + + if (ioctl_data.host_syncthread_handle_in == 0) { + DPRINT("Error: zero host syncthread handle!!!"); + mutex_unlock(&global_sync_state->mutex_lock); + return -EFAULT; + } + + if (!sync_context_data->timeline) { + DPRINT("no timeline yet, create one."); + sync_context_data->timeline = goldfish_sync_timeline_create(); + DPRINT("timeline: 0x%p", &sync_context_data->timeline); + } + + timeline = sync_context_data->timeline; + fd_out = goldfish_sync_fence_create(timeline, + timeline->current_time + 1); + DPRINT("Created fence with fd %d and current time %u (timeline: 0x%p)", + fd_out, + sync_context_data->timeline->current_time + 1, + sync_context_data->timeline); + + ioctl_data.fence_fd_out = fd_out; + + if (copy_to_user((void __user *)arg, + &ioctl_data, + sizeof(ioctl_data))) { + DPRINT("Error, could not copy to user!!!"); + + sys_close(fd_out); + /* We won't be doing an increment, kref_put immediately. */ + kref_put(&timeline->kref, delete_timeline_obj); + mutex_unlock(&global_sync_state->mutex_lock); + return -EFAULT; + } + + /* We are now about to trigger a host-side wait; + * accumulate on |pending_waits|. */ + goldfish_sync_send_guestcmd(global_sync_state, + CMD_TRIGGER_HOST_WAIT, + ioctl_data.host_glsync_handle_in, + ioctl_data.host_syncthread_handle_in, + (uint64_t)(uintptr_t)(sync_context_data->timeline)); + + mutex_unlock(&global_sync_state->mutex_lock); + return 0; + default: + return -ENOTTY; + } +} + +static const struct file_operations goldfish_sync_fops = { + .owner = THIS_MODULE, + .open = goldfish_sync_open, + .release = goldfish_sync_release, + .unlocked_ioctl = goldfish_sync_ioctl, + .compat_ioctl = goldfish_sync_ioctl, +}; + +static struct miscdevice goldfish_sync_device = { + .name = "goldfish_sync", + .fops = &goldfish_sync_fops, +}; + + +static bool setup_verify_batch_cmd_addr(struct goldfish_sync_state *sync_state, + void *batch_addr, + uint32_t addr_offset, + uint32_t addr_offset_high) +{ + uint64_t batch_addr_phys; + uint32_t batch_addr_phys_test_lo; + uint32_t batch_addr_phys_test_hi; + + if (!batch_addr) { + ERR("Could not use batch command address!"); + return false; + } + + batch_addr_phys = virt_to_phys(batch_addr); + writel((uint32_t)(batch_addr_phys), + sync_state->reg_base + addr_offset); + writel((uint32_t)(batch_addr_phys >> 32), + sync_state->reg_base + addr_offset_high); + + batch_addr_phys_test_lo = + readl(sync_state->reg_base + addr_offset); + batch_addr_phys_test_hi = + readl(sync_state->reg_base + addr_offset_high); + + if (virt_to_phys(batch_addr) != + (((uint64_t)batch_addr_phys_test_hi << 32) | + batch_addr_phys_test_lo)) { + ERR("Invalid batch command address!"); + return false; + } + + return true; +} + +int goldfish_sync_probe(struct platform_device *pdev) +{ + struct resource *ioresource; + struct goldfish_sync_state *sync_state = global_sync_state; + int status; + + DTRACE(); + + sync_state->to_do_end = 0; + + spin_lock_init(&sync_state->lock); + mutex_init(&sync_state->mutex_lock); + + platform_set_drvdata(pdev, sync_state); + + ioresource = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (ioresource == NULL) { + ERR("platform_get_resource failed"); + return -ENODEV; + } + + sync_state->reg_base = + devm_ioremap(&pdev->dev, ioresource->start, PAGE_SIZE); + if (sync_state->reg_base == NULL) { + ERR("Could not ioremap"); + return -ENOMEM; + } + + sync_state->irq = platform_get_irq(pdev, 0); + if (sync_state->irq < 0) { + ERR("Could not platform_get_irq"); + return -ENODEV; + } + + status = devm_request_irq(&pdev->dev, + sync_state->irq, + goldfish_sync_interrupt, + IRQF_SHARED, + pdev->name, + sync_state); + if (status) { + ERR("request_irq failed"); + return -ENODEV; + } + + INIT_WORK(&sync_state->work_item, + goldfish_sync_work_item_fn); + + misc_register(&goldfish_sync_device); + + /* Obtain addresses for batch send/recv of commands. */ + { + struct goldfish_sync_hostcmd *batch_addr_hostcmd; + struct goldfish_sync_guestcmd *batch_addr_guestcmd; + + batch_addr_hostcmd = + devm_kzalloc(&pdev->dev, sizeof(struct goldfish_sync_hostcmd), + GFP_KERNEL); + batch_addr_guestcmd = + devm_kzalloc(&pdev->dev, sizeof(struct goldfish_sync_guestcmd), + GFP_KERNEL); + + if (!setup_verify_batch_cmd_addr(sync_state, + batch_addr_hostcmd, + SYNC_REG_BATCH_COMMAND_ADDR, + SYNC_REG_BATCH_COMMAND_ADDR_HIGH)) { + ERR("goldfish_sync: Could not setup batch command address"); + return -ENODEV; + } + + if (!setup_verify_batch_cmd_addr(sync_state, + batch_addr_guestcmd, + SYNC_REG_BATCH_GUESTCOMMAND_ADDR, + SYNC_REG_BATCH_GUESTCOMMAND_ADDR_HIGH)) { + ERR("goldfish_sync: Could not setup batch guest command address"); + return -ENODEV; + } + + sync_state->batch_hostcmd = batch_addr_hostcmd; + sync_state->batch_guestcmd = batch_addr_guestcmd; + } + + INFO("goldfish_sync: Initialized goldfish sync device"); + + writel(0, sync_state->reg_base + SYNC_REG_INIT); + + return 0; +} + +static int goldfish_sync_remove(struct platform_device *pdev) +{ + struct goldfish_sync_state *sync_state = global_sync_state; + + DTRACE(); + + misc_deregister(&goldfish_sync_device); + memset(sync_state, 0, sizeof(struct goldfish_sync_state)); + return 0; +} + +static const struct of_device_id goldfish_sync_of_match[] = { + { .compatible = "google,goldfish-sync", }, + {}, +}; +MODULE_DEVICE_TABLE(of, goldfish_sync_of_match); + +static const struct acpi_device_id goldfish_sync_acpi_match[] = { + { "GFSH0006", 0 }, + { }, +}; + +MODULE_DEVICE_TABLE(acpi, goldfish_sync_acpi_match); + +static struct platform_driver goldfish_sync = { + .probe = goldfish_sync_probe, + .remove = goldfish_sync_remove, + .driver = { + .name = "goldfish_sync", + .of_match_table = goldfish_sync_of_match, + .acpi_match_table = ACPI_PTR(goldfish_sync_acpi_match), + } +}; + +module_platform_driver(goldfish_sync); + +MODULE_AUTHOR("Google, Inc."); +MODULE_DESCRIPTION("Android QEMU Sync Driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1.0");
diff --git a/drivers/staging/goldfish/goldfish_sync_timeline_fence.c b/drivers/staging/goldfish/goldfish_sync_timeline_fence.c new file mode 100644 index 0000000..a5bc2de --- /dev/null +++ b/drivers/staging/goldfish/goldfish_sync_timeline_fence.c
@@ -0,0 +1,254 @@ +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/syscalls.h> +#include <linux/sync_file.h> +#include <linux/dma-fence.h> + +#include "goldfish_sync_timeline_fence.h" + +/* + * Timeline-based sync for Goldfish Sync + * Based on "Sync File validation framework" + * (drivers/dma-buf/sw_sync.c) + * + * Copyright (C) 2017 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +/** + * struct goldfish_sync_timeline - sync object + * @kref: reference count on fence. + * @name: name of the goldfish_sync_timeline. Useful for debugging + * @child_list_head: list of children sync_pts for this goldfish_sync_timeline + * @child_list_lock: lock protecting @child_list_head and fence.status + * @active_list_head: list of active (unsignaled/errored) sync_pts + */ +struct goldfish_sync_timeline { + struct kref kref; + char name[32]; + + /* protected by child_list_lock */ + u64 context; + int value; + + struct list_head child_list_head; + spinlock_t child_list_lock; + + struct list_head active_list_head; +}; + +static inline struct goldfish_sync_timeline *goldfish_dma_fence_parent(struct dma_fence *fence) +{ + return container_of(fence->lock, struct goldfish_sync_timeline, + child_list_lock); +} + +static const struct dma_fence_ops goldfish_sync_timeline_fence_ops; + +static inline struct sync_pt *goldfish_sync_fence_to_sync_pt(struct dma_fence *fence) +{ + if (fence->ops != &goldfish_sync_timeline_fence_ops) + return NULL; + return container_of(fence, struct sync_pt, base); +} + +/** + * goldfish_sync_timeline_create_internal() - creates a sync object + * @name: sync_timeline name + * + * Creates a new sync_timeline. Returns the sync_timeline object or NULL in + * case of error. + */ +struct goldfish_sync_timeline +*goldfish_sync_timeline_create_internal(const char *name) +{ + struct goldfish_sync_timeline *obj; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return NULL; + + kref_init(&obj->kref); + obj->context = dma_fence_context_alloc(1); + strlcpy(obj->name, name, sizeof(obj->name)); + + INIT_LIST_HEAD(&obj->child_list_head); + INIT_LIST_HEAD(&obj->active_list_head); + spin_lock_init(&obj->child_list_lock); + + return obj; +} + +static void goldfish_sync_timeline_free_internal(struct kref *kref) +{ + struct goldfish_sync_timeline *obj = + container_of(kref, struct goldfish_sync_timeline, kref); + + kfree(obj); +} + +static void goldfish_sync_timeline_get_internal( + struct goldfish_sync_timeline *obj) +{ + kref_get(&obj->kref); +} + +void goldfish_sync_timeline_put_internal(struct goldfish_sync_timeline *obj) +{ + kref_put(&obj->kref, goldfish_sync_timeline_free_internal); +} + +/** + * goldfish_sync_timeline_signal() - + * signal a status change on a goldfish_sync_timeline + * @obj: sync_timeline to signal + * @inc: num to increment on timeline->value + * + * A sync implementation should call this any time one of it's fences + * has signaled or has an error condition. + */ +void goldfish_sync_timeline_signal_internal(struct goldfish_sync_timeline *obj, + unsigned int inc) +{ + unsigned long flags; + struct sync_pt *pt, *next; + + spin_lock_irqsave(&obj->child_list_lock, flags); + + obj->value += inc; + + list_for_each_entry_safe(pt, next, &obj->active_list_head, + active_list) { + if (dma_fence_is_signaled_locked(&pt->base)) + list_del_init(&pt->active_list); + } + + spin_unlock_irqrestore(&obj->child_list_lock, flags); +} + +/** + * goldfish_sync_pt_create_internal() - creates a sync pt + * @parent: fence's parent sync_timeline + * @size: size to allocate for this pt + * @inc: value of the fence + * + * Creates a new sync_pt as a child of @parent. @size bytes will be + * allocated allowing for implementation specific data to be kept after + * the generic sync_timeline struct. Returns the sync_pt object or + * NULL in case of error. + */ +struct sync_pt *goldfish_sync_pt_create_internal( + struct goldfish_sync_timeline *obj, int size, + unsigned int value) +{ + unsigned long flags; + struct sync_pt *pt; + + if (size < sizeof(*pt)) + return NULL; + + pt = kzalloc(size, GFP_KERNEL); + if (!pt) + return NULL; + + spin_lock_irqsave(&obj->child_list_lock, flags); + goldfish_sync_timeline_get_internal(obj); + dma_fence_init(&pt->base, &goldfish_sync_timeline_fence_ops, &obj->child_list_lock, + obj->context, value); + list_add_tail(&pt->child_list, &obj->child_list_head); + INIT_LIST_HEAD(&pt->active_list); + spin_unlock_irqrestore(&obj->child_list_lock, flags); + return pt; +} + +static const char *goldfish_sync_timeline_fence_get_driver_name( + struct dma_fence *fence) +{ + return "sw_sync"; +} + +static const char *goldfish_sync_timeline_fence_get_timeline_name( + struct dma_fence *fence) +{ + struct goldfish_sync_timeline *parent = goldfish_dma_fence_parent(fence); + + return parent->name; +} + +static void goldfish_sync_timeline_fence_release(struct dma_fence *fence) +{ + struct sync_pt *pt = goldfish_sync_fence_to_sync_pt(fence); + struct goldfish_sync_timeline *parent = goldfish_dma_fence_parent(fence); + unsigned long flags; + + spin_lock_irqsave(fence->lock, flags); + list_del(&pt->child_list); + if (!list_empty(&pt->active_list)) + list_del(&pt->active_list); + spin_unlock_irqrestore(fence->lock, flags); + + goldfish_sync_timeline_put_internal(parent); + dma_fence_free(fence); +} + +static bool goldfish_sync_timeline_fence_signaled(struct dma_fence *fence) +{ + struct goldfish_sync_timeline *parent = goldfish_dma_fence_parent(fence); + + return (fence->seqno > parent->value) ? false : true; +} + +static bool goldfish_sync_timeline_fence_enable_signaling(struct dma_fence *fence) +{ + struct sync_pt *pt = goldfish_sync_fence_to_sync_pt(fence); + struct goldfish_sync_timeline *parent = goldfish_dma_fence_parent(fence); + + if (goldfish_sync_timeline_fence_signaled(fence)) + return false; + + list_add_tail(&pt->active_list, &parent->active_list_head); + return true; +} + +static void goldfish_sync_timeline_fence_disable_signaling(struct dma_fence *fence) +{ + struct sync_pt *pt = container_of(fence, struct sync_pt, base); + + list_del_init(&pt->active_list); +} + +static void goldfish_sync_timeline_fence_value_str(struct dma_fence *fence, + char *str, int size) +{ + snprintf(str, size, "%d", fence->seqno); +} + +static void goldfish_sync_timeline_fence_timeline_value_str( + struct dma_fence *fence, + char *str, int size) +{ + struct goldfish_sync_timeline *parent = goldfish_dma_fence_parent(fence); + + snprintf(str, size, "%d", parent->value); +} + +static const struct dma_fence_ops goldfish_sync_timeline_fence_ops = { + .get_driver_name = goldfish_sync_timeline_fence_get_driver_name, + .get_timeline_name = goldfish_sync_timeline_fence_get_timeline_name, + .enable_signaling = goldfish_sync_timeline_fence_enable_signaling, + .disable_signaling = goldfish_sync_timeline_fence_disable_signaling, + .signaled = goldfish_sync_timeline_fence_signaled, + .wait = dma_fence_default_wait, + .release = goldfish_sync_timeline_fence_release, + .fence_value_str = goldfish_sync_timeline_fence_value_str, + .timeline_value_str = goldfish_sync_timeline_fence_timeline_value_str, +};
diff --git a/drivers/staging/goldfish/goldfish_sync_timeline_fence.h b/drivers/staging/goldfish/goldfish_sync_timeline_fence.h new file mode 100644 index 0000000..638c6fb --- /dev/null +++ b/drivers/staging/goldfish/goldfish_sync_timeline_fence.h
@@ -0,0 +1,58 @@ +#include <linux/sync_file.h> +#include <linux/dma-fence.h> + +/** + * struct sync_pt - sync_pt object + * @base: base dma_fence object + * @child_list: sync timeline child's list + * @active_list: sync timeline active child's list + */ +struct sync_pt { + struct dma_fence base; + struct list_head child_list; + struct list_head active_list; +}; + +/** + * goldfish_sync_timeline_create_internal() - creates a sync object + * @name: goldfish_sync_timeline name + * + * Creates a new goldfish_sync_timeline. + * Returns the goldfish_sync_timeline object or NULL in case of error. + */ +struct goldfish_sync_timeline +*goldfish_sync_timeline_create_internal(const char *name); + +/** + * goldfish_sync_pt_create_internal() - creates a sync pt + * @parent: fence's parent goldfish_sync_timeline + * @size: size to allocate for this pt + * @inc: value of the fence + * + * Creates a new sync_pt as a child of @parent. @size bytes will be + * allocated allowing for implementation specific data to be kept after + * the generic sync_timeline struct. Returns the sync_pt object or + * NULL in case of error. + */ +struct sync_pt +*goldfish_sync_pt_create_internal(struct goldfish_sync_timeline *obj, + int size, unsigned int value); + +/** + * goldfish_sync_timeline_signal_internal() - + * signal a status change on a sync_timeline + * @obj: goldfish_sync_timeline to signal + * @inc: num to increment on timeline->value + * + * A sync implementation should call this any time one of it's fences + * has signaled or has an error condition. + */ +void goldfish_sync_timeline_signal_internal(struct goldfish_sync_timeline *obj, + unsigned int inc); + +/** + * goldfish_sync_timeline_put_internal() - dec refcount of a sync_timeline + * and clean up memory if it was the last ref. + * @obj: goldfish_sync_timeline to decref + */ +void goldfish_sync_timeline_put_internal(struct goldfish_sync_timeline *obj);
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c index 6ef8dde..8c97acd 100644 --- a/drivers/staging/lustre/lustre/mdc/mdc_request.c +++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c
@@ -1121,9 +1121,9 @@ struct readpage_param { * in PAGE_SIZE (if PAGE_SIZE greater than LU_PAGE_SIZE), and the * lu_dirpage for this integrated page will be adjusted. **/ -static int mdc_read_page_remote(void *data, struct page *page0) +static int mdc_read_page_remote(struct file *data, struct page *page0) { - struct readpage_param *rp = data; + struct readpage_param *rp = (struct readpage_param *)data; struct page **page_pool; struct page *page; struct lu_dirpage *dp;
diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index f2b0d8c..6c0c6c5d 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c
@@ -230,7 +230,8 @@ static void kgdboc_put_char(u8 chr) kgdb_tty_line, chr); } -static int param_set_kgdboc_var(const char *kmessage, struct kernel_param *kp) +static int param_set_kgdboc_var(const char *kmessage, + const struct kernel_param *kp) { size_t len = strlen(kmessage);
diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 31cce780..f572b64 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig
@@ -215,6 +215,12 @@ config USB_F_TCM tristate +config USB_F_AUDIO_SRC + tristate + +config USB_F_ACC + tristate + # this first set of drivers all depend on bulk-capable hardware. config USB_CONFIGFS @@ -368,6 +374,30 @@ implemented in kernel space (for instance Ethernet, serial or mass storage) and other are implemented in user space. +config USB_CONFIGFS_F_ACC + boolean "Accessory gadget" + depends on USB_CONFIGFS + select USB_F_ACC + help + USB gadget Accessory support + +config USB_CONFIGFS_F_AUDIO_SRC + boolean "Audio Source gadget" + depends on USB_CONFIGFS && USB_CONFIGFS_F_ACC + depends on SND + select SND_PCM + select USB_F_AUDIO_SRC + help + USB gadget Audio Source support + +config USB_CONFIGFS_UEVENT + boolean "Uevent notification of Gadget state" + depends on USB_CONFIGFS + help + Enable uevent notifications to userspace when the gadget + state changes. The gadget can be in any of the following + three states: "CONNECTED/DISCONNECTED/CONFIGURED" + config USB_CONFIGFS_F_UAC1 bool "Audio Class 1.0" depends on USB_CONFIGFS
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index b805962..b356719 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c
@@ -2004,6 +2004,12 @@ void composite_disconnect(struct usb_gadget *gadget) struct usb_composite_dev *cdev = get_gadget_data(gadget); unsigned long flags; + if (cdev == NULL) { + WARN(1, "%s: Calling disconnect on a Gadget that is \ + not connected\n", __func__); + return; + } + /* REVISIT: should we have config and device level * disconnect callbacks? */
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index aeb9f3c..bf2d0ce 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c
@@ -9,6 +9,31 @@ #include "u_f.h" #include "u_os_desc.h" +#ifdef CONFIG_USB_CONFIGFS_UEVENT +#include <linux/platform_device.h> +#include <linux/kdev_t.h> +#include <linux/usb/ch9.h> + +#ifdef CONFIG_USB_CONFIGFS_F_ACC +extern int acc_ctrlrequest(struct usb_composite_dev *cdev, + const struct usb_ctrlrequest *ctrl); +void acc_disconnect(void); +#endif +static struct class *android_class; +static struct device *android_device; +static int index; + +struct device *create_function_device(char *name) +{ + if (android_device && !IS_ERR(android_device)) + return device_create(android_class, android_device, + MKDEV(0, index++), NULL, name); + else + return ERR_PTR(-EINVAL); +} +EXPORT_SYMBOL_GPL(create_function_device); +#endif + int check_user_usb_string(const char *name, struct usb_gadget_strings *stringtab_dev) { @@ -60,6 +85,12 @@ struct gadget_info { bool use_os_desc; char b_vendor_code; char qw_sign[OS_STRING_QW_SIGN_LEN]; +#ifdef CONFIG_USB_CONFIGFS_UEVENT + bool connected; + bool sw_connected; + struct work_struct work; + struct device *dev; +#endif }; static inline struct gadget_info *to_gadget_info(struct config_item *item) @@ -265,7 +296,7 @@ static ssize_t gadget_dev_desc_UDC_store(struct config_item *item, mutex_lock(&gi->lock); - if (!strlen(name)) { + if (!strlen(name) || strcmp(name, "none") == 0) { ret = unregister_gadget(gi); if (ret) goto err; @@ -1371,6 +1402,60 @@ static int configfs_composite_bind(struct usb_gadget *gadget, return ret; } +#ifdef CONFIG_USB_CONFIGFS_UEVENT +static void android_work(struct work_struct *data) +{ + struct gadget_info *gi = container_of(data, struct gadget_info, work); + struct usb_composite_dev *cdev = &gi->cdev; + char *disconnected[2] = { "USB_STATE=DISCONNECTED", NULL }; + char *connected[2] = { "USB_STATE=CONNECTED", NULL }; + char *configured[2] = { "USB_STATE=CONFIGURED", NULL }; + /* 0-connected 1-configured 2-disconnected*/ + bool status[3] = { false, false, false }; + unsigned long flags; + bool uevent_sent = false; + + spin_lock_irqsave(&cdev->lock, flags); + if (cdev->config) + status[1] = true; + + if (gi->connected != gi->sw_connected) { + if (gi->connected) + status[0] = true; + else + status[2] = true; + gi->sw_connected = gi->connected; + } + spin_unlock_irqrestore(&cdev->lock, flags); + + if (status[0]) { + kobject_uevent_env(&android_device->kobj, + KOBJ_CHANGE, connected); + pr_info("%s: sent uevent %s\n", __func__, connected[0]); + uevent_sent = true; + } + + if (status[1]) { + kobject_uevent_env(&android_device->kobj, + KOBJ_CHANGE, configured); + pr_info("%s: sent uevent %s\n", __func__, configured[0]); + uevent_sent = true; + } + + if (status[2]) { + kobject_uevent_env(&android_device->kobj, + KOBJ_CHANGE, disconnected); + pr_info("%s: sent uevent %s\n", __func__, disconnected[0]); + uevent_sent = true; + } + + if (!uevent_sent) { + pr_info("%s: did not send uevent (%d %d %p)\n", __func__, + gi->connected, gi->sw_connected, cdev->config); + } +} +#endif + static void configfs_composite_unbind(struct usb_gadget *gadget) { struct usb_composite_dev *cdev; @@ -1390,14 +1475,91 @@ static void configfs_composite_unbind(struct usb_gadget *gadget) set_gadget_data(gadget, NULL); } +#ifdef CONFIG_USB_CONFIGFS_UEVENT +static int android_setup(struct usb_gadget *gadget, + const struct usb_ctrlrequest *c) +{ + struct usb_composite_dev *cdev = get_gadget_data(gadget); + unsigned long flags; + struct gadget_info *gi = container_of(cdev, struct gadget_info, cdev); + int value = -EOPNOTSUPP; + struct usb_function_instance *fi; + + spin_lock_irqsave(&cdev->lock, flags); + if (!gi->connected) { + gi->connected = 1; + schedule_work(&gi->work); + } + spin_unlock_irqrestore(&cdev->lock, flags); + list_for_each_entry(fi, &gi->available_func, cfs_list) { + if (fi != NULL && fi->f != NULL && fi->f->setup != NULL) { + value = fi->f->setup(fi->f, c); + if (value >= 0) + break; + } + } + +#ifdef CONFIG_USB_CONFIGFS_F_ACC + if (value < 0) + value = acc_ctrlrequest(cdev, c); +#endif + + if (value < 0) + value = composite_setup(gadget, c); + + spin_lock_irqsave(&cdev->lock, flags); + if (c->bRequest == USB_REQ_SET_CONFIGURATION && + cdev->config) { + schedule_work(&gi->work); + } + spin_unlock_irqrestore(&cdev->lock, flags); + + return value; +} + +static void android_disconnect(struct usb_gadget *gadget) +{ + struct usb_composite_dev *cdev = get_gadget_data(gadget); + struct gadget_info *gi = container_of(cdev, struct gadget_info, cdev); + + /* FIXME: There's a race between usb_gadget_udc_stop() which is likely + * to set the gadget driver to NULL in the udc driver and this drivers + * gadget disconnect fn which likely checks for the gadget driver to + * be a null ptr. It happens that unbind (doing set_gadget_data(NULL)) + * is called before the gadget driver is set to NULL and the udc driver + * calls disconnect fn which results in cdev being a null ptr. + */ + if (cdev == NULL) { + WARN(1, "%s: gadget driver already disconnected\n", __func__); + return; + } + + /* accessory HID support can be active while the + accessory function is not actually enabled, + so we need to inform it when we are disconnected. + */ + +#ifdef CONFIG_USB_CONFIGFS_F_ACC + acc_disconnect(); +#endif + gi->connected = 0; + schedule_work(&gi->work); + composite_disconnect(gadget); +} +#endif + static const struct usb_gadget_driver configfs_driver_template = { .bind = configfs_composite_bind, .unbind = configfs_composite_unbind, - +#ifdef CONFIG_USB_CONFIGFS_UEVENT + .setup = android_setup, + .reset = android_disconnect, + .disconnect = android_disconnect, +#else .setup = composite_setup, .reset = composite_disconnect, .disconnect = composite_disconnect, - +#endif .suspend = composite_suspend, .resume = composite_resume, @@ -1409,6 +1571,89 @@ static const struct usb_gadget_driver configfs_driver_template = { .match_existing_only = 1, }; +#ifdef CONFIG_USB_CONFIGFS_UEVENT +static ssize_t state_show(struct device *pdev, struct device_attribute *attr, + char *buf) +{ + struct gadget_info *dev = dev_get_drvdata(pdev); + struct usb_composite_dev *cdev; + char *state = "DISCONNECTED"; + unsigned long flags; + + if (!dev) + goto out; + + cdev = &dev->cdev; + + if (!cdev) + goto out; + + spin_lock_irqsave(&cdev->lock, flags); + if (cdev->config) + state = "CONFIGURED"; + else if (dev->connected) + state = "CONNECTED"; + spin_unlock_irqrestore(&cdev->lock, flags); +out: + return sprintf(buf, "%s\n", state); +} + +static DEVICE_ATTR(state, S_IRUGO, state_show, NULL); + +static struct device_attribute *android_usb_attributes[] = { + &dev_attr_state, + NULL +}; + +static int android_device_create(struct gadget_info *gi) +{ + struct device_attribute **attrs; + struct device_attribute *attr; + + INIT_WORK(&gi->work, android_work); + android_device = device_create(android_class, NULL, + MKDEV(0, 0), NULL, "android0"); + if (IS_ERR(android_device)) + return PTR_ERR(android_device); + + dev_set_drvdata(android_device, gi); + + attrs = android_usb_attributes; + while ((attr = *attrs++)) { + int err; + + err = device_create_file(android_device, attr); + if (err) { + device_destroy(android_device->class, + android_device->devt); + return err; + } + } + + return 0; +} + +static void android_device_destroy(void) +{ + struct device_attribute **attrs; + struct device_attribute *attr; + + attrs = android_usb_attributes; + while ((attr = *attrs++)) + device_remove_file(android_device, attr); + device_destroy(android_device->class, android_device->devt); +} +#else +static inline int android_device_create(struct gadget_info *gi) +{ + return 0; +} + +static inline void android_device_destroy(void) +{ +} +#endif + static struct config_group *gadgets_make( struct config_group *group, const char *name) @@ -1460,7 +1705,11 @@ static struct config_group *gadgets_make( if (!gi->composite.gadget_driver.function) goto err; + if (android_device_create(gi) < 0) + goto err; + return &gi->group; + err: kfree(gi); return ERR_PTR(-ENOMEM); @@ -1469,6 +1718,7 @@ static struct config_group *gadgets_make( static void gadgets_drop(struct config_group *group, struct config_item *item) { config_item_put(item); + android_device_destroy(); } static struct configfs_group_operations gadgets_ops = { @@ -1508,6 +1758,13 @@ static int __init gadget_cfs_init(void) config_group_init(&gadget_subsys.su_group); ret = configfs_register_subsystem(&gadget_subsys); + +#ifdef CONFIG_USB_CONFIGFS_UEVENT + android_class = class_create(THIS_MODULE, "android_usb"); + if (IS_ERR(android_class)) + return PTR_ERR(android_class); +#endif + return ret; } module_init(gadget_cfs_init); @@ -1515,5 +1772,10 @@ module_init(gadget_cfs_init); static void __exit gadget_cfs_exit(void) { configfs_unregister_subsystem(&gadget_subsys); +#ifdef CONFIG_USB_CONFIGFS_UEVENT + if (!IS_ERR(android_class)) + class_destroy(android_class); +#endif + } module_exit(gadget_cfs_exit);
diff --git a/drivers/usb/gadget/function/Makefile b/drivers/usb/gadget/function/Makefile index 5d3a6cf..d7d5673 100644 --- a/drivers/usb/gadget/function/Makefile +++ b/drivers/usb/gadget/function/Makefile
@@ -50,3 +50,7 @@ obj-$(CONFIG_USB_F_PRINTER) += usb_f_printer.o usb_f_tcm-y := f_tcm.o obj-$(CONFIG_USB_F_TCM) += usb_f_tcm.o +usb_f_audio_source-y := f_audio_source.o +obj-$(CONFIG_USB_F_AUDIO_SRC) += usb_f_audio_source.o +usb_f_accessory-y := f_accessory.o +obj-$(CONFIG_USB_F_ACC) += usb_f_accessory.o
diff --git a/drivers/usb/gadget/function/f_accessory.c b/drivers/usb/gadget/function/f_accessory.c new file mode 100644 index 0000000..7aa2656 --- /dev/null +++ b/drivers/usb/gadget/function/f_accessory.c
@@ -0,0 +1,1352 @@ +/* + * Gadget Function Driver for Android USB accessories + * + * Copyright (C) 2011 Google, Inc. + * Author: Mike Lockwood <lockwood@android.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +/* #define DEBUG */ +/* #define VERBOSE_DEBUG */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/poll.h> +#include <linux/delay.h> +#include <linux/wait.h> +#include <linux/err.h> +#include <linux/interrupt.h> +#include <linux/kthread.h> +#include <linux/freezer.h> + +#include <linux/types.h> +#include <linux/file.h> +#include <linux/device.h> +#include <linux/miscdevice.h> + +#include <linux/hid.h> +#include <linux/hiddev.h> +#include <linux/usb.h> +#include <linux/usb/ch9.h> +#include <linux/usb/f_accessory.h> + +#include <linux/configfs.h> +#include <linux/usb/composite.h> + +#define MAX_INST_NAME_LEN 40 +#define BULK_BUFFER_SIZE 16384 +#define ACC_STRING_SIZE 256 + +#define PROTOCOL_VERSION 2 + +/* String IDs */ +#define INTERFACE_STRING_INDEX 0 + +/* number of tx and rx requests to allocate */ +#define TX_REQ_MAX 4 +#define RX_REQ_MAX 2 + +struct acc_hid_dev { + struct list_head list; + struct hid_device *hid; + struct acc_dev *dev; + /* accessory defined ID */ + int id; + /* HID report descriptor */ + u8 *report_desc; + /* length of HID report descriptor */ + int report_desc_len; + /* number of bytes of report_desc we have received so far */ + int report_desc_offset; +}; + +struct acc_dev { + struct usb_function function; + struct usb_composite_dev *cdev; + spinlock_t lock; + + struct usb_ep *ep_in; + struct usb_ep *ep_out; + + /* online indicates state of function_set_alt & function_unbind + * set to 1 when we connect + */ + int online:1; + + /* disconnected indicates state of open & release + * Set to 1 when we disconnect. + * Not cleared until our file is closed. + */ + int disconnected:1; + + /* strings sent by the host */ + char manufacturer[ACC_STRING_SIZE]; + char model[ACC_STRING_SIZE]; + char description[ACC_STRING_SIZE]; + char version[ACC_STRING_SIZE]; + char uri[ACC_STRING_SIZE]; + char serial[ACC_STRING_SIZE]; + + /* for acc_complete_set_string */ + int string_index; + + /* set to 1 if we have a pending start request */ + int start_requested; + + int audio_mode; + + /* synchronize access to our device file */ + atomic_t open_excl; + + struct list_head tx_idle; + + wait_queue_head_t read_wq; + wait_queue_head_t write_wq; + struct usb_request *rx_req[RX_REQ_MAX]; + int rx_done; + + /* delayed work for handling ACCESSORY_START */ + struct delayed_work start_work; + + /* worker for registering and unregistering hid devices */ + struct work_struct hid_work; + + /* list of active HID devices */ + struct list_head hid_list; + + /* list of new HID devices to register */ + struct list_head new_hid_list; + + /* list of dead HID devices to unregister */ + struct list_head dead_hid_list; +}; + +static struct usb_interface_descriptor acc_interface_desc = { + .bLength = USB_DT_INTERFACE_SIZE, + .bDescriptorType = USB_DT_INTERFACE, + .bInterfaceNumber = 0, + .bNumEndpoints = 2, + .bInterfaceClass = USB_CLASS_VENDOR_SPEC, + .bInterfaceSubClass = USB_SUBCLASS_VENDOR_SPEC, + .bInterfaceProtocol = 0, +}; + +static struct usb_endpoint_descriptor acc_highspeed_in_desc = { + .bLength = USB_DT_ENDPOINT_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_IN, + .bmAttributes = USB_ENDPOINT_XFER_BULK, + .wMaxPacketSize = __constant_cpu_to_le16(512), +}; + +static struct usb_endpoint_descriptor acc_highspeed_out_desc = { + .bLength = USB_DT_ENDPOINT_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_OUT, + .bmAttributes = USB_ENDPOINT_XFER_BULK, + .wMaxPacketSize = __constant_cpu_to_le16(512), +}; + +static struct usb_endpoint_descriptor acc_fullspeed_in_desc = { + .bLength = USB_DT_ENDPOINT_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_IN, + .bmAttributes = USB_ENDPOINT_XFER_BULK, +}; + +static struct usb_endpoint_descriptor acc_fullspeed_out_desc = { + .bLength = USB_DT_ENDPOINT_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_OUT, + .bmAttributes = USB_ENDPOINT_XFER_BULK, +}; + +static struct usb_descriptor_header *fs_acc_descs[] = { + (struct usb_descriptor_header *) &acc_interface_desc, + (struct usb_descriptor_header *) &acc_fullspeed_in_desc, + (struct usb_descriptor_header *) &acc_fullspeed_out_desc, + NULL, +}; + +static struct usb_descriptor_header *hs_acc_descs[] = { + (struct usb_descriptor_header *) &acc_interface_desc, + (struct usb_descriptor_header *) &acc_highspeed_in_desc, + (struct usb_descriptor_header *) &acc_highspeed_out_desc, + NULL, +}; + +static struct usb_string acc_string_defs[] = { + [INTERFACE_STRING_INDEX].s = "Android Accessory Interface", + { }, /* end of list */ +}; + +static struct usb_gadget_strings acc_string_table = { + .language = 0x0409, /* en-US */ + .strings = acc_string_defs, +}; + +static struct usb_gadget_strings *acc_strings[] = { + &acc_string_table, + NULL, +}; + +/* temporary variable used between acc_open() and acc_gadget_bind() */ +static struct acc_dev *_acc_dev; + +struct acc_instance { + struct usb_function_instance func_inst; + const char *name; +}; + +static inline struct acc_dev *func_to_dev(struct usb_function *f) +{ + return container_of(f, struct acc_dev, function); +} + +static struct usb_request *acc_request_new(struct usb_ep *ep, int buffer_size) +{ + struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL); + + if (!req) + return NULL; + + /* now allocate buffers for the requests */ + req->buf = kmalloc(buffer_size, GFP_KERNEL); + if (!req->buf) { + usb_ep_free_request(ep, req); + return NULL; + } + + return req; +} + +static void acc_request_free(struct usb_request *req, struct usb_ep *ep) +{ + if (req) { + kfree(req->buf); + usb_ep_free_request(ep, req); + } +} + +/* add a request to the tail of a list */ +static void req_put(struct acc_dev *dev, struct list_head *head, + struct usb_request *req) +{ + unsigned long flags; + + spin_lock_irqsave(&dev->lock, flags); + list_add_tail(&req->list, head); + spin_unlock_irqrestore(&dev->lock, flags); +} + +/* remove a request from the head of a list */ +static struct usb_request *req_get(struct acc_dev *dev, struct list_head *head) +{ + unsigned long flags; + struct usb_request *req; + + spin_lock_irqsave(&dev->lock, flags); + if (list_empty(head)) { + req = 0; + } else { + req = list_first_entry(head, struct usb_request, list); + list_del(&req->list); + } + spin_unlock_irqrestore(&dev->lock, flags); + return req; +} + +static void acc_set_disconnected(struct acc_dev *dev) +{ + dev->disconnected = 1; +} + +static void acc_complete_in(struct usb_ep *ep, struct usb_request *req) +{ + struct acc_dev *dev = _acc_dev; + + if (req->status == -ESHUTDOWN) { + pr_debug("acc_complete_in set disconnected"); + acc_set_disconnected(dev); + } + + req_put(dev, &dev->tx_idle, req); + + wake_up(&dev->write_wq); +} + +static void acc_complete_out(struct usb_ep *ep, struct usb_request *req) +{ + struct acc_dev *dev = _acc_dev; + + dev->rx_done = 1; + if (req->status == -ESHUTDOWN) { + pr_debug("acc_complete_out set disconnected"); + acc_set_disconnected(dev); + } + + wake_up(&dev->read_wq); +} + +static void acc_complete_set_string(struct usb_ep *ep, struct usb_request *req) +{ + struct acc_dev *dev = ep->driver_data; + char *string_dest = NULL; + int length = req->actual; + + if (req->status != 0) { + pr_err("acc_complete_set_string, err %d\n", req->status); + return; + } + + switch (dev->string_index) { + case ACCESSORY_STRING_MANUFACTURER: + string_dest = dev->manufacturer; + break; + case ACCESSORY_STRING_MODEL: + string_dest = dev->model; + break; + case ACCESSORY_STRING_DESCRIPTION: + string_dest = dev->description; + break; + case ACCESSORY_STRING_VERSION: + string_dest = dev->version; + break; + case ACCESSORY_STRING_URI: + string_dest = dev->uri; + break; + case ACCESSORY_STRING_SERIAL: + string_dest = dev->serial; + break; + } + if (string_dest) { + unsigned long flags; + + if (length >= ACC_STRING_SIZE) + length = ACC_STRING_SIZE - 1; + + spin_lock_irqsave(&dev->lock, flags); + memcpy(string_dest, req->buf, length); + /* ensure zero termination */ + string_dest[length] = 0; + spin_unlock_irqrestore(&dev->lock, flags); + } else { + pr_err("unknown accessory string index %d\n", + dev->string_index); + } +} + +static void acc_complete_set_hid_report_desc(struct usb_ep *ep, + struct usb_request *req) +{ + struct acc_hid_dev *hid = req->context; + struct acc_dev *dev = hid->dev; + int length = req->actual; + + if (req->status != 0) { + pr_err("acc_complete_set_hid_report_desc, err %d\n", + req->status); + return; + } + + memcpy(hid->report_desc + hid->report_desc_offset, req->buf, length); + hid->report_desc_offset += length; + if (hid->report_desc_offset == hid->report_desc_len) { + /* After we have received the entire report descriptor + * we schedule work to initialize the HID device + */ + schedule_work(&dev->hid_work); + } +} + +static void acc_complete_send_hid_event(struct usb_ep *ep, + struct usb_request *req) +{ + struct acc_hid_dev *hid = req->context; + int length = req->actual; + + if (req->status != 0) { + pr_err("acc_complete_send_hid_event, err %d\n", req->status); + return; + } + + hid_report_raw_event(hid->hid, HID_INPUT_REPORT, req->buf, length, 1); +} + +static int acc_hid_parse(struct hid_device *hid) +{ + struct acc_hid_dev *hdev = hid->driver_data; + + hid_parse_report(hid, hdev->report_desc, hdev->report_desc_len); + return 0; +} + +static int acc_hid_start(struct hid_device *hid) +{ + return 0; +} + +static void acc_hid_stop(struct hid_device *hid) +{ +} + +static int acc_hid_open(struct hid_device *hid) +{ + return 0; +} + +static void acc_hid_close(struct hid_device *hid) +{ +} + +static int acc_hid_raw_request(struct hid_device *hid, unsigned char reportnum, + __u8 *buf, size_t len, unsigned char rtype, int reqtype) +{ + return 0; +} + +static struct hid_ll_driver acc_hid_ll_driver = { + .parse = acc_hid_parse, + .start = acc_hid_start, + .stop = acc_hid_stop, + .open = acc_hid_open, + .close = acc_hid_close, + .raw_request = acc_hid_raw_request, +}; + +static struct acc_hid_dev *acc_hid_new(struct acc_dev *dev, + int id, int desc_len) +{ + struct acc_hid_dev *hdev; + + hdev = kzalloc(sizeof(*hdev), GFP_ATOMIC); + if (!hdev) + return NULL; + hdev->report_desc = kzalloc(desc_len, GFP_ATOMIC); + if (!hdev->report_desc) { + kfree(hdev); + return NULL; + } + hdev->dev = dev; + hdev->id = id; + hdev->report_desc_len = desc_len; + + return hdev; +} + +static struct acc_hid_dev *acc_hid_get(struct list_head *list, int id) +{ + struct acc_hid_dev *hid; + + list_for_each_entry(hid, list, list) { + if (hid->id == id) + return hid; + } + return NULL; +} + +static int acc_register_hid(struct acc_dev *dev, int id, int desc_length) +{ + struct acc_hid_dev *hid; + unsigned long flags; + + /* report descriptor length must be > 0 */ + if (desc_length <= 0) + return -EINVAL; + + spin_lock_irqsave(&dev->lock, flags); + /* replace HID if one already exists with this ID */ + hid = acc_hid_get(&dev->hid_list, id); + if (!hid) + hid = acc_hid_get(&dev->new_hid_list, id); + if (hid) + list_move(&hid->list, &dev->dead_hid_list); + + hid = acc_hid_new(dev, id, desc_length); + if (!hid) { + spin_unlock_irqrestore(&dev->lock, flags); + return -ENOMEM; + } + + list_add(&hid->list, &dev->new_hid_list); + spin_unlock_irqrestore(&dev->lock, flags); + + /* schedule work to register the HID device */ + schedule_work(&dev->hid_work); + return 0; +} + +static int acc_unregister_hid(struct acc_dev *dev, int id) +{ + struct acc_hid_dev *hid; + unsigned long flags; + + spin_lock_irqsave(&dev->lock, flags); + hid = acc_hid_get(&dev->hid_list, id); + if (!hid) + hid = acc_hid_get(&dev->new_hid_list, id); + if (!hid) { + spin_unlock_irqrestore(&dev->lock, flags); + return -EINVAL; + } + + list_move(&hid->list, &dev->dead_hid_list); + spin_unlock_irqrestore(&dev->lock, flags); + + schedule_work(&dev->hid_work); + return 0; +} + +static int create_bulk_endpoints(struct acc_dev *dev, + struct usb_endpoint_descriptor *in_desc, + struct usb_endpoint_descriptor *out_desc) +{ + struct usb_composite_dev *cdev = dev->cdev; + struct usb_request *req; + struct usb_ep *ep; + int i; + + DBG(cdev, "create_bulk_endpoints dev: %p\n", dev); + + ep = usb_ep_autoconfig(cdev->gadget, in_desc); + if (!ep) { + DBG(cdev, "usb_ep_autoconfig for ep_in failed\n"); + return -ENODEV; + } + DBG(cdev, "usb_ep_autoconfig for ep_in got %s\n", ep->name); + ep->driver_data = dev; /* claim the endpoint */ + dev->ep_in = ep; + + ep = usb_ep_autoconfig(cdev->gadget, out_desc); + if (!ep) { + DBG(cdev, "usb_ep_autoconfig for ep_out failed\n"); + return -ENODEV; + } + DBG(cdev, "usb_ep_autoconfig for ep_out got %s\n", ep->name); + ep->driver_data = dev; /* claim the endpoint */ + dev->ep_out = ep; + + /* now allocate requests for our endpoints */ + for (i = 0; i < TX_REQ_MAX; i++) { + req = acc_request_new(dev->ep_in, BULK_BUFFER_SIZE); + if (!req) + goto fail; + req->complete = acc_complete_in; + req_put(dev, &dev->tx_idle, req); + } + for (i = 0; i < RX_REQ_MAX; i++) { + req = acc_request_new(dev->ep_out, BULK_BUFFER_SIZE); + if (!req) + goto fail; + req->complete = acc_complete_out; + dev->rx_req[i] = req; + } + + return 0; + +fail: + pr_err("acc_bind() could not allocate requests\n"); + while ((req = req_get(dev, &dev->tx_idle))) + acc_request_free(req, dev->ep_in); + for (i = 0; i < RX_REQ_MAX; i++) + acc_request_free(dev->rx_req[i], dev->ep_out); + return -1; +} + +static ssize_t acc_read(struct file *fp, char __user *buf, + size_t count, loff_t *pos) +{ + struct acc_dev *dev = fp->private_data; + struct usb_request *req; + ssize_t r = count; + unsigned xfer; + int ret = 0; + + pr_debug("acc_read(%zu)\n", count); + + if (dev->disconnected) { + pr_debug("acc_read disconnected"); + return -ENODEV; + } + + if (count > BULK_BUFFER_SIZE) + count = BULK_BUFFER_SIZE; + + /* we will block until we're online */ + pr_debug("acc_read: waiting for online\n"); + ret = wait_event_interruptible(dev->read_wq, dev->online); + if (ret < 0) { + r = ret; + goto done; + } + + if (dev->rx_done) { + // last req cancelled. try to get it. + req = dev->rx_req[0]; + goto copy_data; + } + +requeue_req: + /* queue a request */ + req = dev->rx_req[0]; + req->length = count; + dev->rx_done = 0; + ret = usb_ep_queue(dev->ep_out, req, GFP_KERNEL); + if (ret < 0) { + r = -EIO; + goto done; + } else { + pr_debug("rx %p queue\n", req); + } + + /* wait for a request to complete */ + ret = wait_event_interruptible(dev->read_wq, dev->rx_done); + if (ret < 0) { + r = ret; + ret = usb_ep_dequeue(dev->ep_out, req); + if (ret != 0) { + // cancel failed. There can be a data already received. + // it will be retrieved in the next read. + pr_debug("acc_read: cancelling failed %d", ret); + } + goto done; + } + +copy_data: + dev->rx_done = 0; + if (dev->online) { + /* If we got a 0-len packet, throw it back and try again. */ + if (req->actual == 0) + goto requeue_req; + + pr_debug("rx %p %u\n", req, req->actual); + xfer = (req->actual < count) ? req->actual : count; + r = xfer; + if (copy_to_user(buf, req->buf, xfer)) + r = -EFAULT; + } else + r = -EIO; + +done: + pr_debug("acc_read returning %zd\n", r); + return r; +} + +static ssize_t acc_write(struct file *fp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct acc_dev *dev = fp->private_data; + struct usb_request *req = 0; + ssize_t r = count; + unsigned xfer; + int ret; + + pr_debug("acc_write(%zu)\n", count); + + if (!dev->online || dev->disconnected) { + pr_debug("acc_write disconnected or not online"); + return -ENODEV; + } + + while (count > 0) { + if (!dev->online) { + pr_debug("acc_write dev->error\n"); + r = -EIO; + break; + } + + /* get an idle tx request to use */ + req = 0; + ret = wait_event_interruptible(dev->write_wq, + ((req = req_get(dev, &dev->tx_idle)) || !dev->online)); + if (!req) { + r = ret; + break; + } + + if (count > BULK_BUFFER_SIZE) { + xfer = BULK_BUFFER_SIZE; + /* ZLP, They will be more TX requests so not yet. */ + req->zero = 0; + } else { + xfer = count; + /* If the data length is a multple of the + * maxpacket size then send a zero length packet(ZLP). + */ + req->zero = ((xfer % dev->ep_in->maxpacket) == 0); + } + if (copy_from_user(req->buf, buf, xfer)) { + r = -EFAULT; + break; + } + + req->length = xfer; + ret = usb_ep_queue(dev->ep_in, req, GFP_KERNEL); + if (ret < 0) { + pr_debug("acc_write: xfer error %d\n", ret); + r = -EIO; + break; + } + + buf += xfer; + count -= xfer; + + /* zero this so we don't try to free it on error exit */ + req = 0; + } + + if (req) + req_put(dev, &dev->tx_idle, req); + + pr_debug("acc_write returning %zd\n", r); + return r; +} + +static long acc_ioctl(struct file *fp, unsigned code, unsigned long value) +{ + struct acc_dev *dev = fp->private_data; + char *src = NULL; + int ret; + + switch (code) { + case ACCESSORY_GET_STRING_MANUFACTURER: + src = dev->manufacturer; + break; + case ACCESSORY_GET_STRING_MODEL: + src = dev->model; + break; + case ACCESSORY_GET_STRING_DESCRIPTION: + src = dev->description; + break; + case ACCESSORY_GET_STRING_VERSION: + src = dev->version; + break; + case ACCESSORY_GET_STRING_URI: + src = dev->uri; + break; + case ACCESSORY_GET_STRING_SERIAL: + src = dev->serial; + break; + case ACCESSORY_IS_START_REQUESTED: + return dev->start_requested; + case ACCESSORY_GET_AUDIO_MODE: + return dev->audio_mode; + } + if (!src) + return -EINVAL; + + ret = strlen(src) + 1; + if (copy_to_user((void __user *)value, src, ret)) + ret = -EFAULT; + return ret; +} + +static int acc_open(struct inode *ip, struct file *fp) +{ + printk(KERN_INFO "acc_open\n"); + if (atomic_xchg(&_acc_dev->open_excl, 1)) + return -EBUSY; + + _acc_dev->disconnected = 0; + fp->private_data = _acc_dev; + return 0; +} + +static int acc_release(struct inode *ip, struct file *fp) +{ + printk(KERN_INFO "acc_release\n"); + + WARN_ON(!atomic_xchg(&_acc_dev->open_excl, 0)); + /* indicate that we are disconnected + * still could be online so don't touch online flag + */ + _acc_dev->disconnected = 1; + return 0; +} + +/* file operations for /dev/usb_accessory */ +static const struct file_operations acc_fops = { + .owner = THIS_MODULE, + .read = acc_read, + .write = acc_write, + .unlocked_ioctl = acc_ioctl, + .open = acc_open, + .release = acc_release, +}; + +static int acc_hid_probe(struct hid_device *hdev, + const struct hid_device_id *id) +{ + int ret; + + ret = hid_parse(hdev); + if (ret) + return ret; + return hid_hw_start(hdev, HID_CONNECT_DEFAULT); +} + +static struct miscdevice acc_device = { + .minor = MISC_DYNAMIC_MINOR, + .name = "usb_accessory", + .fops = &acc_fops, +}; + +static const struct hid_device_id acc_hid_table[] = { + { HID_USB_DEVICE(HID_ANY_ID, HID_ANY_ID) }, + { } +}; + +static struct hid_driver acc_hid_driver = { + .name = "USB accessory", + .id_table = acc_hid_table, + .probe = acc_hid_probe, +}; + +static void acc_complete_setup_noop(struct usb_ep *ep, struct usb_request *req) +{ + /* + * Default no-op function when nothing needs to be done for the + * setup request + */ +} + +int acc_ctrlrequest(struct usb_composite_dev *cdev, + const struct usb_ctrlrequest *ctrl) +{ + struct acc_dev *dev = _acc_dev; + int value = -EOPNOTSUPP; + struct acc_hid_dev *hid; + int offset; + u8 b_requestType = ctrl->bRequestType; + u8 b_request = ctrl->bRequest; + u16 w_index = le16_to_cpu(ctrl->wIndex); + u16 w_value = le16_to_cpu(ctrl->wValue); + u16 w_length = le16_to_cpu(ctrl->wLength); + unsigned long flags; + +/* + printk(KERN_INFO "acc_ctrlrequest " + "%02x.%02x v%04x i%04x l%u\n", + b_requestType, b_request, + w_value, w_index, w_length); +*/ + + if (b_requestType == (USB_DIR_OUT | USB_TYPE_VENDOR)) { + if (b_request == ACCESSORY_START) { + dev->start_requested = 1; + schedule_delayed_work( + &dev->start_work, msecs_to_jiffies(10)); + value = 0; + cdev->req->complete = acc_complete_setup_noop; + } else if (b_request == ACCESSORY_SEND_STRING) { + dev->string_index = w_index; + cdev->gadget->ep0->driver_data = dev; + cdev->req->complete = acc_complete_set_string; + value = w_length; + } else if (b_request == ACCESSORY_SET_AUDIO_MODE && + w_index == 0 && w_length == 0) { + dev->audio_mode = w_value; + cdev->req->complete = acc_complete_setup_noop; + value = 0; + } else if (b_request == ACCESSORY_REGISTER_HID) { + cdev->req->complete = acc_complete_setup_noop; + value = acc_register_hid(dev, w_value, w_index); + } else if (b_request == ACCESSORY_UNREGISTER_HID) { + cdev->req->complete = acc_complete_setup_noop; + value = acc_unregister_hid(dev, w_value); + } else if (b_request == ACCESSORY_SET_HID_REPORT_DESC) { + spin_lock_irqsave(&dev->lock, flags); + hid = acc_hid_get(&dev->new_hid_list, w_value); + spin_unlock_irqrestore(&dev->lock, flags); + if (!hid) { + value = -EINVAL; + goto err; + } + offset = w_index; + if (offset != hid->report_desc_offset + || offset + w_length > hid->report_desc_len) { + value = -EINVAL; + goto err; + } + cdev->req->context = hid; + cdev->req->complete = acc_complete_set_hid_report_desc; + value = w_length; + } else if (b_request == ACCESSORY_SEND_HID_EVENT) { + spin_lock_irqsave(&dev->lock, flags); + hid = acc_hid_get(&dev->hid_list, w_value); + spin_unlock_irqrestore(&dev->lock, flags); + if (!hid) { + value = -EINVAL; + goto err; + } + cdev->req->context = hid; + cdev->req->complete = acc_complete_send_hid_event; + value = w_length; + } + } else if (b_requestType == (USB_DIR_IN | USB_TYPE_VENDOR)) { + if (b_request == ACCESSORY_GET_PROTOCOL) { + *((u16 *)cdev->req->buf) = PROTOCOL_VERSION; + value = sizeof(u16); + cdev->req->complete = acc_complete_setup_noop; + /* clear any string left over from a previous session */ + memset(dev->manufacturer, 0, sizeof(dev->manufacturer)); + memset(dev->model, 0, sizeof(dev->model)); + memset(dev->description, 0, sizeof(dev->description)); + memset(dev->version, 0, sizeof(dev->version)); + memset(dev->uri, 0, sizeof(dev->uri)); + memset(dev->serial, 0, sizeof(dev->serial)); + dev->start_requested = 0; + dev->audio_mode = 0; + } + } + + if (value >= 0) { + cdev->req->zero = 0; + cdev->req->length = value; + value = usb_ep_queue(cdev->gadget->ep0, cdev->req, GFP_ATOMIC); + if (value < 0) + ERROR(cdev, "%s setup response queue error\n", + __func__); + } + +err: + if (value == -EOPNOTSUPP) + VDBG(cdev, + "unknown class-specific control req " + "%02x.%02x v%04x i%04x l%u\n", + ctrl->bRequestType, ctrl->bRequest, + w_value, w_index, w_length); + return value; +} +EXPORT_SYMBOL_GPL(acc_ctrlrequest); + +static int +__acc_function_bind(struct usb_configuration *c, + struct usb_function *f, bool configfs) +{ + struct usb_composite_dev *cdev = c->cdev; + struct acc_dev *dev = func_to_dev(f); + int id; + int ret; + + DBG(cdev, "acc_function_bind dev: %p\n", dev); + + if (configfs) { + if (acc_string_defs[INTERFACE_STRING_INDEX].id == 0) { + ret = usb_string_id(c->cdev); + if (ret < 0) + return ret; + acc_string_defs[INTERFACE_STRING_INDEX].id = ret; + acc_interface_desc.iInterface = ret; + } + dev->cdev = c->cdev; + } + ret = hid_register_driver(&acc_hid_driver); + if (ret) + return ret; + + dev->start_requested = 0; + + /* allocate interface ID(s) */ + id = usb_interface_id(c, f); + if (id < 0) + return id; + acc_interface_desc.bInterfaceNumber = id; + + /* allocate endpoints */ + ret = create_bulk_endpoints(dev, &acc_fullspeed_in_desc, + &acc_fullspeed_out_desc); + if (ret) + return ret; + + /* support high speed hardware */ + if (gadget_is_dualspeed(c->cdev->gadget)) { + acc_highspeed_in_desc.bEndpointAddress = + acc_fullspeed_in_desc.bEndpointAddress; + acc_highspeed_out_desc.bEndpointAddress = + acc_fullspeed_out_desc.bEndpointAddress; + } + + DBG(cdev, "%s speed %s: IN/%s, OUT/%s\n", + gadget_is_dualspeed(c->cdev->gadget) ? "dual" : "full", + f->name, dev->ep_in->name, dev->ep_out->name); + return 0; +} + +static int +acc_function_bind_configfs(struct usb_configuration *c, + struct usb_function *f) { + return __acc_function_bind(c, f, true); +} + +static void +kill_all_hid_devices(struct acc_dev *dev) +{ + struct acc_hid_dev *hid; + struct list_head *entry, *temp; + unsigned long flags; + + /* do nothing if usb accessory device doesn't exist */ + if (!dev) + return; + + spin_lock_irqsave(&dev->lock, flags); + list_for_each_safe(entry, temp, &dev->hid_list) { + hid = list_entry(entry, struct acc_hid_dev, list); + list_del(&hid->list); + list_add(&hid->list, &dev->dead_hid_list); + } + list_for_each_safe(entry, temp, &dev->new_hid_list) { + hid = list_entry(entry, struct acc_hid_dev, list); + list_del(&hid->list); + list_add(&hid->list, &dev->dead_hid_list); + } + spin_unlock_irqrestore(&dev->lock, flags); + + schedule_work(&dev->hid_work); +} + +static void +acc_hid_unbind(struct acc_dev *dev) +{ + hid_unregister_driver(&acc_hid_driver); + kill_all_hid_devices(dev); +} + +static void +acc_function_unbind(struct usb_configuration *c, struct usb_function *f) +{ + struct acc_dev *dev = func_to_dev(f); + struct usb_request *req; + int i; + + dev->online = 0; /* clear online flag */ + wake_up(&dev->read_wq); /* unblock reads on closure */ + wake_up(&dev->write_wq); /* likewise for writes */ + + while ((req = req_get(dev, &dev->tx_idle))) + acc_request_free(req, dev->ep_in); + for (i = 0; i < RX_REQ_MAX; i++) + acc_request_free(dev->rx_req[i], dev->ep_out); + + acc_hid_unbind(dev); +} + +static void acc_start_work(struct work_struct *data) +{ + char *envp[2] = { "ACCESSORY=START", NULL }; + + kobject_uevent_env(&acc_device.this_device->kobj, KOBJ_CHANGE, envp); +} + +static int acc_hid_init(struct acc_hid_dev *hdev) +{ + struct hid_device *hid; + int ret; + + hid = hid_allocate_device(); + if (IS_ERR(hid)) + return PTR_ERR(hid); + + hid->ll_driver = &acc_hid_ll_driver; + hid->dev.parent = acc_device.this_device; + + hid->bus = BUS_USB; + hid->vendor = HID_ANY_ID; + hid->product = HID_ANY_ID; + hid->driver_data = hdev; + ret = hid_add_device(hid); + if (ret) { + pr_err("can't add hid device: %d\n", ret); + hid_destroy_device(hid); + return ret; + } + + hdev->hid = hid; + return 0; +} + +static void acc_hid_delete(struct acc_hid_dev *hid) +{ + kfree(hid->report_desc); + kfree(hid); +} + +static void acc_hid_work(struct work_struct *data) +{ + struct acc_dev *dev = _acc_dev; + struct list_head *entry, *temp; + struct acc_hid_dev *hid; + struct list_head new_list, dead_list; + unsigned long flags; + + INIT_LIST_HEAD(&new_list); + + spin_lock_irqsave(&dev->lock, flags); + + /* copy hids that are ready for initialization to new_list */ + list_for_each_safe(entry, temp, &dev->new_hid_list) { + hid = list_entry(entry, struct acc_hid_dev, list); + if (hid->report_desc_offset == hid->report_desc_len) + list_move(&hid->list, &new_list); + } + + if (list_empty(&dev->dead_hid_list)) { + INIT_LIST_HEAD(&dead_list); + } else { + /* move all of dev->dead_hid_list to dead_list */ + dead_list.prev = dev->dead_hid_list.prev; + dead_list.next = dev->dead_hid_list.next; + dead_list.next->prev = &dead_list; + dead_list.prev->next = &dead_list; + INIT_LIST_HEAD(&dev->dead_hid_list); + } + + spin_unlock_irqrestore(&dev->lock, flags); + + /* register new HID devices */ + list_for_each_safe(entry, temp, &new_list) { + hid = list_entry(entry, struct acc_hid_dev, list); + if (acc_hid_init(hid)) { + pr_err("can't add HID device %p\n", hid); + acc_hid_delete(hid); + } else { + spin_lock_irqsave(&dev->lock, flags); + list_move(&hid->list, &dev->hid_list); + spin_unlock_irqrestore(&dev->lock, flags); + } + } + + /* remove dead HID devices */ + list_for_each_safe(entry, temp, &dead_list) { + hid = list_entry(entry, struct acc_hid_dev, list); + list_del(&hid->list); + if (hid->hid) + hid_destroy_device(hid->hid); + acc_hid_delete(hid); + } +} + +static int acc_function_set_alt(struct usb_function *f, + unsigned intf, unsigned alt) +{ + struct acc_dev *dev = func_to_dev(f); + struct usb_composite_dev *cdev = f->config->cdev; + int ret; + + DBG(cdev, "acc_function_set_alt intf: %d alt: %d\n", intf, alt); + + ret = config_ep_by_speed(cdev->gadget, f, dev->ep_in); + if (ret) + return ret; + + ret = usb_ep_enable(dev->ep_in); + if (ret) + return ret; + + ret = config_ep_by_speed(cdev->gadget, f, dev->ep_out); + if (ret) + return ret; + + ret = usb_ep_enable(dev->ep_out); + if (ret) { + usb_ep_disable(dev->ep_in); + return ret; + } + + dev->online = 1; + dev->disconnected = 0; /* if online then not disconnected */ + + /* readers may be blocked waiting for us to go online */ + wake_up(&dev->read_wq); + return 0; +} + +static void acc_function_disable(struct usb_function *f) +{ + struct acc_dev *dev = func_to_dev(f); + struct usb_composite_dev *cdev = dev->cdev; + + DBG(cdev, "acc_function_disable\n"); + acc_set_disconnected(dev); /* this now only sets disconnected */ + dev->online = 0; /* so now need to clear online flag here too */ + usb_ep_disable(dev->ep_in); + usb_ep_disable(dev->ep_out); + + /* readers may be blocked waiting for us to go online */ + wake_up(&dev->read_wq); + + VDBG(cdev, "%s disabled\n", dev->function.name); +} + +static int acc_setup(void) +{ + struct acc_dev *dev; + int ret; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + spin_lock_init(&dev->lock); + init_waitqueue_head(&dev->read_wq); + init_waitqueue_head(&dev->write_wq); + atomic_set(&dev->open_excl, 0); + INIT_LIST_HEAD(&dev->tx_idle); + INIT_LIST_HEAD(&dev->hid_list); + INIT_LIST_HEAD(&dev->new_hid_list); + INIT_LIST_HEAD(&dev->dead_hid_list); + INIT_DELAYED_WORK(&dev->start_work, acc_start_work); + INIT_WORK(&dev->hid_work, acc_hid_work); + + /* _acc_dev must be set before calling usb_gadget_register_driver */ + _acc_dev = dev; + + ret = misc_register(&acc_device); + if (ret) + goto err; + + return 0; + +err: + kfree(dev); + pr_err("USB accessory gadget driver failed to initialize\n"); + return ret; +} + +void acc_disconnect(void) +{ + /* unregister all HID devices if USB is disconnected */ + kill_all_hid_devices(_acc_dev); +} +EXPORT_SYMBOL_GPL(acc_disconnect); + +static void acc_cleanup(void) +{ + misc_deregister(&acc_device); + kfree(_acc_dev); + _acc_dev = NULL; +} +static struct acc_instance *to_acc_instance(struct config_item *item) +{ + return container_of(to_config_group(item), struct acc_instance, + func_inst.group); +} + +static void acc_attr_release(struct config_item *item) +{ + struct acc_instance *fi_acc = to_acc_instance(item); + + usb_put_function_instance(&fi_acc->func_inst); +} + +static struct configfs_item_operations acc_item_ops = { + .release = acc_attr_release, +}; + +static struct config_item_type acc_func_type = { + .ct_item_ops = &acc_item_ops, + .ct_owner = THIS_MODULE, +}; + +static struct acc_instance *to_fi_acc(struct usb_function_instance *fi) +{ + return container_of(fi, struct acc_instance, func_inst); +} + +static int acc_set_inst_name(struct usb_function_instance *fi, const char *name) +{ + struct acc_instance *fi_acc; + char *ptr; + int name_len; + + name_len = strlen(name) + 1; + if (name_len > MAX_INST_NAME_LEN) + return -ENAMETOOLONG; + + ptr = kstrndup(name, name_len, GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + fi_acc = to_fi_acc(fi); + fi_acc->name = ptr; + return 0; +} + +static void acc_free_inst(struct usb_function_instance *fi) +{ + struct acc_instance *fi_acc; + + fi_acc = to_fi_acc(fi); + kfree(fi_acc->name); + acc_cleanup(); +} + +static struct usb_function_instance *acc_alloc_inst(void) +{ + struct acc_instance *fi_acc; + struct acc_dev *dev; + int err; + + fi_acc = kzalloc(sizeof(*fi_acc), GFP_KERNEL); + if (!fi_acc) + return ERR_PTR(-ENOMEM); + fi_acc->func_inst.set_inst_name = acc_set_inst_name; + fi_acc->func_inst.free_func_inst = acc_free_inst; + + err = acc_setup(); + if (err) { + kfree(fi_acc); + pr_err("Error setting ACCESSORY\n"); + return ERR_PTR(err); + } + + config_group_init_type_name(&fi_acc->func_inst.group, + "", &acc_func_type); + dev = _acc_dev; + return &fi_acc->func_inst; +} + +static void acc_free(struct usb_function *f) +{ +/*NO-OP: no function specific resource allocation in mtp_alloc*/ +} + +int acc_ctrlrequest_configfs(struct usb_function *f, + const struct usb_ctrlrequest *ctrl) { + if (f->config != NULL && f->config->cdev != NULL) + return acc_ctrlrequest(f->config->cdev, ctrl); + else + return -1; +} + +static struct usb_function *acc_alloc(struct usb_function_instance *fi) +{ + struct acc_dev *dev = _acc_dev; + + pr_info("acc_alloc\n"); + + dev->function.name = "accessory"; + dev->function.strings = acc_strings, + dev->function.fs_descriptors = fs_acc_descs; + dev->function.hs_descriptors = hs_acc_descs; + dev->function.bind = acc_function_bind_configfs; + dev->function.unbind = acc_function_unbind; + dev->function.set_alt = acc_function_set_alt; + dev->function.disable = acc_function_disable; + dev->function.free_func = acc_free; + dev->function.setup = acc_ctrlrequest_configfs; + + return &dev->function; +} +DECLARE_USB_FUNCTION_INIT(accessory, acc_alloc_inst, acc_alloc); +MODULE_LICENSE("GPL");
diff --git a/drivers/usb/gadget/function/f_audio_source.c b/drivers/usb/gadget/function/f_audio_source.c new file mode 100644 index 0000000..8124af3 --- /dev/null +++ b/drivers/usb/gadget/function/f_audio_source.c
@@ -0,0 +1,1071 @@ +/* + * Gadget Function Driver for USB audio source device + * + * Copyright (C) 2012 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/device.h> +#include <linux/usb/audio.h> +#include <linux/wait.h> +#include <linux/pm_qos.h> +#include <sound/core.h> +#include <sound/initval.h> +#include <sound/pcm.h> + +#include <linux/usb.h> +#include <linux/usb_usual.h> +#include <linux/usb/ch9.h> +#include <linux/configfs.h> +#include <linux/usb/composite.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#define SAMPLE_RATE 44100 +#define FRAMES_PER_MSEC (SAMPLE_RATE / 1000) + +#define IN_EP_MAX_PACKET_SIZE 256 + +/* Number of requests to allocate */ +#define IN_EP_REQ_COUNT 4 + +#define AUDIO_AC_INTERFACE 0 +#define AUDIO_AS_INTERFACE 1 +#define AUDIO_NUM_INTERFACES 2 +#define MAX_INST_NAME_LEN 40 + +/* B.3.1 Standard AC Interface Descriptor */ +static struct usb_interface_descriptor ac_interface_desc = { + .bLength = USB_DT_INTERFACE_SIZE, + .bDescriptorType = USB_DT_INTERFACE, + .bNumEndpoints = 0, + .bInterfaceClass = USB_CLASS_AUDIO, + .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, +}; + +DECLARE_UAC_AC_HEADER_DESCRIPTOR(2); + +#define UAC_DT_AC_HEADER_LENGTH UAC_DT_AC_HEADER_SIZE(AUDIO_NUM_INTERFACES) +/* 1 input terminal, 1 output terminal and 1 feature unit */ +#define UAC_DT_TOTAL_LENGTH (UAC_DT_AC_HEADER_LENGTH \ + + UAC_DT_INPUT_TERMINAL_SIZE + UAC_DT_OUTPUT_TERMINAL_SIZE \ + + UAC_DT_FEATURE_UNIT_SIZE(0)) +/* B.3.2 Class-Specific AC Interface Descriptor */ +static struct uac1_ac_header_descriptor_2 ac_header_desc = { + .bLength = UAC_DT_AC_HEADER_LENGTH, + .bDescriptorType = USB_DT_CS_INTERFACE, + .bDescriptorSubtype = UAC_HEADER, + .bcdADC = __constant_cpu_to_le16(0x0100), + .wTotalLength = __constant_cpu_to_le16(UAC_DT_TOTAL_LENGTH), + .bInCollection = AUDIO_NUM_INTERFACES, + .baInterfaceNr = { + [0] = AUDIO_AC_INTERFACE, + [1] = AUDIO_AS_INTERFACE, + } +}; + +#define INPUT_TERMINAL_ID 1 +static struct uac_input_terminal_descriptor input_terminal_desc = { + .bLength = UAC_DT_INPUT_TERMINAL_SIZE, + .bDescriptorType = USB_DT_CS_INTERFACE, + .bDescriptorSubtype = UAC_INPUT_TERMINAL, + .bTerminalID = INPUT_TERMINAL_ID, + .wTerminalType = UAC_INPUT_TERMINAL_MICROPHONE, + .bAssocTerminal = 0, + .wChannelConfig = 0x3, +}; + +DECLARE_UAC_FEATURE_UNIT_DESCRIPTOR(0); + +#define FEATURE_UNIT_ID 2 +static struct uac_feature_unit_descriptor_0 feature_unit_desc = { + .bLength = UAC_DT_FEATURE_UNIT_SIZE(0), + .bDescriptorType = USB_DT_CS_INTERFACE, + .bDescriptorSubtype = UAC_FEATURE_UNIT, + .bUnitID = FEATURE_UNIT_ID, + .bSourceID = INPUT_TERMINAL_ID, + .bControlSize = 2, +}; + +#define OUTPUT_TERMINAL_ID 3 +static struct uac1_output_terminal_descriptor output_terminal_desc = { + .bLength = UAC_DT_OUTPUT_TERMINAL_SIZE, + .bDescriptorType = USB_DT_CS_INTERFACE, + .bDescriptorSubtype = UAC_OUTPUT_TERMINAL, + .bTerminalID = OUTPUT_TERMINAL_ID, + .wTerminalType = UAC_TERMINAL_STREAMING, + .bAssocTerminal = FEATURE_UNIT_ID, + .bSourceID = FEATURE_UNIT_ID, +}; + +/* B.4.1 Standard AS Interface Descriptor */ +static struct usb_interface_descriptor as_interface_alt_0_desc = { + .bLength = USB_DT_INTERFACE_SIZE, + .bDescriptorType = USB_DT_INTERFACE, + .bAlternateSetting = 0, + .bNumEndpoints = 0, + .bInterfaceClass = USB_CLASS_AUDIO, + .bInterfaceSubClass = USB_SUBCLASS_AUDIOSTREAMING, +}; + +static struct usb_interface_descriptor as_interface_alt_1_desc = { + .bLength = USB_DT_INTERFACE_SIZE, + .bDescriptorType = USB_DT_INTERFACE, + .bAlternateSetting = 1, + .bNumEndpoints = 1, + .bInterfaceClass = USB_CLASS_AUDIO, + .bInterfaceSubClass = USB_SUBCLASS_AUDIOSTREAMING, +}; + +/* B.4.2 Class-Specific AS Interface Descriptor */ +static struct uac1_as_header_descriptor as_header_desc = { + .bLength = UAC_DT_AS_HEADER_SIZE, + .bDescriptorType = USB_DT_CS_INTERFACE, + .bDescriptorSubtype = UAC_AS_GENERAL, + .bTerminalLink = INPUT_TERMINAL_ID, + .bDelay = 1, + .wFormatTag = UAC_FORMAT_TYPE_I_PCM, +}; + +DECLARE_UAC_FORMAT_TYPE_I_DISCRETE_DESC(1); + +static struct uac_format_type_i_discrete_descriptor_1 as_type_i_desc = { + .bLength = UAC_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(1), + .bDescriptorType = USB_DT_CS_INTERFACE, + .bDescriptorSubtype = UAC_FORMAT_TYPE, + .bFormatType = UAC_FORMAT_TYPE_I, + .bSubframeSize = 2, + .bBitResolution = 16, + .bSamFreqType = 1, +}; + +/* Standard ISO IN Endpoint Descriptor for highspeed */ +static struct usb_endpoint_descriptor hs_as_in_ep_desc = { + .bLength = USB_DT_ENDPOINT_AUDIO_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_IN, + .bmAttributes = USB_ENDPOINT_SYNC_SYNC + | USB_ENDPOINT_XFER_ISOC, + .wMaxPacketSize = __constant_cpu_to_le16(IN_EP_MAX_PACKET_SIZE), + .bInterval = 4, /* poll 1 per millisecond */ +}; + +/* Standard ISO IN Endpoint Descriptor for highspeed */ +static struct usb_endpoint_descriptor fs_as_in_ep_desc = { + .bLength = USB_DT_ENDPOINT_AUDIO_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_IN, + .bmAttributes = USB_ENDPOINT_SYNC_SYNC + | USB_ENDPOINT_XFER_ISOC, + .wMaxPacketSize = __constant_cpu_to_le16(IN_EP_MAX_PACKET_SIZE), + .bInterval = 1, /* poll 1 per millisecond */ +}; + +/* Class-specific AS ISO OUT Endpoint Descriptor */ +static struct uac_iso_endpoint_descriptor as_iso_in_desc = { + .bLength = UAC_ISO_ENDPOINT_DESC_SIZE, + .bDescriptorType = USB_DT_CS_ENDPOINT, + .bDescriptorSubtype = UAC_EP_GENERAL, + .bmAttributes = 1, + .bLockDelayUnits = 1, + .wLockDelay = __constant_cpu_to_le16(1), +}; + +static struct usb_descriptor_header *hs_audio_desc[] = { + (struct usb_descriptor_header *)&ac_interface_desc, + (struct usb_descriptor_header *)&ac_header_desc, + + (struct usb_descriptor_header *)&input_terminal_desc, + (struct usb_descriptor_header *)&output_terminal_desc, + (struct usb_descriptor_header *)&feature_unit_desc, + + (struct usb_descriptor_header *)&as_interface_alt_0_desc, + (struct usb_descriptor_header *)&as_interface_alt_1_desc, + (struct usb_descriptor_header *)&as_header_desc, + + (struct usb_descriptor_header *)&as_type_i_desc, + + (struct usb_descriptor_header *)&hs_as_in_ep_desc, + (struct usb_descriptor_header *)&as_iso_in_desc, + NULL, +}; + +static struct usb_descriptor_header *fs_audio_desc[] = { + (struct usb_descriptor_header *)&ac_interface_desc, + (struct usb_descriptor_header *)&ac_header_desc, + + (struct usb_descriptor_header *)&input_terminal_desc, + (struct usb_descriptor_header *)&output_terminal_desc, + (struct usb_descriptor_header *)&feature_unit_desc, + + (struct usb_descriptor_header *)&as_interface_alt_0_desc, + (struct usb_descriptor_header *)&as_interface_alt_1_desc, + (struct usb_descriptor_header *)&as_header_desc, + + (struct usb_descriptor_header *)&as_type_i_desc, + + (struct usb_descriptor_header *)&fs_as_in_ep_desc, + (struct usb_descriptor_header *)&as_iso_in_desc, + NULL, +}; + +static struct snd_pcm_hardware audio_hw_info = { + .info = SNDRV_PCM_INFO_MMAP | + SNDRV_PCM_INFO_MMAP_VALID | + SNDRV_PCM_INFO_BATCH | + SNDRV_PCM_INFO_INTERLEAVED | + SNDRV_PCM_INFO_BLOCK_TRANSFER, + + .formats = SNDRV_PCM_FMTBIT_S16_LE, + .channels_min = 2, + .channels_max = 2, + .rate_min = SAMPLE_RATE, + .rate_max = SAMPLE_RATE, + + .buffer_bytes_max = 1024 * 1024, + .period_bytes_min = 64, + .period_bytes_max = 512 * 1024, + .periods_min = 2, + .periods_max = 1024, +}; + +/*-------------------------------------------------------------------------*/ + +struct audio_source_config { + int card; + int device; +}; + +struct audio_dev { + struct usb_function func; + struct snd_card *card; + struct snd_pcm *pcm; + struct snd_pcm_substream *substream; + + struct list_head idle_reqs; + struct usb_ep *in_ep; + + spinlock_t lock; + + /* beginning, end and current position in our buffer */ + void *buffer_start; + void *buffer_end; + void *buffer_pos; + + /* byte size of a "period" */ + unsigned int period; + /* bytes sent since last call to snd_pcm_period_elapsed */ + unsigned int period_offset; + /* time we started playing */ + ktime_t start_time; + /* number of frames sent since start_time */ + s64 frames_sent; + struct audio_source_config *config; + /* for creating and issuing QoS requests */ + struct pm_qos_request pm_qos; +}; + +static inline struct audio_dev *func_to_audio(struct usb_function *f) +{ + return container_of(f, struct audio_dev, func); +} + +/*-------------------------------------------------------------------------*/ + +struct audio_source_instance { + struct usb_function_instance func_inst; + const char *name; + struct audio_source_config *config; + struct device *audio_device; +}; + +static void audio_source_attr_release(struct config_item *item); + +static struct configfs_item_operations audio_source_item_ops = { + .release = audio_source_attr_release, +}; + +static struct config_item_type audio_source_func_type = { + .ct_item_ops = &audio_source_item_ops, + .ct_owner = THIS_MODULE, +}; + +static ssize_t audio_source_pcm_show(struct device *dev, + struct device_attribute *attr, char *buf); + +static DEVICE_ATTR(pcm, S_IRUGO, audio_source_pcm_show, NULL); + +static struct device_attribute *audio_source_function_attributes[] = { + &dev_attr_pcm, + NULL +}; + +/*--------------------------------------------------------------------------*/ + +static struct usb_request *audio_request_new(struct usb_ep *ep, int buffer_size) +{ + struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL); + + if (!req) + return NULL; + + req->buf = kmalloc(buffer_size, GFP_KERNEL); + if (!req->buf) { + usb_ep_free_request(ep, req); + return NULL; + } + req->length = buffer_size; + return req; +} + +static void audio_request_free(struct usb_request *req, struct usb_ep *ep) +{ + if (req) { + kfree(req->buf); + usb_ep_free_request(ep, req); + } +} + +static void audio_req_put(struct audio_dev *audio, struct usb_request *req) +{ + unsigned long flags; + + spin_lock_irqsave(&audio->lock, flags); + list_add_tail(&req->list, &audio->idle_reqs); + spin_unlock_irqrestore(&audio->lock, flags); +} + +static struct usb_request *audio_req_get(struct audio_dev *audio) +{ + unsigned long flags; + struct usb_request *req; + + spin_lock_irqsave(&audio->lock, flags); + if (list_empty(&audio->idle_reqs)) { + req = 0; + } else { + req = list_first_entry(&audio->idle_reqs, struct usb_request, + list); + list_del(&req->list); + } + spin_unlock_irqrestore(&audio->lock, flags); + return req; +} + +/* send the appropriate number of packets to match our bitrate */ +static void audio_send(struct audio_dev *audio) +{ + struct snd_pcm_runtime *runtime; + struct usb_request *req; + int length, length1, length2, ret; + s64 msecs; + s64 frames; + ktime_t now; + + /* audio->substream will be null if we have been closed */ + if (!audio->substream) + return; + /* audio->buffer_pos will be null if we have been stopped */ + if (!audio->buffer_pos) + return; + + runtime = audio->substream->runtime; + + /* compute number of frames to send */ + now = ktime_get(); + msecs = div_s64((ktime_to_ns(now) - ktime_to_ns(audio->start_time)), + 1000000); + frames = div_s64((msecs * SAMPLE_RATE), 1000); + + /* Readjust our frames_sent if we fall too far behind. + * If we get too far behind it is better to drop some frames than + * to keep sending data too fast in an attempt to catch up. + */ + if (frames - audio->frames_sent > 10 * FRAMES_PER_MSEC) + audio->frames_sent = frames - FRAMES_PER_MSEC; + + frames -= audio->frames_sent; + + /* We need to send something to keep the pipeline going */ + if (frames <= 0) + frames = FRAMES_PER_MSEC; + + while (frames > 0) { + req = audio_req_get(audio); + if (!req) + break; + + length = frames_to_bytes(runtime, frames); + if (length > IN_EP_MAX_PACKET_SIZE) + length = IN_EP_MAX_PACKET_SIZE; + + if (audio->buffer_pos + length > audio->buffer_end) + length1 = audio->buffer_end - audio->buffer_pos; + else + length1 = length; + memcpy(req->buf, audio->buffer_pos, length1); + if (length1 < length) { + /* Wrap around and copy remaining length + * at beginning of buffer. + */ + length2 = length - length1; + memcpy(req->buf + length1, audio->buffer_start, + length2); + audio->buffer_pos = audio->buffer_start + length2; + } else { + audio->buffer_pos += length1; + if (audio->buffer_pos >= audio->buffer_end) + audio->buffer_pos = audio->buffer_start; + } + + req->length = length; + ret = usb_ep_queue(audio->in_ep, req, GFP_ATOMIC); + if (ret < 0) { + pr_err("usb_ep_queue failed ret: %d\n", ret); + audio_req_put(audio, req); + break; + } + + frames -= bytes_to_frames(runtime, length); + audio->frames_sent += bytes_to_frames(runtime, length); + } +} + +static void audio_control_complete(struct usb_ep *ep, struct usb_request *req) +{ + /* nothing to do here */ +} + +static void audio_data_complete(struct usb_ep *ep, struct usb_request *req) +{ + struct audio_dev *audio = req->context; + + pr_debug("audio_data_complete req->status %d req->actual %d\n", + req->status, req->actual); + + audio_req_put(audio, req); + + if (!audio->buffer_start || req->status) + return; + + audio->period_offset += req->actual; + if (audio->period_offset >= audio->period) { + snd_pcm_period_elapsed(audio->substream); + audio->period_offset = 0; + } + audio_send(audio); +} + +static int audio_set_endpoint_req(struct usb_function *f, + const struct usb_ctrlrequest *ctrl) +{ + int value = -EOPNOTSUPP; + u16 ep = le16_to_cpu(ctrl->wIndex); + u16 len = le16_to_cpu(ctrl->wLength); + u16 w_value = le16_to_cpu(ctrl->wValue); + + pr_debug("bRequest 0x%x, w_value 0x%04x, len %d, endpoint %d\n", + ctrl->bRequest, w_value, len, ep); + + switch (ctrl->bRequest) { + case UAC_SET_CUR: + case UAC_SET_MIN: + case UAC_SET_MAX: + case UAC_SET_RES: + value = len; + break; + default: + break; + } + + return value; +} + +static int audio_get_endpoint_req(struct usb_function *f, + const struct usb_ctrlrequest *ctrl) +{ + struct usb_composite_dev *cdev = f->config->cdev; + int value = -EOPNOTSUPP; + u8 ep = ((le16_to_cpu(ctrl->wIndex) >> 8) & 0xFF); + u16 len = le16_to_cpu(ctrl->wLength); + u16 w_value = le16_to_cpu(ctrl->wValue); + u8 *buf = cdev->req->buf; + + pr_debug("bRequest 0x%x, w_value 0x%04x, len %d, endpoint %d\n", + ctrl->bRequest, w_value, len, ep); + + if (w_value == UAC_EP_CS_ATTR_SAMPLE_RATE << 8) { + switch (ctrl->bRequest) { + case UAC_GET_CUR: + case UAC_GET_MIN: + case UAC_GET_MAX: + case UAC_GET_RES: + /* return our sample rate */ + buf[0] = (u8)SAMPLE_RATE; + buf[1] = (u8)(SAMPLE_RATE >> 8); + buf[2] = (u8)(SAMPLE_RATE >> 16); + value = 3; + break; + default: + break; + } + } + + return value; +} + +static int +audio_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl) +{ + struct usb_composite_dev *cdev = f->config->cdev; + struct usb_request *req = cdev->req; + int value = -EOPNOTSUPP; + u16 w_index = le16_to_cpu(ctrl->wIndex); + u16 w_value = le16_to_cpu(ctrl->wValue); + u16 w_length = le16_to_cpu(ctrl->wLength); + + /* composite driver infrastructure handles everything; interface + * activation uses set_alt(). + */ + switch (ctrl->bRequestType) { + case USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_ENDPOINT: + value = audio_set_endpoint_req(f, ctrl); + break; + + case USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT: + value = audio_get_endpoint_req(f, ctrl); + break; + } + + /* respond with data transfer or status phase? */ + if (value >= 0) { + pr_debug("audio req%02x.%02x v%04x i%04x l%d\n", + ctrl->bRequestType, ctrl->bRequest, + w_value, w_index, w_length); + req->zero = 0; + req->length = value; + req->complete = audio_control_complete; + value = usb_ep_queue(cdev->gadget->ep0, req, GFP_ATOMIC); + if (value < 0) + pr_err("audio response on err %d\n", value); + } + + /* device either stalls (value < 0) or reports success */ + return value; +} + +static int audio_set_alt(struct usb_function *f, unsigned intf, unsigned alt) +{ + struct audio_dev *audio = func_to_audio(f); + struct usb_composite_dev *cdev = f->config->cdev; + int ret; + + pr_debug("audio_set_alt intf %d, alt %d\n", intf, alt); + + ret = config_ep_by_speed(cdev->gadget, f, audio->in_ep); + if (ret) + return ret; + + usb_ep_enable(audio->in_ep); + return 0; +} + +static void audio_disable(struct usb_function *f) +{ + struct audio_dev *audio = func_to_audio(f); + + pr_debug("audio_disable\n"); + usb_ep_disable(audio->in_ep); +} + +static void audio_free_func(struct usb_function *f) +{ + /* no-op */ +} + +/*-------------------------------------------------------------------------*/ + +static void audio_build_desc(struct audio_dev *audio) +{ + u8 *sam_freq; + int rate; + + /* Set channel numbers */ + input_terminal_desc.bNrChannels = 2; + as_type_i_desc.bNrChannels = 2; + + /* Set sample rates */ + rate = SAMPLE_RATE; + sam_freq = as_type_i_desc.tSamFreq[0]; + memcpy(sam_freq, &rate, 3); +} + + +static int snd_card_setup(struct usb_configuration *c, + struct audio_source_config *config); +static struct audio_source_instance *to_fi_audio_source( + const struct usb_function_instance *fi); + + +/* audio function driver setup/binding */ +static int +audio_bind(struct usb_configuration *c, struct usb_function *f) +{ + struct usb_composite_dev *cdev = c->cdev; + struct audio_dev *audio = func_to_audio(f); + int status; + struct usb_ep *ep; + struct usb_request *req; + int i; + int err; + + if (IS_ENABLED(CONFIG_USB_CONFIGFS)) { + struct audio_source_instance *fi_audio = + to_fi_audio_source(f->fi); + struct audio_source_config *config = + fi_audio->config; + + err = snd_card_setup(c, config); + if (err) + return err; + } + + audio_build_desc(audio); + + /* allocate instance-specific interface IDs, and patch descriptors */ + status = usb_interface_id(c, f); + if (status < 0) + goto fail; + ac_interface_desc.bInterfaceNumber = status; + + /* AUDIO_AC_INTERFACE */ + ac_header_desc.baInterfaceNr[0] = status; + + status = usb_interface_id(c, f); + if (status < 0) + goto fail; + as_interface_alt_0_desc.bInterfaceNumber = status; + as_interface_alt_1_desc.bInterfaceNumber = status; + + /* AUDIO_AS_INTERFACE */ + ac_header_desc.baInterfaceNr[1] = status; + + status = -ENODEV; + + /* allocate our endpoint */ + ep = usb_ep_autoconfig(cdev->gadget, &fs_as_in_ep_desc); + if (!ep) + goto fail; + audio->in_ep = ep; + ep->driver_data = audio; /* claim */ + + if (gadget_is_dualspeed(c->cdev->gadget)) + hs_as_in_ep_desc.bEndpointAddress = + fs_as_in_ep_desc.bEndpointAddress; + + f->fs_descriptors = fs_audio_desc; + f->hs_descriptors = hs_audio_desc; + + for (i = 0, status = 0; i < IN_EP_REQ_COUNT && status == 0; i++) { + req = audio_request_new(ep, IN_EP_MAX_PACKET_SIZE); + if (req) { + req->context = audio; + req->complete = audio_data_complete; + audio_req_put(audio, req); + } else + status = -ENOMEM; + } + +fail: + return status; +} + +static void +audio_unbind(struct usb_configuration *c, struct usb_function *f) +{ + struct audio_dev *audio = func_to_audio(f); + struct usb_request *req; + + while ((req = audio_req_get(audio))) + audio_request_free(req, audio->in_ep); + + snd_card_free_when_closed(audio->card); + audio->card = NULL; + audio->pcm = NULL; + audio->substream = NULL; + audio->in_ep = NULL; + + if (IS_ENABLED(CONFIG_USB_CONFIGFS)) { + struct audio_source_instance *fi_audio = + to_fi_audio_source(f->fi); + struct audio_source_config *config = + fi_audio->config; + + config->card = -1; + config->device = -1; + } +} + +static void audio_pcm_playback_start(struct audio_dev *audio) +{ + audio->start_time = ktime_get(); + audio->frames_sent = 0; + audio_send(audio); +} + +static void audio_pcm_playback_stop(struct audio_dev *audio) +{ + unsigned long flags; + + spin_lock_irqsave(&audio->lock, flags); + audio->buffer_start = 0; + audio->buffer_end = 0; + audio->buffer_pos = 0; + spin_unlock_irqrestore(&audio->lock, flags); +} + +static int audio_pcm_open(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct audio_dev *audio = substream->private_data; + + runtime->private_data = audio; + runtime->hw = audio_hw_info; + snd_pcm_limit_hw_rates(runtime); + runtime->hw.channels_max = 2; + + audio->substream = substream; + + /* Add the QoS request and set the latency to 0 */ + pm_qos_add_request(&audio->pm_qos, PM_QOS_CPU_DMA_LATENCY, 0); + + return 0; +} + +static int audio_pcm_close(struct snd_pcm_substream *substream) +{ + struct audio_dev *audio = substream->private_data; + unsigned long flags; + + spin_lock_irqsave(&audio->lock, flags); + + /* Remove the QoS request */ + pm_qos_remove_request(&audio->pm_qos); + + audio->substream = NULL; + spin_unlock_irqrestore(&audio->lock, flags); + + return 0; +} + +static int audio_pcm_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params) +{ + unsigned int channels = params_channels(params); + unsigned int rate = params_rate(params); + + if (rate != SAMPLE_RATE) + return -EINVAL; + if (channels != 2) + return -EINVAL; + + return snd_pcm_lib_alloc_vmalloc_buffer(substream, + params_buffer_bytes(params)); +} + +static int audio_pcm_hw_free(struct snd_pcm_substream *substream) +{ + return snd_pcm_lib_free_vmalloc_buffer(substream); +} + +static int audio_pcm_prepare(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct audio_dev *audio = runtime->private_data; + + audio->period = snd_pcm_lib_period_bytes(substream); + audio->period_offset = 0; + audio->buffer_start = runtime->dma_area; + audio->buffer_end = audio->buffer_start + + snd_pcm_lib_buffer_bytes(substream); + audio->buffer_pos = audio->buffer_start; + + return 0; +} + +static snd_pcm_uframes_t audio_pcm_pointer(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct audio_dev *audio = runtime->private_data; + ssize_t bytes = audio->buffer_pos - audio->buffer_start; + + /* return offset of next frame to fill in our buffer */ + return bytes_to_frames(runtime, bytes); +} + +static int audio_pcm_playback_trigger(struct snd_pcm_substream *substream, + int cmd) +{ + struct audio_dev *audio = substream->runtime->private_data; + int ret = 0; + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_RESUME: + audio_pcm_playback_start(audio); + break; + + case SNDRV_PCM_TRIGGER_STOP: + case SNDRV_PCM_TRIGGER_SUSPEND: + audio_pcm_playback_stop(audio); + break; + + default: + ret = -EINVAL; + } + + return ret; +} + +static struct audio_dev _audio_dev = { + .func = { + .name = "audio_source", + .bind = audio_bind, + .unbind = audio_unbind, + .set_alt = audio_set_alt, + .setup = audio_setup, + .disable = audio_disable, + .free_func = audio_free_func, + }, + .lock = __SPIN_LOCK_UNLOCKED(_audio_dev.lock), + .idle_reqs = LIST_HEAD_INIT(_audio_dev.idle_reqs), +}; + +static struct snd_pcm_ops audio_playback_ops = { + .open = audio_pcm_open, + .close = audio_pcm_close, + .ioctl = snd_pcm_lib_ioctl, + .hw_params = audio_pcm_hw_params, + .hw_free = audio_pcm_hw_free, + .prepare = audio_pcm_prepare, + .trigger = audio_pcm_playback_trigger, + .pointer = audio_pcm_pointer, +}; + +int audio_source_bind_config(struct usb_configuration *c, + struct audio_source_config *config) +{ + struct audio_dev *audio; + int err; + + config->card = -1; + config->device = -1; + + audio = &_audio_dev; + + err = snd_card_setup(c, config); + if (err) + return err; + + err = usb_add_function(c, &audio->func); + if (err) + goto add_fail; + + return 0; + +add_fail: + snd_card_free(audio->card); + return err; +} + +static int snd_card_setup(struct usb_configuration *c, + struct audio_source_config *config) +{ + struct audio_dev *audio; + struct snd_card *card; + struct snd_pcm *pcm; + int err; + + audio = &_audio_dev; + + err = snd_card_new(&c->cdev->gadget->dev, + SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1, + THIS_MODULE, 0, &card); + if (err) + return err; + + err = snd_pcm_new(card, "USB audio source", 0, 1, 0, &pcm); + if (err) + goto pcm_fail; + + pcm->private_data = audio; + pcm->info_flags = 0; + audio->pcm = pcm; + + strlcpy(pcm->name, "USB gadget audio", sizeof(pcm->name)); + + snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &audio_playback_ops); + snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV, + NULL, 0, 64 * 1024); + + strlcpy(card->driver, "audio_source", sizeof(card->driver)); + strlcpy(card->shortname, card->driver, sizeof(card->shortname)); + strlcpy(card->longname, "USB accessory audio source", + sizeof(card->longname)); + + err = snd_card_register(card); + if (err) + goto register_fail; + + config->card = pcm->card->number; + config->device = pcm->device; + audio->card = card; + return 0; + +register_fail: +pcm_fail: + snd_card_free(audio->card); + return err; +} + +static struct audio_source_instance *to_audio_source_instance( + struct config_item *item) +{ + return container_of(to_config_group(item), struct audio_source_instance, + func_inst.group); +} + +static struct audio_source_instance *to_fi_audio_source( + const struct usb_function_instance *fi) +{ + return container_of(fi, struct audio_source_instance, func_inst); +} + +static void audio_source_attr_release(struct config_item *item) +{ + struct audio_source_instance *fi_audio = to_audio_source_instance(item); + + usb_put_function_instance(&fi_audio->func_inst); +} + +static int audio_source_set_inst_name(struct usb_function_instance *fi, + const char *name) +{ + struct audio_source_instance *fi_audio; + char *ptr; + int name_len; + + name_len = strlen(name) + 1; + if (name_len > MAX_INST_NAME_LEN) + return -ENAMETOOLONG; + + ptr = kstrndup(name, name_len, GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + fi_audio = to_fi_audio_source(fi); + fi_audio->name = ptr; + + return 0; +} + +static void audio_source_free_inst(struct usb_function_instance *fi) +{ + struct audio_source_instance *fi_audio; + + fi_audio = to_fi_audio_source(fi); + device_destroy(fi_audio->audio_device->class, + fi_audio->audio_device->devt); + kfree(fi_audio->name); + kfree(fi_audio->config); +} + +static ssize_t audio_source_pcm_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct audio_source_instance *fi_audio = dev_get_drvdata(dev); + struct audio_source_config *config = fi_audio->config; + + /* print PCM card and device numbers */ + return sprintf(buf, "%d %d\n", config->card, config->device); +} + +struct device *create_function_device(char *name); + +static struct usb_function_instance *audio_source_alloc_inst(void) +{ + struct audio_source_instance *fi_audio; + struct device_attribute **attrs; + struct device_attribute *attr; + struct device *dev; + void *err_ptr; + int err = 0; + + fi_audio = kzalloc(sizeof(*fi_audio), GFP_KERNEL); + if (!fi_audio) + return ERR_PTR(-ENOMEM); + + fi_audio->func_inst.set_inst_name = audio_source_set_inst_name; + fi_audio->func_inst.free_func_inst = audio_source_free_inst; + + fi_audio->config = kzalloc(sizeof(struct audio_source_config), + GFP_KERNEL); + if (!fi_audio->config) { + err_ptr = ERR_PTR(-ENOMEM); + goto fail_audio; + } + + config_group_init_type_name(&fi_audio->func_inst.group, "", + &audio_source_func_type); + dev = create_function_device("f_audio_source"); + + if (IS_ERR(dev)) { + err_ptr = dev; + goto fail_audio_config; + } + + fi_audio->config->card = -1; + fi_audio->config->device = -1; + fi_audio->audio_device = dev; + + attrs = audio_source_function_attributes; + if (attrs) { + while ((attr = *attrs++) && !err) + err = device_create_file(dev, attr); + if (err) { + err_ptr = ERR_PTR(-EINVAL); + goto fail_device; + } + } + + dev_set_drvdata(dev, fi_audio); + _audio_dev.config = fi_audio->config; + + return &fi_audio->func_inst; + +fail_device: + device_destroy(dev->class, dev->devt); +fail_audio_config: + kfree(fi_audio->config); +fail_audio: + kfree(fi_audio); + return err_ptr; + +} + +static struct usb_function *audio_source_alloc(struct usb_function_instance *fi) +{ + return &_audio_dev.func; +} + +DECLARE_USB_FUNCTION_INIT(audio_source, audio_source_alloc_inst, + audio_source_alloc); +MODULE_LICENSE("GPL");
diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c index 71cf552..f0a8e66 100644 --- a/drivers/usb/gadget/function/f_midi.c +++ b/drivers/usb/gadget/function/f_midi.c
@@ -1208,6 +1208,65 @@ static void f_midi_free_inst(struct usb_function_instance *f) kfree(opts); } +#ifdef CONFIG_USB_CONFIGFS_UEVENT +extern struct device *create_function_device(char *name); +static ssize_t alsa_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct usb_function_instance *fi_midi = dev_get_drvdata(dev); + struct f_midi *midi; + + if (!fi_midi->f) + dev_warn(dev, "f_midi: function not set\n"); + + if (fi_midi && fi_midi->f) { + midi = func_to_midi(fi_midi->f); + if (midi->rmidi && midi->rmidi->card) + return sprintf(buf, "%d %d\n", + midi->rmidi->card->number, midi->rmidi->device); + } + + /* print PCM card and device numbers */ + return sprintf(buf, "%d %d\n", -1, -1); +} + +static DEVICE_ATTR(alsa, S_IRUGO, alsa_show, NULL); + +static struct device_attribute *alsa_function_attributes[] = { + &dev_attr_alsa, + NULL +}; + +static int create_alsa_device(struct usb_function_instance *fi) +{ + struct device *dev; + struct device_attribute **attrs; + struct device_attribute *attr; + int err = 0; + + dev = create_function_device("f_midi"); + if (IS_ERR(dev)) + return PTR_ERR(dev); + + attrs = alsa_function_attributes; + if (attrs) { + while ((attr = *attrs++) && !err) + err = device_create_file(dev, attr); + if (err) { + device_destroy(dev->class, dev->devt); + return -EINVAL; + } + } + dev_set_drvdata(dev, fi); + return 0; +} +#else +static int create_alsa_device(struct usb_function_instance *fi) +{ + return 0; +} +#endif + static struct usb_function_instance *f_midi_alloc_inst(void) { struct f_midi_opts *opts; @@ -1225,6 +1284,11 @@ static struct usb_function_instance *f_midi_alloc_inst(void) opts->in_ports = 1; opts->out_ports = 1; + if (create_alsa_device(&opts->func_inst)) { + kfree(opts); + return ERR_PTR(-ENODEV); + } + config_group_init_type_name(&opts->func_inst.group, "", &midi_func_type); @@ -1243,6 +1307,7 @@ static void f_midi_free(struct usb_function *f) kfree(midi->id); kfifo_free(&midi->in_req_fifo); kfree(midi); + opts->func_inst.f = NULL; --opts->refcnt; } mutex_unlock(&opts->lock); @@ -1329,6 +1394,7 @@ static struct usb_function *f_midi_alloc(struct usb_function_instance *fi) midi->func.disable = f_midi_disable; midi->func.free_func = f_midi_free; + fi->f = &midi->func; return &midi->func; setup_fail:
diff --git a/drivers/usb/misc/sisusbvga/sisusb_con.c b/drivers/usb/misc/sisusbvga/sisusb_con.c index f019d80..dc45cfc 100644 --- a/drivers/usb/misc/sisusbvga/sisusb_con.c +++ b/drivers/usb/misc/sisusbvga/sisusb_con.c
@@ -1216,7 +1216,7 @@ sisusbcon_do_font_op(struct sisusb_usb_data *sisusb, int set, int slot, /* Interface routine */ static int sisusbcon_font_set(struct vc_data *c, struct console_font *font, - unsigned flags) + unsigned int flags) { struct sisusb_usb_data *sisusb; unsigned charcount = font->charcount; @@ -1337,29 +1337,65 @@ static void sisusbdummycon_init(struct vc_data *vc, int init) vc_resize(vc, 80, 25); } -static int sisusbdummycon_dummy(void) +static void sisusbdummycon_deinit(struct vc_data *vc) { } +static void sisusbdummycon_clear(struct vc_data *vc, int sy, int sx, + int height, int width) { } +static void sisusbdummycon_putc(struct vc_data *vc, int c, int ypos, + int xpos) { } +static void sisusbdummycon_putcs(struct vc_data *vc, const unsigned short *s, + int count, int ypos, int xpos) { } +static void sisusbdummycon_cursor(struct vc_data *vc, int mode) { } + +static bool sisusbdummycon_scroll(struct vc_data *vc, unsigned int top, + unsigned int bottom, enum con_scroll dir, + unsigned int lines) { - return 0; + return false; } -#define SISUSBCONDUMMY (void *)sisusbdummycon_dummy +static int sisusbdummycon_switch(struct vc_data *vc) +{ + return 0; +} + +static int sisusbdummycon_blank(struct vc_data *vc, int blank, int mode_switch) +{ + return 0; +} + +static int sisusbdummycon_font_set(struct vc_data *vc, + struct console_font *font, + unsigned int flags) +{ + return 0; +} + +static int sisusbdummycon_font_default(struct vc_data *vc, + struct console_font *font, char *name) +{ + return 0; +} + +static int sisusbdummycon_font_copy(struct vc_data *vc, int con) +{ + return 0; +} static const struct consw sisusb_dummy_con = { .owner = THIS_MODULE, .con_startup = sisusbdummycon_startup, .con_init = sisusbdummycon_init, - .con_deinit = SISUSBCONDUMMY, - .con_clear = SISUSBCONDUMMY, - .con_putc = SISUSBCONDUMMY, - .con_putcs = SISUSBCONDUMMY, - .con_cursor = SISUSBCONDUMMY, - .con_scroll = SISUSBCONDUMMY, - .con_switch = SISUSBCONDUMMY, - .con_blank = SISUSBCONDUMMY, - .con_font_set = SISUSBCONDUMMY, - .con_font_get = SISUSBCONDUMMY, - .con_font_default = SISUSBCONDUMMY, - .con_font_copy = SISUSBCONDUMMY, + .con_deinit = sisusbdummycon_deinit, + .con_clear = sisusbdummycon_clear, + .con_putc = sisusbdummycon_putc, + .con_putcs = sisusbdummycon_putcs, + .con_cursor = sisusbdummycon_cursor, + .con_scroll = sisusbdummycon_scroll, + .con_switch = sisusbdummycon_switch, + .con_blank = sisusbdummycon_blank, + .con_font_set = sisusbdummycon_font_set, + .con_font_default = sisusbdummycon_font_default, + .con_font_copy = sisusbdummycon_font_copy, }; int
diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig index aff702c..15e822e 100644 --- a/drivers/usb/phy/Kconfig +++ b/drivers/usb/phy/Kconfig
@@ -7,6 +7,14 @@ select EXTCON def_bool n +config USB_OTG_WAKELOCK + bool "Hold a wakelock when USB connected" + depends on PM_WAKELOCKS + select USB_OTG_UTILS + help + Select this to automatically hold a wakelock when USB is + connected, preventing suspend. + # # USB Transceiver Drivers # @@ -202,4 +210,13 @@ Provides read/write operations to the ULPI phy register set for controllers with a viewport register (e.g. Chipidea/ARC controllers). +config DUAL_ROLE_USB_INTF + bool "Generic DUAL ROLE sysfs interface" + depends on SYSFS && USB_PHY + help + A generic sysfs interface to track and change the state of + dual role usb phys. The usb phy drivers can register to + this interface to expose it capabilities to the userspace + and thereby allowing userspace to change the port mode. + endmenu
diff --git a/drivers/usb/phy/Makefile b/drivers/usb/phy/Makefile index 0c40ccc..68867d6 100644 --- a/drivers/usb/phy/Makefile +++ b/drivers/usb/phy/Makefile
@@ -4,6 +4,8 @@ # obj-$(CONFIG_USB_PHY) += phy.o obj-$(CONFIG_OF) += of.o +obj-$(CONFIG_USB_OTG_WAKELOCK) += otg-wakelock.o +obj-$(CONFIG_DUAL_ROLE_USB_INTF) += class-dual-role.o # transceiver drivers, keep the list sorted
diff --git a/drivers/usb/phy/class-dual-role.c b/drivers/usb/phy/class-dual-role.c new file mode 100644 index 0000000..51fcb54 --- /dev/null +++ b/drivers/usb/phy/class-dual-role.c
@@ -0,0 +1,529 @@ +/* + * class-dual-role.c + * + * Copyright (C) 2015 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/ctype.h> +#include <linux/device.h> +#include <linux/usb/class-dual-role.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/stat.h> +#include <linux/types.h> + +#define DUAL_ROLE_NOTIFICATION_TIMEOUT 2000 + +static ssize_t dual_role_store_property(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count); +static ssize_t dual_role_show_property(struct device *dev, + struct device_attribute *attr, + char *buf); + +#define DUAL_ROLE_ATTR(_name) \ +{ \ + .attr = { .name = #_name }, \ + .show = dual_role_show_property, \ + .store = dual_role_store_property, \ +} + +static struct device_attribute dual_role_attrs[] = { + DUAL_ROLE_ATTR(supported_modes), + DUAL_ROLE_ATTR(mode), + DUAL_ROLE_ATTR(power_role), + DUAL_ROLE_ATTR(data_role), + DUAL_ROLE_ATTR(powers_vconn), +}; + +struct class *dual_role_class; +EXPORT_SYMBOL_GPL(dual_role_class); + +static struct device_type dual_role_dev_type; + +static char *kstrdupcase(const char *str, gfp_t gfp, bool to_upper) +{ + char *ret, *ustr; + + ustr = ret = kmalloc(strlen(str) + 1, gfp); + + if (!ret) + return NULL; + + while (*str) + *ustr++ = to_upper ? toupper(*str++) : tolower(*str++); + + *ustr = 0; + + return ret; +} + +static void dual_role_changed_work(struct work_struct *work) +{ + struct dual_role_phy_instance *dual_role = + container_of(work, struct dual_role_phy_instance, + changed_work); + + dev_dbg(&dual_role->dev, "%s\n", __func__); + kobject_uevent(&dual_role->dev.kobj, KOBJ_CHANGE); +} + +void dual_role_instance_changed(struct dual_role_phy_instance *dual_role) +{ + dev_dbg(&dual_role->dev, "%s\n", __func__); + pm_wakeup_event(&dual_role->dev, DUAL_ROLE_NOTIFICATION_TIMEOUT); + schedule_work(&dual_role->changed_work); +} +EXPORT_SYMBOL_GPL(dual_role_instance_changed); + +int dual_role_get_property(struct dual_role_phy_instance *dual_role, + enum dual_role_property prop, + unsigned int *val) +{ + return dual_role->desc->get_property(dual_role, prop, val); +} +EXPORT_SYMBOL_GPL(dual_role_get_property); + +int dual_role_set_property(struct dual_role_phy_instance *dual_role, + enum dual_role_property prop, + const unsigned int *val) +{ + if (!dual_role->desc->set_property) + return -ENODEV; + + return dual_role->desc->set_property(dual_role, prop, val); +} +EXPORT_SYMBOL_GPL(dual_role_set_property); + +int dual_role_property_is_writeable(struct dual_role_phy_instance *dual_role, + enum dual_role_property prop) +{ + if (!dual_role->desc->property_is_writeable) + return -ENODEV; + + return dual_role->desc->property_is_writeable(dual_role, prop); +} +EXPORT_SYMBOL_GPL(dual_role_property_is_writeable); + +static void dual_role_dev_release(struct device *dev) +{ + struct dual_role_phy_instance *dual_role = + container_of(dev, struct dual_role_phy_instance, dev); + pr_debug("device: '%s': %s\n", dev_name(dev), __func__); + kfree(dual_role); +} + +static struct dual_role_phy_instance *__must_check +__dual_role_register(struct device *parent, + const struct dual_role_phy_desc *desc) +{ + struct device *dev; + struct dual_role_phy_instance *dual_role; + int rc; + + dual_role = kzalloc(sizeof(*dual_role), GFP_KERNEL); + if (!dual_role) + return ERR_PTR(-ENOMEM); + + dev = &dual_role->dev; + + device_initialize(dev); + + dev->class = dual_role_class; + dev->type = &dual_role_dev_type; + dev->parent = parent; + dev->release = dual_role_dev_release; + dev_set_drvdata(dev, dual_role); + dual_role->desc = desc; + + rc = dev_set_name(dev, "%s", desc->name); + if (rc) + goto dev_set_name_failed; + + INIT_WORK(&dual_role->changed_work, dual_role_changed_work); + + rc = device_init_wakeup(dev, true); + if (rc) + goto wakeup_init_failed; + + rc = device_add(dev); + if (rc) + goto device_add_failed; + + dual_role_instance_changed(dual_role); + + return dual_role; + +device_add_failed: + device_init_wakeup(dev, false); +wakeup_init_failed: +dev_set_name_failed: + put_device(dev); + kfree(dual_role); + + return ERR_PTR(rc); +} + +static void dual_role_instance_unregister(struct dual_role_phy_instance + *dual_role) +{ + cancel_work_sync(&dual_role->changed_work); + device_init_wakeup(&dual_role->dev, false); + device_unregister(&dual_role->dev); +} + +static void devm_dual_role_release(struct device *dev, void *res) +{ + struct dual_role_phy_instance **dual_role = res; + + dual_role_instance_unregister(*dual_role); +} + +struct dual_role_phy_instance *__must_check +devm_dual_role_instance_register(struct device *parent, + const struct dual_role_phy_desc *desc) +{ + struct dual_role_phy_instance **ptr, *dual_role; + + ptr = devres_alloc(devm_dual_role_release, sizeof(*ptr), GFP_KERNEL); + + if (!ptr) + return ERR_PTR(-ENOMEM); + dual_role = __dual_role_register(parent, desc); + if (IS_ERR(dual_role)) { + devres_free(ptr); + } else { + *ptr = dual_role; + devres_add(parent, ptr); + } + return dual_role; +} +EXPORT_SYMBOL_GPL(devm_dual_role_instance_register); + +static int devm_dual_role_match(struct device *dev, void *res, void *data) +{ + struct dual_role_phy_instance **r = res; + + if (WARN_ON(!r || !*r)) + return 0; + + return *r == data; +} + +void devm_dual_role_instance_unregister(struct device *dev, + struct dual_role_phy_instance + *dual_role) +{ + int rc; + + rc = devres_release(dev, devm_dual_role_release, + devm_dual_role_match, dual_role); + WARN_ON(rc); +} +EXPORT_SYMBOL_GPL(devm_dual_role_instance_unregister); + +void *dual_role_get_drvdata(struct dual_role_phy_instance *dual_role) +{ + return dual_role->drv_data; +} +EXPORT_SYMBOL_GPL(dual_role_get_drvdata); + +/***************** Device attribute functions **************************/ + +/* port type */ +static char *supported_modes_text[] = { + "ufp dfp", "dfp", "ufp" +}; + +/* current mode */ +static char *mode_text[] = { + "ufp", "dfp", "none" +}; + +/* Power role */ +static char *pr_text[] = { + "source", "sink", "none" +}; + +/* Data role */ +static char *dr_text[] = { + "host", "device", "none" +}; + +/* Vconn supply */ +static char *vconn_supply_text[] = { + "n", "y" +}; + +static ssize_t dual_role_show_property(struct device *dev, + struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + struct dual_role_phy_instance *dual_role = dev_get_drvdata(dev); + const ptrdiff_t off = attr - dual_role_attrs; + unsigned int value; + + if (off == DUAL_ROLE_PROP_SUPPORTED_MODES) { + value = dual_role->desc->supported_modes; + } else { + ret = dual_role_get_property(dual_role, off, &value); + + if (ret < 0) { + if (ret == -ENODATA) + dev_dbg(dev, + "driver has no data for `%s' property\n", + attr->attr.name); + else if (ret != -ENODEV) + dev_err(dev, + "driver failed to report `%s' property: %zd\n", + attr->attr.name, ret); + return ret; + } + } + + if (off == DUAL_ROLE_PROP_SUPPORTED_MODES) { + BUILD_BUG_ON(DUAL_ROLE_PROP_SUPPORTED_MODES_TOTAL != + ARRAY_SIZE(supported_modes_text)); + if (value < DUAL_ROLE_PROP_SUPPORTED_MODES_TOTAL) + return snprintf(buf, PAGE_SIZE, "%s\n", + supported_modes_text[value]); + else + return -EIO; + } else if (off == DUAL_ROLE_PROP_MODE) { + BUILD_BUG_ON(DUAL_ROLE_PROP_MODE_TOTAL != + ARRAY_SIZE(mode_text)); + if (value < DUAL_ROLE_PROP_MODE_TOTAL) + return snprintf(buf, PAGE_SIZE, "%s\n", + mode_text[value]); + else + return -EIO; + } else if (off == DUAL_ROLE_PROP_PR) { + BUILD_BUG_ON(DUAL_ROLE_PROP_PR_TOTAL != ARRAY_SIZE(pr_text)); + if (value < DUAL_ROLE_PROP_PR_TOTAL) + return snprintf(buf, PAGE_SIZE, "%s\n", + pr_text[value]); + else + return -EIO; + } else if (off == DUAL_ROLE_PROP_DR) { + BUILD_BUG_ON(DUAL_ROLE_PROP_DR_TOTAL != ARRAY_SIZE(dr_text)); + if (value < DUAL_ROLE_PROP_DR_TOTAL) + return snprintf(buf, PAGE_SIZE, "%s\n", + dr_text[value]); + else + return -EIO; + } else if (off == DUAL_ROLE_PROP_VCONN_SUPPLY) { + BUILD_BUG_ON(DUAL_ROLE_PROP_VCONN_SUPPLY_TOTAL != + ARRAY_SIZE(vconn_supply_text)); + if (value < DUAL_ROLE_PROP_VCONN_SUPPLY_TOTAL) + return snprintf(buf, PAGE_SIZE, "%s\n", + vconn_supply_text[value]); + else + return -EIO; + } else + return -EIO; +} + +static ssize_t dual_role_store_property(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + ssize_t ret; + struct dual_role_phy_instance *dual_role = dev_get_drvdata(dev); + const ptrdiff_t off = attr - dual_role_attrs; + unsigned int value; + int total, i; + char *dup_buf, **text_array; + bool result = false; + + dup_buf = kstrdupcase(buf, GFP_KERNEL, false); + switch (off) { + case DUAL_ROLE_PROP_MODE: + total = DUAL_ROLE_PROP_MODE_TOTAL; + text_array = mode_text; + break; + case DUAL_ROLE_PROP_PR: + total = DUAL_ROLE_PROP_PR_TOTAL; + text_array = pr_text; + break; + case DUAL_ROLE_PROP_DR: + total = DUAL_ROLE_PROP_DR_TOTAL; + text_array = dr_text; + break; + case DUAL_ROLE_PROP_VCONN_SUPPLY: + ret = strtobool(dup_buf, &result); + value = result; + if (!ret) + goto setprop; + default: + ret = -EINVAL; + goto error; + } + + for (i = 0; i <= total; i++) { + if (i == total) { + ret = -ENOTSUPP; + goto error; + } + if (!strncmp(*(text_array + i), dup_buf, + strlen(*(text_array + i)))) { + value = i; + break; + } + } + +setprop: + ret = dual_role->desc->set_property(dual_role, off, &value); + +error: + kfree(dup_buf); + + if (ret < 0) + return ret; + + return count; +} + +static umode_t dual_role_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int attrno) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct dual_role_phy_instance *dual_role = dev_get_drvdata(dev); + umode_t mode = S_IRUSR | S_IRGRP | S_IROTH; + int i; + + if (attrno == DUAL_ROLE_PROP_SUPPORTED_MODES) + return mode; + + for (i = 0; i < dual_role->desc->num_properties; i++) { + int property = dual_role->desc->properties[i]; + + if (property == attrno) { + if (dual_role->desc->property_is_writeable && + dual_role_property_is_writeable(dual_role, property) + > 0) + mode |= S_IWUSR; + + return mode; + } + } + + return 0; +} + +static struct attribute *__dual_role_attrs[ARRAY_SIZE(dual_role_attrs) + 1]; + +static struct attribute_group dual_role_attr_group = { + .attrs = __dual_role_attrs, + .is_visible = dual_role_attr_is_visible, +}; + +static const struct attribute_group *dual_role_attr_groups[] = { + &dual_role_attr_group, + NULL, +}; + +void dual_role_init_attrs(struct device_type *dev_type) +{ + int i; + + dev_type->groups = dual_role_attr_groups; + + for (i = 0; i < ARRAY_SIZE(dual_role_attrs); i++) + __dual_role_attrs[i] = &dual_role_attrs[i].attr; +} + +int dual_role_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct dual_role_phy_instance *dual_role = dev_get_drvdata(dev); + int ret = 0, j; + char *prop_buf; + char *attrname; + + dev_dbg(dev, "uevent\n"); + + if (!dual_role || !dual_role->desc) { + dev_dbg(dev, "No dual_role phy yet\n"); + return ret; + } + + dev_dbg(dev, "DUAL_ROLE_NAME=%s\n", dual_role->desc->name); + + ret = add_uevent_var(env, "DUAL_ROLE_NAME=%s", dual_role->desc->name); + if (ret) + return ret; + + prop_buf = (char *)get_zeroed_page(GFP_KERNEL); + if (!prop_buf) + return -ENOMEM; + + for (j = 0; j < dual_role->desc->num_properties; j++) { + struct device_attribute *attr; + char *line; + + attr = &dual_role_attrs[dual_role->desc->properties[j]]; + + ret = dual_role_show_property(dev, attr, prop_buf); + if (ret == -ENODEV || ret == -ENODATA) { + ret = 0; + continue; + } + + if (ret < 0) + goto out; + line = strnchr(prop_buf, PAGE_SIZE, '\n'); + if (line) + *line = 0; + + attrname = kstrdupcase(attr->attr.name, GFP_KERNEL, true); + if (!attrname) + ret = -ENOMEM; + + dev_dbg(dev, "prop %s=%s\n", attrname, prop_buf); + + ret = add_uevent_var(env, "DUAL_ROLE_%s=%s", attrname, + prop_buf); + kfree(attrname); + if (ret) + goto out; + } + +out: + free_page((unsigned long)prop_buf); + + return ret; +} + +/******************* Module Init ***********************************/ + +static int __init dual_role_class_init(void) +{ + dual_role_class = class_create(THIS_MODULE, "dual_role_usb"); + + if (IS_ERR(dual_role_class)) + return PTR_ERR(dual_role_class); + + dual_role_class->dev_uevent = dual_role_uevent; + dual_role_init_attrs(&dual_role_dev_type); + + return 0; +} + +static void __exit dual_role_class_exit(void) +{ + class_destroy(dual_role_class); +} + +subsys_initcall(dual_role_class_init); +module_exit(dual_role_class_exit);
diff --git a/drivers/usb/phy/otg-wakelock.c b/drivers/usb/phy/otg-wakelock.c new file mode 100644 index 0000000..ecd7410 --- /dev/null +++ b/drivers/usb/phy/otg-wakelock.c
@@ -0,0 +1,170 @@ +/* + * otg-wakelock.c + * + * Copyright (C) 2011 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/notifier.h> +#include <linux/spinlock.h> +#include <linux/usb/otg.h> + +#define TEMPORARY_HOLD_TIME 2000 + +static bool enabled = true; +static struct usb_phy *otgwl_xceiv; +static struct notifier_block otgwl_nb; + +/* + * otgwl_spinlock is held while the VBUS lock is grabbed or dropped and the + * held field is updated to match. + */ + +static DEFINE_SPINLOCK(otgwl_spinlock); + +/* + * Only one lock, but since these 3 fields are associated with each other... + */ + +struct otgwl_lock { + char name[40]; + struct wakeup_source wakesrc; + bool held; +}; + +/* + * VBUS present lock. Also used as a timed lock on charger + * connect/disconnect and USB host disconnect, to allow the system + * to react to the change in power. + */ + +static struct otgwl_lock vbus_lock; + +static void otgwl_hold(struct otgwl_lock *lock) +{ + if (!lock->held) { + __pm_stay_awake(&lock->wakesrc); + lock->held = true; + } +} + +static void otgwl_temporary_hold(struct otgwl_lock *lock) +{ + __pm_wakeup_event(&lock->wakesrc, TEMPORARY_HOLD_TIME); + lock->held = false; +} + +static void otgwl_drop(struct otgwl_lock *lock) +{ + if (lock->held) { + __pm_relax(&lock->wakesrc); + lock->held = false; + } +} + +static void otgwl_handle_event(unsigned long event) +{ + unsigned long irqflags; + + spin_lock_irqsave(&otgwl_spinlock, irqflags); + + if (!enabled) { + otgwl_drop(&vbus_lock); + spin_unlock_irqrestore(&otgwl_spinlock, irqflags); + return; + } + + switch (event) { + case USB_EVENT_VBUS: + case USB_EVENT_ENUMERATED: + otgwl_hold(&vbus_lock); + break; + + case USB_EVENT_NONE: + case USB_EVENT_ID: + case USB_EVENT_CHARGER: + otgwl_temporary_hold(&vbus_lock); + break; + + default: + break; + } + + spin_unlock_irqrestore(&otgwl_spinlock, irqflags); +} + +static int otgwl_otg_notifications(struct notifier_block *nb, + unsigned long event, void *unused) +{ + otgwl_handle_event(event); + return NOTIFY_OK; +} + +static int set_enabled(const char *val, const struct kernel_param *kp) +{ + int rv = param_set_bool(val, kp); + + if (rv) + return rv; + + if (otgwl_xceiv) + otgwl_handle_event(otgwl_xceiv->last_event); + + return 0; +} + +static struct kernel_param_ops enabled_param_ops = { + .set = set_enabled, + .get = param_get_bool, +}; + +module_param_cb(enabled, &enabled_param_ops, &enabled, 0644); +MODULE_PARM_DESC(enabled, "enable wakelock when VBUS present"); + +static int __init otg_wakelock_init(void) +{ + int ret; + struct usb_phy *phy; + + phy = usb_get_phy(USB_PHY_TYPE_USB2); + + if (IS_ERR(phy)) { + pr_err("%s: No USB transceiver found\n", __func__); + return PTR_ERR(phy); + } + otgwl_xceiv = phy; + + snprintf(vbus_lock.name, sizeof(vbus_lock.name), "vbus-%s", + dev_name(otgwl_xceiv->dev)); + wakeup_source_init(&vbus_lock.wakesrc, vbus_lock.name); + + otgwl_nb.notifier_call = otgwl_otg_notifications; + ret = usb_register_notifier(otgwl_xceiv, &otgwl_nb); + + if (ret) { + pr_err("%s: usb_register_notifier on transceiver %s" + " failed\n", __func__, + dev_name(otgwl_xceiv->dev)); + otgwl_xceiv = NULL; + wakeup_source_trash(&vbus_lock.wakesrc); + return ret; + } + + otgwl_handle_event(otgwl_xceiv->last_event); + return ret; +} + +late_initcall(otg_wakelock_init);
diff --git a/drivers/video/console/dummycon.c b/drivers/video/console/dummycon.c index b90ef96..f2eafe2 100644 --- a/drivers/video/console/dummycon.c +++ b/drivers/video/console/dummycon.c
@@ -41,12 +41,47 @@ static void dummycon_init(struct vc_data *vc, int init) vc_resize(vc, DUMMY_COLUMNS, DUMMY_ROWS); } -static int dummycon_dummy(void) +static void dummycon_deinit(struct vc_data *vc) { } +static void dummycon_clear(struct vc_data *vc, int sy, int sx, int height, + int width) { } +static void dummycon_putc(struct vc_data *vc, int c, int ypos, int xpos) { } +static void dummycon_putcs(struct vc_data *vc, const unsigned short *s, + int count, int ypos, int xpos) { } +static void dummycon_cursor(struct vc_data *vc, int mode) { } + +static bool dummycon_scroll(struct vc_data *vc, unsigned int top, + unsigned int bottom, enum con_scroll dir, + unsigned int lines) { - return 0; + return false; } -#define DUMMY (void *)dummycon_dummy +static int dummycon_switch(struct vc_data *vc) +{ + return 0; +} + +static int dummycon_blank(struct vc_data *vc, int blank, int mode_switch) +{ + return 0; +} + +static int dummycon_font_set(struct vc_data *vc, struct console_font *font, + unsigned int flags) +{ + return 0; +} + +static int dummycon_font_default(struct vc_data *vc, + struct console_font *font, char *name) +{ + return 0; +} + +static int dummycon_font_copy(struct vc_data *vc, int con) +{ + return 0; +} /* * The console `switch' structure for the dummy console @@ -55,19 +90,19 @@ static int dummycon_dummy(void) */ const struct consw dummy_con = { - .owner = THIS_MODULE, - .con_startup = dummycon_startup, - .con_init = dummycon_init, - .con_deinit = DUMMY, - .con_clear = DUMMY, - .con_putc = DUMMY, - .con_putcs = DUMMY, - .con_cursor = DUMMY, - .con_scroll = DUMMY, - .con_switch = DUMMY, - .con_blank = DUMMY, - .con_font_set = DUMMY, - .con_font_default = DUMMY, - .con_font_copy = DUMMY, + .owner = THIS_MODULE, + .con_startup = dummycon_startup, + .con_init = dummycon_init, + .con_deinit = dummycon_deinit, + .con_clear = dummycon_clear, + .con_putc = dummycon_putc, + .con_putcs = dummycon_putcs, + .con_cursor = dummycon_cursor, + .con_scroll = dummycon_scroll, + .con_switch = dummycon_switch, + .con_blank = dummycon_blank, + .con_font_set = dummycon_font_set, + .con_font_default = dummycon_font_default, + .con_font_copy = dummycon_font_copy, }; EXPORT_SYMBOL_GPL(dummy_con);
diff --git a/drivers/video/console/newport_con.c b/drivers/video/console/newport_con.c index 42d02a2..7f2526b 100644 --- a/drivers/video/console/newport_con.c +++ b/drivers/video/console/newport_con.c
@@ -673,12 +673,12 @@ static bool newport_scroll(struct vc_data *vc, unsigned int t, unsigned int b, return true; } -static int newport_dummy(struct vc_data *c) +static int newport_set_origin(struct vc_data *vc) { return 0; } -#define DUMMY (void *) newport_dummy +static void newport_save_screen(struct vc_data *vc) { } const struct consw newport_con = { .owner = THIS_MODULE, @@ -694,8 +694,8 @@ const struct consw newport_con = { .con_blank = newport_blank, .con_font_set = newport_font_set, .con_font_default = newport_font_default, - .con_set_origin = DUMMY, - .con_save_screen = DUMMY + .con_set_origin = newport_set_origin, + .con_save_screen = newport_save_screen }; static int newport_probe(struct gio_device *dev,
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index a17ba14..f09e17b 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c
@@ -1272,7 +1272,8 @@ static int vgacon_adjust_height(struct vc_data *vc, unsigned fontheight) return 0; } -static int vgacon_font_set(struct vc_data *c, struct console_font *font, unsigned flags) +static int vgacon_font_set(struct vc_data *c, struct console_font *font, + unsigned int flags) { unsigned charcount = font->charcount; int rc; @@ -1407,21 +1408,20 @@ static bool vgacon_scroll(struct vc_data *c, unsigned int t, unsigned int b, * The console `switch' structure for the VGA based console */ -static int vgacon_dummy(struct vc_data *c) -{ - return 0; -} - -#define DUMMY (void *) vgacon_dummy +static void vgacon_clear(struct vc_data *vc, int sy, int sx, int height, + int width) { } +static void vgacon_putc(struct vc_data *vc, int c, int ypos, int xpos) { } +static void vgacon_putcs(struct vc_data *vc, const unsigned short *s, + int count, int ypos, int xpos) { } const struct consw vga_con = { .owner = THIS_MODULE, .con_startup = vgacon_startup, .con_init = vgacon_init, .con_deinit = vgacon_deinit, - .con_clear = DUMMY, - .con_putc = DUMMY, - .con_putcs = DUMMY, + .con_clear = vgacon_clear, + .con_putc = vgacon_putc, + .con_putcs = vgacon_putcs, .con_cursor = vgacon_cursor, .con_scroll = vgacon_scroll, .con_switch = vgacon_switch,
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 8578711..553763a 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c
@@ -2588,7 +2588,8 @@ static int fbcon_copy_font(struct vc_data *vc, int con) * is ever implemented. */ -static int fbcon_set_font(struct vc_data *vc, struct console_font *font, unsigned flags) +static int fbcon_set_font(struct vc_data *vc, struct console_font *font, + unsigned int flags) { struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; unsigned charcount = font->charcount;
diff --git a/drivers/video/fbdev/goldfishfb.c b/drivers/video/fbdev/goldfishfb.c index 14a93cb..8c93ad1d 100644 --- a/drivers/video/fbdev/goldfishfb.c +++ b/drivers/video/fbdev/goldfishfb.c
@@ -26,6 +26,7 @@ #include <linux/interrupt.h> #include <linux/ioport.h> #include <linux/platform_device.h> +#include <linux/acpi.h> enum { FB_GET_WIDTH = 0x00, @@ -234,7 +235,7 @@ static int goldfish_fb_probe(struct platform_device *pdev) fb->fb.var.activate = FB_ACTIVATE_NOW; fb->fb.var.height = readl(fb->reg_base + FB_GET_PHYS_HEIGHT); fb->fb.var.width = readl(fb->reg_base + FB_GET_PHYS_WIDTH); - fb->fb.var.pixclock = 10000; + fb->fb.var.pixclock = 0; fb->fb.var.red.offset = 11; fb->fb.var.red.length = 5; @@ -305,12 +306,25 @@ static int goldfish_fb_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id goldfish_fb_of_match[] = { + { .compatible = "google,goldfish-fb", }, + {}, +}; +MODULE_DEVICE_TABLE(of, goldfish_fb_of_match); + +static const struct acpi_device_id goldfish_fb_acpi_match[] = { + { "GFSH0004", 0 }, + { }, +}; +MODULE_DEVICE_TABLE(acpi, goldfish_fb_acpi_match); static struct platform_driver goldfish_fb_driver = { .probe = goldfish_fb_probe, .remove = goldfish_fb_remove, .driver = { - .name = "goldfish_fb" + .name = "goldfish_fb", + .of_match_table = goldfish_fb_of_match, + .acpi_match_table = ACPI_PTR(goldfish_fb_acpi_match), } };
diff --git a/fs/Kconfig b/fs/Kconfig index 7aee6d6..121fabf 100644 --- a/fs/Kconfig +++ b/fs/Kconfig
@@ -227,6 +227,7 @@ source "fs/adfs/Kconfig" source "fs/affs/Kconfig" source "fs/ecryptfs/Kconfig" +source "fs/sdcardfs/Kconfig" source "fs/hfs/Kconfig" source "fs/hfsplus/Kconfig" source "fs/befs/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile index ef772f1..1e34e4b 100644 --- a/fs/Makefile +++ b/fs/Makefile
@@ -4,7 +4,7 @@ # # 14 Sep 2000, Christoph Hellwig <hch@infradead.org> # Rewritten to use lists instead of if-statements. -# +# obj-y := open.o read_write.o file_table.o super.o \ char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ @@ -62,7 +62,7 @@ obj-$(CONFIG_PROFILING) += dcookies.o obj-$(CONFIG_DLM) += dlm/ - + # Do not add any filesystems before this line obj-$(CONFIG_FSCACHE) += fscache/ obj-$(CONFIG_REISERFS_FS) += reiserfs/ @@ -84,6 +84,7 @@ obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ obj-$(CONFIG_HFS_FS) += hfs/ obj-$(CONFIG_ECRYPT_FS) += ecryptfs/ +obj-$(CONFIG_SDCARD_FS) += sdcardfs/ obj-$(CONFIG_VXFS_FS) += freevxfs/ obj-$(CONFIG_NFS_FS) += nfs/ obj-$(CONFIG_EXPORTFS) += exportfs/
diff --git a/fs/afs/file.c b/fs/afs/file.c index 510cba1..e6e949c 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c
@@ -140,12 +140,11 @@ static void afs_file_readpage_read_complete(struct page *page, /* * read page from file, directory or symlink, given a key to use */ -int afs_page_filler(void *data, struct page *page) +static int __afs_page_filler(struct key *key, struct page *page) { struct inode *inode = page->mapping->host; struct afs_vnode *vnode = AFS_FS_I(inode); struct afs_read *req; - struct key *key = data; int ret; _enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index); @@ -249,6 +248,13 @@ int afs_page_filler(void *data, struct page *page) return ret; } +int afs_page_filler(struct file *data, struct page *page) +{ + struct key *key = (struct key *)data; + + return __afs_page_filler(key, page); +} + /* * read page from file, directory or symlink, given a file to nominate the key * to be used @@ -261,14 +267,14 @@ static int afs_readpage(struct file *file, struct page *page) if (file) { key = file->private_data; ASSERT(key != NULL); - ret = afs_page_filler(key, page); + ret = __afs_page_filler(key, page); } else { struct inode *inode = page->mapping->host; key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); } else { - ret = afs_page_filler(key, page); + ret = __afs_page_filler(key, page); key_put(key); } }
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 82e16556..ed42f21 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h
@@ -485,7 +485,7 @@ extern const struct file_operations afs_file_operations; extern int afs_open(struct inode *, struct file *); extern int afs_release(struct inode *, struct file *); -extern int afs_page_filler(void *, struct page *); +extern int afs_page_filler(struct file *, struct page *); extern void afs_put_read(struct afs_read *); /*
diff --git a/fs/attr.c b/fs/attr.c index 12ffdb6..f5510d9 100644 --- a/fs/attr.c +++ b/fs/attr.c
@@ -202,7 +202,7 @@ EXPORT_SYMBOL(setattr_copy); * the file open for write, as there can be no conflicting delegation in * that case. */ -int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode) +int notify_change2(struct vfsmount *mnt, struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode) { struct inode *inode = dentry->d_inode; umode_t mode = inode->i_mode; @@ -226,7 +226,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de return -EPERM; if (!inode_owner_or_capable(inode)) { - error = inode_permission(inode, MAY_WRITE); + error = inode_permission2(mnt, inode, MAY_WRITE); if (error) return error; } @@ -309,7 +309,9 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de if (error) return error; - if (inode->i_op->setattr) + if (mnt && inode->i_op->setattr2) + error = inode->i_op->setattr2(mnt, dentry, attr); + else if (inode->i_op->setattr) error = inode->i_op->setattr(dentry, attr); else error = simple_setattr(dentry, attr); @@ -322,4 +324,10 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de return error; } +EXPORT_SYMBOL(notify_change2); + +int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode) +{ + return notify_change2(NULL, dentry, attr, delegated_inode); +} EXPORT_SYMBOL(notify_change);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 5b62e06..6d0af1f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c
@@ -3819,8 +3819,8 @@ int btree_write_cache_pages(struct address_space *mapping, if (wbc->sync_mode == WB_SYNC_ALL) tag_pages_for_writeback(mapping, index, end); while (!done && !nr_to_write_done && (index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + (nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, + tag))) { unsigned i; scanned = 1; @@ -3830,11 +3830,6 @@ int btree_write_cache_pages(struct address_space *mapping, if (!PagePrivate(page)) continue; - if (!wbc->range_cyclic && page->index > end) { - done = 1; - break; - } - spin_lock(&mapping->private_lock); if (!PagePrivate(page)) { spin_unlock(&mapping->private_lock); @@ -3966,8 +3961,8 @@ static int extent_write_cache_pages(struct address_space *mapping, tag_pages_for_writeback(mapping, index, end); done_index = index; while (!done && !nr_to_write_done && (index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + (nr_pages = pagevec_lookup_range_tag(&pvec, mapping, + &index, end, tag))) { unsigned i; scanned = 1; @@ -3992,12 +3987,6 @@ static int extent_write_cache_pages(struct address_space *mapping, continue; } - if (!wbc->range_cyclic && page->index > end) { - done = 1; - unlock_page(page); - continue; - } - if (wbc->sync_mode != WB_SYNC_NONE) { if (PageWriteback(page)) flush_fn(data);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4d62265..26c682d 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c
@@ -870,15 +870,10 @@ static int ceph_writepages_start(struct address_space *mapping, max_pages = wsize >> PAGE_SHIFT; get_more_pages: - pvec_pages = min_t(unsigned, PAGEVEC_SIZE, - max_pages - locked_pages); - if (end - index < (u64)(pvec_pages - 1)) - pvec_pages = (unsigned)(end - index) + 1; - - pvec_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - pvec_pages); - dout("pagevec_lookup_tag got %d\n", pvec_pages); + pvec_pages = pagevec_lookup_range_nr_tag(&pvec, mapping, &index, + end, PAGECACHE_TAG_DIRTY, + max_pages - locked_pages); + dout("pagevec_lookup_range_tag got %d\n", pvec_pages); if (!pvec_pages && !locked_pages) break; for (i = 0; i < pvec_pages && locked_pages < max_pages; i++) { @@ -896,16 +891,6 @@ static int ceph_writepages_start(struct address_space *mapping, unlock_page(page); continue; } - if (page->index > end) { - dout("end of range %p\n", page); - /* can't be range_cyclic (1st pass) because - * end == -1 in that case. */ - stop = true; - if (ceph_wbc.head_snapc) - done = true; - unlock_page(page); - break; - } if (strip_unit_end && (page->index > strip_unit_end)) { dout("end of strip unit %p\n", page); unlock_page(page); @@ -1177,8 +1162,7 @@ static int ceph_writepages_start(struct address_space *mapping, index = 0; while ((index <= end) && (nr = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_WRITEBACK, - PAGEVEC_SIZE))) { + PAGECACHE_TAG_WRITEBACK))) { for (i = 0; i < nr; i++) { page = pvec.pages[i]; if (page_snap_context(page) != snapc)
diff --git a/fs/coredump.c b/fs/coredump.c index 52c63d6..4b15f40 100644 --- a/fs/coredump.c +++ b/fs/coredump.c
@@ -747,7 +747,7 @@ void do_coredump(const siginfo_t *siginfo) goto close_fail; if (!(cprm.file->f_mode & FMODE_CAN_WRITE)) goto close_fail; - if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) + if (do_truncate2(cprm.file->f_path.mnt, cprm.file->f_path.dentry, 0, 0, cprm.file)) goto close_fail; }
diff --git a/fs/crypto/Makefile b/fs/crypto/Makefile index 9f6607f..cb49698 100644 --- a/fs/crypto/Makefile +++ b/fs/crypto/Makefile
@@ -1,4 +1,4 @@ obj-$(CONFIG_FS_ENCRYPTION) += fscrypto.o -fscrypto-y := crypto.o fname.o policy.o keyinfo.o +fscrypto-y := crypto.o fname.o hooks.o keyinfo.o policy.o fscrypto-$(CONFIG_BLOCK) += bio.o
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index 0d5e6a5..0959044 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c
@@ -26,15 +26,8 @@ #include <linux/namei.h> #include "fscrypt_private.h" -/* - * Call fscrypt_decrypt_page on every single page, reusing the encryption - * context. - */ -static void completion_pages(struct work_struct *work) +static void __fscrypt_decrypt_bio(struct bio *bio, bool done) { - struct fscrypt_ctx *ctx = - container_of(work, struct fscrypt_ctx, r.work); - struct bio *bio = ctx->r.bio; struct bio_vec *bv; int i; @@ -46,22 +39,38 @@ static void completion_pages(struct work_struct *work) if (ret) { WARN_ON_ONCE(1); SetPageError(page); - } else { + } else if (done) { SetPageUptodate(page); } - unlock_page(page); + if (done) + unlock_page(page); } +} + +void fscrypt_decrypt_bio(struct bio *bio) +{ + __fscrypt_decrypt_bio(bio, false); +} +EXPORT_SYMBOL(fscrypt_decrypt_bio); + +static void completion_pages(struct work_struct *work) +{ + struct fscrypt_ctx *ctx = + container_of(work, struct fscrypt_ctx, r.work); + struct bio *bio = ctx->r.bio; + + __fscrypt_decrypt_bio(bio, true); fscrypt_release_ctx(ctx); bio_put(bio); } -void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, struct bio *bio) +void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, struct bio *bio) { INIT_WORK(&ctx->r.work, completion_pages); ctx->r.bio = bio; - queue_work(fscrypt_read_workqueue, &ctx->r.work); + fscrypt_enqueue_decrypt_work(&ctx->r.work); } -EXPORT_SYMBOL(fscrypt_decrypt_bio_pages); +EXPORT_SYMBOL(fscrypt_enqueue_decrypt_bio); void fscrypt_pullback_bio_page(struct page **page, bool restore) {
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index daf2683..4dc788e 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c
@@ -27,6 +27,7 @@ #include <linux/dcache.h> #include <linux/namei.h> #include <crypto/aes.h> +#include <crypto/skcipher.h> #include "fscrypt_private.h" static unsigned int num_prealloc_crypto_pages = 32; @@ -44,12 +45,18 @@ static mempool_t *fscrypt_bounce_page_pool = NULL; static LIST_HEAD(fscrypt_free_ctxs); static DEFINE_SPINLOCK(fscrypt_ctx_lock); -struct workqueue_struct *fscrypt_read_workqueue; +static struct workqueue_struct *fscrypt_read_workqueue; static DEFINE_MUTEX(fscrypt_init_mutex); static struct kmem_cache *fscrypt_ctx_cachep; struct kmem_cache *fscrypt_info_cachep; +void fscrypt_enqueue_decrypt_work(struct work_struct *work) +{ + queue_work(fscrypt_read_workqueue, work); +} +EXPORT_SYMBOL(fscrypt_enqueue_decrypt_work); + /** * fscrypt_release_ctx() - Releases an encryption context * @ctx: The encryption context to release. @@ -126,19 +133,17 @@ struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, gfp_t gfp_flags) } EXPORT_SYMBOL(fscrypt_get_ctx); -/** - * page_crypt_complete() - completion callback for page crypto - * @req: The asynchronous cipher request context - * @res: The result of the cipher operation - */ -static void page_crypt_complete(struct crypto_async_request *req, int res) +void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, + const struct fscrypt_info *ci) { - struct fscrypt_completion_result *ecr = req->data; + memset(iv, 0, ci->ci_mode->ivsize); + iv->lblk_num = cpu_to_le64(lblk_num); - if (res == -EINPROGRESS) - return; - ecr->res = res; - complete(&ecr->completion); + if (ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY) + memcpy(iv->nonce, ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE); + + if (ci->ci_essiv_tfm != NULL) + crypto_cipher_encrypt_one(ci->ci_essiv_tfm, iv->raw, iv->raw); } int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, @@ -146,12 +151,9 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, struct page *dest_page, unsigned int len, unsigned int offs, gfp_t gfp_flags) { - struct { - __le64 index; - u8 padding[FS_IV_SIZE - sizeof(__le64)]; - } iv; + union fscrypt_iv iv; struct skcipher_request *req = NULL; - DECLARE_FS_COMPLETION_RESULT(ecr); + DECLARE_CRYPTO_WAIT(wait); struct scatterlist dst, src; struct fscrypt_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_ctfm; @@ -159,27 +161,15 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, BUG_ON(len == 0); - BUILD_BUG_ON(sizeof(iv) != FS_IV_SIZE); - BUILD_BUG_ON(AES_BLOCK_SIZE != FS_IV_SIZE); - iv.index = cpu_to_le64(lblk_num); - memset(iv.padding, 0, sizeof(iv.padding)); - - if (ci->ci_essiv_tfm != NULL) { - crypto_cipher_encrypt_one(ci->ci_essiv_tfm, (u8 *)&iv, - (u8 *)&iv); - } + fscrypt_generate_iv(&iv, lblk_num, ci); req = skcipher_request_alloc(tfm, gfp_flags); - if (!req) { - printk_ratelimited(KERN_ERR - "%s: crypto_request_alloc() failed\n", - __func__); + if (!req) return -ENOMEM; - } skcipher_request_set_callback( req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - page_crypt_complete, &ecr); + crypto_req_done, &wait); sg_init_table(&dst, 1); sg_set_page(&dst, dest_page, len, offs); @@ -187,19 +177,15 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, sg_set_page(&src, src_page, len, offs); skcipher_request_set_crypt(req, &src, &dst, len, &iv); if (rw == FS_DECRYPT) - res = crypto_skcipher_decrypt(req); + res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait); else - res = crypto_skcipher_encrypt(req); - if (res == -EINPROGRESS || res == -EBUSY) { - BUG_ON(req->base.data != &ecr); - wait_for_completion(&ecr.completion); - res = ecr.res; - } + res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); skcipher_request_free(req); if (res) { - printk_ratelimited(KERN_ERR - "%s: crypto_skcipher_encrypt() returned %d\n", - __func__, res); + fscrypt_err(inode->i_sb, + "%scryption failed for inode %lu, block %llu: %d", + (rw == FS_DECRYPT ? "de" : "en"), + inode->i_ino, lblk_num, res); return res; } return 0; @@ -340,12 +326,11 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) return -ECHILD; dir = dget_parent(dentry); - if (!d_inode(dir)->i_sb->s_cop->is_encrypted(d_inode(dir))) { + if (!IS_ENCRYPTED(d_inode(dir))) { dput(dir); return 0; } - /* this should eventually be an flag in d_flags */ spin_lock(&dentry->d_lock); cached_with_key = dentry->d_flags & DCACHE_ENCRYPTED_WITH_KEY; spin_unlock(&dentry->d_lock); @@ -372,7 +357,6 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) const struct dentry_operations fscrypt_d_ops = { .d_revalidate = fscrypt_d_revalidate, }; -EXPORT_SYMBOL(fscrypt_d_ops); void fscrypt_restore_control_page(struct page *page) { @@ -441,6 +425,27 @@ int fscrypt_initialize(unsigned int cop_flags) return res; } +void fscrypt_msg(struct super_block *sb, const char *level, + const char *fmt, ...) +{ + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + struct va_format vaf; + va_list args; + + if (!__ratelimit(&rs)) + return; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + if (sb) + printk("%sfscrypt (%s): %pV\n", level, sb->s_id, &vaf); + else + printk("%sfscrypt: %pV\n", level, &vaf); + va_end(args); +} + /** * fscrypt_init() - Set up for fs encryption. */
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 8606da1..7ff40a7 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c
@@ -13,89 +13,71 @@ #include <linux/scatterlist.h> #include <linux/ratelimit.h> +#include <crypto/skcipher.h> #include "fscrypt_private.h" -/** - * fname_crypt_complete() - completion callback for filename crypto - * @req: The asynchronous cipher request context - * @res: The result of the cipher operation - */ -static void fname_crypt_complete(struct crypto_async_request *req, int res) +static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) { - struct fscrypt_completion_result *ecr = req->data; + if (str->len == 1 && str->name[0] == '.') + return true; - if (res == -EINPROGRESS) - return; - ecr->res = res; - complete(&ecr->completion); + if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') + return true; + + return false; } /** * fname_encrypt() - encrypt a filename * - * The caller must have allocated sufficient memory for the @oname string. + * The output buffer must be at least as large as the input buffer. + * Any extra space is filled with NUL padding before encryption. * * Return: 0 on success, -errno on failure */ -static int fname_encrypt(struct inode *inode, - const struct qstr *iname, struct fscrypt_str *oname) +int fname_encrypt(struct inode *inode, const struct qstr *iname, + u8 *out, unsigned int olen) { struct skcipher_request *req = NULL; - DECLARE_FS_COMPLETION_RESULT(ecr); + DECLARE_CRYPTO_WAIT(wait); struct fscrypt_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_ctfm; - int res = 0; - char iv[FS_CRYPTO_BLOCK_SIZE]; + union fscrypt_iv iv; struct scatterlist sg; - int padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK); - unsigned int lim; - unsigned int cryptlen; - - lim = inode->i_sb->s_cop->max_namelen(inode); - if (iname->len <= 0 || iname->len > lim) - return -EIO; + int res; /* * Copy the filename to the output buffer for encrypting in-place and * pad it with the needed number of NUL bytes. */ - cryptlen = max_t(unsigned int, iname->len, FS_CRYPTO_BLOCK_SIZE); - cryptlen = round_up(cryptlen, padding); - cryptlen = min(cryptlen, lim); - memcpy(oname->name, iname->name, iname->len); - memset(oname->name + iname->len, 0, cryptlen - iname->len); + if (WARN_ON(olen < iname->len)) + return -ENOBUFS; + memcpy(out, iname->name, iname->len); + memset(out + iname->len, 0, olen - iname->len); /* Initialize the IV */ - memset(iv, 0, FS_CRYPTO_BLOCK_SIZE); + fscrypt_generate_iv(&iv, 0, ci); /* Set up the encryption request */ req = skcipher_request_alloc(tfm, GFP_NOFS); - if (!req) { - printk_ratelimited(KERN_ERR - "%s: skcipher_request_alloc() failed\n", __func__); + if (!req) return -ENOMEM; - } skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - fname_crypt_complete, &ecr); - sg_init_one(&sg, oname->name, cryptlen); - skcipher_request_set_crypt(req, &sg, &sg, cryptlen, iv); + crypto_req_done, &wait); + sg_init_one(&sg, out, olen); + skcipher_request_set_crypt(req, &sg, &sg, olen, &iv); /* Do the encryption */ - res = crypto_skcipher_encrypt(req); - if (res == -EINPROGRESS || res == -EBUSY) { - /* Request is being completed asynchronously; wait for it */ - wait_for_completion(&ecr.completion); - res = ecr.res; - } + res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); skcipher_request_free(req); if (res < 0) { - printk_ratelimited(KERN_ERR - "%s: Error (error code %d)\n", __func__, res); + fscrypt_err(inode->i_sb, + "Filename encryption failed for inode %lu: %d", + inode->i_ino, res); return res; } - oname->len = cryptlen; return 0; } @@ -111,45 +93,34 @@ static int fname_decrypt(struct inode *inode, struct fscrypt_str *oname) { struct skcipher_request *req = NULL; - DECLARE_FS_COMPLETION_RESULT(ecr); + DECLARE_CRYPTO_WAIT(wait); struct scatterlist src_sg, dst_sg; struct fscrypt_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_ctfm; - int res = 0; - char iv[FS_CRYPTO_BLOCK_SIZE]; - unsigned lim; - - lim = inode->i_sb->s_cop->max_namelen(inode); - if (iname->len <= 0 || iname->len > lim) - return -EIO; + union fscrypt_iv iv; + int res; /* Allocate request */ req = skcipher_request_alloc(tfm, GFP_NOFS); - if (!req) { - printk_ratelimited(KERN_ERR - "%s: crypto_request_alloc() failed\n", __func__); + if (!req) return -ENOMEM; - } skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - fname_crypt_complete, &ecr); + crypto_req_done, &wait); /* Initialize IV */ - memset(iv, 0, FS_CRYPTO_BLOCK_SIZE); + fscrypt_generate_iv(&iv, 0, ci); /* Create decryption request */ sg_init_one(&src_sg, iname->name, iname->len); sg_init_one(&dst_sg, oname->name, oname->len); - skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv); - res = crypto_skcipher_decrypt(req); - if (res == -EINPROGRESS || res == -EBUSY) { - wait_for_completion(&ecr.completion); - res = ecr.res; - } + skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, &iv); + res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait); skcipher_request_free(req); if (res < 0) { - printk_ratelimited(KERN_ERR - "%s: Error (error code %d)\n", __func__, res); + fscrypt_err(inode->i_sb, + "Filename decryption failed for inode %lu: %d", + inode->i_ino, res); return res; } @@ -212,50 +183,52 @@ static int digest_decode(const char *src, int len, char *dst) return cp - dst; } -u32 fscrypt_fname_encrypted_size(const struct inode *inode, u32 ilen) +bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len, + u32 max_len, u32 *encrypted_len_ret) { - int padding = 32; - struct fscrypt_info *ci = inode->i_crypt_info; + int padding = 4 << (inode->i_crypt_info->ci_flags & + FS_POLICY_FLAGS_PAD_MASK); + u32 encrypted_len; - if (ci) - padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK); - ilen = max(ilen, (u32)FS_CRYPTO_BLOCK_SIZE); - return round_up(ilen, padding); + if (orig_len > max_len) + return false; + encrypted_len = max(orig_len, (u32)FS_CRYPTO_BLOCK_SIZE); + encrypted_len = round_up(encrypted_len, padding); + *encrypted_len_ret = min(encrypted_len, max_len); + return true; } -EXPORT_SYMBOL(fscrypt_fname_encrypted_size); /** - * fscrypt_fname_crypto_alloc_obuff() - + * fscrypt_fname_alloc_buffer - allocate a buffer for presented filenames * - * Allocates an output buffer that is sufficient for the crypto operation - * specified by the context and the direction. + * Allocate a buffer that is large enough to hold any decrypted or encoded + * filename (null-terminated), for the given maximum encrypted filename length. + * + * Return: 0 on success, -errno on failure */ int fscrypt_fname_alloc_buffer(const struct inode *inode, - u32 ilen, struct fscrypt_str *crypto_str) + u32 max_encrypted_len, + struct fscrypt_str *crypto_str) { - u32 olen = fscrypt_fname_encrypted_size(inode, ilen); const u32 max_encoded_len = max_t(u32, BASE64_CHARS(FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE), 1 + BASE64_CHARS(sizeof(struct fscrypt_digested_name))); + u32 max_presented_len; - crypto_str->len = olen; - olen = max(olen, max_encoded_len); + max_presented_len = max(max_encoded_len, max_encrypted_len); - /* - * Allocated buffer can hold one more character to null-terminate the - * string - */ - crypto_str->name = kmalloc(olen + 1, GFP_NOFS); - if (!(crypto_str->name)) + crypto_str->name = kmalloc(max_presented_len + 1, GFP_NOFS); + if (!crypto_str->name) return -ENOMEM; + crypto_str->len = max_presented_len; return 0; } EXPORT_SYMBOL(fscrypt_fname_alloc_buffer); /** - * fscrypt_fname_crypto_free_buffer() - + * fscrypt_fname_free_buffer - free the buffer for presented filenames * - * Frees the buffer allocated for crypto operation. + * Free the buffer allocated by fscrypt_fname_alloc_buffer(). */ void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str) { @@ -322,35 +295,6 @@ int fscrypt_fname_disk_to_usr(struct inode *inode, EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); /** - * fscrypt_fname_usr_to_disk() - converts a filename from user space to disk - * space - * - * The caller must have allocated sufficient memory for the @oname string. - * - * Return: 0 on success, -errno on failure - */ -int fscrypt_fname_usr_to_disk(struct inode *inode, - const struct qstr *iname, - struct fscrypt_str *oname) -{ - if (fscrypt_is_dot_dotdot(iname)) { - oname->name[0] = '.'; - oname->name[iname->len - 1] = '.'; - oname->len = iname->len; - return 0; - } - if (inode->i_crypt_info) - return fname_encrypt(inode, iname, oname); - /* - * Without a proper key, a user is not allowed to modify the filenames - * in a directory. Consequently, a user space name cannot be mapped to - * a disk-space name - */ - return -ENOKEY; -} -EXPORT_SYMBOL(fscrypt_fname_usr_to_disk); - -/** * fscrypt_setup_filename() - prepare to search a possibly encrypted directory * @dir: the directory that will be searched * @iname: the user-provided filename being searched for @@ -383,22 +327,27 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, memset(fname, 0, sizeof(struct fscrypt_name)); fname->usr_fname = iname; - if (!dir->i_sb->s_cop->is_encrypted(dir) || - fscrypt_is_dot_dotdot(iname)) { + if (!IS_ENCRYPTED(dir) || fscrypt_is_dot_dotdot(iname)) { fname->disk_name.name = (unsigned char *)iname->name; fname->disk_name.len = iname->len; return 0; } ret = fscrypt_get_encryption_info(dir); - if (ret && ret != -EOPNOTSUPP) + if (ret) return ret; if (dir->i_crypt_info) { - ret = fscrypt_fname_alloc_buffer(dir, iname->len, - &fname->crypto_buf); - if (ret) - return ret; - ret = fname_encrypt(dir, iname, &fname->crypto_buf); + if (!fscrypt_fname_encrypted_size(dir, iname->len, + dir->i_sb->s_cop->max_namelen, + &fname->crypto_buf.len)) + return -ENAMETOOLONG; + fname->crypto_buf.name = kmalloc(fname->crypto_buf.len, + GFP_NOFS); + if (!fname->crypto_buf.name) + return -ENOMEM; + + ret = fname_encrypt(dir, iname, fname->crypto_buf.name, + fname->crypto_buf.len); if (ret) goto errout; fname->disk_name.name = fname->crypto_buf.name; @@ -450,7 +399,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, return 0; errout: - fscrypt_fname_free_buffer(&fname->crypto_buf); + kfree(fname->crypto_buf.name); return ret; } EXPORT_SYMBOL(fscrypt_setup_filename);
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 092e9da..7424f85 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h
@@ -12,20 +12,12 @@ #ifndef _FSCRYPT_PRIVATE_H #define _FSCRYPT_PRIVATE_H -#include <linux/fscrypt_supp.h> +#define __FS_HAS_ENCRYPTION 1 +#include <linux/fscrypt.h> #include <crypto/hash.h> /* Encryption parameters */ -#define FS_IV_SIZE 16 -#define FS_AES_128_ECB_KEY_SIZE 16 -#define FS_AES_128_CBC_KEY_SIZE 16 -#define FS_AES_128_CTS_KEY_SIZE 16 -#define FS_AES_256_GCM_KEY_SIZE 32 -#define FS_AES_256_CBC_KEY_SIZE 32 -#define FS_AES_256_CTS_KEY_SIZE 32 -#define FS_AES_256_XTS_KEY_SIZE 64 - -#define FS_KEY_DERIVATION_NONCE_SIZE 16 +#define FS_KEY_DERIVATION_NONCE_SIZE 16 /** * Encryption context for inode @@ -49,17 +41,52 @@ struct fscrypt_context { #define FS_ENCRYPTION_CONTEXT_FORMAT_V1 1 +/** + * For encrypted symlinks, the ciphertext length is stored at the beginning + * of the string in little-endian format. + */ +struct fscrypt_symlink_data { + __le16 len; + char encrypted_path[1]; +} __packed; + /* - * A pointer to this structure is stored in the file system's in-core - * representation of an inode. + * fscrypt_info - the "encryption key" for an inode + * + * When an encrypted file's key is made available, an instance of this struct is + * allocated and stored in ->i_crypt_info. Once created, it remains until the + * inode is evicted. */ struct fscrypt_info { + + /* The actual crypto transform used for encryption and decryption */ + struct crypto_skcipher *ci_ctfm; + + /* + * Cipher for ESSIV IV generation. Only set for CBC contents + * encryption, otherwise is NULL. + */ + struct crypto_cipher *ci_essiv_tfm; + + /* + * Encryption mode used for this inode. It corresponds to either + * ci_data_mode or ci_filename_mode, depending on the inode type. + */ + struct fscrypt_mode *ci_mode; + + /* + * If non-NULL, then this inode uses a master key directly rather than a + * derived key, and ci_ctfm will equal ci_master_key->mk_ctfm. + * Otherwise, this inode uses a derived key. + */ + struct fscrypt_master_key *ci_master_key; + + /* fields from the fscrypt_context */ u8 ci_data_mode; u8 ci_filename_mode; u8 ci_flags; - struct crypto_skcipher *ci_ctfm; - struct crypto_cipher *ci_essiv_tfm; - u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE]; + u8 ci_master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE]; + u8 ci_nonce[FS_KEY_DERIVATION_NONCE_SIZE]; }; typedef enum { @@ -70,19 +97,27 @@ typedef enum { #define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 #define FS_CTX_HAS_BOUNCE_BUFFER_FL 0x00000002 -struct fscrypt_completion_result { - struct completion completion; - int res; -}; +static inline bool fscrypt_valid_enc_modes(u32 contents_mode, + u32 filenames_mode) +{ + if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC && + filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS) + return true; -#define DECLARE_FS_COMPLETION_RESULT(ecr) \ - struct fscrypt_completion_result ecr = { \ - COMPLETION_INITIALIZER_ONSTACK((ecr).completion), 0 } + if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS && + filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS) + return true; + if (contents_mode == FS_ENCRYPTION_MODE_ADIANTUM && + filenames_mode == FS_ENCRYPTION_MODE_ADIANTUM) + return true; + + return false; +} /* crypto.c */ +extern struct kmem_cache *fscrypt_info_cachep; extern int fscrypt_initialize(unsigned int cop_flags); -extern struct workqueue_struct *fscrypt_read_workqueue; extern int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, u64 lblk_num, struct page *src_page, @@ -91,8 +126,50 @@ extern int fscrypt_do_page_crypto(const struct inode *inode, gfp_t gfp_flags); extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx, gfp_t gfp_flags); +extern const struct dentry_operations fscrypt_d_ops; + +extern void __printf(3, 4) __cold +fscrypt_msg(struct super_block *sb, const char *level, const char *fmt, ...); + +#define fscrypt_warn(sb, fmt, ...) \ + fscrypt_msg(sb, KERN_WARNING, fmt, ##__VA_ARGS__) +#define fscrypt_err(sb, fmt, ...) \ + fscrypt_msg(sb, KERN_ERR, fmt, ##__VA_ARGS__) + +#define FSCRYPT_MAX_IV_SIZE 32 + +union fscrypt_iv { + struct { + /* logical block number within the file */ + __le64 lblk_num; + + /* per-file nonce; only set in DIRECT_KEY mode */ + u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE]; + }; + u8 raw[FSCRYPT_MAX_IV_SIZE]; +}; + +void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, + const struct fscrypt_info *ci); + +/* fname.c */ +extern int fname_encrypt(struct inode *inode, const struct qstr *iname, + u8 *out, unsigned int olen); +extern bool fscrypt_fname_encrypted_size(const struct inode *inode, + u32 orig_len, u32 max_len, + u32 *encrypted_len_ret); /* keyinfo.c */ + +struct fscrypt_mode { + const char *friendly_name; + const char *cipher_str; + int keysize; + int ivsize; + bool logged_impl_name; + bool needs_essiv; +}; + extern void __exit fscrypt_essiv_cleanup(void); #endif /* _FSCRYPT_PRIVATE_H */
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c new file mode 100644 index 0000000..926e5df --- /dev/null +++ b/fs/crypto/hooks.c
@@ -0,0 +1,271 @@ +/* + * fs/crypto/hooks.c + * + * Encryption hooks for higher-level filesystem operations. + */ + +#include <linux/ratelimit.h> +#include "fscrypt_private.h" + +/** + * fscrypt_file_open - prepare to open a possibly-encrypted regular file + * @inode: the inode being opened + * @filp: the struct file being set up + * + * Currently, an encrypted regular file can only be opened if its encryption key + * is available; access to the raw encrypted contents is not supported. + * Therefore, we first set up the inode's encryption key (if not already done) + * and return an error if it's unavailable. + * + * We also verify that if the parent directory (from the path via which the file + * is being opened) is encrypted, then the inode being opened uses the same + * encryption policy. This is needed as part of the enforcement that all files + * in an encrypted directory tree use the same encryption policy, as a + * protection against certain types of offline attacks. Note that this check is + * needed even when opening an *unencrypted* file, since it's forbidden to have + * an unencrypted file in an encrypted directory. + * + * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code + */ +int fscrypt_file_open(struct inode *inode, struct file *filp) +{ + int err; + struct dentry *dir; + + err = fscrypt_require_key(inode); + if (err) + return err; + + dir = dget_parent(file_dentry(filp)); + if (IS_ENCRYPTED(d_inode(dir)) && + !fscrypt_has_permitted_context(d_inode(dir), inode)) { + fscrypt_warn(inode->i_sb, + "inconsistent encryption contexts: %lu/%lu", + d_inode(dir)->i_ino, inode->i_ino); + err = -EPERM; + } + dput(dir); + return err; +} +EXPORT_SYMBOL_GPL(fscrypt_file_open); + +int __fscrypt_prepare_link(struct inode *inode, struct inode *dir) +{ + int err; + + err = fscrypt_require_key(dir); + if (err) + return err; + + if (!fscrypt_has_permitted_context(dir, inode)) + return -EPERM; + + return 0; +} +EXPORT_SYMBOL_GPL(__fscrypt_prepare_link); + +int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + int err; + + err = fscrypt_require_key(old_dir); + if (err) + return err; + + err = fscrypt_require_key(new_dir); + if (err) + return err; + + if (old_dir != new_dir) { + if (IS_ENCRYPTED(new_dir) && + !fscrypt_has_permitted_context(new_dir, + d_inode(old_dentry))) + return -EPERM; + + if ((flags & RENAME_EXCHANGE) && + IS_ENCRYPTED(old_dir) && + !fscrypt_has_permitted_context(old_dir, + d_inode(new_dentry))) + return -EPERM; + } + return 0; +} +EXPORT_SYMBOL_GPL(__fscrypt_prepare_rename); + +int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry) +{ + int err = fscrypt_get_encryption_info(dir); + + if (err) + return err; + + if (fscrypt_has_encryption_key(dir)) { + spin_lock(&dentry->d_lock); + dentry->d_flags |= DCACHE_ENCRYPTED_WITH_KEY; + spin_unlock(&dentry->d_lock); + } + + d_set_d_op(dentry, &fscrypt_d_ops); + return 0; +} +EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup); + +int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, + unsigned int max_len, + struct fscrypt_str *disk_link) +{ + int err; + + /* + * To calculate the size of the encrypted symlink target we need to know + * the amount of NUL padding, which is determined by the flags set in + * the encryption policy which will be inherited from the directory. + * The easiest way to get access to this is to just load the directory's + * fscrypt_info, since we'll need it to create the dir_entry anyway. + * + * Note: in test_dummy_encryption mode, @dir may be unencrypted. + */ + err = fscrypt_get_encryption_info(dir); + if (err) + return err; + if (!fscrypt_has_encryption_key(dir)) + return -ENOKEY; + + /* + * Calculate the size of the encrypted symlink and verify it won't + * exceed max_len. Note that for historical reasons, encrypted symlink + * targets are prefixed with the ciphertext length, despite this + * actually being redundant with i_size. This decreases by 2 bytes the + * longest symlink target we can accept. + * + * We could recover 1 byte by not counting a null terminator, but + * counting it (even though it is meaningless for ciphertext) is simpler + * for now since filesystems will assume it is there and subtract it. + */ + if (!fscrypt_fname_encrypted_size(dir, len, + max_len - sizeof(struct fscrypt_symlink_data), + &disk_link->len)) + return -ENAMETOOLONG; + disk_link->len += sizeof(struct fscrypt_symlink_data); + + disk_link->name = NULL; + return 0; +} +EXPORT_SYMBOL_GPL(__fscrypt_prepare_symlink); + +int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, + unsigned int len, struct fscrypt_str *disk_link) +{ + int err; + struct qstr iname = QSTR_INIT(target, len); + struct fscrypt_symlink_data *sd; + unsigned int ciphertext_len; + + err = fscrypt_require_key(inode); + if (err) + return err; + + if (disk_link->name) { + /* filesystem-provided buffer */ + sd = (struct fscrypt_symlink_data *)disk_link->name; + } else { + sd = kmalloc(disk_link->len, GFP_NOFS); + if (!sd) + return -ENOMEM; + } + ciphertext_len = disk_link->len - sizeof(*sd); + sd->len = cpu_to_le16(ciphertext_len); + + err = fname_encrypt(inode, &iname, sd->encrypted_path, ciphertext_len); + if (err) { + if (!disk_link->name) + kfree(sd); + return err; + } + /* + * Null-terminating the ciphertext doesn't make sense, but we still + * count the null terminator in the length, so we might as well + * initialize it just in case the filesystem writes it out. + */ + sd->encrypted_path[ciphertext_len] = '\0'; + + if (!disk_link->name) + disk_link->name = (unsigned char *)sd; + return 0; +} +EXPORT_SYMBOL_GPL(__fscrypt_encrypt_symlink); + +/** + * fscrypt_get_symlink - get the target of an encrypted symlink + * @inode: the symlink inode + * @caddr: the on-disk contents of the symlink + * @max_size: size of @caddr buffer + * @done: if successful, will be set up to free the returned target + * + * If the symlink's encryption key is available, we decrypt its target. + * Otherwise, we encode its target for presentation. + * + * This may sleep, so the filesystem must have dropped out of RCU mode already. + * + * Return: the presentable symlink target or an ERR_PTR() + */ +const char *fscrypt_get_symlink(struct inode *inode, const void *caddr, + unsigned int max_size, + struct delayed_call *done) +{ + const struct fscrypt_symlink_data *sd; + struct fscrypt_str cstr, pstr; + int err; + + /* This is for encrypted symlinks only */ + if (WARN_ON(!IS_ENCRYPTED(inode))) + return ERR_PTR(-EINVAL); + + /* + * Try to set up the symlink's encryption key, but we can continue + * regardless of whether the key is available or not. + */ + err = fscrypt_get_encryption_info(inode); + if (err) + return ERR_PTR(err); + + /* + * For historical reasons, encrypted symlink targets are prefixed with + * the ciphertext length, even though this is redundant with i_size. + */ + + if (max_size < sizeof(*sd)) + return ERR_PTR(-EUCLEAN); + sd = caddr; + cstr.name = (unsigned char *)sd->encrypted_path; + cstr.len = le16_to_cpu(sd->len); + + if (cstr.len == 0) + return ERR_PTR(-EUCLEAN); + + if (cstr.len + sizeof(*sd) - 1 > max_size) + return ERR_PTR(-EUCLEAN); + + err = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr); + if (err) + return ERR_PTR(err); + + err = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr); + if (err) + goto err_kfree; + + err = -EUCLEAN; + if (pstr.name[0] == '\0') + goto err_kfree; + + pstr.name[pstr.len] = '\0'; + set_delayed_call(done, kfree_link, pstr.name); + return pstr.name; + +err_kfree: + kfree(pstr.name); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(fscrypt_get_symlink);
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index a386302..1e11a68 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c
@@ -10,40 +10,35 @@ */ #include <keys/user-type.h> +#include <linux/hashtable.h> #include <linux/scatterlist.h> #include <linux/ratelimit.h> #include <crypto/aes.h> +#include <crypto/algapi.h> #include <crypto/sha.h> +#include <crypto/skcipher.h> #include "fscrypt_private.h" static struct crypto_shash *essiv_hash_tfm; -static void derive_crypt_complete(struct crypto_async_request *req, int rc) -{ - struct fscrypt_completion_result *ecr = req->data; +/* Table of keys referenced by FS_POLICY_FLAG_DIRECT_KEY policies */ +static DEFINE_HASHTABLE(fscrypt_master_keys, 6); /* 6 bits = 64 buckets */ +static DEFINE_SPINLOCK(fscrypt_master_keys_lock); - if (rc == -EINPROGRESS) - return; - - ecr->res = rc; - complete(&ecr->completion); -} - -/** - * derive_key_aes() - Derive a key using AES-128-ECB - * @deriving_key: Encryption key used for derivation. - * @source_key: Source key to which to apply derivation. - * @derived_raw_key: Derived raw key. +/* + * Key derivation function. This generates the derived key by encrypting the + * master key with AES-128-ECB using the inode's nonce as the AES key. * - * Return: Zero on success; non-zero otherwise. + * The master key must be at least as long as the derived key. If the master + * key is longer, then only the first 'derived_keysize' bytes are used. */ -static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE], - const struct fscrypt_key *source_key, - u8 derived_raw_key[FS_MAX_KEY_SIZE]) +static int derive_key_aes(const u8 *master_key, + const struct fscrypt_context *ctx, + u8 *derived_key, unsigned int derived_keysize) { int res = 0; struct skcipher_request *req = NULL; - DECLARE_FS_COMPLETION_RESULT(ecr); + DECLARE_CRYPTO_WAIT(wait); struct scatterlist src_sg, dst_sg; struct crypto_skcipher *tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0); @@ -60,132 +55,326 @@ static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE], } skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - derive_crypt_complete, &ecr); - res = crypto_skcipher_setkey(tfm, deriving_key, - FS_AES_128_ECB_KEY_SIZE); + crypto_req_done, &wait); + res = crypto_skcipher_setkey(tfm, ctx->nonce, sizeof(ctx->nonce)); if (res < 0) goto out; - sg_init_one(&src_sg, source_key->raw, source_key->size); - sg_init_one(&dst_sg, derived_raw_key, source_key->size); - skcipher_request_set_crypt(req, &src_sg, &dst_sg, source_key->size, + sg_init_one(&src_sg, master_key, derived_keysize); + sg_init_one(&dst_sg, derived_key, derived_keysize); + skcipher_request_set_crypt(req, &src_sg, &dst_sg, derived_keysize, NULL); - res = crypto_skcipher_encrypt(req); - if (res == -EINPROGRESS || res == -EBUSY) { - wait_for_completion(&ecr.completion); - res = ecr.res; - } + res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); out: skcipher_request_free(req); crypto_free_skcipher(tfm); return res; } -static int validate_user_key(struct fscrypt_info *crypt_info, - struct fscrypt_context *ctx, u8 *raw_key, - const char *prefix, int min_keysize) +/* + * Search the current task's subscribed keyrings for a "logon" key with + * description prefix:descriptor, and if found acquire a read lock on it and + * return a pointer to its validated payload in *payload_ret. + */ +static struct key * +find_and_lock_process_key(const char *prefix, + const u8 descriptor[FS_KEY_DESCRIPTOR_SIZE], + unsigned int min_keysize, + const struct fscrypt_key **payload_ret) { char *description; - struct key *keyring_key; - struct fscrypt_key *master_key; + struct key *key; const struct user_key_payload *ukp; - int res; + const struct fscrypt_key *payload; description = kasprintf(GFP_NOFS, "%s%*phN", prefix, - FS_KEY_DESCRIPTOR_SIZE, - ctx->master_key_descriptor); + FS_KEY_DESCRIPTOR_SIZE, descriptor); if (!description) - return -ENOMEM; + return ERR_PTR(-ENOMEM); - keyring_key = request_key(&key_type_logon, description, NULL); + key = request_key(&key_type_logon, description, NULL); kfree(description); - if (IS_ERR(keyring_key)) - return PTR_ERR(keyring_key); - down_read(&keyring_key->sem); + if (IS_ERR(key)) + return key; - if (keyring_key->type != &key_type_logon) { - printk_once(KERN_WARNING - "%s: key type must be logon\n", __func__); - res = -ENOKEY; - goto out; - } - ukp = user_key_payload_locked(keyring_key); - if (!ukp) { - /* key was revoked before we acquired its semaphore */ - res = -EKEYREVOKED; - goto out; - } - if (ukp->datalen != sizeof(struct fscrypt_key)) { - res = -EINVAL; - goto out; - } - master_key = (struct fscrypt_key *)ukp->data; - BUILD_BUG_ON(FS_AES_128_ECB_KEY_SIZE != FS_KEY_DERIVATION_NONCE_SIZE); + down_read(&key->sem); + ukp = user_key_payload_locked(key); - if (master_key->size < min_keysize || master_key->size > FS_MAX_KEY_SIZE - || master_key->size % AES_BLOCK_SIZE != 0) { - printk_once(KERN_WARNING - "%s: key size incorrect: %d\n", - __func__, master_key->size); - res = -ENOKEY; - goto out; + if (!ukp) /* was the key revoked before we acquired its semaphore? */ + goto invalid; + + payload = (const struct fscrypt_key *)ukp->data; + + if (ukp->datalen != sizeof(struct fscrypt_key) || + payload->size < 1 || payload->size > FS_MAX_KEY_SIZE) { + fscrypt_warn(NULL, + "key with description '%s' has invalid payload", + key->description); + goto invalid; } - res = derive_key_aes(ctx->nonce, master_key, raw_key); -out: - up_read(&keyring_key->sem); - key_put(keyring_key); - return res; + + if (payload->size < min_keysize) { + fscrypt_warn(NULL, + "key with description '%s' is too short (got %u bytes, need %u+ bytes)", + key->description, payload->size, min_keysize); + goto invalid; + } + + *payload_ret = payload; + return key; + +invalid: + up_read(&key->sem); + key_put(key); + return ERR_PTR(-ENOKEY); } -static const struct { - const char *cipher_str; - int keysize; -} available_modes[] = { - [FS_ENCRYPTION_MODE_AES_256_XTS] = { "xts(aes)", - FS_AES_256_XTS_KEY_SIZE }, - [FS_ENCRYPTION_MODE_AES_256_CTS] = { "cts(cbc(aes))", - FS_AES_256_CTS_KEY_SIZE }, - [FS_ENCRYPTION_MODE_AES_128_CBC] = { "cbc(aes)", - FS_AES_128_CBC_KEY_SIZE }, - [FS_ENCRYPTION_MODE_AES_128_CTS] = { "cts(cbc(aes))", - FS_AES_128_CTS_KEY_SIZE }, +static struct fscrypt_mode available_modes[] = { + [FS_ENCRYPTION_MODE_AES_256_XTS] = { + .friendly_name = "AES-256-XTS", + .cipher_str = "xts(aes)", + .keysize = 64, + .ivsize = 16, + }, + [FS_ENCRYPTION_MODE_AES_256_CTS] = { + .friendly_name = "AES-256-CTS-CBC", + .cipher_str = "cts(cbc(aes))", + .keysize = 32, + .ivsize = 16, + }, + [FS_ENCRYPTION_MODE_AES_128_CBC] = { + .friendly_name = "AES-128-CBC", + .cipher_str = "cbc(aes)", + .keysize = 16, + .ivsize = 16, + .needs_essiv = true, + }, + [FS_ENCRYPTION_MODE_AES_128_CTS] = { + .friendly_name = "AES-128-CTS-CBC", + .cipher_str = "cts(cbc(aes))", + .keysize = 16, + .ivsize = 16, + }, + [FS_ENCRYPTION_MODE_ADIANTUM] = { + .friendly_name = "Adiantum", + .cipher_str = "adiantum(xchacha12,aes)", + .keysize = 32, + .ivsize = 32, + }, }; -static int determine_cipher_type(struct fscrypt_info *ci, struct inode *inode, - const char **cipher_str_ret, int *keysize_ret) +static struct fscrypt_mode * +select_encryption_mode(const struct fscrypt_info *ci, const struct inode *inode) { - u32 mode; - if (!fscrypt_valid_enc_modes(ci->ci_data_mode, ci->ci_filename_mode)) { - pr_warn_ratelimited("fscrypt: inode %lu uses unsupported encryption modes (contents mode %d, filenames mode %d)\n", - inode->i_ino, - ci->ci_data_mode, ci->ci_filename_mode); - return -EINVAL; + fscrypt_warn(inode->i_sb, + "inode %lu uses unsupported encryption modes (contents mode %d, filenames mode %d)", + inode->i_ino, ci->ci_data_mode, + ci->ci_filename_mode); + return ERR_PTR(-EINVAL); } - if (S_ISREG(inode->i_mode)) { - mode = ci->ci_data_mode; - } else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) { - mode = ci->ci_filename_mode; - } else { - WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n", - inode->i_ino, (inode->i_mode & S_IFMT)); - return -EINVAL; - } + if (S_ISREG(inode->i_mode)) + return &available_modes[ci->ci_data_mode]; - *cipher_str_ret = available_modes[mode].cipher_str; - *keysize_ret = available_modes[mode].keysize; - return 0; + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + return &available_modes[ci->ci_filename_mode]; + + WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n", + inode->i_ino, (inode->i_mode & S_IFMT)); + return ERR_PTR(-EINVAL); } -static void put_crypt_info(struct fscrypt_info *ci) +/* Find the master key, then derive the inode's actual encryption key */ +static int find_and_derive_key(const struct inode *inode, + const struct fscrypt_context *ctx, + u8 *derived_key, const struct fscrypt_mode *mode) { - if (!ci) - return; + struct key *key; + const struct fscrypt_key *payload; + int err; - crypto_free_skcipher(ci->ci_ctfm); - crypto_free_cipher(ci->ci_essiv_tfm); - kmem_cache_free(fscrypt_info_cachep, ci); + key = find_and_lock_process_key(FS_KEY_DESC_PREFIX, + ctx->master_key_descriptor, + mode->keysize, &payload); + if (key == ERR_PTR(-ENOKEY) && inode->i_sb->s_cop->key_prefix) { + key = find_and_lock_process_key(inode->i_sb->s_cop->key_prefix, + ctx->master_key_descriptor, + mode->keysize, &payload); + } + if (IS_ERR(key)) + return PTR_ERR(key); + + if (ctx->flags & FS_POLICY_FLAG_DIRECT_KEY) { + if (mode->ivsize < offsetofend(union fscrypt_iv, nonce)) { + fscrypt_warn(inode->i_sb, + "direct key mode not allowed with %s", + mode->friendly_name); + err = -EINVAL; + } else if (ctx->contents_encryption_mode != + ctx->filenames_encryption_mode) { + fscrypt_warn(inode->i_sb, + "direct key mode not allowed with different contents and filenames modes"); + err = -EINVAL; + } else { + memcpy(derived_key, payload->raw, mode->keysize); + err = 0; + } + } else { + err = derive_key_aes(payload->raw, ctx, derived_key, + mode->keysize); + } + up_read(&key->sem); + key_put(key); + return err; +} + +/* Allocate and key a symmetric cipher object for the given encryption mode */ +static struct crypto_skcipher * +allocate_skcipher_for_mode(struct fscrypt_mode *mode, const u8 *raw_key, + const struct inode *inode) +{ + struct crypto_skcipher *tfm; + int err; + + tfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0); + if (IS_ERR(tfm)) { + fscrypt_warn(inode->i_sb, + "error allocating '%s' transform for inode %lu: %ld", + mode->cipher_str, inode->i_ino, PTR_ERR(tfm)); + return tfm; + } + if (unlikely(!mode->logged_impl_name)) { + /* + * fscrypt performance can vary greatly depending on which + * crypto algorithm implementation is used. Help people debug + * performance problems by logging the ->cra_driver_name the + * first time a mode is used. Note that multiple threads can + * race here, but it doesn't really matter. + */ + mode->logged_impl_name = true; + pr_info("fscrypt: %s using implementation \"%s\"\n", + mode->friendly_name, + crypto_skcipher_alg(tfm)->base.cra_driver_name); + } + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); + err = crypto_skcipher_setkey(tfm, raw_key, mode->keysize); + if (err) + goto err_free_tfm; + + return tfm; + +err_free_tfm: + crypto_free_skcipher(tfm); + return ERR_PTR(err); +} + +/* Master key referenced by FS_POLICY_FLAG_DIRECT_KEY policy */ +struct fscrypt_master_key { + struct hlist_node mk_node; + refcount_t mk_refcount; + const struct fscrypt_mode *mk_mode; + struct crypto_skcipher *mk_ctfm; + u8 mk_descriptor[FS_KEY_DESCRIPTOR_SIZE]; + u8 mk_raw[FS_MAX_KEY_SIZE]; +}; + +static void free_master_key(struct fscrypt_master_key *mk) +{ + if (mk) { + crypto_free_skcipher(mk->mk_ctfm); + kzfree(mk); + } +} + +static void put_master_key(struct fscrypt_master_key *mk) +{ + if (!refcount_dec_and_lock(&mk->mk_refcount, &fscrypt_master_keys_lock)) + return; + hash_del(&mk->mk_node); + spin_unlock(&fscrypt_master_keys_lock); + + free_master_key(mk); +} + +/* + * Find/insert the given master key into the fscrypt_master_keys table. If + * found, it is returned with elevated refcount, and 'to_insert' is freed if + * non-NULL. If not found, 'to_insert' is inserted and returned if it's + * non-NULL; otherwise NULL is returned. + */ +static struct fscrypt_master_key * +find_or_insert_master_key(struct fscrypt_master_key *to_insert, + const u8 *raw_key, const struct fscrypt_mode *mode, + const struct fscrypt_info *ci) +{ + unsigned long hash_key; + struct fscrypt_master_key *mk; + + /* + * Careful: to avoid potentially leaking secret key bytes via timing + * information, we must key the hash table by descriptor rather than by + * raw key, and use crypto_memneq() when comparing raw keys. + */ + + BUILD_BUG_ON(sizeof(hash_key) > FS_KEY_DESCRIPTOR_SIZE); + memcpy(&hash_key, ci->ci_master_key_descriptor, sizeof(hash_key)); + + spin_lock(&fscrypt_master_keys_lock); + hash_for_each_possible(fscrypt_master_keys, mk, mk_node, hash_key) { + if (memcmp(ci->ci_master_key_descriptor, mk->mk_descriptor, + FS_KEY_DESCRIPTOR_SIZE) != 0) + continue; + if (mode != mk->mk_mode) + continue; + if (crypto_memneq(raw_key, mk->mk_raw, mode->keysize)) + continue; + /* using existing tfm with same (descriptor, mode, raw_key) */ + refcount_inc(&mk->mk_refcount); + spin_unlock(&fscrypt_master_keys_lock); + free_master_key(to_insert); + return mk; + } + if (to_insert) + hash_add(fscrypt_master_keys, &to_insert->mk_node, hash_key); + spin_unlock(&fscrypt_master_keys_lock); + return to_insert; +} + +/* Prepare to encrypt directly using the master key in the given mode */ +static struct fscrypt_master_key * +fscrypt_get_master_key(const struct fscrypt_info *ci, struct fscrypt_mode *mode, + const u8 *raw_key, const struct inode *inode) +{ + struct fscrypt_master_key *mk; + int err; + + /* Is there already a tfm for this key? */ + mk = find_or_insert_master_key(NULL, raw_key, mode, ci); + if (mk) + return mk; + + /* Nope, allocate one. */ + mk = kzalloc(sizeof(*mk), GFP_NOFS); + if (!mk) + return ERR_PTR(-ENOMEM); + refcount_set(&mk->mk_refcount, 1); + mk->mk_mode = mode; + mk->mk_ctfm = allocate_skcipher_for_mode(mode, raw_key, inode); + if (IS_ERR(mk->mk_ctfm)) { + err = PTR_ERR(mk->mk_ctfm); + mk->mk_ctfm = NULL; + goto err_free_mk; + } + memcpy(mk->mk_descriptor, ci->ci_master_key_descriptor, + FS_KEY_DESCRIPTOR_SIZE); + memcpy(mk->mk_raw, raw_key, mode->keysize); + + return find_or_insert_master_key(mk, raw_key, mode, ci); + +err_free_mk: + free_master_key(mk); + return ERR_PTR(err); } static int derive_essiv_salt(const u8 *key, int keysize, u8 *salt) @@ -198,8 +387,9 @@ static int derive_essiv_salt(const u8 *key, int keysize, u8 *salt) tfm = crypto_alloc_shash("sha256", 0, 0); if (IS_ERR(tfm)) { - pr_warn_ratelimited("fscrypt: error allocating SHA-256 transform: %ld\n", - PTR_ERR(tfm)); + fscrypt_warn(NULL, + "error allocating SHA-256 transform: %ld", + PTR_ERR(tfm)); return PTR_ERR(tfm); } prev_tfm = cmpxchg(&essiv_hash_tfm, NULL, tfm); @@ -254,13 +444,68 @@ void __exit fscrypt_essiv_cleanup(void) crypto_free_shash(essiv_hash_tfm); } +/* + * Given the encryption mode and key (normally the derived key, but for + * FS_POLICY_FLAG_DIRECT_KEY mode it's the master key), set up the inode's + * symmetric cipher transform object(s). + */ +static int setup_crypto_transform(struct fscrypt_info *ci, + struct fscrypt_mode *mode, + const u8 *raw_key, const struct inode *inode) +{ + struct fscrypt_master_key *mk; + struct crypto_skcipher *ctfm; + int err; + + if (ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY) { + mk = fscrypt_get_master_key(ci, mode, raw_key, inode); + if (IS_ERR(mk)) + return PTR_ERR(mk); + ctfm = mk->mk_ctfm; + } else { + mk = NULL; + ctfm = allocate_skcipher_for_mode(mode, raw_key, inode); + if (IS_ERR(ctfm)) + return PTR_ERR(ctfm); + } + ci->ci_master_key = mk; + ci->ci_ctfm = ctfm; + + if (mode->needs_essiv) { + /* ESSIV implies 16-byte IVs which implies !DIRECT_KEY */ + WARN_ON(mode->ivsize != AES_BLOCK_SIZE); + WARN_ON(ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY); + + err = init_essiv_generator(ci, raw_key, mode->keysize); + if (err) { + fscrypt_warn(inode->i_sb, + "error initializing ESSIV generator for inode %lu: %d", + inode->i_ino, err); + return err; + } + } + return 0; +} + +static void put_crypt_info(struct fscrypt_info *ci) +{ + if (!ci) + return; + + if (ci->ci_master_key) { + put_master_key(ci->ci_master_key); + } else { + crypto_free_skcipher(ci->ci_ctfm); + crypto_free_cipher(ci->ci_essiv_tfm); + } + kmem_cache_free(fscrypt_info_cachep, ci); +} + int fscrypt_get_encryption_info(struct inode *inode) { struct fscrypt_info *crypt_info; struct fscrypt_context ctx; - struct crypto_skcipher *ctfm; - const char *cipher_str; - int keysize; + struct fscrypt_mode *mode; u8 *raw_key = NULL; int res; @@ -274,7 +519,7 @@ int fscrypt_get_encryption_info(struct inode *inode) res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx)); if (res < 0) { if (!fscrypt_dummy_context_enabled(inode) || - inode->i_sb->s_cop->is_encrypted(inode)) + IS_ENCRYPTED(inode)) return res; /* Fake up a context for an unencrypted directory */ memset(&ctx, 0, sizeof(ctx)); @@ -292,72 +537,42 @@ int fscrypt_get_encryption_info(struct inode *inode) if (ctx.flags & ~FS_POLICY_FLAGS_VALID) return -EINVAL; - crypt_info = kmem_cache_alloc(fscrypt_info_cachep, GFP_NOFS); + crypt_info = kmem_cache_zalloc(fscrypt_info_cachep, GFP_NOFS); if (!crypt_info) return -ENOMEM; crypt_info->ci_flags = ctx.flags; crypt_info->ci_data_mode = ctx.contents_encryption_mode; crypt_info->ci_filename_mode = ctx.filenames_encryption_mode; - crypt_info->ci_ctfm = NULL; - crypt_info->ci_essiv_tfm = NULL; - memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor, - sizeof(crypt_info->ci_master_key)); + memcpy(crypt_info->ci_master_key_descriptor, ctx.master_key_descriptor, + FS_KEY_DESCRIPTOR_SIZE); + memcpy(crypt_info->ci_nonce, ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE); - res = determine_cipher_type(crypt_info, inode, &cipher_str, &keysize); - if (res) + mode = select_encryption_mode(crypt_info, inode); + if (IS_ERR(mode)) { + res = PTR_ERR(mode); goto out; + } + WARN_ON(mode->ivsize > FSCRYPT_MAX_IV_SIZE); + crypt_info->ci_mode = mode; /* - * This cannot be a stack buffer because it is passed to the scatterlist - * crypto API as part of key derivation. + * This cannot be a stack buffer because it may be passed to the + * scatterlist crypto API as part of key derivation. */ res = -ENOMEM; - raw_key = kmalloc(FS_MAX_KEY_SIZE, GFP_NOFS); + raw_key = kmalloc(mode->keysize, GFP_NOFS); if (!raw_key) goto out; - res = validate_user_key(crypt_info, &ctx, raw_key, FS_KEY_DESC_PREFIX, - keysize); - if (res && inode->i_sb->s_cop->key_prefix) { - int res2 = validate_user_key(crypt_info, &ctx, raw_key, - inode->i_sb->s_cop->key_prefix, - keysize); - if (res2) { - if (res2 == -ENOKEY) - res = -ENOKEY; - goto out; - } - } else if (res) { - goto out; - } - ctfm = crypto_alloc_skcipher(cipher_str, 0, 0); - if (!ctfm || IS_ERR(ctfm)) { - res = ctfm ? PTR_ERR(ctfm) : -ENOMEM; - pr_debug("%s: error %d (inode %lu) allocating crypto tfm\n", - __func__, res, inode->i_ino); - goto out; - } - crypt_info->ci_ctfm = ctfm; - crypto_skcipher_clear_flags(ctfm, ~0); - crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_REQ_WEAK_KEY); - /* - * if the provided key is longer than keysize, we use the first - * keysize bytes of the derived key only - */ - res = crypto_skcipher_setkey(ctfm, raw_key, keysize); + res = find_and_derive_key(inode, &ctx, raw_key, mode); if (res) goto out; - if (S_ISREG(inode->i_mode) && - crypt_info->ci_data_mode == FS_ENCRYPTION_MODE_AES_128_CBC) { - res = init_essiv_generator(crypt_info, raw_key, keysize); - if (res) { - pr_debug("%s: error %d (inode %lu) allocating essiv tfm\n", - __func__, res, inode->i_ino); - goto out; - } - } + res = setup_crypto_transform(crypt_info, mode, raw_key, inode); + if (res) + goto out; + if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) == NULL) crypt_info = NULL; out: @@ -369,19 +584,9 @@ int fscrypt_get_encryption_info(struct inode *inode) } EXPORT_SYMBOL(fscrypt_get_encryption_info); -void fscrypt_put_encryption_info(struct inode *inode, struct fscrypt_info *ci) +void fscrypt_put_encryption_info(struct inode *inode) { - struct fscrypt_info *prev; - - if (ci == NULL) - ci = ACCESS_ONCE(inode->i_crypt_info); - if (ci == NULL) - return; - - prev = cmpxchg(&inode->i_crypt_info, ci, NULL); - if (prev != ci) - return; - - put_crypt_info(ci); + put_crypt_info(inode->i_crypt_info); + inode->i_crypt_info = NULL; } EXPORT_SYMBOL(fscrypt_put_encryption_info);
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index a120649b..f490de92 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c
@@ -110,7 +110,7 @@ int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg) struct fscrypt_policy policy; int res; - if (!inode->i_sb->s_cop->is_encrypted(inode)) + if (!IS_ENCRYPTED(inode)) return -ENODATA; res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx)); @@ -167,11 +167,11 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) return 1; /* No restrictions if the parent directory is unencrypted */ - if (!cops->is_encrypted(parent)) + if (!IS_ENCRYPTED(parent)) return 1; /* Encrypted directories must not contain unencrypted files */ - if (!cops->is_encrypted(child)) + if (!IS_ENCRYPTED(child)) return 0; /* @@ -199,7 +199,8 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) child_ci = child->i_crypt_info; if (parent_ci && child_ci) { - return memcmp(parent_ci->ci_master_key, child_ci->ci_master_key, + return memcmp(parent_ci->ci_master_key_descriptor, + child_ci->ci_master_key_descriptor, FS_KEY_DESCRIPTOR_SIZE) == 0 && (parent_ci->ci_data_mode == child_ci->ci_data_mode) && (parent_ci->ci_filename_mode == @@ -254,7 +255,7 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child, ctx.contents_encryption_mode = ci->ci_data_mode; ctx.filenames_encryption_mode = ci->ci_filename_mode; ctx.flags = ci->ci_flags; - memcpy(ctx.master_key_descriptor, ci->ci_master_key, + memcpy(ctx.master_key_descriptor, ci->ci_master_key_descriptor, FS_KEY_DESCRIPTOR_SIZE); get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE); BUILD_BUG_ON(sizeof(ctx) != FSCRYPT_SET_CONTEXT_MAX_SIZE);
diff --git a/fs/dcache.c b/fs/dcache.c index 9ac1290..c04352b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c
@@ -3279,6 +3279,7 @@ char *d_absolute_path(const struct path *path, return ERR_PTR(error); return res; } +EXPORT_SYMBOL(d_absolute_path); /* * same as __d_path but appends "(deleted)" for unlinked files.
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index c291bf6..9a309f7 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c
@@ -34,6 +34,7 @@ #include <linux/mutex.h> #include <linux/anon_inodes.h> #include <linux/device.h> +#include <linux/freezer.h> #include <linux/uaccess.h> #include <asm/io.h> #include <asm/mman.h> @@ -1826,7 +1827,8 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, } spin_unlock_irqrestore(&ep->lock, flags); - if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) + if (!freezable_schedule_hrtimeout_range(to, slack, + HRTIMER_MODE_ABS)) timed_out = 1; spin_lock_irqsave(&ep->lock, flags);
diff --git a/fs/exec.c b/fs/exec.c index 0da4d74..062159d 100644 --- a/fs/exec.c +++ b/fs/exec.c
@@ -1303,7 +1303,7 @@ EXPORT_SYMBOL(flush_old_exec); void would_dump(struct linux_binprm *bprm, struct file *file) { struct inode *inode = file_inode(file); - if (inode_permission(inode, MAY_READ) < 0) { + if (inode_permission2(file->f_path.mnt, inode, MAY_READ) < 0) { struct user_namespace *old, *user_ns; bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 0ac6281..f17715d 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c
@@ -377,9 +377,8 @@ static int read_exec(struct page_collect *pcol) * and will start a new collection. Eventually caller must submit the last * segment if present. */ -static int readpage_strip(void *data, struct page *page) +static int __readpage_strip(struct page_collect *pcol, struct page *page) { - struct page_collect *pcol = data; struct inode *inode = pcol->inode; struct exofs_i_info *oi = exofs_i(inode); loff_t i_size = i_size_read(inode); @@ -470,6 +469,13 @@ static int readpage_strip(void *data, struct page *page) return ret; } +static int readpage_strip(struct file *data, struct page *page) +{ + struct page_collect *pcol = (struct page_collect *)data; + + return __readpage_strip(pcol, page); +} + static int exofs_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { @@ -499,7 +505,7 @@ static int _readpage(struct page *page, bool read_4_write) _pcol_init(&pcol, 1, page->mapping->host); pcol.read_4_write = read_4_write; - ret = readpage_strip(&pcol, page); + ret = __readpage_strip(&pcol, page); if (ret) { EXOFS_ERR("_readpage => %d\n", ret); return ret;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 02970a2..f45bb67 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h
@@ -34,17 +34,15 @@ #include <linux/percpu_counter.h> #include <linux/ratelimit.h> #include <crypto/hash.h> -#ifdef CONFIG_EXT4_FS_ENCRYPTION -#include <linux/fscrypt_supp.h> -#else -#include <linux/fscrypt_notsupp.h> -#endif #include <linux/falloc.h> #include <linux/percpu-rwsem.h> #ifdef __KERNEL__ #include <linux/compat.h> #endif +#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_EXT4_FS_ENCRYPTION) +#include <linux/fscrypt.h> + /* * The fourth extended filesystem constants/structures */
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 137c752..3344bd8 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c
@@ -18,6 +18,7 @@ #include "ext4.h" #include "xattr.h" #include "truncate.h" +#include <trace/events/android_fs.h> #define EXT4_XATTR_SYSTEM_DATA "data" #define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS)) @@ -511,6 +512,17 @@ int ext4_readpage_inline(struct inode *inode, struct page *page) return -EAGAIN; } + if (trace_android_fs_dataread_start_enabled()) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + inode); + trace_android_fs_dataread_start(inode, page_offset(page), + PAGE_SIZE, current->pid, + path, current->comm); + } + /* * Current inline data can only exist in the 1st page, * So for all the other pages, just set them uptodate. @@ -522,6 +534,8 @@ int ext4_readpage_inline(struct inode *inode, struct page *page) SetPageUptodate(page); } + trace_android_fs_dataread_end(inode, page_offset(page), PAGE_SIZE); + up_read(&EXT4_I(inode)->xattr_sem); unlock_page(page);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5eb28dc..b11091e6a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c
@@ -46,6 +46,7 @@ #include "truncate.h" #include <trace/events/ext4.h> +#include <trace/events/android_fs.h> #define MPAGE_DA_EXTENT_TAIL 0x01 @@ -1252,6 +1253,16 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) return -EIO; + if (trace_android_fs_datawrite_start_enabled()) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + inode); + trace_android_fs_datawrite_start(inode, pos, len, + current->pid, path, + current->comm); + } trace_ext4_write_begin(inode, pos, len, flags); /* * Reserve one block more for addition to orphan list in case @@ -1390,6 +1401,7 @@ static int ext4_write_end(struct file *file, int i_size_changed = 0; int inline_data = ext4_has_inline_data(inode); + trace_android_fs_datawrite_end(inode, pos, len); trace_ext4_write_end(inode, pos, len, copied); if (inline_data) { ret = ext4_write_inline_data_end(inode, pos, len, @@ -1495,6 +1507,7 @@ static int ext4_journalled_write_end(struct file *file, int size_changed = 0; int inline_data = ext4_has_inline_data(inode); + trace_android_fs_datawrite_end(inode, pos, len); trace_ext4_journalled_write_end(inode, pos, len, copied); from = pos & (PAGE_SIZE - 1); to = from + len; @@ -2627,8 +2640,8 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) mpd->map.m_len = 0; mpd->next_page = index; while (index <= end) { - nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); + nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, + tag); if (nr_pages == 0) goto out; @@ -2636,16 +2649,6 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) struct page *page = pvec.pages[i]; /* - * At this point, the page may be truncated or - * invalidated (changing page->mapping to NULL), or - * even swizzled back from swapper_space to tmpfs file - * mapping. However, page->index will not change - * because we have a reference on the page. - */ - if (page->index > end) - goto out; - - /* * Accumulated enough dirty pages? This doesn't apply * to WB_SYNC_ALL mode. For integrity sync we have to * keep going because someone may be concurrently @@ -3032,6 +3035,16 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, len, flags, pagep, fsdata); } *fsdata = (void *)0; + if (trace_android_fs_datawrite_start_enabled()) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + inode); + trace_android_fs_datawrite_start(inode, pos, len, + current->pid, + path, current->comm); + } trace_ext4_da_write_begin(inode, pos, len, flags); if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { @@ -3150,6 +3163,7 @@ static int ext4_da_write_end(struct file *file, return ext4_write_end(file, mapping, pos, len, copied, page, fsdata); + trace_android_fs_datawrite_end(inode, pos, len); trace_ext4_da_write_end(inode, pos, len, copied); start = pos & (PAGE_SIZE - 1); end = start + copied - 1; @@ -3800,6 +3814,7 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) size_t count = iov_iter_count(iter); loff_t offset = iocb->ki_pos; ssize_t ret; + int rw = iov_iter_rw(iter); #ifdef CONFIG_EXT4_FS_ENCRYPTION if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) @@ -3820,12 +3835,42 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (WARN_ON_ONCE(IS_DAX(inode))) return 0; + if (trace_android_fs_dataread_start_enabled() && + (rw == READ)) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + inode); + trace_android_fs_dataread_start(inode, offset, count, + current->pid, path, + current->comm); + } + if (trace_android_fs_datawrite_start_enabled() && + (rw == WRITE)) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + inode); + trace_android_fs_datawrite_start(inode, offset, count, + current->pid, path, + current->comm); + } trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); if (iov_iter_rw(iter) == READ) ret = ext4_direct_IO_read(iocb, iter); else ret = ext4_direct_IO_write(iocb, iter); trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret); + + if (trace_android_fs_dataread_start_enabled() && + (rw == READ)) + trace_android_fs_dataread_end(inode, offset, count); + if (trace_android_fs_datawrite_start_enabled() && + (rw == WRITE)) + trace_android_fs_datawrite_end(inode, offset, count); + return ret; } @@ -4611,10 +4656,13 @@ void ext4_set_inode_flags(struct inode *inode) new_fl |= S_DIRSYNC; if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode) && !ext4_should_journal_data(inode) && !ext4_has_inline_data(inode) && - !ext4_encrypted_inode(inode)) + !(flags & EXT4_ENCRYPT_FL)) new_fl |= S_DAX; + if (flags & EXT4_ENCRYPT_FL) + new_fl |= S_ENCRYPTED; inode_set_flags(inode, new_fl, - S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX); + S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX| + S_ENCRYPTED); } static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 4e301b0..3e1016a 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c
@@ -1545,24 +1545,14 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi struct inode *inode; struct ext4_dir_entry_2 *de; struct buffer_head *bh; + int err; - if (ext4_encrypted_inode(dir)) { - int res = fscrypt_get_encryption_info(dir); + err = fscrypt_prepare_lookup(dir, dentry, flags); + if (err) + return ERR_PTR(err); - /* - * DCACHE_ENCRYPTED_WITH_KEY is set if the dentry is - * created while the directory was encrypted and we - * have access to the key. - */ - if (fscrypt_has_encryption_key(dir)) - fscrypt_set_encrypted_dentry(dentry); - fscrypt_set_d_op(dentry); - if (res && res != -ENOKEY) - return ERR_PTR(res); - } - - if (dentry->d_name.len > EXT4_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); + if (dentry->d_name.len > EXT4_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); if (IS_ERR(bh)) @@ -3068,39 +3058,19 @@ static int ext4_symlink(struct inode *dir, struct inode *inode; int err, len = strlen(symname); int credits; - bool encryption_required; struct fscrypt_str disk_link; - struct fscrypt_symlink_data *sd = NULL; if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb)))) return -EIO; - disk_link.len = len + 1; - disk_link.name = (char *) symname; - - encryption_required = (ext4_encrypted_inode(dir) || - DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))); - if (encryption_required) { - err = fscrypt_get_encryption_info(dir); - if (err) - return err; - if (!fscrypt_has_encryption_key(dir)) - return -ENOKEY; - disk_link.len = (fscrypt_fname_encrypted_size(dir, len) + - sizeof(struct fscrypt_symlink_data)); - sd = kzalloc(disk_link.len, GFP_KERNEL); - if (!sd) - return -ENOMEM; - } - - if (disk_link.len > dir->i_sb->s_blocksize) { - err = -ENAMETOOLONG; - goto err_free_sd; - } + err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize, + &disk_link); + if (err) + return err; err = dquot_initialize(dir); if (err) - goto err_free_sd; + return err; if ((disk_link.len > EXT4_N_BLOCKS * 4)) { /* @@ -3129,27 +3099,18 @@ static int ext4_symlink(struct inode *dir, if (IS_ERR(inode)) { if (handle) ext4_journal_stop(handle); - err = PTR_ERR(inode); - goto err_free_sd; + return PTR_ERR(inode); } - if (encryption_required) { - struct qstr istr; - struct fscrypt_str ostr = - FSTR_INIT(sd->encrypted_path, disk_link.len); - - istr.name = (const unsigned char *) symname; - istr.len = len; - err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr); + if (IS_ENCRYPTED(inode)) { + err = fscrypt_encrypt_symlink(inode, symname, len, &disk_link); if (err) goto err_drop_inode; - sd->len = cpu_to_le16(ostr.len); - disk_link.name = (char *) sd; inode->i_op = &ext4_encrypted_symlink_inode_operations; } if ((disk_link.len > EXT4_N_BLOCKS * 4)) { - if (!encryption_required) + if (!IS_ENCRYPTED(inode)) inode->i_op = &ext4_symlink_inode_operations; inode_nohighmem(inode); ext4_set_aops(inode); @@ -3191,7 +3152,7 @@ static int ext4_symlink(struct inode *dir, } else { /* clear the extent format for fast symlink */ ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); - if (!encryption_required) { + if (!IS_ENCRYPTED(inode)) { inode->i_op = &ext4_fast_symlink_inode_operations; inode->i_link = (char *)&EXT4_I(inode)->i_data; } @@ -3206,16 +3167,17 @@ static int ext4_symlink(struct inode *dir, if (handle) ext4_journal_stop(handle); - kfree(sd); - return err; + goto out_free_encrypted_link; + err_drop_inode: if (handle) ext4_journal_stop(handle); clear_nlink(inode); unlock_new_inode(inode); iput(inode); -err_free_sd: - kfree(sd); +out_free_encrypted_link: + if (disk_link.name != (unsigned char *)symname) + kfree(disk_link.name); return err; }
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 9ffa6fa..039aa69 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c
@@ -46,6 +46,7 @@ #include <linux/cleancache.h> #include "ext4.h" +#include <trace/events/android_fs.h> static inline bool ext4_bio_encrypted(struct bio *bio) { @@ -56,6 +57,17 @@ static inline bool ext4_bio_encrypted(struct bio *bio) #endif } +static void +ext4_trace_read_completion(struct bio *bio) +{ + struct page *first_page = bio->bi_io_vec[0].bv_page; + + if (first_page != NULL) + trace_android_fs_dataread_end(first_page->mapping->host, + page_offset(first_page), + bio->bi_iter.bi_size); +} + /* * I/O completion handler for multipage BIOs. * @@ -73,11 +85,14 @@ static void mpage_end_io(struct bio *bio) struct bio_vec *bv; int i; + if (trace_android_fs_dataread_start_enabled()) + ext4_trace_read_completion(bio); + if (ext4_bio_encrypted(bio)) { if (bio->bi_status) { fscrypt_release_ctx(bio->bi_private); } else { - fscrypt_decrypt_bio_pages(bio->bi_private, bio); + fscrypt_enqueue_decrypt_bio(bio->bi_private, bio); return; } } @@ -96,6 +111,30 @@ static void mpage_end_io(struct bio *bio) bio_put(bio); } +static void +ext4_submit_bio_read(struct bio *bio) +{ + if (trace_android_fs_dataread_start_enabled()) { + struct page *first_page = bio->bi_io_vec[0].bv_page; + + if (first_page != NULL) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + first_page->mapping->host); + trace_android_fs_dataread_start( + first_page->mapping->host, + page_offset(first_page), + bio->bi_iter.bi_size, + current->pid, + path, + current->comm); + } + } + submit_bio(bio); +} + int ext4_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, unsigned nr_pages) @@ -236,7 +275,7 @@ int ext4_mpage_readpages(struct address_space *mapping, */ if (bio && (last_block_in_bio != blocks[0] - 1)) { submit_and_realloc: - submit_bio(bio); + ext4_submit_bio_read(bio); bio = NULL; } if (bio == NULL) { @@ -269,14 +308,14 @@ int ext4_mpage_readpages(struct address_space *mapping, if (((map.m_flags & EXT4_MAP_BOUNDARY) && (relative_block == map.m_len)) || (first_hole != blocks_per_page)) { - submit_bio(bio); + ext4_submit_bio_read(bio); bio = NULL; } else last_block_in_bio = blocks[blocks_per_page - 1]; goto next_page; confused: if (bio) { - submit_bio(bio); + ext4_submit_bio_read(bio); bio = NULL; } if (!PageUptodate(page)) @@ -289,6 +328,6 @@ int ext4_mpage_readpages(struct address_space *mapping, } BUG_ON(pages && !list_empty(pages)); if (bio) - submit_bio(bio); + ext4_submit_bio_read(bio); return 0; }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d004906..e662ff4 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c
@@ -1105,9 +1105,7 @@ void ext4_clear_inode(struct inode *inode) jbd2_free_inode(EXT4_I(inode)->jinode); EXT4_I(inode)->jinode = NULL; } -#ifdef CONFIG_EXT4_FS_ENCRYPTION - fscrypt_put_encryption_info(inode, NULL); -#endif + fscrypt_put_encryption_info(inode); } static struct inode *ext4_nfs_get_inode(struct super_block *sb, @@ -1227,7 +1225,8 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); /* - * Update inode->i_flags - e.g. S_DAX may get disabled + * Update inode->i_flags - S_ENCRYPTED will be enabled, + * S_DAX may be disabled */ ext4_set_inode_flags(inode); } @@ -1252,7 +1251,10 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, ctx, len, 0); if (!res) { ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); - /* Update inode->i_flags - e.g. S_DAX may get disabled */ + /* + * Update inode->i_flags - S_ENCRYPTED will be enabled, + * S_DAX may be disabled + */ ext4_set_inode_flags(inode); res = ext4_mark_inode_dirty(handle, inode); if (res) @@ -1272,24 +1274,13 @@ static bool ext4_dummy_context(struct inode *inode) return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb)); } -static unsigned ext4_max_namelen(struct inode *inode) -{ - return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize : - EXT4_NAME_LEN; -} - static const struct fscrypt_operations ext4_cryptops = { .key_prefix = "ext4:", .get_context = ext4_get_context, .set_context = ext4_set_context, .dummy_context = ext4_dummy_context, - .is_encrypted = ext4_encrypted_inode, .empty_dir = ext4_empty_dir, - .max_namelen = ext4_max_namelen, -}; -#else -static const struct fscrypt_operations ext4_cryptops = { - .is_encrypted = ext4_encrypted_inode, + .max_namelen = EXT4_NAME_LEN, }; #endif @@ -4118,7 +4109,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &ext4_sops; sb->s_export_op = &ext4_export_ops; sb->s_xattr = ext4_xattr_handlers; +#ifdef CONFIG_EXT4_FS_ENCRYPTION sb->s_cop = &ext4_cryptops; +#endif #ifdef CONFIG_QUOTA sb->dq_op = &ext4_quota_operations; if (ext4_has_feature_quota(sb))
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index a2006c9..dd05af9 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c
@@ -28,59 +28,28 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry, struct delayed_call *done) { struct page *cpage = NULL; - char *caddr, *paddr = NULL; - struct fscrypt_str cstr, pstr; - struct fscrypt_symlink_data *sd; - int res; - u32 max_size = inode->i_sb->s_blocksize; + const void *caddr; + unsigned int max_size; + const char *paddr; if (!dentry) return ERR_PTR(-ECHILD); - res = fscrypt_get_encryption_info(inode); - if (res) - return ERR_PTR(res); - if (ext4_inode_is_fast_symlink(inode)) { - caddr = (char *) EXT4_I(inode)->i_data; + caddr = EXT4_I(inode)->i_data; max_size = sizeof(EXT4_I(inode)->i_data); } else { cpage = read_mapping_page(inode->i_mapping, 0, NULL); if (IS_ERR(cpage)) return ERR_CAST(cpage); caddr = page_address(cpage); + max_size = inode->i_sb->s_blocksize; } - /* Symlink is encrypted */ - sd = (struct fscrypt_symlink_data *)caddr; - cstr.name = sd->encrypted_path; - cstr.len = le16_to_cpu(sd->len); - if ((cstr.len + sizeof(struct fscrypt_symlink_data) - 1) > max_size) { - /* Symlink data on the disk is corrupted */ - res = -EFSCORRUPTED; - goto errout; - } - - res = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr); - if (res) - goto errout; - paddr = pstr.name; - - res = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr); - if (res) - goto errout; - - /* Null-terminate the name */ - paddr[pstr.len] = '\0'; + paddr = fscrypt_get_symlink(inode, caddr, max_size, done); if (cpage) put_page(cpage); - set_delayed_call(done, kfree_link, paddr); return paddr; -errout: - if (cpage) - put_page(cpage); - kfree(paddr); - return ERR_PTR(res); } const struct inode_operations ext4_encrypted_symlink_inode_operations = {
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 5e4860b..63e5995 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c
@@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/acl.c * @@ -7,10 +8,6 @@ * Portions of this code from linux/fs/ext2/acl.c * * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/f2fs_fs.h> #include "f2fs.h" @@ -53,6 +50,9 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1); const char *end = value + size; + if (size < sizeof(struct f2fs_acl_header)) + return ERR_PTR(-EINVAL); + if (hdr->a_version != cpu_to_le32(F2FS_ACL_VERSION)) return ERR_PTR(-EINVAL); @@ -160,7 +160,7 @@ static void *f2fs_acl_to_disk(struct f2fs_sb_info *sbi, return (void *)f2fs_acl; fail: - kfree(f2fs_acl); + kvfree(f2fs_acl); return ERR_PTR(-EINVAL); } @@ -190,7 +190,7 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type, acl = NULL; else acl = ERR_PTR(retval); - kfree(value); + kvfree(value); return acl; } @@ -240,7 +240,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0); - kfree(value); + kvfree(value); if (!error) set_cached_acl(inode, type, acl); @@ -250,6 +250,9 @@ static int __f2fs_set_acl(struct inode *inode, int type, int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + return __f2fs_set_acl(inode, type, acl, NULL); } @@ -267,7 +270,7 @@ static struct posix_acl *f2fs_acl_clone(const struct posix_acl *acl, sizeof(struct posix_acl_entry); clone = kmemdup(acl, size, flags); if (clone) - atomic_set(&clone->a_refcount, 1); + refcount_set(&clone->a_refcount, 1); } return clone; } @@ -393,12 +396,16 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl, ipage); posix_acl_release(default_acl); + } else { + inode->i_default_acl = NULL; } if (acl) { if (!error) error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, ipage); posix_acl_release(acl); + } else { + inode->i_acl = NULL; } return error;
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index 2c68518..b96823c 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h
@@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/acl.h * @@ -7,10 +8,6 @@ * Portions of this code from linux/fs/ext2/acl.h * * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef __F2FS_ACL_H__ #define __F2FS_ACL_H__
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 624817ee..d27d3c4 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c
@@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/checkpoint.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/bio.h> @@ -24,12 +21,12 @@ #include <trace/events/f2fs.h> static struct kmem_cache *ino_entry_slab; -struct kmem_cache *inode_entry_slab; +struct kmem_cache *f2fs_inode_entry_slab; void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) { + f2fs_build_fault_attr(sbi, 0, 0); set_ckpt_flags(sbi, CP_ERROR_FLAG); - sbi->sb->s_flags |= MS_RDONLY; if (!end_io) f2fs_flush_merged_writes(sbi); } @@ -37,7 +34,7 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) /* * We guarantee no failure on the returned page. */ -struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) +struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) { struct address_space *mapping = META_MAPPING(sbi); struct page *page = NULL; @@ -47,7 +44,7 @@ struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) cond_resched(); goto repeat; } - f2fs_wait_on_page_writeback(page, META, true); + f2fs_wait_on_page_writeback(page, META, true, true); if (!PageUptodate(page)) SetPageUptodate(page); return page; @@ -71,6 +68,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, .encrypted_page = NULL, .is_meta = is_meta, }; + int err; if (unlikely(!is_meta)) fio.op_flags &= ~REQ_META; @@ -85,11 +83,10 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, fio.page = page; - if (f2fs_submit_page_bio(&fio)) { - memset(page_address(page), 0, PAGE_SIZE); - f2fs_stop_checkpoint(sbi, false); - f2fs_bug_on(sbi, 1); - return page; + err = f2fs_submit_page_bio(&fio); + if (err) { + f2fs_put_page(page, 1); + return ERR_PTR(err); } lock_page(page); @@ -98,24 +95,37 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, goto repeat; } - /* - * if there is any IO error when accessing device, make our filesystem - * readonly and make sure do not write checkpoint with non-uptodate - * meta page. - */ - if (unlikely(!PageUptodate(page))) - f2fs_stop_checkpoint(sbi, false); + if (unlikely(!PageUptodate(page))) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } out: return page; } -struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) +struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) { return __get_meta_page(sbi, index, true); } +struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index) +{ + struct page *page; + int count = 0; + +retry: + page = __get_meta_page(sbi, index, true); + if (IS_ERR(page)) { + if (PTR_ERR(page) == -EIO && + ++count <= DEFAULT_RETRY_IO_COUNT) + goto retry; + f2fs_stop_checkpoint(sbi, false); + } + return page; +} + /* for POR only */ -struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index) +struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index) { return __get_meta_page(sbi, index, false); } @@ -167,7 +177,7 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, /* * Readahead CP/NAT/SIT/SSA pages */ -int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, +int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync) { struct page *page; @@ -233,7 +243,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, return blkno - start; } -void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) +void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) { struct page *page; bool readahead = false; @@ -244,7 +254,7 @@ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) f2fs_put_page(page, 0); if (readahead) - ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true); + f2fs_ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true); } static int __f2fs_write_meta_page(struct page *page, @@ -255,19 +265,18 @@ static int __f2fs_write_meta_page(struct page *page, trace_f2fs_writepage(page, META); + if (unlikely(f2fs_cp_error(sbi))) + goto redirty_out; if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) goto redirty_out; - if (unlikely(f2fs_cp_error(sbi))) - goto redirty_out; - write_meta_page(sbi, page, io_type); + f2fs_do_write_meta_page(sbi, page, io_type); dec_page_count(sbi, F2FS_DIRTY_META); if (wbc->for_reclaim) - f2fs_submit_merged_write_cond(sbi, page->mapping->host, - 0, page->index, META); + f2fs_submit_merged_write_cond(sbi, NULL, page, 0, META); unlock_page(page); @@ -307,7 +316,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping, trace_f2fs_writepages(mapping->host, wbc, META); diff = nr_pages_to_write(sbi, META, wbc); - written = sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO); + written = f2fs_sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO); mutex_unlock(&sbi->cp_mutex); wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); return 0; @@ -318,13 +327,14 @@ static int f2fs_write_meta_pages(struct address_space *mapping, return 0; } -long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, +long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, long nr_to_write, enum iostat_type io_type) { struct address_space *mapping = META_MAPPING(sbi); - pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX; + pgoff_t index = 0, prev = ULONG_MAX; struct pagevec pvec; long nwritten = 0; + int nr_pages; struct writeback_control wbc = { .for_reclaim = 0, }; @@ -334,13 +344,9 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, blk_start_plug(&plug); - while (index <= end) { - int i, nr_pages; - nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); - if (unlikely(nr_pages == 0)) - break; + while ((nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY))) { + int i; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; @@ -364,9 +370,8 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, goto continue_unlock; } - f2fs_wait_on_page_writeback(page, META, true); + f2fs_wait_on_page_writeback(page, META, true, true); - BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) goto continue_unlock; @@ -398,7 +403,7 @@ static int f2fs_set_meta_page_dirty(struct page *page) if (!PageUptodate(page)) SetPageUptodate(page); if (!PageDirty(page)) { - f2fs_set_page_dirty_nobuffers(page); + __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); SetPagePrivate(page); f2fs_trace_pid(page); @@ -418,24 +423,23 @@ const struct address_space_operations f2fs_meta_aops = { #endif }; -static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) +static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, + unsigned int devidx, int type) { struct inode_management *im = &sbi->im[type]; struct ino_entry *e, *tmp; tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS); -retry: + radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); spin_lock(&im->ino_lock); e = radix_tree_lookup(&im->ino_root, ino); if (!e) { e = tmp; - if (radix_tree_insert(&im->ino_root, ino, e)) { - spin_unlock(&im->ino_lock); - radix_tree_preload_end(); - goto retry; - } + if (unlikely(radix_tree_insert(&im->ino_root, ino, e))) + f2fs_bug_on(sbi, 1); + memset(e, 0, sizeof(struct ino_entry)); e->ino = ino; @@ -443,6 +447,10 @@ static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) if (type != ORPHAN_INO) im->ino_num++; } + + if (type == FLUSH_INO) + f2fs_set_bit(devidx, (char *)&e->dirty_device); + spin_unlock(&im->ino_lock); radix_tree_preload_end(); @@ -468,20 +476,20 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) spin_unlock(&im->ino_lock); } -void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) +void f2fs_add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { /* add new dirty ino entry into list */ - __add_ino_entry(sbi, ino, type); + __add_ino_entry(sbi, ino, 0, type); } -void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) +void f2fs_remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { /* remove dirty ino entry from list */ __remove_ino_entry(sbi, ino, type); } /* mode should be APPEND_INO or UPDATE_INO */ -bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) +bool f2fs_exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) { struct inode_management *im = &sbi->im[mode]; struct ino_entry *e; @@ -492,12 +500,12 @@ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) return e ? true : false; } -void release_ino_entry(struct f2fs_sb_info *sbi, bool all) +void f2fs_release_ino_entry(struct f2fs_sb_info *sbi, bool all) { struct ino_entry *e, *tmp; int i; - for (i = all ? ORPHAN_INO: APPEND_INO; i <= UPDATE_INO; i++) { + for (i = all ? ORPHAN_INO : APPEND_INO; i < MAX_INO_ENTRY; i++) { struct inode_management *im = &sbi->im[i]; spin_lock(&im->ino_lock); @@ -511,20 +519,40 @@ void release_ino_entry(struct f2fs_sb_info *sbi, bool all) } } -int acquire_orphan_inode(struct f2fs_sb_info *sbi) +void f2fs_set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, + unsigned int devidx, int type) +{ + __add_ino_entry(sbi, ino, devidx, type); +} + +bool f2fs_is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, + unsigned int devidx, int type) +{ + struct inode_management *im = &sbi->im[type]; + struct ino_entry *e; + bool is_dirty = false; + + spin_lock(&im->ino_lock); + e = radix_tree_lookup(&im->ino_root, ino); + if (e && f2fs_test_bit(devidx, (char *)&e->dirty_device)) + is_dirty = true; + spin_unlock(&im->ino_lock); + return is_dirty; +} + +int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi) { struct inode_management *im = &sbi->im[ORPHAN_INO]; int err = 0; spin_lock(&im->ino_lock); -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_ORPHAN)) { spin_unlock(&im->ino_lock); f2fs_show_injection_info(FAULT_ORPHAN); return -ENOSPC; } -#endif + if (unlikely(im->ino_num >= sbi->max_orphans)) err = -ENOSPC; else @@ -534,7 +562,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi) return err; } -void release_orphan_inode(struct f2fs_sb_info *sbi) +void f2fs_release_orphan_inode(struct f2fs_sb_info *sbi) { struct inode_management *im = &sbi->im[ORPHAN_INO]; @@ -544,14 +572,14 @@ void release_orphan_inode(struct f2fs_sb_info *sbi) spin_unlock(&im->ino_lock); } -void add_orphan_inode(struct inode *inode) +void f2fs_add_orphan_inode(struct inode *inode) { /* add new orphan ino entry into list */ - __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, ORPHAN_INO); - update_inode_page(inode); + __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, 0, ORPHAN_INO); + f2fs_update_inode_page(inode); } -void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) +void f2fs_remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) { /* remove orphan entry from orphan list */ __remove_ino_entry(sbi, ino, ORPHAN_INO); @@ -561,17 +589,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) { struct inode *inode; struct node_info ni; - int err = acquire_orphan_inode(sbi); - - if (err) { - set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: orphan failed (ino=%x), run fsck to fix.", - __func__, ino); - return err; - } - - __add_ino_entry(sbi, ino, ORPHAN_INO); + int err; inode = f2fs_iget_retry(sbi->sb, ino); if (IS_ERR(inode)) { @@ -583,30 +601,44 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) return PTR_ERR(inode); } + err = dquot_initialize(inode); + if (err) { + iput(inode); + goto err_out; + } + clear_nlink(inode); /* truncate all the data during iput */ iput(inode); - get_node_info(sbi, ino, &ni); + err = f2fs_get_node_info(sbi, ino, &ni); + if (err) + goto err_out; /* ENOMEM was fully retried in f2fs_evict_inode. */ if (ni.blk_addr != NULL_ADDR) { - set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: orphan failed (ino=%x) by kernel, retry mount.", - __func__, ino); - return -EIO; + err = -EIO; + goto err_out; } - __remove_ino_entry(sbi, ino, ORPHAN_INO); return 0; + +err_out: + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: orphan failed (ino=%x), run fsck to fix.", + __func__, ino); + return err; } -int recover_orphan_inodes(struct f2fs_sb_info *sbi) +int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi) { block_t start_blk, orphan_blocks, i, j; unsigned int s_flags = sbi->sb->s_flags; int err = 0; +#ifdef CONFIG_QUOTA + int quota_enabled; +#endif if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG)) return 0; @@ -619,19 +651,29 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) #ifdef CONFIG_QUOTA /* Needed for iput() to work correctly and not trash data */ sbi->sb->s_flags |= MS_ACTIVE; - /* Turn on quotas so that they are updated correctly */ - f2fs_enable_quota_files(sbi); + + /* + * Turn on quotas which were not enabled for read-only mounts if + * filesystem has quota feature, so that they are updated correctly. + */ + quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY); #endif start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); - ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true); + f2fs_ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true); for (i = 0; i < orphan_blocks; i++) { - struct page *page = get_meta_page(sbi, start_blk + i); + struct page *page; struct f2fs_orphan_block *orphan_blk; + page = f2fs_get_meta_page(sbi, start_blk + i); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto out; + } + orphan_blk = (struct f2fs_orphan_block *)page_address(page); for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { nid_t ino = le32_to_cpu(orphan_blk->ino[j]); @@ -646,9 +688,12 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) /* clear Orphan Flag */ clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG); out: + set_sbi_flag(sbi, SBI_IS_RECOVERED); + #ifdef CONFIG_QUOTA /* Turn quotas off */ - f2fs_quota_off_umount(sbi->sb); + if (quota_enabled) + f2fs_quota_off_umount(sbi->sb); #endif sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ @@ -678,7 +723,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) /* loop for each orphan inode entry and write them in Jornal block */ list_for_each_entry(orphan, head, list) { if (!page) { - page = grab_meta_page(sbi, start_blk++); + page = f2fs_grab_meta_page(sbi, start_blk++); orphan_blk = (struct f2fs_orphan_block *)page_address(page); memset(orphan_blk, 0, sizeof(*orphan_blk)); @@ -720,7 +765,10 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, size_t crc_offset = 0; __u32 crc = 0; - *cp_page = get_meta_page(sbi, cp_addr); + *cp_page = f2fs_get_meta_page(sbi, cp_addr); + if (IS_ERR(*cp_page)) + return PTR_ERR(*cp_page); + *cp_block = (struct f2fs_checkpoint *)page_address(*cp_page); crc_offset = le32_to_cpu((*cp_block)->checksum_offset); @@ -782,7 +830,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, return NULL; } -int get_valid_checkpoint(struct f2fs_sb_info *sbi) +int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) { struct f2fs_checkpoint *cp_block; struct f2fs_super_block *fsb = sbi->raw_super; @@ -794,7 +842,8 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) block_t cp_blk_no; int i; - sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL); + sbi->ckpt = f2fs_kzalloc(sbi, array_size(blk_size, cp_blks), + GFP_KERNEL); if (!sbi->ckpt) return -ENOMEM; /* @@ -831,7 +880,7 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) sbi->cur_cp_pack = 2; /* Sanity checking of checkpoint */ - if (sanity_check_ckpt(sbi)) + if (f2fs_sanity_check_ckpt(sbi)) goto free_fail_no_cp; if (cp_blks <= 1) @@ -845,7 +894,9 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) void *sit_bitmap_ptr; unsigned char *ckpt = (unsigned char *)sbi->ckpt; - cur_page = get_meta_page(sbi, cp_blk_no + i); + cur_page = f2fs_get_meta_page(sbi, cp_blk_no + i); + if (IS_ERR(cur_page)) + goto free_fail_no_cp; sit_bitmap_ptr = page_address(cur_page); memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size); f2fs_put_page(cur_page, 1); @@ -859,7 +910,7 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) f2fs_put_page(cp1, 1); f2fs_put_page(cp2, 1); fail_no_cp: - kfree(sbi->ckpt); + kvfree(sbi->ckpt); return -EINVAL; } @@ -890,7 +941,7 @@ static void __remove_dirty_inode(struct inode *inode, enum inode_type type) stat_dec_dirty_inode(F2FS_I_SB(inode), type); } -void update_dirty_page(struct inode *inode, struct page *page) +void f2fs_update_dirty_page(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE; @@ -909,7 +960,7 @@ void update_dirty_page(struct inode *inode, struct page *page) f2fs_trace_pid(page); } -void remove_dirty_inode(struct inode *inode) +void f2fs_remove_dirty_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE; @@ -926,7 +977,7 @@ void remove_dirty_inode(struct inode *inode) spin_unlock(&sbi->inode_lock[type]); } -int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) +int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) { struct list_head *head; struct inode *inode; @@ -967,12 +1018,10 @@ int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) iput(inode); /* We need to give cpu to another writers. */ - if (ino == cur_ino) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + if (ino == cur_ino) cond_resched(); - } else { + else ino = cur_ino; - } } else { /* * We should submit bio, since it exists several @@ -1009,10 +1058,10 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi) /* it's on eviction */ if (is_inode_flag_set(inode, FI_DIRTY_INODE)) - update_inode_page(inode); + f2fs_update_inode_page(inode); iput(inode); } - }; + } return 0; } @@ -1029,6 +1078,21 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi) ckpt->next_free_nid = cpu_to_le32(last_nid); } +static bool __need_flush_quota(struct f2fs_sb_info *sbi) +{ + if (!is_journalled_quota(sbi)) + return false; + if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) + return false; + if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) + return false; + if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH)) + return true; + if (get_pages(sbi, F2FS_DIRTY_QDATA)) + return true; + return false; +} + /* * Freeze all the FS-operations for checkpoint. */ @@ -1040,20 +1104,44 @@ static int block_operations(struct f2fs_sb_info *sbi) .for_reclaim = 0, }; struct blk_plug plug; - int err = 0; + int err = 0, cnt = 0; blk_start_plug(&plug); -retry_flush_dents: +retry_flush_quotas: + if (__need_flush_quota(sbi)) { + int locked; + + if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) { + set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH); + f2fs_lock_all(sbi); + goto retry_flush_dents; + } + clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + + /* only failed during mount/umount/freeze/quotactl */ + locked = down_read_trylock(&sbi->sb->s_umount); + f2fs_quota_sync(sbi->sb, -1); + if (locked) + up_read(&sbi->sb->s_umount); + } + f2fs_lock_all(sbi); + if (__need_flush_quota(sbi)) { + f2fs_unlock_all(sbi); + cond_resched(); + goto retry_flush_quotas; + } + +retry_flush_dents: /* write all the dirty dentry pages */ if (get_pages(sbi, F2FS_DIRTY_DENTS)) { f2fs_unlock_all(sbi); - err = sync_dirty_inodes(sbi, DIR_INODE); + err = f2fs_sync_dirty_inodes(sbi, DIR_INODE); if (err) goto out; cond_resched(); - goto retry_flush_dents; + goto retry_flush_quotas; } /* @@ -1062,6 +1150,12 @@ static int block_operations(struct f2fs_sb_info *sbi) */ down_write(&sbi->node_change); + if (__need_flush_quota(sbi)) { + up_write(&sbi->node_change); + f2fs_unlock_all(sbi); + goto retry_flush_quotas; + } + if (get_pages(sbi, F2FS_DIRTY_IMETA)) { up_write(&sbi->node_change); f2fs_unlock_all(sbi); @@ -1069,7 +1163,7 @@ static int block_operations(struct f2fs_sb_info *sbi) if (err) goto out; cond_resched(); - goto retry_flush_dents; + goto retry_flush_quotas; } retry_flush_nodes: @@ -1077,7 +1171,9 @@ static int block_operations(struct f2fs_sb_info *sbi) if (get_pages(sbi, F2FS_DIRTY_NODES)) { up_write(&sbi->node_write); - err = sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO); + atomic_inc(&sbi->wb_sync_req[NODE]); + err = f2fs_sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO); + atomic_dec(&sbi->wb_sync_req[NODE]); if (err) { up_write(&sbi->node_change); f2fs_unlock_all(sbi); @@ -1104,7 +1200,7 @@ static void unblock_operations(struct f2fs_sb_info *sbi) f2fs_unlock_all(sbi); } -static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) +void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) { DEFINE_WAIT(wait); @@ -1114,6 +1210,9 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) if (!get_pages(sbi, F2FS_WB_CP_DATA)) break; + if (unlikely(f2fs_cp_error(sbi))) + break; + io_schedule_timeout(5*HZ); } finish_wait(&sbi->cp_wait, &wait); @@ -1155,12 +1254,63 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) __set_ckpt_flags(ckpt, CP_FSCK_FLAG); + if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) + __set_ckpt_flags(ckpt, CP_DISABLED_FLAG); + else + __clear_ckpt_flags(ckpt, CP_DISABLED_FLAG); + + if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) + __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); + else + __clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); + + if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) + __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); + /* set this flag to activate crc|cp_ver for recovery */ __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG); + __clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG); spin_unlock_irqrestore(&sbi->cp_lock, flags); } +static void commit_checkpoint(struct f2fs_sb_info *sbi, + void *src, block_t blk_addr) +{ + struct writeback_control wbc = { + .for_reclaim = 0, + }; + + /* + * pagevec_lookup_tag and lock_page again will take + * some extra time. Therefore, f2fs_update_meta_pages and + * f2fs_sync_meta_pages are combined in this function. + */ + struct page *page = f2fs_grab_meta_page(sbi, blk_addr); + int err; + + f2fs_wait_on_page_writeback(page, META, true, true); + + memcpy(page_address(page), src, PAGE_SIZE); + + set_page_dirty(page); + if (unlikely(!clear_page_dirty_for_io(page))) + f2fs_bug_on(sbi, 1); + + /* writeout cp pack 2 page */ + err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO); + if (unlikely(err && f2fs_cp_error(sbi))) { + f2fs_put_page(page, 1); + return; + } + + f2fs_bug_on(sbi, err); + f2fs_put_page(page, 0); + + /* submit checkpoint (with barrier if NOBARRIER is not set) */ + f2fs_submit_merged_write(sbi, META_FLUSH); +} + static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); @@ -1174,19 +1324,18 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct super_block *sb = sbi->sb; struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); u64 kbytes_written; + int err; /* Flush all the NAT/SIT pages */ - while (get_pages(sbi, F2FS_DIRTY_META)) { - sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); - if (unlikely(f2fs_cp_error(sbi))) - return -EIO; - } + f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); + f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && + !f2fs_cp_error(sbi)); /* * modify checkpoint * version number is already updated */ - ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); + ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi, true)); ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { ckpt->cur_node_segno[i] = @@ -1206,7 +1355,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) } /* 2 cp + n data seg summary + orphan inode blocks */ - data_sum_blocks = npages_for_summary_flush(sbi, false); + data_sum_blocks = f2fs_npages_for_summary_flush(sbi, false); spin_lock_irqsave(&sbi->cp_lock, flags); if (data_sum_blocks < NR_CURSEG_DATA_TYPE) __set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); @@ -1251,27 +1400,15 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks; for (i = 0; i < nm_i->nat_bits_blocks; i++) - update_meta_page(sbi, nm_i->nat_bits + + f2fs_update_meta_page(sbi, nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS), blk + i); - - /* Flush all the NAT BITS pages */ - while (get_pages(sbi, F2FS_DIRTY_META)) { - sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); - if (unlikely(f2fs_cp_error(sbi))) - return -EIO; - } } - /* need to wait for end_io results */ - wait_on_all_pages_writeback(sbi); - if (unlikely(f2fs_cp_error(sbi))) - return -EIO; - /* write out checkpoint buffer at block 0 */ - update_meta_page(sbi, ckpt, start_blk++); + f2fs_update_meta_page(sbi, ckpt, start_blk++); for (i = 1; i < 1 + cp_payload_blks; i++) - update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE, + f2fs_update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE, start_blk++); if (orphan_num) { @@ -1279,7 +1416,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) start_blk += orphan_blocks; } - write_data_summaries(sbi, start_blk); + f2fs_write_data_summaries(sbi, start_blk); start_blk += data_sum_blocks; /* Record write statistics in the hot node summary */ @@ -1290,39 +1427,47 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written); if (__remain_node_summaries(cpc->reason)) { - write_node_summaries(sbi, start_blk); + f2fs_write_node_summaries(sbi, start_blk); start_blk += NR_CURSEG_NODE_TYPE; } - /* writeout checkpoint block */ - update_meta_page(sbi, ckpt, start_blk); - - /* wait for previous submitted node/meta pages writeback */ - wait_on_all_pages_writeback(sbi); - - if (unlikely(f2fs_cp_error(sbi))) - return -EIO; - - filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LLONG_MAX); - filemap_fdatawait_range(META_MAPPING(sbi), 0, LLONG_MAX); - /* update user_block_counts */ sbi->last_valid_block_count = sbi->total_valid_block_count; percpu_counter_set(&sbi->alloc_valid_block_count, 0); - /* Here, we only have one bio having CP pack */ - sync_meta_pages(sbi, META_FLUSH, LONG_MAX, FS_CP_META_IO); + /* Here, we have one bio having CP pack except cp pack 2 page */ + f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); + f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && + !f2fs_cp_error(sbi)); /* wait for previous submitted meta pages writeback */ - wait_on_all_pages_writeback(sbi); + f2fs_wait_on_all_pages_writeback(sbi); - release_ino_entry(sbi, false); + /* flush all device cache */ + err = f2fs_flush_device_cache(sbi); + if (err) + return err; - if (unlikely(f2fs_cp_error(sbi))) - return -EIO; + /* barrier and flush checkpoint cp pack 2 page if it can */ + commit_checkpoint(sbi, ckpt, start_blk); + f2fs_wait_on_all_pages_writeback(sbi); + + /* + * invalidate intermediate page cache borrowed from meta inode + * which are used for migration of encrypted inode's blocks. + */ + if (f2fs_sb_has_encrypt(sbi)) + invalidate_mapping_pages(META_MAPPING(sbi), + MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1); + + f2fs_release_ino_entry(sbi, false); + + f2fs_reset_fsync_node_info(sbi); clear_sbi_flag(sbi, SBI_IS_DIRTY); clear_sbi_flag(sbi, SBI_NEED_CP); + clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH); + sbi->unusable_block_count = 0; __set_cp_next_pack(sbi); /* @@ -1335,18 +1480,24 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS)); - return 0; + return unlikely(f2fs_cp_error(sbi)) ? -EIO : 0; } /* * We guarantee that this checkpoint procedure will not fail. */ -int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) +int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); unsigned long long ckpt_ver; int err = 0; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (cpc->reason != CP_PAUSE) + return 0; + f2fs_msg(sbi->sb, KERN_WARNING, + "Start checkpoint disabled!"); + } mutex_lock(&sbi->cp_mutex); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && @@ -1374,7 +1525,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* this is the case of multiple fstrims without any changes */ if (cpc->reason & CP_DISCARD) { - if (!exist_trim_candidates(sbi, cpc)) { + if (!f2fs_exist_trim_candidates(sbi, cpc)) { unblock_operations(sbi); goto out; } @@ -1382,8 +1533,8 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (NM_I(sbi)->dirty_nat_cnt == 0 && SIT_I(sbi)->dirty_sentries == 0 && prefree_segments(sbi) == 0) { - flush_sit_entries(sbi, cpc); - clear_prefree_segments(sbi, cpc); + f2fs_flush_sit_entries(sbi, cpc); + f2fs_clear_prefree_segments(sbi, cpc); unblock_operations(sbi); goto out; } @@ -1398,16 +1549,19 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); /* write cached NAT/SIT entries to NAT/SIT area */ - flush_nat_entries(sbi, cpc); - flush_sit_entries(sbi, cpc); + err = f2fs_flush_nat_entries(sbi, cpc); + if (err) + goto stop; + + f2fs_flush_sit_entries(sbi, cpc); /* unlock all the fs_lock[] in do_checkpoint() */ err = do_checkpoint(sbi, cpc); if (err) - release_discard_addrs(sbi); + f2fs_release_discard_addrs(sbi); else - clear_prefree_segments(sbi, cpc); - + f2fs_clear_prefree_segments(sbi, cpc); +stop: unblock_operations(sbi); stat_inc_cp_count(sbi->stat_info); @@ -1423,7 +1577,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) return err; } -void init_ino_entry_info(struct f2fs_sb_info *sbi) +void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi) { int i; @@ -1441,23 +1595,23 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) F2FS_ORPHANS_PER_BLOCK; } -int __init create_checkpoint_caches(void) +int __init f2fs_create_checkpoint_caches(void) { ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry", sizeof(struct ino_entry)); if (!ino_entry_slab) return -ENOMEM; - inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry", + f2fs_inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry", sizeof(struct inode_entry)); - if (!inode_entry_slab) { + if (!f2fs_inode_entry_slab) { kmem_cache_destroy(ino_entry_slab); return -ENOMEM; } return 0; } -void destroy_checkpoint_caches(void) +void f2fs_destroy_checkpoint_caches(void) { kmem_cache_destroy(ino_entry_slab); - kmem_cache_destroy(inode_entry_slab); + kmem_cache_destroy(f2fs_inode_entry_slab); }
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3d37124..5b22078 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c
@@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/data.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/f2fs_fs.h> @@ -19,8 +16,6 @@ #include <linux/bio.h> #include <linux/prefetch.h> #include <linux/uio.h> -#include <linux/mm.h> -#include <linux/memcontrol.h> #include <linux/cleancache.h> #include <linux/sched/signal.h> @@ -29,6 +24,12 @@ #include "segment.h" #include "trace.h" #include <trace/events/f2fs.h> +#include <trace/events/android_fs.h> + +#define NUM_PREALLOC_POST_READ_CTXS 128 + +static struct kmem_cache *bio_post_read_ctx_cache; +static mempool_t *bio_post_read_ctx_pool; static bool __is_cp_guaranteed(struct page *page) { @@ -45,53 +46,139 @@ static bool __is_cp_guaranteed(struct page *page) if (inode->i_ino == F2FS_META_INO(sbi) || inode->i_ino == F2FS_NODE_INO(sbi) || S_ISDIR(inode->i_mode) || + (S_ISREG(inode->i_mode) && + (f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) || is_cold_data(page)) return true; return false; } -static void f2fs_read_end_io(struct bio *bio) +static enum count_type __read_io_type(struct page *page) { - struct bio_vec *bvec; + struct address_space *mapping = page->mapping; + + if (mapping) { + struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (inode->i_ino == F2FS_META_INO(sbi)) + return F2FS_RD_META; + + if (inode->i_ino == F2FS_NODE_INO(sbi)) + return F2FS_RD_NODE; + } + return F2FS_RD_DATA; +} + +/* postprocessing steps for read bios */ +enum bio_post_read_step { + STEP_INITIAL = 0, + STEP_DECRYPT, +}; + +struct bio_post_read_ctx { + struct bio *bio; + struct work_struct work; + unsigned int cur_step; + unsigned int enabled_steps; +}; + +static void __read_end_io(struct bio *bio) +{ + struct page *page; + struct bio_vec *bv; int i; -#ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) { - f2fs_show_injection_info(FAULT_IO); - bio->bi_status = BLK_STS_IOERR; - } -#endif + bio_for_each_segment_all(bv, bio, i) { + page = bv->bv_page; - if (f2fs_bio_encrypted(bio)) { - if (bio->bi_status) { - fscrypt_release_ctx(bio->bi_private); - } else { - fscrypt_decrypt_bio_pages(bio->bi_private, bio); - return; - } - } - - bio_for_each_segment_all(bvec, bio, i) { - struct page *page = bvec->bv_page; - - if (!bio->bi_status) { - if (!PageUptodate(page)) - SetPageUptodate(page); - } else { + /* PG_error was set if any post_read step failed */ + if (bio->bi_status || PageError(page)) { ClearPageUptodate(page); - SetPageError(page); + /* will re-read again later */ + ClearPageError(page); + } else { + SetPageUptodate(page); } + dec_page_count(F2FS_P_SB(page), __read_io_type(page)); unlock_page(page); } + if (bio->bi_private) + mempool_free(bio->bi_private, bio_post_read_ctx_pool); bio_put(bio); } +static void bio_post_read_processing(struct bio_post_read_ctx *ctx); + +static void decrypt_work(struct work_struct *work) +{ + struct bio_post_read_ctx *ctx = + container_of(work, struct bio_post_read_ctx, work); + + fscrypt_decrypt_bio(ctx->bio); + + bio_post_read_processing(ctx); +} + +static void bio_post_read_processing(struct bio_post_read_ctx *ctx) +{ + switch (++ctx->cur_step) { + case STEP_DECRYPT: + if (ctx->enabled_steps & (1 << STEP_DECRYPT)) { + INIT_WORK(&ctx->work, decrypt_work); + fscrypt_enqueue_decrypt_work(&ctx->work); + return; + } + ctx->cur_step++; + /* fall-through */ + default: + __read_end_io(ctx->bio); + } +} + +static bool f2fs_bio_post_read_required(struct bio *bio) +{ + return bio->bi_private && !bio->bi_status; +} + +static void f2fs_read_end_io(struct bio *bio) +{ + struct page *first_page = bio->bi_io_vec[0].bv_page; + + if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_READ_IO)) { + f2fs_show_injection_info(FAULT_READ_IO); + bio->bi_status = BLK_STS_IOERR; + } + + if (f2fs_bio_post_read_required(bio)) { + struct bio_post_read_ctx *ctx = bio->bi_private; + + ctx->cur_step = STEP_INITIAL; + bio_post_read_processing(ctx); + return; + } + + if (first_page != NULL && + __read_io_type(first_page) == F2FS_RD_DATA) { + trace_android_fs_dataread_end(first_page->mapping->host, + page_offset(first_page), + bio->bi_iter.bi_size); + } + + __read_end_io(bio); +} + static void f2fs_write_end_io(struct bio *bio) { struct f2fs_sb_info *sbi = bio->bi_private; struct bio_vec *bvec; int i; + if (time_to_inject(sbi, FAULT_WRITE_IO)) { + f2fs_show_injection_info(FAULT_WRITE_IO); + bio->bi_status = BLK_STS_IOERR; + } + bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; enum count_type type = WB_DATA_TYPE(page); @@ -111,9 +198,16 @@ static void f2fs_write_end_io(struct bio *bio) if (unlikely(bio->bi_status)) { mapping_set_error(page->mapping, -EIO); - f2fs_stop_checkpoint(sbi, true); + if (type == F2FS_WB_CP_DATA) + f2fs_stop_checkpoint(sbi, true); } + + f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) && + page->index != nid_of_node(page)); + dec_page_count(sbi, type); + if (f2fs_in_warm_node_list(sbi, page)) + f2fs_del_fsync_node_entry(sbi, page); clear_cold_data(page); end_page_writeback(page); } @@ -169,15 +263,25 @@ static bool __same_bdev(struct f2fs_sb_info *sbi, * Low-level block read/write IO operations. */ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, - int npages, bool is_read) + struct writeback_control *wbc, + int npages, bool is_read, + enum page_type type, enum temp_type temp) { struct bio *bio; - bio = f2fs_bio_alloc(npages); + bio = f2fs_bio_alloc(sbi, npages, true); f2fs_target_device(sbi, blk_addr, bio); - bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; - bio->bi_private = is_read ? NULL : sbi; + if (is_read) { + bio->bi_end_io = f2fs_read_end_io; + bio->bi_private = NULL; + } else { + bio->bi_end_io = f2fs_write_end_io; + bio->bi_private = sbi; + bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, type, temp); + } + if (wbc) + wbc_init_bio(wbc, bio); return bio; } @@ -188,13 +292,12 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, if (!is_read_io(bio_op(bio))) { unsigned int start; - if (f2fs_sb_mounted_blkzoned(sbi->sb) && - current->plug && (type == DATA || type == NODE)) - blk_finish_plug(current->plug); - if (type != DATA && type != NODE) goto submit_io; + if (test_opt(sbi, LFS) && current->plug) + blk_finish_plug(current->plug); + start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS; start %= F2FS_IO_SIZE(sbi); @@ -229,6 +332,32 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, submit_bio(bio); } +static void __f2fs_submit_read_bio(struct f2fs_sb_info *sbi, + struct bio *bio, enum page_type type) +{ + if (trace_android_fs_dataread_start_enabled() && (type == DATA)) { + struct page *first_page = bio->bi_io_vec[0].bv_page; + + if (first_page != NULL && + __read_io_type(first_page) == F2FS_RD_DATA) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + first_page->mapping->host); + + trace_android_fs_dataread_start( + first_page->mapping->host, + page_offset(first_page), + bio->bi_iter.bi_size, + current->pid, + path, + current->comm); + } + } + __submit_bio(sbi, bio, type); +} + static void __submit_merged_bio(struct f2fs_bio_info *io) { struct f2fs_io_info *fio = &io->fio; @@ -247,8 +376,8 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) io->bio = NULL; } -static bool __has_merged_page(struct f2fs_bio_info *io, - struct inode *inode, nid_t ino, pgoff_t idx) +static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, + struct page *page, nid_t ino) { struct bio_vec *bvec; struct page *target; @@ -257,7 +386,7 @@ static bool __has_merged_page(struct f2fs_bio_info *io, if (!io->bio) return false; - if (!inode && !ino) + if (!inode && !page && !ino) return true; bio_for_each_segment_all(bvec, io->bio, i) { @@ -267,11 +396,10 @@ static bool __has_merged_page(struct f2fs_bio_info *io, else target = fscrypt_control_page(bvec->bv_page); - if (idx != target->index) - continue; - if (inode && inode == target->mapping->host) return true; + if (page && page == target) + return true; if (ino && ino == ino_of_node(target)) return true; } @@ -279,28 +407,6 @@ static bool __has_merged_page(struct f2fs_bio_info *io, return false; } -static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode, - nid_t ino, pgoff_t idx, enum page_type type) -{ - enum page_type btype = PAGE_TYPE_OF_BIO(type); - enum temp_type temp; - struct f2fs_bio_info *io; - bool ret = false; - - for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { - io = sbi->write_io[btype] + temp; - - down_read(&io->io_rwsem); - ret = __has_merged_page(io, inode, ino, idx); - up_read(&io->io_rwsem); - - /* TODO: use HOT temp only for meta pages now. */ - if (ret || btype == META) - break; - } - return ret; -} - static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type, enum temp_type temp) { @@ -322,17 +428,23 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, } static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type, bool force) + struct inode *inode, struct page *page, + nid_t ino, enum page_type type, bool force) { enum temp_type temp; - - if (!force && !has_merged_page(sbi, inode, ino, idx, type)) - return; + bool ret = true; for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { + if (!force) { + enum page_type btype = PAGE_TYPE_OF_BIO(type); + struct f2fs_bio_info *io = sbi->write_io[btype] + temp; - __f2fs_submit_merged_write(sbi, type, temp); + down_read(&io->io_rwsem); + ret = __has_merged_page(io, inode, page, ino); + up_read(&io->io_rwsem); + } + if (ret) + __f2fs_submit_merged_write(sbi, type, temp); /* TODO: use HOT temp only for meta pages now. */ if (type >= META) @@ -346,10 +458,10 @@ void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type) } void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type) + struct inode *inode, struct page *page, + nid_t ino, enum page_type type) { - __submit_merged_write_cond(sbi, inode, ino, idx, type, false); + __submit_merged_write_cond(sbi, inode, page, ino, type, false); } void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) @@ -377,28 +489,32 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) f2fs_trace_ios(fio, 0); /* Allocate a new bio */ - bio = __bio_alloc(fio->sbi, fio->new_blkaddr, 1, is_read_io(fio->op)); + bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc, + 1, is_read_io(fio->op), fio->type, fio->temp); if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); return -EFAULT; } + + if (fio->io_wbc && !is_read_io(fio->op)) + wbc_account_io(fio->io_wbc, page, PAGE_SIZE); + bio_set_op_attrs(bio, fio->op, fio->op_flags); - if (!is_read_io(fio->op)) - inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page)); + inc_page_count(fio->sbi, is_read_io(fio->op) ? + __read_io_type(page): WB_DATA_TYPE(fio->page)); - __submit_bio(fio->sbi, bio, fio->type); + __f2fs_submit_read_bio(fio->sbi, bio, fio->type); return 0; } -int f2fs_submit_page_write(struct f2fs_io_info *fio) +void f2fs_submit_page_write(struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp; struct page *bio_page; - int err = 0; f2fs_bug_on(sbi, is_read_io(fio->op)); @@ -408,7 +524,7 @@ int f2fs_submit_page_write(struct f2fs_io_info *fio) spin_lock(&io->io_lock); if (list_empty(&io->io_list)) { spin_unlock(&io->io_lock); - goto out_fail; + goto out; } fio = list_first_entry(&io->io_list, struct f2fs_io_info, list); @@ -422,8 +538,8 @@ int f2fs_submit_page_write(struct f2fs_io_info *fio) bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; - /* set submitted = 1 as a return value */ - fio->submitted = 1; + /* set submitted = true as a return value */ + fio->submitted = true; inc_page_count(sbi, WB_DATA_TYPE(bio_page)); @@ -435,12 +551,13 @@ int f2fs_submit_page_write(struct f2fs_io_info *fio) if (io->bio == NULL) { if ((fio->type == DATA || fio->type == NODE) && fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) { - err = -EAGAIN; dec_page_count(sbi, WB_DATA_TYPE(bio_page)); - goto out_fail; + fio->retry = true; + goto skip; } - io->bio = __bio_alloc(sbi, fio->new_blkaddr, - BIO_MAX_PAGES, false); + io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc, + BIO_MAX_PAGES, false, + fio->type, fio->temp); io->fio = *fio; } @@ -449,47 +566,53 @@ int f2fs_submit_page_write(struct f2fs_io_info *fio) goto alloc_new; } + if (fio->io_wbc) + wbc_account_io(fio->io_wbc, bio_page, PAGE_SIZE); + io->last_block_in_bio = fio->new_blkaddr; f2fs_trace_ios(fio, 0); trace_f2fs_submit_page_write(fio->page, fio); - +skip: if (fio->in_list) goto next; -out_fail: +out: + if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || + f2fs_is_checkpoint_ready(sbi)) + __submit_merged_bio(io); up_write(&io->io_rwsem); - return err; } static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, - unsigned nr_pages) + unsigned nr_pages, unsigned op_flag) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct fscrypt_ctx *ctx = NULL; struct bio *bio; + struct bio_post_read_ctx *ctx; + unsigned int post_read_steps = 0; if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) return ERR_PTR(-EFAULT); - if (f2fs_encrypted_file(inode)) { - ctx = fscrypt_get_ctx(inode, GFP_NOFS); - if (IS_ERR(ctx)) - return ERR_CAST(ctx); - - /* wait the page to be moved by cleaning */ - f2fs_wait_on_block_writeback(sbi, blkaddr); - } - - bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES)); - if (!bio) { - if (ctx) - fscrypt_release_ctx(ctx); + bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false); + if (!bio) return ERR_PTR(-ENOMEM); - } f2fs_target_device(sbi, blkaddr, bio); bio->bi_end_io = f2fs_read_end_io; - bio->bi_private = ctx; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio_set_op_attrs(bio, REQ_OP_READ, op_flag); + + if (f2fs_encrypted_file(inode)) + post_read_steps |= 1 << STEP_DECRYPT; + if (post_read_steps) { + ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS); + if (!ctx) { + bio_put(bio); + return ERR_PTR(-ENOMEM); + } + ctx->bio = bio; + ctx->enabled_steps = post_read_steps; + bio->bi_private = ctx; + } return bio; } @@ -498,16 +621,21 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, static int f2fs_submit_page_read(struct inode *inode, struct page *page, block_t blkaddr) { - struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1); + struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0); if (IS_ERR(bio)) return PTR_ERR(bio); + /* wait for GCed page writeback via META_MAPPING */ + f2fs_wait_on_block_writeback(inode, blkaddr); + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); return -EFAULT; } - __submit_bio(F2FS_I_SB(inode), bio, DATA); + ClearPageError(page); + inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); + __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); return 0; } @@ -531,9 +659,9 @@ static void __set_data_blkaddr(struct dnode_of_data *dn) * ->node_page * update block addresses in the node page */ -void set_data_blkaddr(struct dnode_of_data *dn) +void f2fs_set_data_blkaddr(struct dnode_of_data *dn) { - f2fs_wait_on_page_writeback(dn->node_page, NODE, true); + f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); __set_data_blkaddr(dn); if (set_page_dirty(dn->node_page)) dn->node_changed = true; @@ -542,12 +670,12 @@ void set_data_blkaddr(struct dnode_of_data *dn) void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) { dn->data_blkaddr = blkaddr; - set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn); f2fs_update_extent_cache(dn); } /* dn->ofs_in_node will be returned with up-to-date last block pointer */ -int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) +int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); int err; @@ -563,7 +691,7 @@ int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) trace_f2fs_reserve_new_blocks(dn->inode, dn->nid, dn->ofs_in_node, count); - f2fs_wait_on_page_writeback(dn->node_page, NODE, true); + f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); for (; count > 0; dn->ofs_in_node++) { block_t blkaddr = datablock_addr(dn->inode, @@ -581,12 +709,12 @@ int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) } /* Should keep dn->ofs_in_node unchanged */ -int reserve_new_block(struct dnode_of_data *dn) +int f2fs_reserve_new_block(struct dnode_of_data *dn) { unsigned int ofs_in_node = dn->ofs_in_node; int ret; - ret = reserve_new_blocks(dn, 1); + ret = f2fs_reserve_new_blocks(dn, 1); dn->ofs_in_node = ofs_in_node; return ret; } @@ -596,12 +724,12 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) bool need_put = dn->inode_page ? false : true; int err; - err = get_dnode_of_data(dn, index, ALLOC_NODE); + err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE); if (err) return err; if (dn->data_blkaddr == NULL_ADDR) - err = reserve_new_block(dn); + err = f2fs_reserve_new_block(dn); if (err || need_put) f2fs_put_dnode(dn); return err; @@ -620,7 +748,7 @@ int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) return f2fs_reserve_block(dn, index); } -struct page *get_read_data_page(struct inode *inode, pgoff_t index, +struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, int op_flags, bool for_write) { struct address_space *mapping = inode->i_mapping; @@ -639,7 +767,7 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index, } set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, index, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err) goto put_err; f2fs_put_dnode(&dn); @@ -658,7 +786,8 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index, * A new dentry page is allocated but not able to be written, since its * new inode page couldn't be allocated due to -ENOSPC. * In such the case, its blkaddr can be remained as NEW_ADDR. - * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata. + * see, f2fs_add_link -> f2fs_get_new_data_page -> + * f2fs_init_inode_metadata. */ if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_SIZE); @@ -678,7 +807,7 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index, return ERR_PTR(err); } -struct page *find_data_page(struct inode *inode, pgoff_t index) +struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index) { struct address_space *mapping = inode->i_mapping; struct page *page; @@ -688,7 +817,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index) return page; f2fs_put_page(page, 0); - page = get_read_data_page(inode, index, 0, false); + page = f2fs_get_read_data_page(inode, index, 0, false); if (IS_ERR(page)) return page; @@ -708,13 +837,13 @@ struct page *find_data_page(struct inode *inode, pgoff_t index) * Because, the callers, functions in dir.c and GC, should be able to know * whether this page exists or not. */ -struct page *get_lock_data_page(struct inode *inode, pgoff_t index, +struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, bool for_write) { struct address_space *mapping = inode->i_mapping; struct page *page; repeat: - page = get_read_data_page(inode, index, 0, for_write); + page = f2fs_get_read_data_page(inode, index, 0, for_write); if (IS_ERR(page)) return page; @@ -740,7 +869,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index, * Note that, ipage is set only by make_empty_dir, and if any error occur, * ipage should be released by this function. */ -struct page *get_new_data_page(struct inode *inode, +struct page *f2fs_get_new_data_page(struct inode *inode, struct page *ipage, pgoff_t index, bool new_i_size) { struct address_space *mapping = inode->i_mapping; @@ -779,7 +908,7 @@ struct page *get_new_data_page(struct inode *inode, /* if ipage exists, blkaddr should be NEW_ADDR */ f2fs_bug_on(F2FS_I_SB(inode), ipage); - page = get_lock_data_page(inode, index, true); + page = f2fs_get_lock_data_page(inode, index, true); if (IS_ERR(page)) return page; } @@ -790,55 +919,64 @@ struct page *get_new_data_page(struct inode *inode, return page; } -static int __allocate_data_block(struct dnode_of_data *dn) +static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_summary sum; struct node_info ni; - pgoff_t fofs; + block_t old_blkaddr; blkcnt_t count = 1; int err; if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; + err = f2fs_get_node_info(sbi, dn->nid, &ni); + if (err) + return err; + dn->data_blkaddr = datablock_addr(dn->inode, dn->node_page, dn->ofs_in_node); - if (dn->data_blkaddr == NEW_ADDR) + if (dn->data_blkaddr != NULL_ADDR) goto alloc; if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) return err; alloc: - get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); + old_blkaddr = dn->data_blkaddr; + f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr, + &sum, seg_type, NULL, false); + if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) + invalidate_mapping_pages(META_MAPPING(sbi), + old_blkaddr, old_blkaddr); + f2fs_set_data_blkaddr(dn); - allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr, - &sum, CURSEG_WARM_DATA, NULL, false); - set_data_blkaddr(dn); - - /* update i_size */ - fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + - dn->ofs_in_node; - if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT)) - f2fs_i_size_write(dn->inode, - ((loff_t)(fofs + 1) << PAGE_SHIFT)); + /* + * i_size will be updated by direct_IO. Otherwise, we'll get stale + * data from unwritten block via dio_read. + */ return 0; } -static inline bool __force_buffered_io(struct inode *inode, int rw) -{ - return (f2fs_encrypted_file(inode) || - (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) || - F2FS_I_SB(inode)->s_ndevs); -} - int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); struct f2fs_map_blocks map; + int flag; int err = 0; + bool direct_io = iocb->ki_flags & IOCB_DIRECT; + + /* convert inline data for Direct I/O*/ + if (direct_io) { + err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } + + if (direct_io && allow_outplace_dio(inode, iocb, from)) + return 0; if (is_inode_flag_set(inode, FI_NO_PREALLOC)) return 0; @@ -851,27 +989,38 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) map.m_len = 0; map.m_next_pgofs = NULL; + map.m_next_extent = NULL; + map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = true; - if (iocb->ki_flags & IOCB_DIRECT) { - err = f2fs_convert_inline_inode(inode); - if (err) - return err; - return f2fs_map_blocks(inode, &map, 1, - __force_buffered_io(inode, WRITE) ? - F2FS_GET_BLOCK_PRE_AIO : - F2FS_GET_BLOCK_PRE_DIO); + if (direct_io) { + map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint); + flag = f2fs_force_buffered_io(inode, iocb, from) ? + F2FS_GET_BLOCK_PRE_AIO : + F2FS_GET_BLOCK_PRE_DIO; + goto map_blocks; } if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) { err = f2fs_convert_inline_inode(inode); if (err) return err; } - if (!f2fs_has_inline_data(inode)) - return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); + if (f2fs_has_inline_data(inode)) + return err; + + flag = F2FS_GET_BLOCK_PRE_AIO; + +map_blocks: + err = f2fs_map_blocks(inode, &map, 1, flag); + if (map.m_len > 0 && err == -ENOSPC) { + if (!direct_io) + set_inode_flag(inode, FI_NO_PREALLOC); + err = 0; + } return err; } -static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) { if (flag == F2FS_GET_BLOCK_PRE_AIO) { if (lock) @@ -901,13 +1050,14 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, unsigned int maxblocks = map->m_len; struct dnode_of_data dn; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - int mode = create ? ALLOC_NODE : LOOKUP_NODE; + int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE; pgoff_t pgofs, end_offset, end; int err = 0, ofs = 1; unsigned int ofs_in_node, last_ofs_in_node; blkcnt_t prealloc; struct extent_info ei = {0,0,0}; block_t blkaddr; + unsigned int start_pgofs; if (!maxblocks) return 0; @@ -920,19 +1070,30 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, end = pgofs + maxblocks; if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) { + if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && + map->m_may_create) + goto next_dnode; + map->m_pblk = ei.blk + pgofs - ei.fofs; map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs); map->m_flags = F2FS_MAP_MAPPED; + if (map->m_next_extent) + *map->m_next_extent = pgofs + map->m_len; + + /* for hardware encryption, but to avoid potential issue in future */ + if (flag == F2FS_GET_BLOCK_DIO) + f2fs_wait_on_block_writeback_range(inode, + map->m_pblk, map->m_len); goto out; } next_dnode: - if (create) + if (map->m_may_create) __do_map_lock(sbi, flag, true); /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, pgofs, mode); + err = f2fs_get_dnode_of_data(&dn, pgofs, mode); if (err) { if (flag == F2FS_GET_BLOCK_BMAP) map->m_pblk = 0; @@ -940,11 +1101,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, err = 0; if (map->m_next_pgofs) *map->m_next_pgofs = - get_next_page_offset(&dn, pgofs); + f2fs_get_next_page_offset(&dn, pgofs); + if (map->m_next_extent) + *map->m_next_extent = + f2fs_get_next_page_offset(&dn, pgofs); } goto unlock_out; } + start_pgofs = pgofs; prealloc = 0; last_ofs_in_node = ofs_in_node = dn.ofs_in_node; end_offset = ADDRS_PER_PAGE(dn.node_page, inode); @@ -958,7 +1123,17 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, goto sync_out; } - if (!is_valid_data_blkaddr(sbi, blkaddr)) { + if (is_valid_data_blkaddr(sbi, blkaddr)) { + /* use out-place-update for driect IO under LFS mode */ + if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && + map->m_may_create) { + err = __allocate_data_block(&dn, map->m_seg_type); + if (!err) { + blkaddr = dn.data_blkaddr; + set_inode_flag(inode, FI_APPEND_WRITE); + } + } + } else { if (create) { if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; @@ -970,7 +1145,10 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, last_ofs_in_node = dn.ofs_in_node; } } else { - err = __allocate_data_block(&dn); + WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO && + flag != F2FS_GET_BLOCK_DIO); + err = __allocate_data_block(&dn, + map->m_seg_type); if (!err) set_inode_flag(inode, FI_APPEND_WRITE); } @@ -983,14 +1161,20 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, map->m_pblk = 0; goto sync_out; } + if (flag == F2FS_GET_BLOCK_PRECACHE) + goto sync_out; if (flag == F2FS_GET_BLOCK_FIEMAP && blkaddr == NULL_ADDR) { if (map->m_next_pgofs) *map->m_next_pgofs = pgofs + 1; - } - if (flag != F2FS_GET_BLOCK_FIEMAP || - blkaddr != NEW_ADDR) goto sync_out; + } + if (flag != F2FS_GET_BLOCK_FIEMAP) { + /* for defragment case */ + if (map->m_next_pgofs) + *map->m_next_pgofs = pgofs + 1; + goto sync_out; + } } } @@ -1024,7 +1208,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, (pgofs == end || dn.ofs_in_node == end_offset)) { dn.ofs_in_node = ofs_in_node; - err = reserve_new_blocks(&dn, prealloc); + err = f2fs_reserve_new_blocks(&dn, prealloc); if (err) goto sync_out; @@ -1041,18 +1225,45 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, else if (dn.ofs_in_node < end_offset) goto next_block; + if (flag == F2FS_GET_BLOCK_PRECACHE) { + if (map->m_flags & F2FS_MAP_MAPPED) { + unsigned int ofs = start_pgofs - map->m_lblk; + + f2fs_update_extent_cache_range(&dn, + start_pgofs, map->m_pblk + ofs, + map->m_len - ofs); + } + } + f2fs_put_dnode(&dn); - if (create) { + if (map->m_may_create) { __do_map_lock(sbi, flag, false); f2fs_balance_fs(sbi, dn.node_changed); } goto next_dnode; sync_out: + + /* for hardware encryption, but to avoid potential issue in future */ + if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) + f2fs_wait_on_block_writeback_range(inode, + map->m_pblk, map->m_len); + + if (flag == F2FS_GET_BLOCK_PRECACHE) { + if (map->m_flags & F2FS_MAP_MAPPED) { + unsigned int ofs = start_pgofs - map->m_lblk; + + f2fs_update_extent_cache_range(&dn, + start_pgofs, map->m_pblk + ofs, + map->m_len - ofs); + } + if (map->m_next_extent) + *map->m_next_extent = pgofs + 1; + } f2fs_put_dnode(&dn); unlock_out: - if (create) { + if (map->m_may_create) { __do_map_lock(sbi, flag, false); f2fs_balance_fs(sbi, dn.node_changed); } @@ -1061,9 +1272,35 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, return err; } +bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) +{ + struct f2fs_map_blocks map; + block_t last_lblk; + int err; + + if (pos + len > i_size_read(inode)) + return false; + + map.m_lblk = F2FS_BYTES_TO_BLK(pos); + map.m_next_pgofs = NULL; + map.m_next_extent = NULL; + map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = false; + last_lblk = F2FS_BLK_ALIGN(pos + len); + + while (map.m_lblk < last_lblk) { + map.m_len = last_lblk - map.m_lblk; + err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT); + if (err || map.m_len == 0) + return false; + map.m_lblk += map.m_len; + } + return true; +} + static int __get_data_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create, int flag, - pgoff_t *next_pgofs) + pgoff_t *next_pgofs, int seg_type, bool may_write) { struct f2fs_map_blocks map; int err; @@ -1071,6 +1308,9 @@ static int __get_data_block(struct inode *inode, sector_t iblock, map.m_lblk = iblock; map.m_len = bh->b_size >> inode->i_blkbits; map.m_next_pgofs = next_pgofs; + map.m_next_extent = NULL; + map.m_seg_type = seg_type; + map.m_may_create = may_write; err = f2fs_map_blocks(inode, &map, create, flag); if (!err) { @@ -1086,14 +1326,26 @@ static int get_data_block(struct inode *inode, sector_t iblock, pgoff_t *next_pgofs) { return __get_data_block(inode, iblock, bh_result, create, - flag, next_pgofs); + flag, next_pgofs, + NO_CHECK_TYPE, create); +} + +static int get_data_block_dio_write(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + return __get_data_block(inode, iblock, bh_result, create, + F2FS_GET_BLOCK_DIO, NULL, + f2fs_rw_hint_to_seg_type(inode->i_write_hint), + true); } static int get_data_block_dio(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { return __get_data_block(inode, iblock, bh_result, create, - F2FS_GET_BLOCK_DEFAULT, NULL); + F2FS_GET_BLOCK_DIO, NULL, + f2fs_rw_hint_to_seg_type(inode->i_write_hint), + false); } static int get_data_block_bmap(struct inode *inode, sector_t iblock, @@ -1104,7 +1356,8 @@ static int get_data_block_bmap(struct inode *inode, sector_t iblock, return -EFBIG; return __get_data_block(inode, iblock, bh_result, create, - F2FS_GET_BLOCK_BMAP, NULL); + F2FS_GET_BLOCK_BMAP, NULL, + NO_CHECK_TYPE, create); } static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) @@ -1117,6 +1370,76 @@ static inline loff_t blk_to_logical(struct inode *inode, sector_t blk) return (blk << inode->i_blkbits); } +static int f2fs_xattr_fiemap(struct inode *inode, + struct fiemap_extent_info *fieinfo) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct page *page; + struct node_info ni; + __u64 phys = 0, len; + __u32 flags; + nid_t xnid = F2FS_I(inode)->i_xattr_nid; + int err = 0; + + if (f2fs_has_inline_xattr(inode)) { + int offset; + + page = f2fs_grab_cache_page(NODE_MAPPING(sbi), + inode->i_ino, false); + if (!page) + return -ENOMEM; + + err = f2fs_get_node_info(sbi, inode->i_ino, &ni); + if (err) { + f2fs_put_page(page, 1); + return err; + } + + phys = (__u64)blk_to_logical(inode, ni.blk_addr); + offset = offsetof(struct f2fs_inode, i_addr) + + sizeof(__le32) * (DEF_ADDRS_PER_INODE - + get_inline_xattr_addrs(inode)); + + phys += offset; + len = inline_xattr_size(inode); + + f2fs_put_page(page, 1); + + flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED; + + if (!xnid) + flags |= FIEMAP_EXTENT_LAST; + + err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags); + if (err || err == 1) + return err; + } + + if (xnid) { + page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false); + if (!page) + return -ENOMEM; + + err = f2fs_get_node_info(sbi, xnid, &ni); + if (err) { + f2fs_put_page(page, 1); + return err; + } + + phys = (__u64)blk_to_logical(inode, ni.blk_addr); + len = inode->i_sb->s_blocksize; + + f2fs_put_page(page, 1); + + flags = FIEMAP_EXTENT_LAST; + } + + if (phys) + err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags); + + return (err < 0 ? err : 0); +} + int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { @@ -1127,18 +1450,29 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u32 flags = 0; int ret = 0; - ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); + if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) { + ret = f2fs_precache_extents(inode); + if (ret) + return ret; + } + + ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR); if (ret) return ret; + inode_lock(inode); + + if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { + ret = f2fs_xattr_fiemap(inode, fieinfo); + goto out; + } + if (f2fs_has_inline_data(inode)) { ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len); if (ret != -EAGAIN) - return ret; + goto out; } - inode_lock(inode); - if (logical_to_blk(inode, len) == 0) len = blk_to_logical(inode, 1); @@ -1202,13 +1536,17 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, /* * This function was originally taken from fs/mpage.c, and customized for f2fs. * Major change was from block_size == page_size in f2fs by default. + * + * Note that the aops->readpages() function is ONLY used for read-ahead. If + * this function ever deviates from doing just read-ahead, it should either + * use ->readpage() or do the necessary surgery to decouple ->readpages() + * from read-ahead. */ static int f2fs_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, - unsigned nr_pages) + unsigned nr_pages, bool is_readahead) { struct bio *bio = NULL; - unsigned page_idx; sector_t last_block_in_bio = 0; struct inode *inode = mapping->host; const unsigned blkbits = inode->i_blkbits; @@ -1224,9 +1562,11 @@ static int f2fs_mpage_readpages(struct address_space *mapping, map.m_len = 0; map.m_flags = 0; map.m_next_pgofs = NULL; + map.m_next_extent = NULL; + map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = false; - for (page_idx = 0; nr_pages; page_idx++, nr_pages--) { - + for (; nr_pages; nr_pages--) { if (pages) { page = list_last_entry(pages, struct page, lru); @@ -1295,20 +1635,29 @@ static int f2fs_mpage_readpages(struct address_space *mapping, if (bio && (last_block_in_bio != block_nr - 1 || !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) { submit_and_realloc: - __submit_bio(F2FS_I_SB(inode), bio, DATA); + __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); bio = NULL; } if (bio == NULL) { - bio = f2fs_grab_read_bio(inode, block_nr, nr_pages); + bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, + is_readahead ? REQ_RAHEAD : 0); if (IS_ERR(bio)) { bio = NULL; goto set_error_page; } } + /* + * If the page is under writeback, we need to wait for + * its completion to see the correct decrypted data. + */ + f2fs_wait_on_block_writeback(inode, block_nr); + if (bio_add_page(bio, page, blocksize, 0) < blocksize) goto submit_and_realloc; + inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); + ClearPageError(page); last_block_in_bio = block_nr; goto next_page; set_error_page: @@ -1318,7 +1667,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, goto next_page; confused: if (bio) { - __submit_bio(F2FS_I_SB(inode), bio, DATA); + __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); bio = NULL; } unlock_page(page); @@ -1328,7 +1677,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, } BUG_ON(pages && !list_empty(pages)); if (bio) - __submit_bio(F2FS_I_SB(inode), bio, DATA); + __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); return 0; } @@ -1343,7 +1692,7 @@ static int f2fs_read_data_page(struct file *file, struct page *page) if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); if (ret == -EAGAIN) - ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1); + ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1, false); return ret; } @@ -1351,7 +1700,7 @@ static int f2fs_read_data_pages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { - struct inode *inode = file->f_mapping->host; + struct inode *inode = mapping->host; struct page *page = list_last_entry(pages, struct page, lru); trace_f2fs_readpages(inode, page, nr_pages); @@ -1360,56 +1709,136 @@ static int f2fs_read_data_pages(struct file *file, if (f2fs_has_inline_data(inode)) return 0; - return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages); + return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true); } static int encrypt_one_page(struct f2fs_io_info *fio) { struct inode *inode = fio->page->mapping->host; + struct page *mpage; gfp_t gfp_flags = GFP_NOFS; if (!f2fs_encrypted_file(inode)) return 0; - /* wait for GCed encrypted page writeback */ - f2fs_wait_on_block_writeback(fio->sbi, fio->old_blkaddr); + /* wait for GCed page writeback via META_MAPPING */ + f2fs_wait_on_block_writeback(inode, fio->old_blkaddr); retry_encrypt: fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, PAGE_SIZE, 0, fio->page->index, gfp_flags); - if (!IS_ERR(fio->encrypted_page)) - return 0; - - /* flush pending IOs and wait for a while in the ENOMEM case */ - if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { - f2fs_flush_merged_writes(fio->sbi); - congestion_wait(BLK_RW_ASYNC, HZ/50); - gfp_flags |= __GFP_NOFAIL; - goto retry_encrypt; + if (IS_ERR(fio->encrypted_page)) { + /* flush pending IOs and wait for a while in the ENOMEM case */ + if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { + f2fs_flush_merged_writes(fio->sbi); + congestion_wait(BLK_RW_ASYNC, HZ/50); + gfp_flags |= __GFP_NOFAIL; + goto retry_encrypt; + } + return PTR_ERR(fio->encrypted_page); } - return PTR_ERR(fio->encrypted_page); + + mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr); + if (mpage) { + if (PageUptodate(mpage)) + memcpy(page_address(mpage), + page_address(fio->encrypted_page), PAGE_SIZE); + f2fs_put_page(mpage, 1); + } + return 0; +} + +static inline bool check_inplace_update_policy(struct inode *inode, + struct f2fs_io_info *fio) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + unsigned int policy = SM_I(sbi)->ipu_policy; + + if (policy & (0x1 << F2FS_IPU_FORCE)) + return true; + if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi)) + return true; + if (policy & (0x1 << F2FS_IPU_UTIL) && + utilization(sbi) > SM_I(sbi)->min_ipu_util) + return true; + if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && f2fs_need_SSR(sbi) && + utilization(sbi) > SM_I(sbi)->min_ipu_util) + return true; + + /* + * IPU for rewrite async pages + */ + if (policy & (0x1 << F2FS_IPU_ASYNC) && + fio && fio->op == REQ_OP_WRITE && + !(fio->op_flags & REQ_SYNC) && + !f2fs_encrypted_inode(inode)) + return true; + + /* this is only set during fdatasync */ + if (policy & (0x1 << F2FS_IPU_FSYNC) && + is_inode_flag_set(inode, FI_NEED_IPU)) + return true; + + if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) && + !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr))) + return true; + + return false; +} + +bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio) +{ + if (f2fs_is_pinned_file(inode)) + return true; + + /* if this is cold file, we should overwrite to avoid fragmentation */ + if (file_is_cold(inode)) + return true; + + return check_inplace_update_policy(inode, fio); +} + +bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (test_opt(sbi, LFS)) + return true; + if (S_ISDIR(inode->i_mode)) + return true; + if (IS_NOQUOTA(inode)) + return true; + if (f2fs_is_atomic_file(inode)) + return true; + if (fio) { + if (is_cold_data(fio->page)) + return true; + if (IS_ATOMIC_WRITTEN_PAGE(fio->page)) + return true; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) && + f2fs_is_checkpointed_data(sbi, fio->old_blkaddr))) + return true; + } + return false; } static inline bool need_inplace_update(struct f2fs_io_info *fio) { struct inode *inode = fio->page->mapping->host; - if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) - return false; - if (is_cold_data(fio->page)) - return false; - if (IS_ATOMIC_WRITTEN_PAGE(fio->page)) + if (f2fs_should_update_outplace(inode, fio)) return false; - return need_inplace_update_policy(inode, fio); + return f2fs_should_update_inplace(inode, fio); } -int do_write_data_page(struct f2fs_io_info *fio) +int f2fs_do_write_data_page(struct f2fs_io_info *fio) { struct page *page = fio->page; struct inode *inode = page->mapping->host; struct dnode_of_data dn; struct extent_info ei = {0,0,0}; + struct node_info ni; bool ipu_force = false; int err = 0; @@ -1431,7 +1860,7 @@ int do_write_data_page(struct f2fs_io_info *fio) if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi)) return -EAGAIN; - err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); if (err) goto out; @@ -1440,6 +1869,7 @@ int do_write_data_page(struct f2fs_io_info *fio) /* This page is already truncated */ if (fio->old_blkaddr == NULL_ADDR) { ClearPageUptodate(page); + clear_cold_data(page); goto out_writepage; } got_it: @@ -1460,10 +1890,13 @@ int do_write_data_page(struct f2fs_io_info *fio) goto out_writepage; set_page_writeback(page); + ClearPageError(page); f2fs_put_dnode(&dn); if (fio->need_lock == LOCK_REQ) f2fs_unlock_op(fio->sbi); - err = rewrite_data_page(fio); + err = f2fs_inplace_write_data(fio); + if (err && PageWriteback(page)) + end_page_writeback(page); trace_f2fs_do_write_data_page(fio->page, IPU); set_inode_flag(inode, FI_UPDATE_WRITE); return err; @@ -1477,14 +1910,21 @@ int do_write_data_page(struct f2fs_io_info *fio) fio->need_lock = LOCK_REQ; } + err = f2fs_get_node_info(fio->sbi, dn.nid, &ni); + if (err) + goto out_writepage; + + fio->version = ni.version; + err = encrypt_one_page(fio); if (err) goto out_writepage; set_page_writeback(page); + ClearPageError(page); /* LFS mode write path */ - write_data_page(&dn, fio); + f2fs_outplace_write_data(&dn, fio); trace_f2fs_do_write_data_page(page, OPU); set_inode_flag(inode, FI_APPEND_WRITE); if (page->index == 0) @@ -1512,6 +1952,7 @@ static int __write_data_page(struct page *page, bool *submitted, int err = 0; struct f2fs_io_info fio = { .sbi = sbi, + .ino = inode->i_ino, .type = DATA, .op = REQ_OP_WRITE, .op_flags = wbc_to_write_flags(wbc), @@ -1521,10 +1962,23 @@ static int __write_data_page(struct page *page, bool *submitted, .submitted = false, .need_lock = LOCK_RETRY, .io_type = io_type, + .io_wbc = wbc, }; trace_f2fs_writepage(page, DATA); + /* we should bypass data pages to proceed the kworkder jobs */ + if (unlikely(f2fs_cp_error(sbi))) { + mapping_set_error(page->mapping, -EIO); + /* + * don't drop any dirty dentry pages for keeping lastest + * directory structure. + */ + if (S_ISDIR(inode->i_mode)) + goto redirty_out; + goto out; + } + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; @@ -1546,19 +2000,13 @@ static int __write_data_page(struct page *page, bool *submitted, /* we should not write 0'th page having journal header */ if (f2fs_is_volatile_file(inode) && (!page->index || (!wbc->for_reclaim && - available_free_memory(sbi, BASE_CHECK)))) + f2fs_available_free_memory(sbi, BASE_CHECK)))) goto redirty_out; - /* we should bypass data pages to proceed the kworkder jobs */ - if (unlikely(f2fs_cp_error(sbi))) { - mapping_set_error(page->mapping, -EIO); - goto out; - } - /* Dentry blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode)) { fio.need_lock = LOCK_DONE; - err = do_write_data_page(&fio); + err = f2fs_do_write_data_page(&fio); goto done; } @@ -1577,14 +2025,21 @@ static int __write_data_page(struct page *page, bool *submitted, } if (err == -EAGAIN) { - err = do_write_data_page(&fio); + err = f2fs_do_write_data_page(&fio); if (err == -EAGAIN) { fio.need_lock = LOCK_REQ; - err = do_write_data_page(&fio); + err = f2fs_do_write_data_page(&fio); } } - if (F2FS_I(inode)->last_disk_size < psize) - F2FS_I(inode)->last_disk_size = psize; + + if (err) { + file_set_keep_isize(inode); + } else { + down_write(&F2FS_I(inode)->i_sem); + if (F2FS_I(inode)->last_disk_size < psize) + F2FS_I(inode)->last_disk_size = psize; + up_write(&F2FS_I(inode)->i_sem); + } done: if (err && err != -ENOENT) @@ -1592,18 +2047,20 @@ static int __write_data_page(struct page *page, bool *submitted, out: inode_dec_dirty_pages(inode); - if (err) + if (err) { ClearPageUptodate(page); + clear_cold_data(page); + } if (wbc->for_reclaim) { - f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA); + f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA); clear_inode_flag(inode, FI_HOT_DATA); - remove_dirty_inode(inode); + f2fs_remove_dirty_inode(inode); submitted = NULL; } unlock_page(page); - if (!S_ISDIR(inode->i_mode)) + if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode)) f2fs_balance_fs(sbi, need_balance_fs); if (unlikely(f2fs_cp_error(sbi))) { @@ -1648,15 +2105,16 @@ static int f2fs_write_cache_pages(struct address_space *mapping, int ret = 0; int done = 0; struct pagevec pvec; + struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); int nr_pages; pgoff_t uninitialized_var(writeback_index); pgoff_t index; pgoff_t end; /* Inclusive */ pgoff_t done_index; - pgoff_t last_idx = ULONG_MAX; int cycled; int range_whole = 0; int tag; + int nwritten = 0; pagevec_init(&pvec, 0); @@ -1692,8 +2150,8 @@ static int f2fs_write_cache_pages(struct address_space *mapping, while (!done && (index <= end)) { int i; - nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, - min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1); + nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, + tag); if (nr_pages == 0) break; @@ -1701,7 +2159,9 @@ static int f2fs_write_cache_pages(struct address_space *mapping, struct page *page = pvec.pages[i]; bool submitted = false; - if (page->index > end) { + /* give a priority to WB_SYNC threads */ + if (atomic_read(&sbi->wb_sync_req[DATA]) && + wbc->sync_mode == WB_SYNC_NONE) { done = 1; break; } @@ -1724,12 +2184,11 @@ static int f2fs_write_cache_pages(struct address_space *mapping, if (PageWriteback(page)) { if (wbc->sync_mode != WB_SYNC_NONE) f2fs_wait_on_page_writeback(page, - DATA, true); + DATA, true, true); else goto continue_unlock; } - BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) goto continue_unlock; @@ -1757,12 +2216,10 @@ static int f2fs_write_cache_pages(struct address_space *mapping, done = 1; break; } else if (submitted) { - last_idx = page->index; + nwritten++; } - /* give a priority to WB_SYNC threads */ - if ((atomic_read(&F2FS_M_SB(mapping)->wb_sync_req) || - --wbc->nr_to_write <= 0) && + if (--wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) { done = 1; break; @@ -1781,14 +2238,28 @@ static int f2fs_write_cache_pages(struct address_space *mapping, if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = done_index; - if (last_idx != ULONG_MAX) + if (nwritten) f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host, - 0, last_idx, DATA); + NULL, 0, DATA); return ret; } -int __f2fs_write_data_pages(struct address_space *mapping, +static inline bool __should_serialize_io(struct inode *inode, + struct writeback_control *wbc) +{ + if (!S_ISREG(inode->i_mode)) + return false; + if (IS_NOQUOTA(inode)) + return false; + if (wbc->sync_mode != WB_SYNC_ALL) + return true; + if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks) + return true; + return false; +} + +static int __f2fs_write_data_pages(struct address_space *mapping, struct writeback_control *wbc, enum iostat_type io_type) { @@ -1796,6 +2267,7 @@ int __f2fs_write_data_pages(struct address_space *mapping, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct blk_plug plug; int ret; + bool locked = false; /* deal with chardevs and other special file */ if (!mapping->a_ops->writepage) @@ -1809,9 +2281,10 @@ int __f2fs_write_data_pages(struct address_space *mapping, if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto skip_write; - if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && + if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) && + wbc->sync_mode == WB_SYNC_NONE && get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) && - available_free_memory(sbi, DIRTY_DENTS)) + f2fs_available_free_memory(sbi, DIRTY_DENTS)) goto skip_write; /* skip writing during file defragment */ @@ -1822,22 +2295,30 @@ int __f2fs_write_data_pages(struct address_space *mapping, /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */ if (wbc->sync_mode == WB_SYNC_ALL) - atomic_inc(&sbi->wb_sync_req); - else if (atomic_read(&sbi->wb_sync_req)) + atomic_inc(&sbi->wb_sync_req[DATA]); + else if (atomic_read(&sbi->wb_sync_req[DATA])) goto skip_write; + if (__should_serialize_io(inode, wbc)) { + mutex_lock(&sbi->writepages); + locked = true; + } + blk_start_plug(&plug); ret = f2fs_write_cache_pages(mapping, wbc, io_type); blk_finish_plug(&plug); + if (locked) + mutex_unlock(&sbi->writepages); + if (wbc->sync_mode == WB_SYNC_ALL) - atomic_dec(&sbi->wb_sync_req); + atomic_dec(&sbi->wb_sync_req[DATA]); /* * if some pages were truncated, we cannot guarantee its mapping->host * to detect pending bios. */ - remove_dirty_inode(inode); + f2fs_remove_dirty_inode(inode); return ret; skip_write: @@ -1862,10 +2343,14 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) loff_t i_size = i_size_read(inode); if (to > i_size) { + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); down_write(&F2FS_I(inode)->i_mmap_sem); + truncate_pagecache(inode, i_size); - truncate_blocks(inode, i_size, true); + f2fs_truncate_blocks(inode, i_size, true, true); + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); } } @@ -1903,7 +2388,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, } restart: /* check inline_data */ - ipage = get_node_page(sbi, inode->i_ino); + ipage = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) { err = PTR_ERR(ipage); goto unlock_out; @@ -1913,7 +2398,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, if (f2fs_has_inline_data(inode)) { if (pos + len <= MAX_INLINE_DATA(inode)) { - read_inline_data(page, ipage); + f2fs_do_read_inline_data(page, ipage); set_inode_flag(inode, FI_DATA_EXIST); if (inode->i_nlink) set_inline_node(ipage); @@ -1931,7 +2416,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, dn.data_blkaddr = ei.blk + index - ei.fofs; } else { /* hole case */ - err = get_dnode_of_data(&dn, index, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err || dn.data_blkaddr == NULL_ADDR) { f2fs_put_dnode(&dn); __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, @@ -1962,12 +2447,34 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *page = NULL; pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT; - bool need_balance = false; + bool need_balance = false, drop_atomic = false; block_t blkaddr = NULL_ADDR; int err = 0; + if (trace_android_fs_datawrite_start_enabled()) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + inode); + trace_android_fs_datawrite_start(inode, pos, len, + current->pid, path, + current->comm); + } trace_f2fs_write_begin(inode, pos, len, flags); + err = f2fs_is_checkpoint_ready(sbi); + if (err) + goto fail; + + if ((f2fs_is_atomic_file(inode) && + !f2fs_available_free_memory(sbi, INMEM_PAGES)) || + is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) { + err = -ENOMEM; + drop_atomic = true; + goto fail; + } + /* * We should check this at this moment to avoid deadlock on inode page * and #0 page. The locking rule for inline_data conversion should be: @@ -1983,7 +2490,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, * Do not use grab_cache_page_write_begin() to avoid deadlock due to * wait_for_stable_page. Will wait that below with our IO control. */ - page = pagecache_get_page(mapping, index, + page = f2fs_pagecache_get_page(mapping, index, FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS); if (!page) { err = -ENOMEM; @@ -1997,7 +2504,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, if (err) goto fail; - if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) { + if (need_balance && !IS_NOQUOTA(inode) && + has_not_enough_free_secs(sbi, 0, 0)) { unlock_page(page); f2fs_balance_fs(sbi, true); lock_page(page); @@ -2008,11 +2516,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, } } - f2fs_wait_on_page_writeback(page, DATA, false); - - /* wait for GCed encrypted page writeback */ - if (f2fs_encrypted_file(inode)) - f2fs_wait_on_block_writeback(sbi, blkaddr); + f2fs_wait_on_page_writeback(page, DATA, false, true); if (len == PAGE_SIZE || PageUptodate(page)) return 0; @@ -2045,6 +2549,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, fail: f2fs_put_page(page, 1); f2fs_write_failed(mapping, pos + len); + if (drop_atomic) + f2fs_drop_inmem_pages_all(sbi, false); return err; } @@ -2055,6 +2561,7 @@ static int f2fs_write_end(struct file *file, { struct inode *inode = page->mapping->host; + trace_android_fs_datawrite_end(inode, pos, len); trace_f2fs_write_end(inode, pos, len, copied); /* @@ -2084,48 +2591,167 @@ static int f2fs_write_end(struct file *file, static int check_direct_IO(struct inode *inode, struct iov_iter *iter, loff_t offset) { - unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; + unsigned i_blkbits = READ_ONCE(inode->i_blkbits); + unsigned blkbits = i_blkbits; + unsigned blocksize_mask = (1 << blkbits) - 1; + unsigned long align = offset | iov_iter_alignment(iter); + struct block_device *bdev = inode->i_sb->s_bdev; - if (offset & blocksize_mask) - return -EINVAL; - - if (iov_iter_alignment(iter) & blocksize_mask) - return -EINVAL; - + if (align & blocksize_mask) { + if (bdev) + blkbits = blksize_bits(bdev_logical_block_size(bdev)); + blocksize_mask = (1 << blkbits) - 1; + if (align & blocksize_mask) + return -EINVAL; + return 1; + } return 0; } +static void f2fs_dio_end_io(struct bio *bio) +{ + struct f2fs_private_dio *dio = bio->bi_private; + + dec_page_count(F2FS_I_SB(dio->inode), + dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ); + + bio->bi_private = dio->orig_private; + bio->bi_end_io = dio->orig_end_io; + + kvfree(dio); + + bio_endio(bio); +} + +static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode, + loff_t file_offset) +{ + struct f2fs_private_dio *dio; + bool write = (bio_op(bio) == REQ_OP_WRITE); + int err; + + dio = f2fs_kzalloc(F2FS_I_SB(inode), + sizeof(struct f2fs_private_dio), GFP_NOFS); + if (!dio) { + err = -ENOMEM; + goto out; + } + + dio->inode = inode; + dio->orig_end_io = bio->bi_end_io; + dio->orig_private = bio->bi_private; + dio->write = write; + + bio->bi_end_io = f2fs_dio_end_io; + bio->bi_private = dio; + + inc_page_count(F2FS_I_SB(inode), + write ? F2FS_DIO_WRITE : F2FS_DIO_READ); + + submit_bio(bio); + return; +out: + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); +} + static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); size_t count = iov_iter_count(iter); loff_t offset = iocb->ki_pos; int rw = iov_iter_rw(iter); int err; + enum rw_hint hint = iocb->ki_hint; + int whint_mode = F2FS_OPTION(sbi).whint_mode; + bool do_opu; err = check_direct_IO(inode, iter, offset); if (err) - return err; + return err < 0 ? err : 0; - if (__force_buffered_io(inode, rw)) + if (f2fs_force_buffered_io(inode, iocb, iter)) return 0; + do_opu = allow_outplace_dio(inode, iocb, iter); + trace_f2fs_direct_IO_enter(inode, offset, count, rw); - down_read(&F2FS_I(inode)->dio_rwsem[rw]); - err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); - up_read(&F2FS_I(inode)->dio_rwsem[rw]); + if (trace_android_fs_dataread_start_enabled() && + (rw == READ)) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + inode); + trace_android_fs_dataread_start(inode, offset, + count, current->pid, path, + current->comm); + } + if (trace_android_fs_datawrite_start_enabled() && + (rw == WRITE)) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + inode); + trace_android_fs_datawrite_start(inode, offset, count, + current->pid, path, + current->comm); + } + if (rw == WRITE && whint_mode == WHINT_MODE_OFF) + iocb->ki_hint = WRITE_LIFE_NOT_SET; + + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!down_read_trylock(&fi->i_gc_rwsem[rw])) { + iocb->ki_hint = hint; + err = -EAGAIN; + goto out; + } + if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) { + up_read(&fi->i_gc_rwsem[rw]); + iocb->ki_hint = hint; + err = -EAGAIN; + goto out; + } + } else { + down_read(&fi->i_gc_rwsem[rw]); + if (do_opu) + down_read(&fi->i_gc_rwsem[READ]); + } + + err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, + iter, rw == WRITE ? get_data_block_dio_write : + get_data_block_dio, NULL, f2fs_dio_submit_bio, + DIO_LOCKING | DIO_SKIP_HOLES); + + if (do_opu) + up_read(&fi->i_gc_rwsem[READ]); + + up_read(&fi->i_gc_rwsem[rw]); if (rw == WRITE) { + if (whint_mode == WHINT_MODE_OFF) + iocb->ki_hint = hint; if (err > 0) { f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO, err); - set_inode_flag(inode, FI_UPDATE_WRITE); + if (!do_opu) + set_inode_flag(inode, FI_UPDATE_WRITE); } else if (err < 0) { f2fs_write_failed(mapping, offset + count); } } +out: + if (trace_android_fs_dataread_start_enabled() && + (rw == READ)) + trace_android_fs_dataread_end(inode, offset, count); + if (trace_android_fs_datawrite_start_enabled() && + (rw == WRITE)) + trace_android_fs_datawrite_end(inode, offset, count); trace_f2fs_direct_IO_exit(inode, offset, count, rw, err); @@ -2149,13 +2775,15 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset, dec_page_count(sbi, F2FS_DIRTY_NODES); } else { inode_dec_dirty_pages(inode); - remove_dirty_inode(inode); + f2fs_remove_dirty_inode(inode); } } + clear_cold_data(page); + /* This is atomic written page, keep Private */ if (IS_ATOMIC_WRITTEN_PAGE(page)) - return drop_inmem_page(inode, page); + return f2fs_drop_inmem_page(inode, page); set_page_private(page, 0); ClearPagePrivate(page); @@ -2171,40 +2799,12 @@ int f2fs_release_page(struct page *page, gfp_t wait) if (IS_ATOMIC_WRITTEN_PAGE(page)) return 0; + clear_cold_data(page); set_page_private(page, 0); ClearPagePrivate(page); return 1; } -/* - * This was copied from __set_page_dirty_buffers which gives higher performance - * in very high speed storages. (e.g., pmem) - */ -void f2fs_set_page_dirty_nobuffers(struct page *page) -{ - struct address_space *mapping = page->mapping; - unsigned long flags; - - if (unlikely(!mapping)) - return; - - spin_lock(&mapping->private_lock); - lock_page_memcg(page); - SetPageDirty(page); - spin_unlock(&mapping->private_lock); - - spin_lock_irqsave(&mapping->tree_lock, flags); - WARN_ON_ONCE(!PageUptodate(page)); - account_page_dirtied(page, mapping); - radix_tree_tag_set(&mapping->page_tree, - page_index(page), PAGECACHE_TAG_DIRTY); - spin_unlock_irqrestore(&mapping->tree_lock, flags); - unlock_page_memcg(page); - - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - return; -} - static int f2fs_set_data_page_dirty(struct page *page) { struct address_space *mapping = page->mapping; @@ -2217,7 +2817,7 @@ static int f2fs_set_data_page_dirty(struct page *page) if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) { if (!IS_ATOMIC_WRITTEN_PAGE(page)) { - register_inmem_page(inode, page); + f2fs_register_inmem_page(inode, page); return 1; } /* @@ -2228,8 +2828,8 @@ static int f2fs_set_data_page_dirty(struct page *page) } if (!PageDirty(page)) { - f2fs_set_page_dirty_nobuffers(page); - update_dirty_page(inode, page); + __set_page_dirty_nobuffers(page); + f2fs_update_dirty_page(inode, page); return 1; } return 0; @@ -2324,3 +2924,38 @@ const struct address_space_operations f2fs_dblock_aops = { .migratepage = f2fs_migrate_page, #endif }; + +void f2fs_clear_radix_tree_dirty_tag(struct page *page) +{ + struct address_space *mapping = page_mapping(page); + unsigned long flags; + + spin_lock_irqsave(&mapping->tree_lock, flags); + radix_tree_tag_clear(&mapping->page_tree, page_index(page), + PAGECACHE_TAG_DIRTY); + spin_unlock_irqrestore(&mapping->tree_lock, flags); +} + +int __init f2fs_init_post_read_processing(void) +{ + bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, 0); + if (!bio_post_read_ctx_cache) + goto fail; + bio_post_read_ctx_pool = + mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS, + bio_post_read_ctx_cache); + if (!bio_post_read_ctx_pool) + goto fail_free_cache; + return 0; + +fail_free_cache: + kmem_cache_destroy(bio_post_read_ctx_cache); +fail: + return -ENOMEM; +} + +void __exit f2fs_destroy_post_read_processing(void) +{ + mempool_destroy(bio_post_read_ctx_pool); + kmem_cache_destroy(bio_post_read_ctx_cache); +}
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 87f4498..f05b37ef 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c
@@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs debugging statistics * @@ -5,10 +6,6 @@ * http://www.samsung.com/ * Copyright (c) 2012 Linux Foundation * Copyright (c) 2012 Greg Kroah-Hartman <gregkh@linuxfoundation.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> @@ -45,28 +42,37 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA); + si->ndirty_qdata = get_pages(sbi, F2FS_DIRTY_QDATA); si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA); si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE]; si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; + si->nquota_files = sbi->nquota_files; si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); si->aw_cnt = atomic_read(&sbi->aw_cnt); si->vw_cnt = atomic_read(&sbi->vw_cnt); si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt); si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt); + si->nr_dio_read = get_pages(sbi, F2FS_DIO_READ); + si->nr_dio_write = get_pages(sbi, F2FS_DIO_WRITE); si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA); si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA); + si->nr_rd_data = get_pages(sbi, F2FS_RD_DATA); + si->nr_rd_node = get_pages(sbi, F2FS_RD_NODE); + si->nr_rd_meta = get_pages(sbi, F2FS_RD_META); if (SM_I(sbi) && SM_I(sbi)->fcc_info) { si->nr_flushed = atomic_read(&SM_I(sbi)->fcc_info->issued_flush); si->nr_flushing = - atomic_read(&SM_I(sbi)->fcc_info->issing_flush); + atomic_read(&SM_I(sbi)->fcc_info->queued_flush); + si->flush_list_empty = + llist_empty(&SM_I(sbi)->fcc_info->issue_list); } if (SM_I(sbi) && SM_I(sbi)->dcc_info) { si->nr_discarded = atomic_read(&SM_I(sbi)->dcc_info->issued_discard); si->nr_discarding = - atomic_read(&SM_I(sbi)->dcc_info->issing_discard); + atomic_read(&SM_I(sbi)->dcc_info->queued_discard); si->nr_discard_cmd = atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt); si->undiscard_blks = SM_I(sbi)->dcc_info->undiscard_blks; @@ -90,16 +96,22 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->free_secs = free_sections(sbi); si->prefree_count = prefree_segments(sbi); si->dirty_count = dirty_segments(sbi); - si->node_pages = NODE_MAPPING(sbi)->nrpages; - si->meta_pages = META_MAPPING(sbi)->nrpages; + if (sbi->node_inode) + si->node_pages = NODE_MAPPING(sbi)->nrpages; + if (sbi->meta_inode) + si->meta_pages = META_MAPPING(sbi)->nrpages; si->nats = NM_I(sbi)->nat_cnt; si->dirty_nats = NM_I(sbi)->dirty_nat_cnt; si->sits = MAIN_SEGS(sbi); si->dirty_sits = SIT_I(sbi)->dirty_sentries; - si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID_LIST]; + si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID]; si->avail_nids = NM_I(sbi)->available_nids; - si->alloc_nids = NM_I(sbi)->nid_cnt[ALLOC_NID_LIST]; + si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID]; si->bg_gc = sbi->bg_gc; + si->io_skip_bggc = sbi->io_skip_bggc; + si->other_skip_bggc = sbi->other_skip_bggc; + si->skipped_atomic_files[BG_GC] = sbi->skipped_atomic_files[BG_GC]; + si->skipped_atomic_files[FG_GC] = sbi->skipped_atomic_files[FG_GC]; si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) / 2; @@ -115,6 +127,9 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]); } + for (i = META_CP; i < META_MAX; i++) + si->meta_count[i] = atomic_read(&sbi->meta_count[i]); + for (i = 0; i < 2; i++) { si->segment_count[i] = sbi->segment_count[i]; si->block_count[i] = sbi->block_count[i]; @@ -162,7 +177,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi) static void update_mem_info(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); - unsigned npages; int i; if (si->base_mem) @@ -175,7 +189,6 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize; si->base_mem += 2 * sizeof(struct f2fs_inode_info); si->base_mem += sizeof(*sbi->ckpt); - si->base_mem += sizeof(struct percpu_counter) * NR_COUNT_TYPE; /* build sm */ si->base_mem += sizeof(struct f2fs_sm_info); @@ -185,10 +198,9 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry); si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi)); si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); - if (f2fs_discard_en(sbi)) - si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); + si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); si->base_mem += SIT_VBLOCK_MAP_SIZE; - if (sbi->segs_per_sec > 1) + if (__is_large_section(sbi)) si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry); si->base_mem += __bitmap_size(sbi, SIT_BITMAP); @@ -210,7 +222,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += sizeof(struct f2fs_nm_info); si->base_mem += __bitmap_size(sbi, NAT_BITMAP); si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS); - si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE; + si->base_mem += NM_I(sbi)->nat_blocks * + f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK); si->base_mem += NM_I(sbi)->nat_blocks / 8; si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short); @@ -231,14 +244,14 @@ static void update_mem_info(struct f2fs_sb_info *sbi) } /* free nids */ - si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] + - NM_I(sbi)->nid_cnt[ALLOC_NID_LIST]) * + si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID] + + NM_I(sbi)->nid_cnt[PREALLOC_NID]) * sizeof(struct free_nid); si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry); si->cache_mem += NM_I(sbi)->dirty_nat_cnt * sizeof(struct nat_entry_set); si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); - for (i = 0; i <= ORPHAN_INO; i++) + for (i = 0; i < MAX_INO_ENTRY; i++) si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); si->cache_mem += atomic_read(&sbi->total_ext_tree) * sizeof(struct extent_tree); @@ -246,10 +259,14 @@ static void update_mem_info(struct f2fs_sb_info *sbi) sizeof(struct extent_node); si->page_mem = 0; - npages = NODE_MAPPING(sbi)->nrpages; - si->page_mem += (unsigned long long)npages << PAGE_SHIFT; - npages = META_MAPPING(sbi)->nrpages; - si->page_mem += (unsigned long long)npages << PAGE_SHIFT; + if (sbi->node_inode) { + unsigned npages = NODE_MAPPING(sbi)->nrpages; + si->page_mem += (unsigned long long)npages << PAGE_SHIFT; + } + if (sbi->meta_inode) { + unsigned npages = META_MAPPING(sbi)->nrpages; + si->page_mem += (unsigned long long)npages << PAGE_SHIFT; + } } static int stat_show(struct seq_file *s, void *v) @@ -262,9 +279,11 @@ static int stat_show(struct seq_file *s, void *v) list_for_each_entry(si, &f2fs_stat_list, stat_list) { update_general_status(si->sbi); - seq_printf(s, "\n=====[ partition info(%pg). #%d, %s]=====\n", + seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n", si->sbi->sb->s_bdev, i++, - f2fs_readonly(si->sbi->sb) ? "RO": "RW"); + f2fs_readonly(si->sbi->sb) ? "RO": "RW", + is_set_ckpt_flags(si->sbi, CP_DISABLED_FLAG) ? + "Disabled": (f2fs_cp_error(si->sbi) ? "Error": "Good")); seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ", si->sit_area_segs, si->nat_area_segs); seq_printf(s, "[SSA: %d] [MAIN: %d", @@ -326,6 +345,13 @@ static int stat_show(struct seq_file *s, void *v) si->prefree_count, si->free_segs, si->free_secs); seq_printf(s, "CP calls: %d (BG: %d)\n", si->cp_count, si->bg_cp_count); + seq_printf(s, " - cp blocks : %u\n", si->meta_count[META_CP]); + seq_printf(s, " - sit blocks : %u\n", + si->meta_count[META_SIT]); + seq_printf(s, " - nat blocks : %u\n", + si->meta_count[META_NAT]); + seq_printf(s, " - ssa blocks : %u\n", + si->meta_count[META_SSA]); seq_printf(s, "GC calls: %d (BG: %d)\n", si->call_count, si->bg_gc); seq_printf(s, " - data segments : %d (%d)\n", @@ -338,6 +364,12 @@ static int stat_show(struct seq_file *s, void *v) si->bg_data_blks); seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks, si->bg_node_blks); + seq_printf(s, "Skipped : atomic write %llu (%llu)\n", + si->skipped_atomic_files[BG_GC] + + si->skipped_atomic_files[FG_GC], + si->skipped_atomic_files[BG_GC]); + seq_printf(s, "BG skip : IO: %u, Other: %u\n", + si->io_skip_bggc, si->other_skip_bggc); seq_puts(s, "\nExtent Cache:\n"); seq_printf(s, " - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n", si->hit_largest, si->hit_cached, @@ -349,10 +381,15 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", si->ext_tree, si->zombie_tree, si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); - seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d), " + seq_printf(s, " - DIO (R: %4d, W: %4d)\n", + si->nr_dio_read, si->nr_dio_write); + seq_printf(s, " - IO_R (Data: %4d, Node: %4d, Meta: %4d\n", + si->nr_rd_data, si->nr_rd_node, si->nr_rd_meta); + seq_printf(s, " - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), " "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n", si->nr_wb_cp_data, si->nr_wb_data, si->nr_flushing, si->nr_flushed, + si->flush_list_empty, si->nr_discarding, si->nr_discarded, si->nr_discard_cmd, si->undiscard_blks); seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), " @@ -365,6 +402,8 @@ static int stat_show(struct seq_file *s, void *v) si->ndirty_dent, si->ndirty_dirs, si->ndirty_all); seq_printf(s, " - datas: %4d in files:%4d\n", si->ndirty_data, si->ndirty_files); + seq_printf(s, " - quota datas: %4d in quota files:%4d\n", + si->ndirty_qdata, si->nquota_files); seq_printf(s, " - meta: %4d in %4d\n", si->ndirty_meta, si->meta_pages); seq_printf(s, " - imeta: %4d\n", @@ -431,8 +470,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_stat_info *si; + int i; - si = kzalloc(sizeof(struct f2fs_stat_info), GFP_KERNEL); + si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL); if (!si) return -ENOMEM; @@ -456,6 +496,8 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->inline_inode, 0); atomic_set(&sbi->inline_dir, 0); atomic_set(&sbi->inplace_count, 0); + for (i = META_CP; i < META_MAX; i++) + atomic_set(&sbi->meta_count[i], 0); atomic_set(&sbi->aw_cnt, 0); atomic_set(&sbi->vw_cnt, 0); @@ -477,7 +519,7 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi) list_del(&si->stat_list); mutex_unlock(&f2fs_stat_mutex); - kfree(si); + kvfree(si); } int __init f2fs_create_root_stats(void)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index c0c933ad..7ff9e99 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c
@@ -1,19 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/dir.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/f2fs_fs.h> +#include <linux/sched/signal.h> #include "f2fs.h" #include "node.h" #include "acl.h" #include "xattr.h" +#include <trace/events/f2fs.h> static unsigned long dir_blocks(struct inode *inode) { @@ -58,12 +57,12 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, }; -void set_de_type(struct f2fs_dir_entry *de, umode_t mode) +static void set_de_type(struct f2fs_dir_entry *de, umode_t mode) { de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; } -unsigned char get_de_type(struct f2fs_dir_entry *de) +unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de) { if (de->file_type < F2FS_FT_MAX) return f2fs_filetype_table[de->file_type]; @@ -92,19 +91,17 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, struct f2fs_dir_entry *de; struct f2fs_dentry_ptr d; - dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page); + dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page); make_dentry_ptr_block(NULL, &d, dentry_blk); - de = find_target_dentry(fname, namehash, max_slots, &d); + de = f2fs_find_target_dentry(fname, namehash, max_slots, &d); if (de) *res_page = dentry_page; - else - kunmap(dentry_page); return de; } -struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname, +struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname, f2fs_hash_t namehash, int *max_slots, struct f2fs_dentry_ptr *d) { @@ -171,7 +168,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, for (; bidx < end_block; bidx++) { /* no need to allocate new dentry pages to all the indices */ - dentry_page = find_data_page(dir, bidx); + dentry_page = f2fs_find_data_page(dir, bidx); if (IS_ERR(dentry_page)) { if (PTR_ERR(dentry_page) == -ENOENT) { room = true; @@ -210,7 +207,7 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, if (f2fs_has_inline_dentry(dir)) { *res_page = NULL; - de = find_in_inline_dir(dir, fname, res_page); + de = f2fs_find_in_inline_dir(dir, fname, res_page); goto out; } @@ -285,7 +282,6 @@ ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr, de = f2fs_find_entry(dir, qstr, page); if (de) { res = le32_to_cpu(de->ino); - f2fs_dentry_kunmap(dir, *page); f2fs_put_page(*page, 0); } @@ -297,10 +293,9 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, { enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA; lock_page(page); - f2fs_wait_on_page_writeback(page, type, true); + f2fs_wait_on_page_writeback(page, type, true, true); de->ino = cpu_to_le32(inode->i_ino); set_de_type(de, inode->i_mode); - f2fs_dentry_kunmap(dir, page); set_page_dirty(page); dir->i_mtime = dir->i_ctime = current_time(dir); @@ -312,7 +307,7 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) { struct f2fs_inode *ri; - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); /* copy name info. to this inode page */ ri = F2FS_INODE(ipage); @@ -321,7 +316,7 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) set_page_dirty(ipage); } -void do_make_empty_dir(struct inode *inode, struct inode *parent, +void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, struct f2fs_dentry_ptr *d) { struct qstr dot = QSTR_INIT(".", 1); @@ -342,33 +337,32 @@ static int make_empty_dir(struct inode *inode, struct f2fs_dentry_ptr d; if (f2fs_has_inline_dentry(inode)) - return make_empty_inline_dir(inode, parent, page); + return f2fs_make_empty_inline_dir(inode, parent, page); - dentry_page = get_new_data_page(inode, page, 0, true); + dentry_page = f2fs_get_new_data_page(inode, page, 0, true); if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); - dentry_blk = kmap_atomic(dentry_page); + dentry_blk = page_address(dentry_page); make_dentry_ptr_block(NULL, &d, dentry_blk); - do_make_empty_dir(inode, parent, &d); - - kunmap_atomic(dentry_blk); + f2fs_do_make_empty_dir(inode, parent, &d); set_page_dirty(dentry_page); f2fs_put_page(dentry_page, 1); return 0; } -struct page *init_inode_metadata(struct inode *inode, struct inode *dir, +struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, const struct qstr *new_name, const struct qstr *orig_name, struct page *dpage) { struct page *page; + int dummy_encrypt = DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(dir)); int err; if (is_inode_flag_set(inode, FI_NEW_INODE)) { - page = new_inode_page(inode); + page = f2fs_new_inode_page(inode); if (IS_ERR(page)) return page; @@ -391,17 +385,16 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, if (err) goto put_error; - if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) { + if ((f2fs_encrypted_inode(dir) || dummy_encrypt) && + f2fs_may_encrypt(inode)) { err = fscrypt_inherit_context(dir, inode, page, false); if (err) goto put_error; } } else { - page = get_node_page(F2FS_I_SB(dir), inode->i_ino); + page = f2fs_get_node_page(F2FS_I_SB(dir), inode->i_ino); if (IS_ERR(page)) return page; - - set_cold_node(inode, page); } if (new_name) { @@ -422,19 +415,19 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, * we should remove this inode from orphan list. */ if (inode->i_nlink == 0) - remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino); + f2fs_remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino); f2fs_i_links_write(inode, true); } return page; put_error: clear_nlink(inode); - update_inode(inode, page); + f2fs_update_inode(inode, page); f2fs_put_page(page, 1); return ERR_PTR(err); } -void update_parent_metadata(struct inode *dir, struct inode *inode, +void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth) { if (inode && is_inode_flag_set(inode, FI_NEW_INODE)) { @@ -452,7 +445,7 @@ void update_parent_metadata(struct inode *dir, struct inode *inode, clear_inode_flag(inode, FI_INC_LINK); } -int room_for_filename(const void *bitmap, int slots, int max_slots) +int f2fs_room_for_filename(const void *bitmap, int slots, int max_slots) { int bit_start = 0; int zero_start, zero_end; @@ -521,12 +514,11 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, } start: -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) { f2fs_show_injection_info(FAULT_DIR_DEPTH); return -ENOSPC; } -#endif + if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) return -ENOSPC; @@ -541,17 +533,16 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, (le32_to_cpu(dentry_hash) % nbucket)); for (block = bidx; block <= (bidx + nblock - 1); block++) { - dentry_page = get_new_data_page(dir, NULL, block, true); + dentry_page = f2fs_get_new_data_page(dir, NULL, block, true); if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); - dentry_blk = kmap(dentry_page); - bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, + dentry_blk = page_address(dentry_page); + bit_pos = f2fs_room_for_filename(&dentry_blk->dentry_bitmap, slots, NR_DENTRY_IN_BLOCK); if (bit_pos < NR_DENTRY_IN_BLOCK) goto add_dentry; - kunmap(dentry_page); f2fs_put_page(dentry_page, 1); } @@ -559,11 +550,11 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, ++level; goto start; add_dentry: - f2fs_wait_on_page_writeback(dentry_page, DATA, true); + f2fs_wait_on_page_writeback(dentry_page, DATA, true, true); if (inode) { down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, new_name, + page = f2fs_init_inode_metadata(inode, dir, new_name, orig_name, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); @@ -581,18 +572,17 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, f2fs_put_page(page, 1); } - update_parent_metadata(dir, inode, current_depth); + f2fs_update_parent_metadata(dir, inode, current_depth); fail: if (inode) up_write(&F2FS_I(inode)->i_sem); - kunmap(dentry_page); f2fs_put_page(dentry_page, 1); return err; } -int __f2fs_do_add_link(struct inode *dir, struct fscrypt_name *fname, +int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname, struct inode *inode, nid_t ino, umode_t mode) { struct qstr new_name; @@ -616,7 +606,7 @@ int __f2fs_do_add_link(struct inode *dir, struct fscrypt_name *fname, * Caller should grab and release a rwsem by calling f2fs_lock_op() and * f2fs_unlock_op(). */ -int __f2fs_add_link(struct inode *dir, const struct qstr *name, +int f2fs_do_add_link(struct inode *dir, const struct qstr *name, struct inode *inode, nid_t ino, umode_t mode) { struct fscrypt_name fname; @@ -640,13 +630,12 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, F2FS_I(dir)->task = NULL; } if (de) { - f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); err = -EEXIST; } else if (IS_ERR(page)) { err = PTR_ERR(page); } else { - err = __f2fs_do_add_link(dir, &fname, inode, ino, mode); + err = f2fs_add_dentry(dir, &fname, inode, ino, mode); } fscrypt_free_filename(&fname); return err; @@ -658,7 +647,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) int err = 0; down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, NULL, NULL, NULL); + page = f2fs_init_inode_metadata(inode, dir, NULL, NULL, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; @@ -666,9 +655,9 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) f2fs_put_page(page, 1); clear_inode_flag(inode, FI_NEW_INODE); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); fail: up_write(&F2FS_I(inode)->i_sem); - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return err; } @@ -690,9 +679,9 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode) up_write(&F2FS_I(inode)->i_sem); if (inode->i_nlink == 0) - add_orphan_inode(inode); + f2fs_add_orphan_inode(inode); else - release_orphan_inode(sbi); + f2fs_release_orphan_inode(sbi); } /* @@ -705,17 +694,18 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, struct f2fs_dentry_block *dentry_blk; unsigned int bit_pos; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); - struct address_space *mapping = page_mapping(page); - unsigned long flags; int i; f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); + if (F2FS_OPTION(F2FS_I_SB(dir)).fsync_mode == FSYNC_MODE_STRICT) + f2fs_add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO); + if (f2fs_has_inline_dentry(dir)) return f2fs_delete_inline_entry(dentry, page, dir, inode); lock_page(page); - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); dentry_blk = page_address(page); bit_pos = dentry - dentry_blk->dentry; @@ -726,7 +716,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, NR_DENTRY_IN_BLOCK, 0); - kunmap(page); /* kunmap - pair of f2fs_find_entry */ set_page_dirty(page); dir->i_ctime = dir->i_mtime = current_time(dir); @@ -736,17 +725,14 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, f2fs_drop_nlink(dir, inode); if (bit_pos == NR_DENTRY_IN_BLOCK && - !truncate_hole(dir, page->index, page->index + 1)) { - spin_lock_irqsave(&mapping->tree_lock, flags); - radix_tree_tag_clear(&mapping->page_tree, page_index(page), - PAGECACHE_TAG_DIRTY); - spin_unlock_irqrestore(&mapping->tree_lock, flags); - + !f2fs_truncate_hole(dir, page->index, page->index + 1)) { + f2fs_clear_radix_tree_dirty_tag(page); clear_page_dirty_for_io(page); ClearPagePrivate(page); ClearPageUptodate(page); + clear_cold_data(page); inode_dec_dirty_pages(dir); - remove_dirty_inode(dir); + f2fs_remove_dirty_inode(dir); } f2fs_put_page(page, 1); } @@ -763,7 +749,7 @@ bool f2fs_empty_dir(struct inode *dir) return f2fs_empty_inline_dir(dir); for (bidx = 0; bidx < nblock; bidx++) { - dentry_page = get_lock_data_page(dir, bidx, false); + dentry_page = f2fs_get_lock_data_page(dir, bidx, false); if (IS_ERR(dentry_page)) { if (PTR_ERR(dentry_page) == -ENOENT) continue; @@ -771,7 +757,7 @@ bool f2fs_empty_dir(struct inode *dir) return false; } - dentry_blk = kmap_atomic(dentry_page); + dentry_blk = page_address(dentry_page); if (bidx == 0) bit_pos = 2; else @@ -779,7 +765,6 @@ bool f2fs_empty_dir(struct inode *dir) bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, NR_DENTRY_IN_BLOCK, bit_pos); - kunmap_atomic(dentry_blk); f2fs_put_page(dentry_page, 1); @@ -796,9 +781,16 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, unsigned int bit_pos; struct f2fs_dir_entry *de = NULL; struct fscrypt_str de_name = FSTR_INIT(NULL, 0); + struct f2fs_sb_info *sbi = F2FS_I_SB(d->inode); + struct blk_plug plug; + bool readdir_ra = sbi->readdir_ra == 1; + int err = 0; bit_pos = ((unsigned long)ctx->pos % d->max); + if (readdir_ra) + blk_start_plug(&plug); + while (bit_pos < d->max) { bit_pos = find_next_bit_le(d->bitmap, d->max, bit_pos); if (bit_pos >= d->max) @@ -811,33 +803,50 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, continue; } - d_type = get_de_type(de); + d_type = f2fs_get_de_type(de); de_name.name = d->filename[bit_pos]; de_name.len = le16_to_cpu(de->name_len); + /* check memory boundary before moving forward */ + bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + if (unlikely(bit_pos > d->max)) { + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: corrupted namelen=%d, run fsck to fix.", + __func__, le16_to_cpu(de->name_len)); + set_sbi_flag(sbi, SBI_NEED_FSCK); + err = -EINVAL; + goto out; + } + if (f2fs_encrypted_inode(d->inode)) { int save_len = fstr->len; - int err; err = fscrypt_fname_disk_to_usr(d->inode, (u32)de->hash_code, 0, &de_name, fstr); if (err) - return err; + goto out; de_name = *fstr; fstr->len = save_len; } if (!dir_emit(ctx, de_name.name, de_name.len, - le32_to_cpu(de->ino), d_type)) - return 1; + le32_to_cpu(de->ino), d_type)) { + err = 1; + goto out; + } - bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + if (readdir_ra) + f2fs_ra_node_page(sbi, le32_to_cpu(de->ino)); + ctx->pos = start_pos + bit_pos; } - return 0; +out: + if (readdir_ra) + blk_finish_plug(&plug); + return err; } static int f2fs_readdir(struct file *file, struct dir_context *ctx) @@ -847,6 +856,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) struct f2fs_dentry_block *dentry_blk = NULL; struct page *dentry_page = NULL; struct file_ra_state *ra = &file->f_ra; + loff_t start_pos = ctx->pos; unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); struct f2fs_dentry_ptr d; struct fscrypt_str fstr = FSTR_INIT(NULL, 0); @@ -855,53 +865,60 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) if (f2fs_encrypted_inode(inode)) { err = fscrypt_get_encryption_info(inode); if (err && err != -ENOKEY) - return err; + goto out; err = fscrypt_fname_alloc_buffer(inode, F2FS_NAME_LEN, &fstr); if (err < 0) - return err; + goto out; } if (f2fs_has_inline_dentry(inode)) { err = f2fs_read_inline_dir(file, ctx, &fstr); - goto out; + goto out_free; } - /* readahead for multi pages of dir */ - if (npages - n > 1 && !ra_has_index(ra, n)) - page_cache_sync_readahead(inode->i_mapping, ra, file, n, + for (; n < npages; n++, ctx->pos = n * NR_DENTRY_IN_BLOCK) { + + /* allow readdir() to be interrupted */ + if (fatal_signal_pending(current)) { + err = -ERESTARTSYS; + goto out_free; + } + cond_resched(); + + /* readahead for multi pages of dir */ + if (npages - n > 1 && !ra_has_index(ra, n)) + page_cache_sync_readahead(inode->i_mapping, ra, file, n, min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES)); - for (; n < npages; n++) { - dentry_page = get_lock_data_page(inode, n, false); + dentry_page = f2fs_get_lock_data_page(inode, n, false); if (IS_ERR(dentry_page)) { err = PTR_ERR(dentry_page); if (err == -ENOENT) { err = 0; continue; } else { - goto out; + goto out_free; } } - dentry_blk = kmap(dentry_page); + dentry_blk = page_address(dentry_page); make_dentry_ptr_block(inode, &d, dentry_blk); err = f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr); if (err) { - kunmap(dentry_page); f2fs_put_page(dentry_page, 1); break; } - ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK; - kunmap(dentry_page); f2fs_put_page(dentry_page, 1); } -out: +out_free: fscrypt_fname_free_buffer(&fstr); +out: + trace_f2fs_readdir(inode, start_pos, ctx->pos, err); return err < 0 ? err : 0; }
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index aff6c2e..1cb0fcc 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c
@@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs extent cache support * @@ -5,10 +6,6 @@ * Copyright (c) 2015 Samsung Electronics * Authors: Jaegeuk Kim <jaegeuk@kernel.org> * Chao Yu <chao2.yu@samsung.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> @@ -30,10 +27,10 @@ static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re, return NULL; } -static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root, +static struct rb_entry *__lookup_rb_tree_slow(struct rb_root_cached *root, unsigned int ofs) { - struct rb_node *node = root->rb_node; + struct rb_node *node = root->rb_root.rb_node; struct rb_entry *re; while (node) { @@ -49,7 +46,7 @@ static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root, return NULL; } -struct rb_entry *__lookup_rb_tree(struct rb_root *root, +struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root, struct rb_entry *cached_re, unsigned int ofs) { struct rb_entry *re; @@ -61,23 +58,26 @@ struct rb_entry *__lookup_rb_tree(struct rb_root *root, return re; } -struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, - struct rb_root *root, struct rb_node **parent, - unsigned int ofs) +struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, + struct rb_root_cached *root, + struct rb_node **parent, + unsigned int ofs, bool *leftmost) { - struct rb_node **p = &root->rb_node; + struct rb_node **p = &root->rb_root.rb_node; struct rb_entry *re; while (*p) { *parent = *p; re = rb_entry(*parent, struct rb_entry, rb_node); - if (ofs < re->ofs) + if (ofs < re->ofs) { p = &(*p)->rb_left; - else if (ofs >= re->ofs + re->len) + } else if (ofs >= re->ofs + re->len) { p = &(*p)->rb_right; - else + *leftmost = false; + } else { f2fs_bug_on(sbi, 1); + } } return p; @@ -92,16 +92,16 @@ struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, * in order to simpfy the insertion after. * tree must stay unchanged between lookup and insertion. */ -struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, +struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, struct rb_entry *cached_re, unsigned int ofs, struct rb_entry **prev_entry, struct rb_entry **next_entry, struct rb_node ***insert_p, struct rb_node **insert_parent, - bool force) + bool force, bool *leftmost) { - struct rb_node **pnode = &root->rb_node; + struct rb_node **pnode = &root->rb_root.rb_node; struct rb_node *parent = NULL, *tmp_node; struct rb_entry *re = cached_re; @@ -110,7 +110,7 @@ struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, *prev_entry = NULL; *next_entry = NULL; - if (RB_EMPTY_ROOT(root)) + if (RB_EMPTY_ROOT(&root->rb_root)) return NULL; if (re) { @@ -118,16 +118,22 @@ struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, goto lookup_neighbors; } + if (leftmost) + *leftmost = true; + while (*pnode) { parent = *pnode; re = rb_entry(*pnode, struct rb_entry, rb_node); - if (ofs < re->ofs) + if (ofs < re->ofs) { pnode = &(*pnode)->rb_left; - else if (ofs >= re->ofs + re->len) + } else if (ofs >= re->ofs + re->len) { pnode = &(*pnode)->rb_right; - else + if (leftmost) + *leftmost = false; + } else { goto lookup_neighbors; + } } *insert_p = pnode; @@ -159,11 +165,11 @@ struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, return re; } -bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi, - struct rb_root *root) +bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, + struct rb_root_cached *root) { #ifdef CONFIG_F2FS_CHECK_FS - struct rb_node *cur = rb_first(root), *next; + struct rb_node *cur = rb_first_cached(root), *next; struct rb_entry *cur_re, *next_re; if (!cur) @@ -196,7 +202,8 @@ static struct kmem_cache *extent_node_slab; static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_info *ei, - struct rb_node *parent, struct rb_node **p) + struct rb_node *parent, struct rb_node **p, + bool leftmost) { struct extent_node *en; @@ -209,7 +216,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, en->et = et; rb_link_node(&en->rb_node, parent, p); - rb_insert_color(&en->rb_node, &et->root); + rb_insert_color_cached(&en->rb_node, &et->root, leftmost); atomic_inc(&et->node_cnt); atomic_inc(&sbi->total_ext_node); return en; @@ -218,7 +225,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, static void __detach_extent_node(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_node *en) { - rb_erase(&en->rb_node, &et->root); + rb_erase_cached(&en->rb_node, &et->root); atomic_dec(&et->node_cnt); atomic_dec(&sbi->total_ext_node); @@ -257,7 +264,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode) f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et); memset(et, 0, sizeof(struct extent_tree)); et->ino = ino; - et->root = RB_ROOT; + et->root = RB_ROOT_CACHED; et->cached_en = NULL; rwlock_init(&et->lock); INIT_LIST_HEAD(&et->list); @@ -278,10 +285,10 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode) static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_info *ei) { - struct rb_node **p = &et->root.rb_node; + struct rb_node **p = &et->root.rb_root.rb_node; struct extent_node *en; - en = __attach_extent_node(sbi, et, ei, NULL, p); + en = __attach_extent_node(sbi, et, ei, NULL, p, true); if (!en) return NULL; @@ -297,7 +304,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, struct extent_node *en; unsigned int count = atomic_read(&et->node_cnt); - node = rb_first(&et->root); + node = rb_first_cached(&et->root); while (node) { next = rb_next(node); en = rb_entry(node, struct extent_node, rb_node); @@ -308,14 +315,13 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, return count - atomic_read(&et->node_cnt); } -static void __drop_largest_extent(struct inode *inode, +static void __drop_largest_extent(struct extent_tree *et, pgoff_t fofs, unsigned int len) { - struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest; - - if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) { - largest->len = 0; - f2fs_mark_inode_dirty_sync(inode, true); + if (fofs < et->largest.fofs + et->largest.len && + fofs + len > et->largest.fofs) { + et->largest.len = 0; + et->largest_updated = true; } } @@ -390,7 +396,7 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, goto out; } - en = (struct extent_node *)__lookup_rb_tree(&et->root, + en = (struct extent_node *)f2fs_lookup_rb_tree(&et->root, (struct rb_entry *)et->cached_en, pgofs); if (!en) goto out; @@ -416,12 +422,11 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, return ret; } -static struct extent_node *__try_merge_extent_node(struct inode *inode, +static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_info *ei, struct extent_node *prev_ex, struct extent_node *next_ex) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_node *en = NULL; if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) { @@ -443,7 +448,7 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode, if (!en) return NULL; - __try_update_largest_extent(inode, et, en); + __try_update_largest_extent(et, en); spin_lock(&sbi->extent_lock); if (!list_empty(&en->list)) { @@ -454,13 +459,13 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode, return en; } -static struct extent_node *__insert_extent_tree(struct inode *inode, +static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_info *ei, struct rb_node **insert_p, - struct rb_node *insert_parent) + struct rb_node *insert_parent, + bool leftmost) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct rb_node **p = &et->root.rb_node; + struct rb_node **p; struct rb_node *parent = NULL; struct extent_node *en = NULL; @@ -470,13 +475,16 @@ static struct extent_node *__insert_extent_tree(struct inode *inode, goto do_insert; } - p = __lookup_rb_tree_for_insert(sbi, &et->root, &parent, ei->fofs); + leftmost = true; + + p = f2fs_lookup_rb_tree_for_insert(sbi, &et->root, &parent, + ei->fofs, &leftmost); do_insert: - en = __attach_extent_node(sbi, et, ei, parent, p); + en = __attach_extent_node(sbi, et, ei, parent, p, leftmost); if (!en) return NULL; - __try_update_largest_extent(inode, et, en); + __try_update_largest_extent(et, en); /* update in global extent list */ spin_lock(&sbi->extent_lock); @@ -497,6 +505,8 @@ static void f2fs_update_extent_tree_range(struct inode *inode, struct rb_node **insert_p = NULL, *insert_parent = NULL; unsigned int end = fofs + len; unsigned int pos = (unsigned int)fofs; + bool updated = false; + bool leftmost; if (!et) return; @@ -517,14 +527,15 @@ static void f2fs_update_extent_tree_range(struct inode *inode, * drop largest extent before lookup, in case it's already * been shrunk from extent tree */ - __drop_largest_extent(inode, fofs, len); + __drop_largest_extent(et, fofs, len); /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ - en = (struct extent_node *)__lookup_rb_tree_ret(&et->root, + en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root, (struct rb_entry *)et->cached_en, fofs, (struct rb_entry **)&prev_en, (struct rb_entry **)&next_en, - &insert_p, &insert_parent, false); + &insert_p, &insert_parent, false, + &leftmost); if (!en) en = next_en; @@ -550,8 +561,8 @@ static void f2fs_update_extent_tree_range(struct inode *inode, set_extent_info(&ei, end, end - dei.fofs + dei.blk, org_end - end); - en1 = __insert_extent_tree(inode, et, &ei, - NULL, NULL); + en1 = __insert_extent_tree(sbi, et, &ei, + NULL, NULL, true); next_en = en1; } else { en->ei.fofs = end; @@ -570,7 +581,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, } if (parts) - __try_update_largest_extent(inode, et, en); + __try_update_largest_extent(et, en); else __release_extent_node(sbi, et, en); @@ -590,15 +601,16 @@ static void f2fs_update_extent_tree_range(struct inode *inode, if (blkaddr) { set_extent_info(&ei, fofs, blkaddr, len); - if (!__try_merge_extent_node(inode, et, &ei, prev_en, next_en)) - __insert_extent_tree(inode, et, &ei, - insert_p, insert_parent); + if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) + __insert_extent_tree(sbi, et, &ei, + insert_p, insert_parent, leftmost); /* give up extent_cache, if split and small updates happen */ if (dei.len >= 1 && prev.len < F2FS_MIN_EXTENT_LEN && et->largest.len < F2FS_MIN_EXTENT_LEN) { - __drop_largest_extent(inode, 0, UINT_MAX); + et->largest.len = 0; + et->largest_updated = true; set_inode_flag(inode, FI_NO_EXTENT); } } @@ -606,7 +618,15 @@ static void f2fs_update_extent_tree_range(struct inode *inode, if (is_inode_flag_set(inode, FI_NO_EXTENT)) __free_extent_tree(sbi, et); + if (et->largest_updated) { + et->largest_updated = false; + updated = true; + } + write_unlock(&et->lock); + + if (updated) + f2fs_mark_inode_dirty_sync(inode, true); } unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) @@ -705,6 +725,7 @@ void f2fs_drop_extent_tree(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et = F2FS_I(inode)->extent_tree; + bool updated = false; if (!f2fs_may_extent_tree(inode)) return; @@ -713,8 +734,13 @@ void f2fs_drop_extent_tree(struct inode *inode) write_lock(&et->lock); __free_extent_tree(sbi, et); - __drop_largest_extent(inode, 0, UINT_MAX); + if (et->largest.len) { + et->largest.len = 0; + updated = true; + } write_unlock(&et->lock); + if (updated) + f2fs_mark_inode_dirty_sync(inode, true); } void f2fs_destroy_extent_tree(struct inode *inode) @@ -773,7 +799,7 @@ void f2fs_update_extent_cache(struct dnode_of_data *dn) else blkaddr = dn->data_blkaddr; - fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + + fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + dn->ofs_in_node; f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1); } @@ -788,7 +814,7 @@ void f2fs_update_extent_cache_range(struct dnode_of_data *dn, f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len); } -void init_extent_cache_info(struct f2fs_sb_info *sbi) +void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi) { INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO); mutex_init(&sbi->extent_tree_lock); @@ -800,7 +826,7 @@ void init_extent_cache_info(struct f2fs_sb_info *sbi) atomic_set(&sbi->total_ext_node, 0); } -int __init create_extent_cache(void) +int __init f2fs_create_extent_cache(void) { extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree", sizeof(struct extent_tree)); @@ -815,7 +841,7 @@ int __init create_extent_cache(void) return 0; } -void destroy_extent_cache(void) +void f2fs_destroy_extent_cache(void) { kmem_cache_destroy(extent_node_slab); kmem_cache_destroy(extent_tree_slab);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 634165f..ecaa01a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h
@@ -1,16 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/f2fs.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _LINUX_F2FS_H #define _LINUX_F2FS_H +#include <linux/uio.h> #include <linux/types.h> #include <linux/page-flags.h> #include <linux/buffer_head.h> @@ -19,16 +17,16 @@ #include <linux/magic.h> #include <linux/kobject.h> #include <linux/sched.h> +#include <linux/cred.h> #include <linux/vmalloc.h> #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/quotaops.h> -#ifdef CONFIG_F2FS_FS_ENCRYPTION -#include <linux/fscrypt_supp.h> -#else -#include <linux/fscrypt_notsupp.h> -#endif #include <crypto/hash.h> +#include <linux/overflow.h> + +#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_F2FS_FS_ENCRYPTION) +#include <linux/fscrypt.h> #ifdef CONFIG_F2FS_CHECK_FS #define f2fs_bug_on(sbi, condition) BUG_ON(condition) @@ -42,28 +40,35 @@ } while (0) #endif -#ifdef CONFIG_F2FS_FAULT_INJECTION enum { FAULT_KMALLOC, + FAULT_KVMALLOC, FAULT_PAGE_ALLOC, + FAULT_PAGE_GET, + FAULT_ALLOC_BIO, FAULT_ALLOC_NID, FAULT_ORPHAN, FAULT_BLOCK, FAULT_DIR_DEPTH, FAULT_EVICT_INODE, FAULT_TRUNCATE, - FAULT_IO, + FAULT_READ_IO, FAULT_CHECKPOINT, + FAULT_DISCARD, + FAULT_WRITE_IO, FAULT_MAX, }; +#ifdef CONFIG_F2FS_FAULT_INJECTION +#define F2FS_ALL_FAULT_TYPE ((1 << FAULT_MAX) - 1) + struct f2fs_fault_info { atomic_t inject_ops; unsigned int inject_rate; unsigned int inject_type; }; -extern char *fault_name[FAULT_MAX]; +extern const char *f2fs_fault_name[FAULT_MAX]; #define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type))) #endif @@ -93,10 +98,14 @@ extern char *fault_name[FAULT_MAX]; #define F2FS_MOUNT_GRPQUOTA 0x00100000 #define F2FS_MOUNT_PRJQUOTA 0x00200000 #define F2FS_MOUNT_QUOTA 0x00400000 +#define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000 +#define F2FS_MOUNT_RESERVE_ROOT 0x01000000 +#define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000 -#define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option) -#define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option) -#define test_opt(sbi, option) ((sbi)->mount_opt.opt & F2FS_MOUNT_##option) +#define F2FS_OPTION(sbi) ((sbi)->mount_opt) +#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) +#define set_opt(sbi, option) (F2FS_OPTION(sbi).opt |= F2FS_MOUNT_##option) +#define test_opt(sbi, option) (F2FS_OPTION(sbi).opt & F2FS_MOUNT_##option) #define ver_after(a, b) (typecheck(unsigned long long, a) && \ typecheck(unsigned long long, b) && \ @@ -109,7 +118,26 @@ typedef u32 block_t; /* typedef u32 nid_t; struct f2fs_mount_info { - unsigned int opt; + unsigned int opt; + int write_io_size_bits; /* Write IO size bits */ + block_t root_reserved_blocks; /* root reserved blocks */ + kuid_t s_resuid; /* reserved blocks for uid */ + kgid_t s_resgid; /* reserved blocks for gid */ + int active_logs; /* # of active logs */ + int inline_xattr_size; /* inline xattr size */ +#ifdef CONFIG_F2FS_FAULT_INJECTION + struct f2fs_fault_info fault_info; /* For fault injection */ +#endif +#ifdef CONFIG_QUOTA + /* Names of quota files with journalled quota */ + char *s_qf_names[MAXQUOTAS]; + int s_jquota_fmt; /* Format of quota to use */ +#endif + /* For which write hints are passed down to block layer */ + int whint_mode; + int alloc_mode; /* segment allocation policy */ + int fsync_mode; /* fsync policy */ + bool test_dummy_encryption; /* test dummy encryption */ }; #define F2FS_FEATURE_ENCRYPT 0x0001 @@ -118,13 +146,26 @@ struct f2fs_mount_info { #define F2FS_FEATURE_EXTRA_ATTR 0x0008 #define F2FS_FEATURE_PRJQUOTA 0x0010 #define F2FS_FEATURE_INODE_CHKSUM 0x0020 +#define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR 0x0040 +#define F2FS_FEATURE_QUOTA_INO 0x0080 +#define F2FS_FEATURE_INODE_CRTIME 0x0100 +#define F2FS_FEATURE_LOST_FOUND 0x0200 +#define F2FS_FEATURE_VERITY 0x0400 /* reserved */ +#define F2FS_FEATURE_SB_CHKSUM 0x0800 -#define F2FS_HAS_FEATURE(sb, mask) \ - ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) -#define F2FS_SET_FEATURE(sb, mask) \ - (F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask)) -#define F2FS_CLEAR_FEATURE(sb, mask) \ - (F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask)) +#define __F2FS_HAS_FEATURE(raw_super, mask) \ + ((raw_super->feature & cpu_to_le32(mask)) != 0) +#define F2FS_HAS_FEATURE(sbi, mask) __F2FS_HAS_FEATURE(sbi->raw_super, mask) +#define F2FS_SET_FEATURE(sbi, mask) \ + (sbi->raw_super->feature |= cpu_to_le32(mask)) +#define F2FS_CLEAR_FEATURE(sbi, mask) \ + (sbi->raw_super->feature &= ~cpu_to_le32(mask)) + +/* + * Default values for user and/or group using reserved blocks + */ +#define F2FS_DEF_RESUID 0 +#define F2FS_DEF_RESGID 0 /* * For checkpoint manager @@ -140,25 +181,23 @@ enum { #define CP_RECOVERY 0x00000008 #define CP_DISCARD 0x00000010 #define CP_TRIMMED 0x00000020 +#define CP_PAUSE 0x00000040 -#define DEF_BATCHED_TRIM_SECTIONS 2048 -#define BATCHED_TRIM_SEGMENTS(sbi) \ - (GET_SEG_FROM_SEC(sbi, SM_I(sbi)->trim_sections)) -#define BATCHED_TRIM_BLOCKS(sbi) \ - (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) #define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) -#define DISCARD_ISSUE_RATE 8 +#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */ #define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */ +#define DEF_MID_DISCARD_ISSUE_TIME 500 /* 500 ms, if device busy */ #define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */ +#define DEF_DISCARD_URGENT_UTIL 80 /* do more discard over 80% */ #define DEF_CP_INTERVAL 60 /* 60 secs */ #define DEF_IDLE_INTERVAL 5 /* 5 secs */ +#define DEF_DISABLE_INTERVAL 5 /* 5 secs */ struct cp_control { int reason; __u64 trim_start; __u64 trim_end; __u64 trim_minlen; - __u64 trimmed; }; /* @@ -169,6 +208,7 @@ enum { META_NAT, META_SIT, META_SSA, + META_MAX, META_POR, DATA_GENERIC, META_GENERIC, @@ -179,12 +219,15 @@ enum { ORPHAN_INO, /* for orphan ino list */ APPEND_INO, /* for append ino list */ UPDATE_INO, /* for update ino list */ + TRANS_DIR_INO, /* for trasactions dir ino list */ + FLUSH_INO, /* for multiple device flushing */ MAX_INO_ENTRY, /* max. list */ }; struct ino_entry { - struct list_head list; /* list head */ - nid_t ino; /* inode number */ + struct list_head list; /* list head */ + nid_t ino; /* inode number */ + unsigned int dirty_device; /* dirty device bitmap */ }; /* for the list of inodes to be GCed */ @@ -193,6 +236,12 @@ struct inode_entry { struct inode *inode; /* vfs inode pointer */ }; +struct fsync_node_entry { + struct list_head list; /* list head */ + struct page *page; /* warm node page pointer */ + unsigned int seq_id; /* sequence id */ +}; + /* for the bitmap indicate blocks to be discarded */ struct discard_entry { struct list_head list; /* list head */ @@ -208,14 +257,11 @@ struct discard_entry { #define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \ (MAX_PLIST_NUM - 1) : (blk_num - 1)) -#define P_ACTIVE 0x01 -#define P_TRIM 0x02 -#define plist_issue(tag) (((tag) & P_ACTIVE) || ((tag) & P_TRIM)) - enum { - D_PREP, - D_SUBMIT, - D_DONE, + D_PREP, /* initial */ + D_PARTIAL, /* partially submitted */ + D_SUBMIT, /* all submitted */ + D_DONE, /* finished */ }; struct discard_info { @@ -240,15 +286,39 @@ struct discard_cmd { struct block_device *bdev; /* bdev */ unsigned short ref; /* reference count */ unsigned char state; /* state */ + unsigned char queued; /* queued discard */ int error; /* bio error */ + spinlock_t lock; /* for state/bio_ref updating */ + unsigned short bio_ref; /* bio reference count */ +}; + +enum { + DPOLICY_BG, + DPOLICY_FORCE, + DPOLICY_FSTRIM, + DPOLICY_UMOUNT, + MAX_DPOLICY, +}; + +struct discard_policy { + int type; /* type of discard */ + unsigned int min_interval; /* used for candidates exist */ + unsigned int mid_interval; /* used for device busy */ + unsigned int max_interval; /* used for candidates not exist */ + unsigned int max_requests; /* # of discards issued per round */ + unsigned int io_aware_gran; /* minimum granularity discard not be aware of I/O */ + bool io_aware; /* issue discard in idle time */ + bool sync; /* submit discard with REQ_SYNC flag */ + bool ordered; /* issue discard by lba order */ + unsigned int granularity; /* discard granularity */ }; struct discard_cmd_control { struct task_struct *f2fs_issue_discard; /* discard thread */ struct list_head entry_list; /* 4KB discard entry list */ struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */ - unsigned char pend_list_tag[MAX_PLIST_NUM];/* tag for pending entries */ struct list_head wait_list; /* store on-flushing entries */ + struct list_head fstrim_list; /* in-flight discard from fstrim */ wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */ unsigned int discard_wake; /* to wake up discard thread */ struct mutex cmd_lock; @@ -256,10 +326,12 @@ struct discard_cmd_control { unsigned int max_discards; /* max. discards to be issued */ unsigned int discard_granularity; /* discard granularity */ unsigned int undiscard_blks; /* # of undiscard blocks */ + unsigned int next_pos; /* next discard position */ atomic_t issued_discard; /* # of issued discard */ - atomic_t issing_discard; /* # of issing discard */ + atomic_t queued_discard; /* # of queued discard */ atomic_t discard_cmd_cnt; /* # of cached cmd count */ - struct rb_root root; /* root of discard rb-tree */ + struct rb_root_cached root; /* root of discard rb-tree */ + bool rbtree_check; /* config for consistence check */ }; /* for the list of fsync inodes, used only during recovery */ @@ -329,6 +401,9 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11, \ struct f2fs_gc_range) #define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, __u32) +#define F2FS_IOC_SET_PIN_FILE _IOW(F2FS_IOCTL_MAGIC, 13, __u32) +#define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, __u32) +#define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15) #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY @@ -343,6 +418,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */ #define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */ #define F2FS_GOING_DOWN_METAFLUSH 0x3 /* going down with meta flush */ +#define F2FS_GOING_DOWN_NEED_FSCK 0x4 /* going down to trigger fsck */ #if defined(__KERNEL__) && defined(CONFIG_COMPAT) /* @@ -381,11 +457,13 @@ struct f2fs_flush_device { /* for inline stuff */ #define DEF_INLINE_RESERVED_SIZE 1 +#define DEF_MIN_INLINE_SIZE 1 static inline int get_extra_isize(struct inode *inode); -#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \ - (CUR_ADDRS_PER_INODE(inode) - \ - DEF_INLINE_RESERVED_SIZE - \ - F2FS_INLINE_XATTR_ADDRS)) +static inline int get_inline_xattr_addrs(struct inode *inode); +#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \ + (CUR_ADDRS_PER_INODE(inode) - \ + get_inline_xattr_addrs(inode) - \ + DEF_INLINE_RESERVED_SIZE)) /* for inline dir */ #define NR_INLINE_DENTRY(inode) (MAX_INLINE_DATA(inode) * BITS_PER_BYTE / \ @@ -417,7 +495,7 @@ static inline void make_dentry_ptr_block(struct inode *inode, d->inode = inode; d->max = NR_DENTRY_IN_BLOCK; d->nr_bitmap = SIZE_OF_DENTRY_BITMAP; - d->bitmap = &t->dentry_bitmap; + d->bitmap = t->dentry_bitmap; d->dentry = t->dentry; d->filename = t->filename; } @@ -454,13 +532,15 @@ enum { */ }; +#define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */ + +/* maximum retry quota flush count */ +#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT 8 + #define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */ #define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ -/* vector size for gang look-up from extent cache that consists of radix tree */ -#define EXT_TREE_VEC_SIZE 64 - /* for in-memory extent cache entry */ #define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */ @@ -480,28 +560,21 @@ struct extent_info { }; struct extent_node { - struct rb_node rb_node; - union { - struct { - unsigned int fofs; - unsigned int len; - u32 blk; - }; - struct extent_info ei; /* extent info */ - - }; + struct rb_node rb_node; /* rb node located in rb-tree */ + struct extent_info ei; /* extent info */ struct list_head list; /* node in global extent list of sbi */ struct extent_tree *et; /* extent tree pointer */ }; struct extent_tree { nid_t ino; /* inode number */ - struct rb_root root; /* root of extent info rb-tree */ + struct rb_root_cached root; /* root of extent info rb-tree */ struct extent_node *cached_en; /* recently accessed extent node */ struct extent_info largest; /* largested extent info */ struct list_head list; /* to be used by sbi->zombie_list */ rwlock_t lock; /* protect extent info rb-tree */ atomic_t node_cnt; /* # of extent node in rb-tree*/ + bool largest_updated; /* largest extent updated */ }; /* @@ -521,6 +594,9 @@ struct f2fs_map_blocks { unsigned int m_len; unsigned int m_flags; pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */ + pgoff_t *m_next_extent; /* point to next possible extent */ + int m_seg_type; + bool m_may_create; /* indicate it is from write path */ }; /* for flag in get_data_block */ @@ -528,8 +604,10 @@ enum { F2FS_GET_BLOCK_DEFAULT, F2FS_GET_BLOCK_FIEMAP, F2FS_GET_BLOCK_BMAP, + F2FS_GET_BLOCK_DIO, F2FS_GET_BLOCK_PRE_DIO, F2FS_GET_BLOCK_PRE_AIO, + F2FS_GET_BLOCK_PRECACHE, }; /* @@ -540,6 +618,10 @@ enum { #define FADVISE_ENCRYPT_BIT 0x04 #define FADVISE_ENC_NAME_BIT 0x08 #define FADVISE_KEEP_SIZE_BIT 0x10 +#define FADVISE_HOT_BIT 0x20 +#define FADVISE_VERITY_BIT 0x40 /* reserved */ + +#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT) #define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) #define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) @@ -554,15 +636,26 @@ enum { #define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT) #define file_keep_isize(inode) is_file(inode, FADVISE_KEEP_SIZE_BIT) #define file_set_keep_isize(inode) set_file(inode, FADVISE_KEEP_SIZE_BIT) +#define file_is_hot(inode) is_file(inode, FADVISE_HOT_BIT) +#define file_set_hot(inode) set_file(inode, FADVISE_HOT_BIT) +#define file_clear_hot(inode) clear_file(inode, FADVISE_HOT_BIT) #define DEF_DIR_LEVEL 0 +enum { + GC_FAILURE_PIN, + GC_FAILURE_ATOMIC, + MAX_GC_FAILURE +}; + struct f2fs_inode_info { struct inode vfs_inode; /* serve a vfs inode */ unsigned long i_flags; /* keep an inode flags for ioctl */ unsigned char i_advise; /* use to give file attribute hints */ unsigned char i_dir_level; /* use for dentry level for large dir */ - unsigned int i_current_depth; /* use only in directory structure */ + unsigned int i_current_depth; /* only for directory depth */ + /* for gc failure statistic */ + unsigned int i_gc_failures[MAX_GC_FAILURE]; unsigned int i_pino; /* parent inode number */ umode_t i_acl_mode; /* keep file acl mode temporarily */ @@ -585,16 +678,22 @@ struct f2fs_inode_info { #endif struct list_head dirty_list; /* dirty list for dirs and files */ struct list_head gdirty_list; /* linked in global dirty list */ + struct list_head inmem_ilist; /* list for inmem inodes */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct task_struct *inmem_task; /* store inmemory task */ struct mutex inmem_lock; /* lock for inmemory pages */ struct extent_tree *extent_tree; /* cached extent_tree entry */ - struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */ + + /* avoid racing between foreground op and gc */ + struct rw_semaphore i_gc_rwsem[2]; struct rw_semaphore i_mmap_sem; struct rw_semaphore i_xattr_sem; /* avoid racing between reading and changing EAs */ int i_extra_isize; /* size of extra space located in i_addr */ kprojid_t i_projid; /* id for project quota */ + int i_inline_xattr_size; /* inline xattr size */ + struct timespec i_crtime; /* inode creation time */ + struct timespec i_disk_time[4]; /* inode disk times */ }; static inline void get_extent_info(struct extent_info *ext, @@ -622,21 +721,22 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs, } static inline bool __is_discard_mergeable(struct discard_info *back, - struct discard_info *front) + struct discard_info *front, unsigned int max_len) { - return back->lstart + back->len == front->lstart; + return (back->lstart + back->len == front->lstart) && + (back->len + front->len <= max_len); } static inline bool __is_discard_back_mergeable(struct discard_info *cur, - struct discard_info *back) + struct discard_info *back, unsigned int max_len) { - return __is_discard_mergeable(back, cur); + return __is_discard_mergeable(back, cur, max_len); } static inline bool __is_discard_front_mergeable(struct discard_info *cur, - struct discard_info *front) + struct discard_info *front, unsigned int max_len) { - return __is_discard_mergeable(cur, front); + return __is_discard_mergeable(cur, front, max_len); } static inline bool __is_extent_mergeable(struct extent_info *back, @@ -659,19 +759,22 @@ static inline bool __is_front_mergeable(struct extent_info *cur, } extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync); -static inline void __try_update_largest_extent(struct inode *inode, - struct extent_tree *et, struct extent_node *en) +static inline void __try_update_largest_extent(struct extent_tree *et, + struct extent_node *en) { if (en->ei.len > et->largest.len) { et->largest = en->ei; - f2fs_mark_inode_dirty_sync(inode, true); + et->largest_updated = true; } } -enum nid_list { - FREE_NID_LIST, - ALLOC_NID_LIST, - MAX_NID_LIST, +/* + * For free nid management + */ +enum nid_state { + FREE_NID, /* newly added to free nid list */ + PREALLOC_NID, /* it is preallocated */ + MAX_NID_STATE, }; struct f2fs_nm_info { @@ -688,17 +791,18 @@ struct f2fs_nm_info { struct radix_tree_root nat_set_root;/* root of the nat set cache */ struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ struct list_head nat_entries; /* cached nat entry list (clean) */ + spinlock_t nat_list_lock; /* protect clean nat entry list */ unsigned int nat_cnt; /* the # of cached nat entries */ unsigned int dirty_nat_cnt; /* total num of nat entries in set */ unsigned int nat_blocks; /* # of nat blocks */ /* free node ids management */ struct radix_tree_root free_nid_root;/* root of the free_nid cache */ - struct list_head nid_list[MAX_NID_LIST];/* lists for free nids */ - unsigned int nid_cnt[MAX_NID_LIST]; /* the number of free node id */ + struct list_head free_nid_list; /* list for free nids excluding preallocated nids */ + unsigned int nid_cnt[MAX_NID_STATE]; /* the number of free node id */ spinlock_t nid_list_lock; /* protect nid lists ops */ struct mutex build_lock; /* lock for build free nids */ - unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE]; + unsigned char **free_nid_bitmap; unsigned char *nat_block_bitmap; unsigned short *free_nid_count; /* free nid count of NAT block */ @@ -773,6 +877,7 @@ enum { struct flush_cmd { struct completion wait; struct llist_node llnode; + nid_t ino; int ret; }; @@ -780,7 +885,7 @@ struct flush_cmd_control { struct task_struct *f2fs_issue_flush; /* flush thread */ wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ atomic_t issued_flush; /* # of issued flushes */ - atomic_t issing_flush; /* # of issing flushes */ + atomic_t queued_flush; /* # of queued flushes */ struct llist_head issue_list; /* list for command issue */ struct llist_node *dispatch_list; /* list for command dispatch */ }; @@ -791,6 +896,8 @@ struct f2fs_sm_info { struct dirty_seglist_info *dirty_info; /* dirty segment information */ struct curseg_info *curseg_array; /* active segment information */ + struct rw_semaphore curseg_lock; /* for preventing curseg change */ + block_t seg0_blkaddr; /* block address of 0'th segment */ block_t main_blkaddr; /* start block address of main area */ block_t ssa_blkaddr; /* start block address of SSA area */ @@ -811,7 +918,9 @@ struct f2fs_sm_info { unsigned int ipu_policy; /* in-place-update policy */ unsigned int min_ipu_util; /* in-place-update threshold */ unsigned int min_fsync_blocks; /* threshold for fsync */ + unsigned int min_seq_blocks; /* threshold for sequential blocks */ unsigned int min_hot_blocks; /* threshold for hot block allocation */ + unsigned int min_ssr_sections; /* threshold to trigger SSR allocation */ /* for flush command control */ struct flush_cmd_control *fcc_info; @@ -833,12 +942,18 @@ struct f2fs_sm_info { enum count_type { F2FS_DIRTY_DENTS, F2FS_DIRTY_DATA, + F2FS_DIRTY_QDATA, F2FS_DIRTY_NODES, F2FS_DIRTY_META, F2FS_INMEM_PAGES, F2FS_DIRTY_IMETA, F2FS_WB_CP_DATA, F2FS_WB_DATA, + F2FS_RD_DATA, + F2FS_RD_NODE, + F2FS_RD_META, + F2FS_DIO_WRITE, + F2FS_DIO_READ, NR_COUNT_TYPE, }; @@ -881,6 +996,19 @@ enum need_lock_type { LOCK_RETRY, }; +enum cp_reason_type { + CP_NO_NEEDED, + CP_NON_REGULAR, + CP_HARDLINK, + CP_SB_NEED_CP, + CP_WRONG_PINO, + CP_NO_SPC_ROLL, + CP_NODE_NEED_CP, + CP_FASTBOOT_MODE, + CP_SPEC_LOG_NUM, + CP_RECOVER_DIR, +}; + enum iostat_type { APP_DIRECT_IO, /* app direct IOs */ APP_BUFFERED_IO, /* app buffered IOs */ @@ -900,6 +1028,7 @@ enum iostat_type { struct f2fs_io_info { struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ + nid_t ino; /* inode number */ enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ enum temp_type temp; /* contains HOT/WARM/COLD */ int op; /* contains REQ_OP_ */ @@ -913,7 +1042,10 @@ struct f2fs_io_info { int need_lock; /* indicate we need to lock cp_rwsem */ bool in_list; /* indicate fio is in io_list */ bool is_meta; /* indicate borrow meta inode mapping or not */ + bool retry; /* need to reallocate block address */ enum iostat_type io_type; /* io type */ + struct writeback_control *io_wbc; /* writeback control */ + unsigned char version; /* version of the node */ }; #define is_read_io(rw) ((rw) == READ) @@ -945,6 +1077,7 @@ enum inode_type { DIR_INODE, /* for dirty dir inode */ FILE_INODE, /* for dirty regular/symlink inode */ DIRTY_META, /* for all dirtied inode metadata */ + ATOMIC_FILE, /* for all atomic files */ NR_INODE_TYPE, }; @@ -964,20 +1097,62 @@ enum { SBI_POR_DOING, /* recovery is doing or not */ SBI_NEED_SB_WRITE, /* need to recover superblock */ SBI_NEED_CP, /* need to checkpoint */ + SBI_IS_SHUTDOWN, /* shutdown by ioctl */ + SBI_IS_RECOVERED, /* recovered orphan/data */ + SBI_CP_DISABLED, /* CP was disabled last mount */ + SBI_QUOTA_NEED_FLUSH, /* need to flush quota info in CP */ + SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */ + SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */ }; enum { CP_TIME, REQ_TIME, + DISCARD_TIME, + GC_TIME, + DISABLE_TIME, MAX_TIME, }; +enum { + GC_NORMAL, + GC_IDLE_CB, + GC_IDLE_GREEDY, + GC_URGENT, +}; + +enum { + WHINT_MODE_OFF, /* not pass down write hints */ + WHINT_MODE_USER, /* try to pass down hints given by users */ + WHINT_MODE_FS, /* pass down hints with F2FS policy */ +}; + +enum { + ALLOC_MODE_DEFAULT, /* stay default */ + ALLOC_MODE_REUSE, /* reuse segments as much as possible */ +}; + +enum fsync_mode { + FSYNC_MODE_POSIX, /* fsync follows posix semantics */ + FSYNC_MODE_STRICT, /* fsync behaves in line with ext4 */ + FSYNC_MODE_NOBARRIER, /* fsync behaves nobarrier based on posix */ +}; + +#ifdef CONFIG_F2FS_FS_ENCRYPTION +#define DUMMY_ENCRYPTION_ENABLED(sbi) \ + (unlikely(F2FS_OPTION(sbi).test_dummy_encryption)) +#else +#define DUMMY_ENCRYPTION_ENABLED(sbi) (0) +#endif + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ struct f2fs_super_block *raw_super; /* raw super block pointer */ + struct rw_semaphore sb_lock; /* lock for raw super block */ int valid_super_block; /* valid super block no */ unsigned long s_flag; /* flags for sbi */ + struct mutex writepages; /* mutex for writepages() */ #ifdef CONFIG_BLK_DEV_ZONED unsigned int blocks_per_blkz; /* F2FS blocks per zone */ @@ -993,9 +1168,8 @@ struct f2fs_sb_info { /* for bio operations */ struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ - struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE]; - /* bio ordering for NODE/DATA */ - int write_io_size_bits; /* Write IO size bits */ + /* keep migration IO order for LFS mode */ + struct rw_semaphore io_order_lock; mempool_t *write_io_dummy; /* Dummy pages */ /* for checkpoint */ @@ -1013,6 +1187,11 @@ struct f2fs_sb_info { struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ + spinlock_t fsync_node_lock; /* for node entry lock */ + struct list_head fsync_node_list; /* node list head */ + unsigned int fsync_seg_id; /* sequence id */ + unsigned int fsync_node_num; /* number of node entries */ + /* for orphan inode, use 0'th array */ unsigned int max_orphans; /* max orphan inodes */ @@ -1045,14 +1224,20 @@ struct f2fs_sb_info { unsigned int total_node_count; /* total node block count */ unsigned int total_valid_node_count; /* valid node block count */ loff_t max_file_blocks; /* max block index of file */ - int active_logs; /* # of active logs */ int dir_level; /* directory level */ + int readdir_ra; /* readahead inode in readdir */ block_t user_block_count; /* # of user blocks */ block_t total_valid_block_count; /* # of valid blocks */ block_t discard_blks; /* discard command candidats */ block_t last_valid_block_count; /* for recovery */ block_t reserved_blocks; /* configurable reserved blocks */ + block_t current_reserved_blocks; /* current reserved blocks */ + + /* Additional tracking for no checkpoint mode */ + block_t unusable_block_count; /* # of blocks saved by last cp */ + + unsigned int nquota_files; /* # of quota sysfile */ u32 s_next_generation; /* for NFS support */ @@ -1062,7 +1247,7 @@ struct f2fs_sb_info { struct percpu_counter alloc_valid_block_count; /* writeback control */ - atomic_t wb_sync_req; /* count # of WB_SYNC threads */ + atomic_t wb_sync_req[META]; /* count # of WB_SYNC threads */ /* valid inode count */ struct percpu_counter total_valid_inode_count; @@ -1073,12 +1258,19 @@ struct f2fs_sb_info { struct mutex gc_mutex; /* mutex for GC */ struct f2fs_gc_kthread *gc_thread; /* GC thread */ unsigned int cur_victim_sec; /* current victim section num */ + unsigned int gc_mode; /* current GC state */ + unsigned int next_victim_seg[2]; /* next segment in victim section */ + /* for skip statistic */ + unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */ + unsigned long long skipped_gc_rwsem; /* FG_GC only */ - /* threshold for converting bg victims for fg */ - u64 fggc_threshold; + /* threshold for gc trials on pinned files */ + u64 gc_pin_file_threshold; /* maximum # of trials to find a victim segment for SSR and GC */ unsigned int max_victim_search; + /* migration granularity of garbage collection, unit: segment */ + unsigned int migration_granularity; /* * for stat information. @@ -1086,6 +1278,7 @@ struct f2fs_sb_info { */ #ifdef CONFIG_F2FS_STAT_FS struct f2fs_stat_info *stat_info; /* FS status information */ + atomic_t meta_count[META_MAX]; /* # of meta blocks */ unsigned int segment_count[2]; /* # of allocated segments */ unsigned int block_count[2]; /* # of allocated blocks */ atomic_t inplace_count; /* # of inplace update */ @@ -1101,6 +1294,8 @@ struct f2fs_sb_info { atomic_t max_aw_cnt; /* max # of atomic writes */ atomic_t max_vw_cnt; /* max # of volatile writes */ int bg_gc; /* background gc calls */ + unsigned int io_skip_bggc; /* skip background gc for in-flight IO */ + unsigned int other_skip_bggc; /* skip background gc for other reasons */ unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */ #endif spinlock_t stat_lock; /* lock for stat operations */ @@ -1118,6 +1313,8 @@ struct f2fs_sb_info { struct list_head s_list; int s_ndevs; /* number of devices */ struct f2fs_dev_info *devs; /* for device list */ + unsigned int dirty_device; /* for checkpoint data flush */ + spinlock_t dev_lock; /* protect dirty_device */ struct mutex umount_mutex; unsigned int shrinker_run_no; @@ -1130,27 +1327,23 @@ struct f2fs_sb_info { /* Precomputed FS UUID checksum for seeding other checksums */ __u32 s_chksum_seed; +}; - /* For fault injection */ -#ifdef CONFIG_F2FS_FAULT_INJECTION - struct f2fs_fault_info fault_info; -#endif - -#ifdef CONFIG_QUOTA - /* Names of quota files with journalled quota */ - char *s_qf_names[MAXQUOTAS]; - int s_jquota_fmt; /* Format of quota to use */ -#endif +struct f2fs_private_dio { + struct inode *inode; + void *orig_private; + bio_end_io_t *orig_end_io; + bool write; }; #ifdef CONFIG_F2FS_FAULT_INJECTION -#define f2fs_show_injection_info(type) \ - printk("%sF2FS-fs : inject %s in %s of %pF\n", \ - KERN_INFO, fault_name[type], \ +#define f2fs_show_injection_info(type) \ + printk_ratelimited("%sF2FS-fs : inject %s in %s of %pF\n", \ + KERN_INFO, f2fs_fault_name[type], \ __func__, __builtin_return_address(0)) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) { - struct f2fs_fault_info *ffi = &sbi->fault_info; + struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info; if (!ffi->inject_rate) return false; @@ -1165,6 +1358,12 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) } return false; } +#else +#define f2fs_show_injection_info(type) do { } while (0) +static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) +{ + return false; +} #endif /* For write statistics. Suppose sector size is 512 bytes, @@ -1176,59 +1375,42 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type) { - sbi->last_time[type] = jiffies; + unsigned long now = jiffies; + + sbi->last_time[type] = now; + + /* DISCARD_TIME and GC_TIME are based on REQ_TIME */ + if (type == REQ_TIME) { + sbi->last_time[DISCARD_TIME] = now; + sbi->last_time[GC_TIME] = now; + } } static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type) { - struct timespec ts = {sbi->interval_time[type], 0}; - unsigned long interval = timespec_to_jiffies(&ts); + unsigned long interval = sbi->interval_time[type] * HZ; return time_after(jiffies, sbi->last_time[type] + interval); } -static inline bool is_idle(struct f2fs_sb_info *sbi) +static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi, + int type) { - struct block_device *bdev = sbi->sb->s_bdev; - struct request_queue *q = bdev_get_queue(bdev); - struct request_list *rl = &q->root_rl; + unsigned long interval = sbi->interval_time[type] * HZ; + unsigned int wait_ms = 0; + long delta; - if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC]) - return 0; + delta = (sbi->last_time[type] + interval) - jiffies; + if (delta > 0) + wait_ms = jiffies_to_msecs(delta); - return f2fs_time_over(sbi, REQ_TIME); + return wait_ms; } /* * Inline functions */ -static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address, - unsigned int length) -{ - SHASH_DESC_ON_STACK(shash, sbi->s_chksum_driver); - u32 *ctx = (u32 *)shash_desc_ctx(shash); - u32 retval; - int err; - - shash->tfm = sbi->s_chksum_driver; - shash->flags = 0; - *ctx = F2FS_SUPER_MAGIC; - - err = crypto_shash_update(shash, address, length); - BUG_ON(err); - - retval = *ctx; - barrier_data(ctx); - return retval; -} - -static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc, - void *buf, size_t buf_size) -{ - return f2fs_crc32(sbi, buf, buf_size) == blk_crc; -} - -static inline u32 f2fs_chksum(struct f2fs_sb_info *sbi, u32 crc, +static inline u32 __f2fs_crc32(struct f2fs_sb_info *sbi, u32 crc, const void *address, unsigned int length) { struct { @@ -1249,6 +1431,24 @@ static inline u32 f2fs_chksum(struct f2fs_sb_info *sbi, u32 crc, return *(u32 *)desc.ctx; } +static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address, + unsigned int length) +{ + return __f2fs_crc32(sbi, F2FS_SUPER_MAGIC, address, length); +} + +static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc, + void *buf, size_t buf_size) +{ + return f2fs_crc32(sbi, buf, buf_size) == blk_crc; +} + +static inline u32 f2fs_chksum(struct f2fs_sb_info *sbi, u32 crc, + const void *address, unsigned int length) +{ + return __f2fs_crc32(sbi, crc, address, length); +} + static inline struct f2fs_inode_info *F2FS_I(struct inode *inode) { return container_of(inode, struct f2fs_inode_info, vfs_inode); @@ -1349,6 +1549,13 @@ static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp) return le64_to_cpu(cp->checkpoint_ver); } +static inline unsigned long f2fs_qf_ino(struct super_block *sb, int type) +{ + if (type < F2FS_MAX_QUOTAS) + return le32_to_cpu(F2FS_SB(sb)->raw_super->qf_ino[type]); + return 0; +} + static inline __u64 cur_cp_crc(struct f2fs_checkpoint *cp) { size_t crc_offset = le32_to_cpu(cp->checksum_offset); @@ -1407,12 +1614,16 @@ static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock) { unsigned long flags; - set_sbi_flag(sbi, SBI_NEED_FSCK); + /* + * In order to re-enable nat_bits we need to call fsck.f2fs by + * set_sbi_flag(sbi, SBI_NEED_FSCK). But it may give huge cost, + * so let's rely on regular fsck or unclean shutdown. + */ if (lock) spin_lock_irqsave(&sbi->cp_lock, flags); __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG); - kfree(NM_I(sbi)->nat_bits); + kvfree(NM_I(sbi)->nat_bits); NM_I(sbi)->nat_bits = NULL; if (lock) spin_unlock_irqrestore(&sbi->cp_lock, flags); @@ -1488,6 +1699,25 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs) return ofs == XATTR_NODE_OFFSET; } +static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi, + struct inode *inode, bool cap) +{ + if (!inode) + return true; + if (!test_opt(sbi, RESERVE_ROOT)) + return false; + if (IS_NOQUOTA(inode)) + return true; + if (uid_eq(F2FS_OPTION(sbi).s_resuid, current_fsuid())) + return true; + if (!gid_eq(F2FS_OPTION(sbi).s_resgid, GLOBAL_ROOT_GID) && + in_group_p(F2FS_OPTION(sbi).s_resgid)) + return true; + if (cap && capable(CAP_SYS_RESOURCE)) + return true; + return false; +} + static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool); static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, blkcnt_t *count) @@ -1500,13 +1730,12 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, if (ret) return ret; -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_BLOCK)) { f2fs_show_injection_info(FAULT_BLOCK); release = *count; goto enospc; } -#endif + /* * let's increase this in prior to actual block count change in order * for f2fs_sync_file to avoid data races when deciding checkpoint. @@ -1515,26 +1744,36 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, spin_lock(&sbi->stat_lock); sbi->total_valid_block_count += (block_t)(*count); - avail_user_block_count = sbi->user_block_count - sbi->reserved_blocks; + avail_user_block_count = sbi->user_block_count - + sbi->current_reserved_blocks; + + if (!__allow_reserved_blocks(sbi, inode, true)) + avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + avail_user_block_count -= sbi->unusable_block_count; if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { diff = sbi->total_valid_block_count - avail_user_block_count; + if (diff > *count) + diff = *count; *count -= diff; release = diff; - sbi->total_valid_block_count = avail_user_block_count; + sbi->total_valid_block_count -= diff; if (!*count) { spin_unlock(&sbi->stat_lock); - percpu_counter_sub(&sbi->alloc_valid_block_count, diff); goto enospc; } } spin_unlock(&sbi->stat_lock); - if (release) + if (unlikely(release)) { + percpu_counter_sub(&sbi->alloc_valid_block_count, release); dquot_release_reservation_block(inode, release); + } f2fs_i_blocks_write(inode, *count, true, true); return 0; enospc: + percpu_counter_sub(&sbi->alloc_valid_block_count, release); dquot_release_reservation_block(inode, release); return -ENOSPC; } @@ -1549,6 +1788,10 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); f2fs_bug_on(sbi, inode->i_blocks < sectors); sbi->total_valid_block_count -= (block_t)count; + if (sbi->reserved_blocks && + sbi->current_reserved_blocks < sbi->reserved_blocks) + sbi->current_reserved_blocks = min(sbi->reserved_blocks, + sbi->current_reserved_blocks + count); spin_unlock(&sbi->stat_lock); f2fs_i_blocks_write(inode, count, false, true); } @@ -1558,7 +1801,9 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) atomic_inc(&sbi->nr_pages[count_type]); if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES || - count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA) + count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA || + count_type == F2FS_RD_DATA || count_type == F2FS_RD_NODE || + count_type == F2FS_RD_META) return; set_sbi_flag(sbi, SBI_IS_DIRTY); @@ -1569,6 +1814,8 @@ static inline void inode_inc_dirty_pages(struct inode *inode) atomic_inc(&F2FS_I(inode)->dirty_pages); inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); + if (IS_NOQUOTA(inode)) + inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA); } static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) @@ -1585,6 +1832,8 @@ static inline void inode_dec_dirty_pages(struct inode *inode) atomic_dec(&F2FS_I(inode)->dirty_pages); dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); + if (IS_NOQUOTA(inode)) + dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA); } static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type) @@ -1639,6 +1888,12 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); int offset; + if (is_set_ckpt_flags(sbi, CP_LARGE_NAT_BITMAP_FLAG)) { + offset = (flag == SIT_BITMAP) ? + le32_to_cpu(ckpt->nat_ver_bitmap_bytesize) : 0; + return &ckpt->sit_nat_version_bitmap + offset; + } + if (__cp_payload(sbi) > 0) { if (flag == NAT_BITMAP) return &ckpt->sit_nat_version_bitmap; @@ -1684,19 +1939,36 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, { block_t valid_block_count; unsigned int valid_node_count; - bool quota = inode && !is_inode; + int err; - if (quota) { - int ret = dquot_reserve_block(inode, 1); - if (ret) - return ret; + if (is_inode) { + if (inode) { + err = dquot_alloc_inode(inode); + if (err) + return err; + } + } else { + err = dquot_reserve_block(inode, 1); + if (err) + return err; + } + + if (time_to_inject(sbi, FAULT_BLOCK)) { + f2fs_show_injection_info(FAULT_BLOCK); + goto enospc; } spin_lock(&sbi->stat_lock); - valid_block_count = sbi->total_valid_block_count + 1; - if (unlikely(valid_block_count + sbi->reserved_blocks > - sbi->user_block_count)) { + valid_block_count = sbi->total_valid_block_count + + sbi->current_reserved_blocks + 1; + + if (!__allow_reserved_blocks(sbi, inode, false)) + valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + valid_block_count += sbi->unusable_block_count; + + if (unlikely(valid_block_count > sbi->user_block_count)) { spin_unlock(&sbi->stat_lock); goto enospc; } @@ -1722,8 +1994,12 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, return 0; enospc: - if (quota) + if (is_inode) { + if (inode) + dquot_free_inode(inode); + } else { dquot_release_reservation_block(inode, 1); + } return -ENOSPC; } @@ -1738,10 +2014,15 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, sbi->total_valid_node_count--; sbi->total_valid_block_count--; + if (sbi->reserved_blocks && + sbi->current_reserved_blocks < sbi->reserved_blocks) + sbi->current_reserved_blocks++; spin_unlock(&sbi->stat_lock); - if (!is_inode) + if (is_inode) + dquot_free_inode(inode); + else f2fs_i_blocks_write(inode, 1, false, true); } @@ -1768,27 +2049,40 @@ static inline s64 valid_inode_count(struct f2fs_sb_info *sbi) static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, pgoff_t index, bool for_write) { -#ifdef CONFIG_F2FS_FAULT_INJECTION struct page *page; - if (!for_write) - page = find_get_page_flags(mapping, index, - FGP_LOCK | FGP_ACCESSED); - else - page = find_lock_page(mapping, index); - if (page) - return page; + if (IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)) { + if (!for_write) + page = find_get_page_flags(mapping, index, + FGP_LOCK | FGP_ACCESSED); + else + page = find_lock_page(mapping, index); + if (page) + return page; - if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) { - f2fs_show_injection_info(FAULT_PAGE_ALLOC); - return NULL; + if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) { + f2fs_show_injection_info(FAULT_PAGE_ALLOC); + return NULL; + } } -#endif + if (!for_write) return grab_cache_page(mapping, index); return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); } +static inline struct page *f2fs_pagecache_get_page( + struct address_space *mapping, pgoff_t index, + int fgp_flags, gfp_t gfp_mask) +{ + if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) { + f2fs_show_injection_info(FAULT_PAGE_GET); + return NULL; + } + + return pagecache_get_page(mapping, index, fgp_flags, gfp_mask); +} + static inline void f2fs_copy_page(struct page *src, struct page *dst) { char *src_kaddr = kmap(src); @@ -1838,15 +2132,37 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, return entry; } -static inline struct bio *f2fs_bio_alloc(int npages) +static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, + int npages, bool no_fail) { struct bio *bio; - /* No failure on bio allocation */ - bio = bio_alloc(GFP_NOIO, npages); - if (!bio) - bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); - return bio; + if (no_fail) { + /* No failure on bio allocation */ + bio = bio_alloc(GFP_NOIO, npages); + if (!bio) + bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); + return bio; + } + if (time_to_inject(sbi, FAULT_ALLOC_BIO)) { + f2fs_show_injection_info(FAULT_ALLOC_BIO); + return NULL; + } + + return bio_alloc(GFP_KERNEL, npages); +} + +static inline bool is_idle(struct f2fs_sb_info *sbi, int type) +{ + if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) || + get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) || + get_pages(sbi, F2FS_WB_CP_DATA) || + get_pages(sbi, F2FS_DIO_READ) || + get_pages(sbi, F2FS_DIO_WRITE) || + atomic_read(&SM_I(sbi)->dcc_info->queued_discard) || + atomic_read(&SM_I(sbi)->fcc_info->queued_flush)) + return false; + return f2fs_time_over(sbi, type); } static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, @@ -1888,11 +2204,11 @@ static inline block_t datablock_addr(struct inode *inode, raw_node = F2FS_NODE(node_page); /* from GC path only */ - if (!inode) { - if (is_inode) + if (is_inode) { + if (!inode) base = offset_in_addr(&raw_node->i); - } else if (f2fs_has_extra_attr(inode) && is_inode) { - base = get_extra_isize(inode); + else if (f2fs_has_extra_attr(inode)) + base = get_extra_isize(inode); } addr_array = blkaddr_in_node(raw_node); @@ -1959,9 +2275,60 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) *addr ^= mask; } -#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) -#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) -#define F2FS_FL_INHERITED (FS_PROJINHERIT_FL) +/* + * Inode flags + */ +#define F2FS_SECRM_FL 0x00000001 /* Secure deletion */ +#define F2FS_UNRM_FL 0x00000002 /* Undelete */ +#define F2FS_COMPR_FL 0x00000004 /* Compress file */ +#define F2FS_SYNC_FL 0x00000008 /* Synchronous updates */ +#define F2FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ +#define F2FS_APPEND_FL 0x00000020 /* writes to file may only append */ +#define F2FS_NODUMP_FL 0x00000040 /* do not dump file */ +#define F2FS_NOATIME_FL 0x00000080 /* do not update atime */ +/* Reserved for compression usage... */ +#define F2FS_DIRTY_FL 0x00000100 +#define F2FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ +#define F2FS_NOCOMPR_FL 0x00000400 /* Don't compress */ +#define F2FS_ENCRYPT_FL 0x00000800 /* encrypted file */ +/* End compression flags --- maybe not all used */ +#define F2FS_INDEX_FL 0x00001000 /* hash-indexed directory */ +#define F2FS_IMAGIC_FL 0x00002000 /* AFS directory */ +#define F2FS_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ +#define F2FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ +#define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ +#define F2FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define F2FS_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ +#define F2FS_EXTENTS_FL 0x00080000 /* Inode uses extents */ +#define F2FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */ +#define F2FS_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ +#define F2FS_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ +#define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ +#define F2FS_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ + +#define F2FS_FL_USER_VISIBLE 0x304BDFFF /* User visible flags */ +#define F2FS_FL_USER_MODIFIABLE 0x204BC0FF /* User modifiable flags */ + +/* Flags we can manipulate with through F2FS_IOC_FSSETXATTR */ +#define F2FS_FL_XFLAG_VISIBLE (F2FS_SYNC_FL | \ + F2FS_IMMUTABLE_FL | \ + F2FS_APPEND_FL | \ + F2FS_NODUMP_FL | \ + F2FS_NOATIME_FL | \ + F2FS_PROJINHERIT_FL) + +/* Flags that should be inherited by new inodes from their parent. */ +#define F2FS_FL_INHERITED (F2FS_SECRM_FL | F2FS_UNRM_FL | F2FS_COMPR_FL |\ + F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL |\ + F2FS_NOCOMPR_FL | F2FS_JOURNAL_DATA_FL |\ + F2FS_NOTAIL_FL | F2FS_DIRSYNC_FL |\ + F2FS_PROJINHERIT_FL) + +/* Flags that are appropriate for regular files (all but dir-specific ones). */ +#define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_TOPDIR_FL)) + +/* Flags that are appropriate for non-directories/regular files. */ +#define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL) static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) { @@ -2003,6 +2370,8 @@ enum { FI_HOT_DATA, /* indicate file is hot */ FI_EXTRA_ATTR, /* indicate file has extra attribute */ FI_PROJ_INHERIT, /* indicate file inherits projectid */ + FI_PIN_FILE, /* indicate file should not be gced */ + FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */ }; static inline void __mark_inode_dirty_flag(struct inode *inode, @@ -2012,10 +2381,13 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, case FI_INLINE_XATTR: case FI_INLINE_DATA: case FI_INLINE_DENTRY: + case FI_NEW_INODE: if (set) return; + /* fall through */ case FI_DATA_EXIST: case FI_INLINE_DOTS: + case FI_PIN_FILE: f2fs_mark_inode_dirty_sync(inode, true); } } @@ -2096,6 +2468,13 @@ static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth) f2fs_mark_inode_dirty_sync(inode, true); } +static inline void f2fs_i_gc_failures_write(struct inode *inode, + unsigned int count) +{ + F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = count; + f2fs_mark_inode_dirty_sync(inode, true); +} + static inline void f2fs_i_xnid_write(struct inode *inode, nid_t xnid) { F2FS_I(inode)->i_xattr_nid = xnid; @@ -2124,6 +2503,8 @@ static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) set_bit(FI_INLINE_DOTS, &fi->flags); if (ri->i_inline & F2FS_EXTRA_ATTR) set_bit(FI_EXTRA_ATTR, &fi->flags); + if (ri->i_inline & F2FS_PIN_FILE) + set_bit(FI_PIN_FILE, &fi->flags); } static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) @@ -2142,6 +2523,8 @@ static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) ri->i_inline |= F2FS_INLINE_DOTS; if (is_inode_flag_set(inode, FI_EXTRA_ATTR)) ri->i_inline |= F2FS_EXTRA_ATTR; + if (is_inode_flag_set(inode, FI_PIN_FILE)) + ri->i_inline |= F2FS_PIN_FILE; } static inline int f2fs_has_extra_attr(struct inode *inode) @@ -2156,25 +2539,20 @@ static inline int f2fs_has_inline_xattr(struct inode *inode) static inline unsigned int addrs_per_inode(struct inode *inode) { - if (f2fs_has_inline_xattr(inode)) - return CUR_ADDRS_PER_INODE(inode) - F2FS_INLINE_XATTR_ADDRS; - return CUR_ADDRS_PER_INODE(inode); + return CUR_ADDRS_PER_INODE(inode) - get_inline_xattr_addrs(inode); } -static inline void *inline_xattr_addr(struct page *page) +static inline void *inline_xattr_addr(struct inode *inode, struct page *page) { struct f2fs_inode *ri = F2FS_INODE(page); return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - - F2FS_INLINE_XATTR_ADDRS]); + get_inline_xattr_addrs(inode)]); } static inline int inline_xattr_size(struct inode *inode) { - if (f2fs_has_inline_xattr(inode)) - return F2FS_INLINE_XATTR_ADDRS << 2; - else - return 0; + return get_inline_xattr_addrs(inode) * sizeof(__le32); } static inline int f2fs_has_inline_data(struct inode *inode) @@ -2192,6 +2570,11 @@ static inline int f2fs_has_inline_dots(struct inode *inode) return is_inode_flag_set(inode, FI_INLINE_DOTS); } +static inline bool f2fs_is_pinned_file(struct inode *inode) +{ + return is_inode_flag_set(inode, FI_PIN_FILE); +} + static inline bool f2fs_is_atomic_file(struct inode *inode) { return is_inode_flag_set(inode, FI_ATOMIC_FILE); @@ -2230,12 +2613,6 @@ static inline int f2fs_has_inline_dentry(struct inode *inode) return is_inode_flag_set(inode, FI_INLINE_DENTRY); } -static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page) -{ - if (!f2fs_has_inline_dentry(dir)) - kunmap(page); -} - static inline int is_file(struct inode *inode, int type) { return F2FS_I(inode)->i_advise & type; @@ -2255,9 +2632,10 @@ static inline void clear_file(struct inode *inode, int type) static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) { + bool ret; + if (dsync) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - bool ret; spin_lock(&sbi->inode_lock[DIRTY_META]); ret = list_empty(&F2FS_I(inode)->gdirty_list); @@ -2266,14 +2644,29 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) } if (!is_inode_flag_set(inode, FI_AUTO_RECOVER) || file_keep_isize(inode) || - i_size_read(inode) & PAGE_MASK) + i_size_read(inode) & ~PAGE_MASK) return false; - return F2FS_I(inode)->last_disk_size == i_size_read(inode); + + if (!timespec_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime)) + return false; + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime)) + return false; + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime)) + return false; + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 3, + &F2FS_I(inode)->i_crtime)) + return false; + + down_read(&F2FS_I(inode)->i_sem); + ret = F2FS_I(inode)->last_disk_size == i_size_read(inode); + up_read(&F2FS_I(inode)->i_sem); + + return ret; } -static inline int f2fs_readonly(struct super_block *sb) +static inline bool f2fs_readonly(struct super_block *sb) { - return sb->s_flags & MS_RDONLY; + return sb_rdonly(sb); } static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi) @@ -2313,13 +2706,41 @@ static inline bool f2fs_may_extent_tree(struct inode *inode) static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { -#ifdef CONFIG_F2FS_FAULT_INJECTION + void *ret; + if (time_to_inject(sbi, FAULT_KMALLOC)) { f2fs_show_injection_info(FAULT_KMALLOC); return NULL; } -#endif - return kmalloc(size, flags); + + ret = kmalloc(size, flags); + if (ret) + return ret; + + return kvmalloc(size, flags); +} + +static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi, + size_t size, gfp_t flags) +{ + return f2fs_kmalloc(sbi, size, flags | __GFP_ZERO); +} + +static inline void *f2fs_kvmalloc(struct f2fs_sb_info *sbi, + size_t size, gfp_t flags) +{ + if (time_to_inject(sbi, FAULT_KVMALLOC)) { + f2fs_show_injection_info(FAULT_KVMALLOC); + return NULL; + } + + return kvmalloc(size, flags); +} + +static inline void *f2fs_kvzalloc(struct f2fs_sb_info *sbi, + size_t size, gfp_t flags) +{ + return f2fs_kvmalloc(sbi, size, flags | __GFP_ZERO); } static inline int get_extra_isize(struct inode *inode) @@ -2327,7 +2748,12 @@ static inline int get_extra_isize(struct inode *inode) return F2FS_I(inode)->i_extra_isize / sizeof(__le32); } -#define get_inode_mode(i) \ +static inline int get_inline_xattr_addrs(struct inode *inode) +{ + return F2FS_I(inode)->i_inline_xattr_size; +} + +#define f2fs_get_inode_mode(i) \ ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) @@ -2366,6 +2792,8 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, spin_unlock(&sbi->iostat_lock); } +#define __is_large_section(sbi) ((sbi)->segs_per_sec > 1) + #define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \ (!is_read_io(fio->op) || fio->is_meta)) @@ -2403,16 +2831,20 @@ static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi, * file.c */ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); -void truncate_data_blocks(struct dnode_of_data *dn); -int truncate_blocks(struct inode *inode, u64 from, bool lock); +void f2fs_truncate_data_blocks(struct dnode_of_data *dn); +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock, + bool buf_write); int f2fs_truncate(struct inode *inode); int f2fs_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags); int f2fs_setattr(struct dentry *dentry, struct iattr *attr); -int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end); -int truncate_data_blocks_range(struct dnode_of_data *dn, int count); +int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end); +void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count); +int f2fs_precache_extents(struct inode *inode); long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid); +int f2fs_pin_file_control(struct inode *inode, bool inc); /* * inode.c @@ -2422,36 +2854,37 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page); void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page); struct inode *f2fs_iget(struct super_block *sb, unsigned long ino); struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino); -int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink); -int update_inode(struct inode *inode, struct page *node_page); -int update_inode_page(struct inode *inode); +int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink); +void f2fs_update_inode(struct inode *inode, struct page *node_page); +void f2fs_update_inode_page(struct inode *inode); int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc); void f2fs_evict_inode(struct inode *inode); -void handle_failed_inode(struct inode *inode); +void f2fs_handle_failed_inode(struct inode *inode); /* * namei.c */ +int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, + bool hot, bool set); struct dentry *f2fs_get_parent(struct dentry *child); /* * dir.c */ -void set_de_type(struct f2fs_dir_entry *de, umode_t mode); -unsigned char get_de_type(struct f2fs_dir_entry *de); -struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname, +unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de); +struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname, f2fs_hash_t namehash, int *max_slots, struct f2fs_dentry_ptr *d); int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, unsigned int start_pos, struct fscrypt_str *fstr); -void do_make_empty_dir(struct inode *inode, struct inode *parent, +void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, struct f2fs_dentry_ptr *d); -struct page *init_inode_metadata(struct inode *inode, struct inode *dir, +struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, const struct qstr *new_name, const struct qstr *orig_name, struct page *dpage); -void update_parent_metadata(struct inode *dir, struct inode *inode, +void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth); -int room_for_filename(const void *bitmap, int slots, int max_slots); +int f2fs_room_for_filename(const void *bitmap, int slots, int max_slots); void f2fs_drop_nlink(struct inode *dir, struct inode *inode); struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, struct fscrypt_name *fname, struct page **res_page); @@ -2468,9 +2901,9 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, const struct qstr *orig_name, struct inode *inode, nid_t ino, umode_t mode); -int __f2fs_do_add_link(struct inode *dir, struct fscrypt_name *fname, +int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname, struct inode *inode, nid_t ino, umode_t mode); -int __f2fs_add_link(struct inode *dir, const struct qstr *name, +int f2fs_do_add_link(struct inode *dir, const struct qstr *name, struct inode *inode, nid_t ino, umode_t mode); void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, struct inode *dir, struct inode *inode); @@ -2479,7 +2912,7 @@ bool f2fs_empty_dir(struct inode *dir); static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) { - return __f2fs_add_link(d_inode(dentry->d_parent), &dentry->d_name, + return f2fs_do_add_link(d_inode(dentry->d_parent), &dentry->d_name, inode, inode->i_ino, inode->i_mode); } @@ -2488,13 +2921,14 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) */ int f2fs_inode_dirtied(struct inode *inode, bool sync); void f2fs_inode_synced(struct inode *inode); -void f2fs_enable_quota_files(struct f2fs_sb_info *sbi); +int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly); +int f2fs_quota_sync(struct super_block *sb, int type); void f2fs_quota_off_umount(struct super_block *sb); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); int f2fs_sync_fs(struct super_block *sb, int sync); extern __printf(3, 4) void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...); -int sanity_check_ckpt(struct f2fs_sb_info *sbi); +int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi); /* * hash.c @@ -2508,169 +2942,196 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info, struct dnode_of_data; struct node_info; -int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid); -bool available_free_memory(struct f2fs_sb_info *sbi, int type); -int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid); -bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid); -bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino); -void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni); -pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs); -int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode); -int truncate_inode_blocks(struct inode *inode, pgoff_t from); -int truncate_xattr_node(struct inode *inode, struct page *page); -int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino); -int remove_inode_page(struct inode *inode); -struct page *new_inode_page(struct inode *inode); -struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs); -void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid); -struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid); -struct page *get_node_page_ra(struct page *parent, int start); -void move_node_page(struct page *node_page, int gc_type); -int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, - struct writeback_control *wbc, bool atomic); -int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, +int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid); +bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type); +bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page); +void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi); +void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page); +void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi); +int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid); +bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid); +bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino); +int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, + struct node_info *ni); +pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs); +int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode); +int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from); +int f2fs_truncate_xattr_node(struct inode *inode); +int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, + unsigned int seq_id); +int f2fs_remove_inode_page(struct inode *inode); +struct page *f2fs_new_inode_page(struct inode *inode); +struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs); +void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid); +struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid); +struct page *f2fs_get_node_page_ra(struct page *parent, int start); +int f2fs_move_node_page(struct page *node_page, int gc_type); +int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, + struct writeback_control *wbc, bool atomic, + unsigned int *seq_id); +int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, + struct writeback_control *wbc, bool do_balance, enum iostat_type io_type); -void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount); -bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid); -void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid); -void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid); -int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink); -void recover_inline_xattr(struct inode *inode, struct page *page); -int recover_xattr_data(struct inode *inode, struct page *page, - block_t blkaddr); -int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); -int restore_node_summary(struct f2fs_sb_info *sbi, +int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount); +bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid); +void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid); +void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid); +int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink); +void f2fs_recover_inline_xattr(struct inode *inode, struct page *page); +int f2fs_recover_xattr_data(struct inode *inode, struct page *page); +int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); +int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum); -void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); -int build_node_manager(struct f2fs_sb_info *sbi); -void destroy_node_manager(struct f2fs_sb_info *sbi); -int __init create_node_manager_caches(void); -void destroy_node_manager_caches(void); +int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); +int f2fs_build_node_manager(struct f2fs_sb_info *sbi); +void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi); +int __init f2fs_create_node_manager_caches(void); +void f2fs_destroy_node_manager_caches(void); /* * segment.c */ -bool need_SSR(struct f2fs_sb_info *sbi); -void register_inmem_page(struct inode *inode, struct page *page); -void drop_inmem_pages(struct inode *inode); -void drop_inmem_page(struct inode *inode, struct page *page); -int commit_inmem_pages(struct inode *inode); +bool f2fs_need_SSR(struct f2fs_sb_info *sbi); +void f2fs_register_inmem_page(struct inode *inode, struct page *page); +void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure); +void f2fs_drop_inmem_pages(struct inode *inode); +void f2fs_drop_inmem_page(struct inode *inode, struct page *page); +int f2fs_commit_inmem_pages(struct inode *inode); void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need); void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi); -int f2fs_issue_flush(struct f2fs_sb_info *sbi); -int create_flush_cmd_control(struct f2fs_sb_info *sbi); -void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free); -void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); -bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); -void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new); -void stop_discard_thread(struct f2fs_sb_info *sbi); -void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount); -void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); -void release_discard_addrs(struct f2fs_sb_info *sbi); -int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); -void allocate_new_segments(struct f2fs_sb_info *sbi); +int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino); +int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi); +int f2fs_flush_device_cache(struct f2fs_sb_info *sbi); +void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free); +void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); +bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); +void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi); +void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi); +bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); +void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, + struct cp_control *cpc); +void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi); +int f2fs_disable_cp_again(struct f2fs_sb_info *sbi); +void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); +int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); +void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); -bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc); -struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno); -void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr); -void write_meta_page(struct f2fs_sb_info *sbi, struct page *page, +bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, + struct cp_control *cpc); +struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno); +void f2fs_update_meta_page(struct f2fs_sb_info *sbi, void *src, + block_t blk_addr); +void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, enum iostat_type io_type); -void write_node_page(unsigned int nid, struct f2fs_io_info *fio); -void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio); -int rewrite_data_page(struct f2fs_io_info *fio); -void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, +void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio); +void f2fs_outplace_write_data(struct dnode_of_data *dn, + struct f2fs_io_info *fio); +int f2fs_inplace_write_data(struct f2fs_io_info *fio); +void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, block_t old_blkaddr, block_t new_blkaddr, bool recover_curseg, bool recover_newaddr); void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, block_t old_addr, block_t new_addr, unsigned char version, bool recover_curseg, bool recover_newaddr); -void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, +void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio, bool add_list); void f2fs_wait_on_page_writeback(struct page *page, - enum page_type type, bool ordered); -void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr); -void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk); -void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk); -int lookup_journal_in_cursum(struct f2fs_journal *journal, int type, + enum page_type type, bool ordered, bool locked); +void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); +void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, + block_t len); +void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk); +void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk); +int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, unsigned int val, int alloc); -void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); -int build_segment_manager(struct f2fs_sb_info *sbi); -void destroy_segment_manager(struct f2fs_sb_info *sbi); -int __init create_segment_manager_caches(void); -void destroy_segment_manager_caches(void); +void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); +int f2fs_build_segment_manager(struct f2fs_sb_info *sbi); +void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi); +int __init f2fs_create_segment_manager_caches(void); +void f2fs_destroy_segment_manager_caches(void); +int f2fs_rw_hint_to_seg_type(enum rw_hint hint); +enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, + enum page_type type, enum temp_type temp); /* * checkpoint.c */ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io); -struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); -struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); -struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index); +struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); +struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); +struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index); +struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index); bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); -int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, +int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync); -void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index); -long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, +void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index); +long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, long nr_to_write, enum iostat_type io_type); -void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); -void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); -void release_ino_entry(struct f2fs_sb_info *sbi, bool all); -bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode); +void f2fs_add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); +void f2fs_remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); +void f2fs_release_ino_entry(struct f2fs_sb_info *sbi, bool all); +bool f2fs_exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode); +void f2fs_set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, + unsigned int devidx, int type); +bool f2fs_is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, + unsigned int devidx, int type); int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi); -int acquire_orphan_inode(struct f2fs_sb_info *sbi); -void release_orphan_inode(struct f2fs_sb_info *sbi); -void add_orphan_inode(struct inode *inode); -void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino); -int recover_orphan_inodes(struct f2fs_sb_info *sbi); -int get_valid_checkpoint(struct f2fs_sb_info *sbi); -void update_dirty_page(struct inode *inode, struct page *page); -void remove_dirty_inode(struct inode *inode); -int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type); -int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc); -void init_ino_entry_info(struct f2fs_sb_info *sbi); -int __init create_checkpoint_caches(void); -void destroy_checkpoint_caches(void); +int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi); +void f2fs_release_orphan_inode(struct f2fs_sb_info *sbi); +void f2fs_add_orphan_inode(struct inode *inode); +void f2fs_remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino); +int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi); +int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi); +void f2fs_update_dirty_page(struct inode *inode, struct page *page); +void f2fs_remove_dirty_inode(struct inode *inode); +int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type); +void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi); +int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc); +void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi); +int __init f2fs_create_checkpoint_caches(void); +void f2fs_destroy_checkpoint_caches(void); /* * data.c */ +int f2fs_init_post_read_processing(void); +void f2fs_destroy_post_read_processing(void); void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type); void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type); + struct inode *inode, struct page *page, + nid_t ino, enum page_type type); void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi); int f2fs_submit_page_bio(struct f2fs_io_info *fio); -int f2fs_submit_page_write(struct f2fs_io_info *fio); +void f2fs_submit_page_write(struct f2fs_io_info *fio); struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, block_t blk_addr, struct bio *bio); int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr); -void set_data_blkaddr(struct dnode_of_data *dn); +void f2fs_set_data_blkaddr(struct dnode_of_data *dn); void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr); -int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count); -int reserve_new_block(struct dnode_of_data *dn); +int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count); +int f2fs_reserve_new_block(struct dnode_of_data *dn); int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index); int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from); int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index); -struct page *get_read_data_page(struct inode *inode, pgoff_t index, +struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, int op_flags, bool for_write); -struct page *find_data_page(struct inode *inode, pgoff_t index); -struct page *get_lock_data_page(struct inode *inode, pgoff_t index, +struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index); +struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, bool for_write); -struct page *get_new_data_page(struct inode *inode, +struct page *f2fs_get_new_data_page(struct inode *inode, struct page *ipage, pgoff_t index, bool new_i_size); -int do_write_data_page(struct f2fs_io_info *fio); +int f2fs_do_write_data_page(struct f2fs_io_info *fio); +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock); int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int create, int flag); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); -void f2fs_set_page_dirty_nobuffers(struct page *page); -int __f2fs_write_data_pages(struct address_space *mapping, - struct writeback_control *wbc, - enum iostat_type io_type); +bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio); +bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio); void f2fs_invalidate_page(struct page *page, unsigned int offset, unsigned int length); int f2fs_release_page(struct page *page, gfp_t wait); @@ -2678,22 +3139,24 @@ int f2fs_release_page(struct page *page, gfp_t wait); int f2fs_migrate_page(struct address_space *mapping, struct page *newpage, struct page *page, enum migrate_mode mode); #endif +bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len); +void f2fs_clear_radix_tree_dirty_tag(struct page *page); /* * gc.c */ -int start_gc_thread(struct f2fs_sb_info *sbi); -void stop_gc_thread(struct f2fs_sb_info *sbi); -block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode); +int f2fs_start_gc_thread(struct f2fs_sb_info *sbi); +void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi); +block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode); int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, unsigned int segno); -void build_gc_manager(struct f2fs_sb_info *sbi); +void f2fs_build_gc_manager(struct f2fs_sb_info *sbi); /* * recovery.c */ -int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only); -bool space_for_roll_forward(struct f2fs_sb_info *sbi); +int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only); +bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi); /* * debug.c @@ -2707,14 +3170,19 @@ struct f2fs_stat_info { unsigned long long hit_largest, hit_cached, hit_rbtree; unsigned long long hit_total, total_ext; int ext_tree, zombie_tree, ext_node; - int ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta; + int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta; + int ndirty_data, ndirty_qdata; int inmem_pages; - unsigned int ndirty_dirs, ndirty_files, ndirty_all; + unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all; int nats, dirty_nats, sits, dirty_sits; int free_nids, avail_nids, alloc_nids; int total_count, utilization; int bg_gc, nr_wb_cp_data, nr_wb_data; - int nr_flushing, nr_flushed, nr_discarding, nr_discarded; + int nr_rd_data, nr_rd_node, nr_rd_meta; + int nr_dio_read, nr_dio_write; + unsigned int io_skip_bggc, other_skip_bggc; + int nr_flushing, nr_flushed, flush_list_empty; + int nr_discarding, nr_discarded; int nr_discard_cmd; unsigned int undiscard_blks; int inline_xattr, inline_inode, inline_dir, append, update, orphans; @@ -2729,10 +3197,12 @@ struct f2fs_stat_info { int bg_node_segs, bg_data_segs; int tot_blks, data_blks, node_blks; int bg_data_blks, bg_node_blks; + unsigned long long skipped_atomic_files[2]; int curseg[NR_CURSEG_TYPE]; int cursec[NR_CURSEG_TYPE]; int curzone[NR_CURSEG_TYPE]; + unsigned int meta_count[META_MAX]; unsigned int segment_count[2]; unsigned int block_count[2]; unsigned int inplace_count; @@ -2748,6 +3218,8 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_bg_cp_count(si) ((si)->bg_cp_count++) #define stat_inc_call_count(si) ((si)->call_count++) #define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) +#define stat_io_skip_bggc_count(sbi) ((sbi)->io_skip_bggc++) +#define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++) #define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) #define stat_dec_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]--) #define stat_inc_total_hit(sbi) (atomic64_inc(&(sbi)->total_hit_ext)) @@ -2784,6 +3256,17 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) if (f2fs_has_inline_dentry(inode)) \ (atomic_dec(&F2FS_I_SB(inode)->inline_dir)); \ } while (0) +#define stat_inc_meta_count(sbi, blkaddr) \ + do { \ + if (blkaddr < SIT_I(sbi)->sit_base_addr) \ + atomic_inc(&(sbi)->meta_count[META_CP]); \ + else if (blkaddr < NM_I(sbi)->nat_blkaddr) \ + atomic_inc(&(sbi)->meta_count[META_SIT]); \ + else if (blkaddr < SM_I(sbi)->ssa_blkaddr) \ + atomic_inc(&(sbi)->meta_count[META_NAT]); \ + else if (blkaddr < SM_I(sbi)->main_blkaddr) \ + atomic_inc(&(sbi)->meta_count[META_SSA]); \ + } while (0) #define stat_inc_seg_type(sbi, curseg) \ ((sbi)->segment_count[(curseg)->alloc_type]++) #define stat_inc_block_count(sbi, curseg) \ @@ -2853,6 +3336,8 @@ void f2fs_destroy_root_stats(void); #define stat_inc_bg_cp_count(si) do { } while (0) #define stat_inc_call_count(si) do { } while (0) #define stat_inc_bggc_count(si) do { } while (0) +#define stat_io_skip_bggc_count(sbi) do { } while (0) +#define stat_other_skip_bggc_count(sbi) do { } while (0) #define stat_inc_dirty_inode(sbi, type) do { } while (0) #define stat_dec_dirty_inode(sbi, type) do { } while (0) #define stat_inc_total_hit(sb) do { } while (0) @@ -2871,6 +3356,7 @@ void f2fs_destroy_root_stats(void); #define stat_inc_volatile_write(inode) do { } while (0) #define stat_dec_volatile_write(inode) do { } while (0) #define stat_update_max_volatile_write(inode) do { } while (0) +#define stat_inc_meta_count(sbi, blkaddr) do { } while (0) #define stat_inc_seg_type(sbi, curseg) do { } while (0) #define stat_inc_block_count(sbi, curseg) do { } while (0) #define stat_inc_inplace_blocks(sbi) do { } while (0) @@ -2895,29 +3381,31 @@ extern const struct inode_operations f2fs_dir_inode_operations; extern const struct inode_operations f2fs_symlink_inode_operations; extern const struct inode_operations f2fs_encrypted_symlink_inode_operations; extern const struct inode_operations f2fs_special_inode_operations; -extern struct kmem_cache *inode_entry_slab; +extern struct kmem_cache *f2fs_inode_entry_slab; /* * inline.c */ bool f2fs_may_inline_data(struct inode *inode); bool f2fs_may_inline_dentry(struct inode *inode); -void read_inline_data(struct page *page, struct page *ipage); -void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from); +void f2fs_do_read_inline_data(struct page *page, struct page *ipage); +void f2fs_truncate_inline_inode(struct inode *inode, + struct page *ipage, u64 from); int f2fs_read_inline_data(struct inode *inode, struct page *page); int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page); int f2fs_convert_inline_inode(struct inode *inode); int f2fs_write_inline_data(struct inode *inode, struct page *page); -bool recover_inline_data(struct inode *inode, struct page *npage); -struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, +bool f2fs_recover_inline_data(struct inode *inode, struct page *npage); +struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, struct fscrypt_name *fname, struct page **res_page); -int make_empty_inline_dir(struct inode *inode, struct inode *parent, +int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, struct page *ipage); int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, const struct qstr *orig_name, struct inode *inode, nid_t ino, umode_t mode); -void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, - struct inode *dir, struct inode *inode); +void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, + struct page *page, struct inode *dir, + struct inode *inode); bool f2fs_empty_inline_dir(struct inode *dir); int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, struct fscrypt_str *fstr); @@ -2938,18 +3426,19 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi); /* * extent_cache.c */ -struct rb_entry *__lookup_rb_tree(struct rb_root *root, +struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root, struct rb_entry *cached_re, unsigned int ofs); -struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, - struct rb_root *root, struct rb_node **parent, - unsigned int ofs); -struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, +struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, + struct rb_root_cached *root, + struct rb_node **parent, + unsigned int ofs, bool *leftmost); +struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, struct rb_entry *cached_re, unsigned int ofs, struct rb_entry **prev_entry, struct rb_entry **next_entry, struct rb_node ***insert_p, struct rb_node **insert_parent, - bool force); -bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi, - struct rb_root *root); + bool force, bool *leftmost); +bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, + struct rb_root_cached *root); unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink); bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext); void f2fs_drop_extent_tree(struct inode *inode); @@ -2960,9 +3449,9 @@ bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, void f2fs_update_extent_cache(struct dnode_of_data *dn); void f2fs_update_extent_cache_range(struct dnode_of_data *dn, pgoff_t fofs, block_t blkaddr, unsigned int len); -void init_extent_cache_info(struct f2fs_sb_info *sbi); -int __init create_extent_cache(void); -void destroy_extent_cache(void); +void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi); +int __init f2fs_create_extent_cache(void); +void f2fs_destroy_extent_cache(void); /* * sysfs.c @@ -2989,38 +3478,35 @@ static inline void f2fs_set_encrypted_inode(struct inode *inode) { #ifdef CONFIG_F2FS_FS_ENCRYPTION file_set_encrypt(inode); + f2fs_set_inode_flags(inode); #endif } -static inline bool f2fs_bio_encrypted(struct bio *bio) +/* + * Returns true if the reads of the inode's data need to undergo some + * postprocessing step, like decryption or authenticity verification. + */ +static inline bool f2fs_post_read_required(struct inode *inode) { - return bio->bi_private != NULL; + return f2fs_encrypted_file(inode); } -static inline int f2fs_sb_has_crypto(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_ENCRYPT); +#define F2FS_FEATURE_FUNCS(name, flagname) \ +static inline int f2fs_sb_has_##name(struct f2fs_sb_info *sbi) \ +{ \ + return F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_##flagname); \ } -static inline int f2fs_sb_mounted_blkzoned(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_BLKZONED); -} - -static inline int f2fs_sb_has_extra_attr(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_EXTRA_ATTR); -} - -static inline int f2fs_sb_has_project_quota(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_PRJQUOTA); -} - -static inline int f2fs_sb_has_inode_chksum(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CHKSUM); -} +F2FS_FEATURE_FUNCS(encrypt, ENCRYPT); +F2FS_FEATURE_FUNCS(blkzoned, BLKZONED); +F2FS_FEATURE_FUNCS(extra_attr, EXTRA_ATTR); +F2FS_FEATURE_FUNCS(project_quota, PRJQUOTA); +F2FS_FEATURE_FUNCS(inode_chksum, INODE_CHKSUM); +F2FS_FEATURE_FUNCS(flexible_inline_xattr, FLEXIBLE_INLINE_XATTR); +F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO); +F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME); +F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND); +F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); #ifdef CONFIG_BLK_DEV_ZONED static inline int get_blkz_type(struct f2fs_sb_info *sbi, @@ -3036,11 +3522,20 @@ static inline int get_blkz_type(struct f2fs_sb_info *sbi, } #endif -static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi) +static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi) { - struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev); + return f2fs_sb_has_blkzoned(sbi); +} - return blk_queue_discard(q) || f2fs_sb_mounted_blkzoned(sbi->sb); +static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi) +{ + return blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev)); +} + +static inline bool f2fs_realtime_discard_enable(struct f2fs_sb_info *sbi) +{ + return (test_opt(sbi, DISCARD) && f2fs_hw_support_discard(sbi)) || + f2fs_hw_should_discard(sbi); } static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt) @@ -3065,8 +3560,74 @@ static inline bool f2fs_may_encrypt(struct inode *inode) return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)); #else - return 0; + return false; #endif } +static inline int block_unaligned_IO(struct inode *inode, + struct kiocb *iocb, struct iov_iter *iter) +{ + unsigned int i_blkbits = READ_ONCE(inode->i_blkbits); + unsigned int blocksize_mask = (1 << i_blkbits) - 1; + loff_t offset = iocb->ki_pos; + unsigned long align = offset | iov_iter_alignment(iter); + + return align & blocksize_mask; +} + +static inline int allow_outplace_dio(struct inode *inode, + struct kiocb *iocb, struct iov_iter *iter) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + int rw = iov_iter_rw(iter); + + return (test_opt(sbi, LFS) && (rw == WRITE) && + !block_unaligned_IO(inode, iocb, iter)); +} + +static inline bool f2fs_force_buffered_io(struct inode *inode, + struct kiocb *iocb, struct iov_iter *iter) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + int rw = iov_iter_rw(iter); + + if (f2fs_post_read_required(inode)) + return true; + if (sbi->s_ndevs) + return true; + /* + * for blkzoned device, fallback direct IO to buffered IO, so + * all IOs can be serialized by log-structured write. + */ + if (f2fs_sb_has_blkzoned(sbi)) + return true; + if (test_opt(sbi, LFS) && (rw == WRITE) && + block_unaligned_IO(inode, iocb, iter)) + return true; + if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED)) + return true; + + return false; +} + +#ifdef CONFIG_F2FS_FAULT_INJECTION +extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, + unsigned int type); +#else +#define f2fs_build_fault_attr(sbi, rate, type) do { } while (0) #endif + +#endif + +static inline bool is_journalled_quota(struct f2fs_sb_info *sbi) +{ +#ifdef CONFIG_QUOTA + if (f2fs_sb_has_quota_ino(sbi)) + return true; + if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] || + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) + return true; +#endif + return false; +}
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5f549bc..dfd2647 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c
@@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/file.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/f2fs_fs.h> @@ -50,26 +47,18 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) struct page *page = vmf->page; struct inode *inode = file_inode(vmf->vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dnode_of_data dn; + struct dnode_of_data dn = { .node_changed = false }; int err; + if (unlikely(f2fs_cp_error(sbi))) { + err = -EIO; + goto err; + } + sb_start_pagefault(inode->i_sb); f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); - /* block allocation */ - f2fs_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_reserve_block(&dn, page->index); - if (err) { - f2fs_unlock_op(sbi); - goto out; - } - f2fs_put_dnode(&dn); - f2fs_unlock_op(sbi); - - f2fs_balance_fs(sbi, dn.node_changed); - file_update_time(vmf->vma->vm_file); down_read(&F2FS_I(inode)->i_mmap_sem); lock_page(page); @@ -81,16 +70,34 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) goto out_sem; } + /* block allocation */ + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = f2fs_get_block(&dn, page->index); + f2fs_put_dnode(&dn); + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); + if (err) { + unlock_page(page); + goto out_sem; + } + + /* fill the page */ + f2fs_wait_on_page_writeback(page, DATA, false, true); + + /* wait for GCed page writeback via META_MAPPING */ + f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); + /* * check to see if the page is mapped already (no holes) */ if (PageMappedToDisk(page)) - goto mapped; + goto out_sem; /* page is wholly or partially inside EOF */ if (((loff_t)(page->index + 1) << PAGE_SHIFT) > i_size_read(inode)) { - unsigned offset; + loff_t offset; + offset = i_size_read(inode) & ~PAGE_MASK; zero_user_segment(page, offset, PAGE_SIZE); } @@ -99,21 +106,16 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) SetPageUptodate(page); f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE); + f2fs_update_time(sbi, REQ_TIME); trace_f2fs_vm_page_mkwrite(page, DATA); -mapped: - /* fill the page */ - f2fs_wait_on_page_writeback(page, DATA, false); - - /* wait for GCed encrypted page writeback */ - if (f2fs_encrypted_file(inode)) - f2fs_wait_on_block_writeback(sbi, dn.data_blkaddr); - out_sem: up_read(&F2FS_I(inode)->i_mmap_sem); -out: + + f2fs_balance_fs(sbi, dn.node_changed); + sb_end_pagefault(inode->i_sb); - f2fs_update_time(sbi, REQ_TIME); +err: return block_page_mkwrite_return(err); } @@ -138,27 +140,34 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) return 1; } -static inline bool need_do_checkpoint(struct inode *inode) +static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - bool need_cp = false; + enum cp_reason_type cp_reason = CP_NO_NEEDED; - if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) - need_cp = true; + if (!S_ISREG(inode->i_mode)) + cp_reason = CP_NON_REGULAR; + else if (inode->i_nlink != 1) + cp_reason = CP_HARDLINK; else if (is_sbi_flag_set(sbi, SBI_NEED_CP)) - need_cp = true; + cp_reason = CP_SB_NEED_CP; else if (file_wrong_pino(inode)) - need_cp = true; - else if (!space_for_roll_forward(sbi)) - need_cp = true; - else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) - need_cp = true; + cp_reason = CP_WRONG_PINO; + else if (!f2fs_space_for_roll_forward(sbi)) + cp_reason = CP_NO_SPC_ROLL; + else if (!f2fs_is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) + cp_reason = CP_NODE_NEED_CP; else if (test_opt(sbi, FASTBOOT)) - need_cp = true; - else if (sbi->active_logs == 2) - need_cp = true; + cp_reason = CP_FASTBOOT_MODE; + else if (F2FS_OPTION(sbi).active_logs == 2) + cp_reason = CP_SPEC_LOG_NUM; + else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT && + f2fs_need_dentry_mark(sbi, inode->i_ino) && + f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, + TRANS_DIR_INO)) + cp_reason = CP_RECOVER_DIR; - return need_cp; + return cp_reason; } static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) @@ -166,7 +175,7 @@ static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) struct page *i = find_get_page(NODE_MAPPING(sbi), ino); bool ret = false; /* But we need to avoid that there are some inode updates */ - if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) + if ((i && PageDirty(i)) || f2fs_need_inode_block_update(sbi, ino)) ret = true; f2fs_put_page(i, 0); return ret; @@ -193,14 +202,16 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t ino = inode->i_ino; int ret = 0; - bool need_cp = false; + enum cp_reason_type cp_reason = 0; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, .for_reclaim = 0, }; + unsigned int seq_id = 0; - if (unlikely(f2fs_readonly(inode->i_sb))) + if (unlikely(f2fs_readonly(inode->i_sb) || + is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return 0; trace_f2fs_sync_file_enter(inode); @@ -215,7 +226,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, clear_inode_flag(inode, FI_NEED_IPU); if (ret) { - trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); + trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); return ret; } @@ -229,14 +240,14 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, * if there is no written data, don't waste time to write recovery info. */ if (!is_inode_flag_set(inode, FI_APPEND_WRITE) && - !exist_written_data(sbi, ino, APPEND_INO)) { + !f2fs_exist_written_data(sbi, ino, APPEND_INO)) { /* it may call write_inode just prior to fsync */ if (need_inode_page_update(sbi, ino)) goto go_write; if (is_inode_flag_set(inode, FI_UPDATE_WRITE) || - exist_written_data(sbi, ino, UPDATE_INO)) + f2fs_exist_written_data(sbi, ino, UPDATE_INO)) goto flush_out; goto out; } @@ -246,10 +257,10 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, * sudden-power-off. */ down_read(&F2FS_I(inode)->i_sem); - need_cp = need_do_checkpoint(inode); + cp_reason = need_do_checkpoint(inode); up_read(&F2FS_I(inode)->i_sem); - if (need_cp) { + if (cp_reason) { /* all the dirty node pages should be flushed for POR */ ret = f2fs_sync_fs(inode->i_sb, 1); @@ -263,7 +274,9 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, goto out; } sync_nodes: - ret = fsync_node_pages(sbi, inode, &wbc, atomic); + atomic_inc(&sbi->wb_sync_req[NODE]); + ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id); + atomic_dec(&sbi->wb_sync_req[NODE]); if (ret) goto out; @@ -273,7 +286,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, goto out; } - if (need_inode_block_update(sbi, ino)) { + if (f2fs_need_inode_block_update(sbi, ino)) { f2fs_mark_inode_dirty_sync(inode, true); f2fs_write_inode(inode, NULL); goto sync_nodes; @@ -288,46 +301,52 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, * given fsync mark. */ if (!atomic) { - ret = wait_on_node_pages_writeback(sbi, ino); + ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id); if (ret) goto out; } /* once recovery info is written, don't need to tack this */ - remove_ino_entry(sbi, ino, APPEND_INO); + f2fs_remove_ino_entry(sbi, ino, APPEND_INO); clear_inode_flag(inode, FI_APPEND_WRITE); flush_out: - remove_ino_entry(sbi, ino, UPDATE_INO); - clear_inode_flag(inode, FI_UPDATE_WRITE); - if (!atomic) - ret = f2fs_issue_flush(sbi); + if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) + ret = f2fs_issue_flush(sbi, inode->i_ino); + if (!ret) { + f2fs_remove_ino_entry(sbi, ino, UPDATE_INO); + clear_inode_flag(inode, FI_UPDATE_WRITE); + f2fs_remove_ino_entry(sbi, ino, FLUSH_INO); + } f2fs_update_time(sbi, REQ_TIME); out: - trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); + trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); f2fs_trace_ios(NULL, 1); return ret; } int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file))))) + return -EIO; return f2fs_do_sync_file(file, start, end, datasync, false); } static pgoff_t __get_first_dirty_index(struct address_space *mapping, pgoff_t pgofs, int whence) { - struct pagevec pvec; + struct page *page; int nr_pages; if (whence != SEEK_DATA) return 0; /* find first dirty page index */ - pagevec_init(&pvec, 0); - nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, - PAGECACHE_TAG_DIRTY, 1); - pgofs = nr_pages ? pvec.pages[0]->index : ULONG_MAX; - pagevec_release(&pvec); + nr_pages = find_get_pages_tag(mapping, &pgofs, PAGECACHE_TAG_DIRTY, + 1, &page); + if (!nr_pages) + return ULONG_MAX; + pgofs = page->index; + put_page(page); return pgofs; } @@ -377,13 +396,13 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) { set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE); if (err && err != -ENOENT) { goto fail; } else if (err == -ENOENT) { /* direct node does not exists */ if (whence == SEEK_DATA) { - pgofs = get_next_page_offset(&dn, pgofs); + pgofs = f2fs_get_next_page_offset(&dn, pgofs); continue; } else { goto found; @@ -397,6 +416,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) dn.ofs_in_node++, pgofs++, data_ofs = (loff_t)pgofs << PAGE_SHIFT) { block_t blkaddr; + blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); @@ -454,6 +474,9 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) struct inode *inode = file_inode(file); int err; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + /* we don't need to use inline_data strictly */ err = f2fs_convert_inline_inode(inode); if (err) @@ -466,26 +489,17 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) static int f2fs_file_open(struct inode *inode, struct file *filp) { - struct dentry *dir; + int err = fscrypt_file_open(inode, filp); - if (f2fs_encrypted_inode(inode)) { - int ret = fscrypt_get_encryption_info(inode); - if (ret) - return -EACCES; - if (!fscrypt_has_encryption_key(inode)) - return -ENOKEY; - } - dir = dget_parent(file_dentry(filp)); - if (f2fs_encrypted_inode(d_inode(dir)) && - !fscrypt_has_permitted_context(d_inode(dir), inode)) { - dput(dir); - return -EPERM; - } - dput(dir); + if (err) + return err; + + filp->f_mode |= FMODE_NOWAIT; + return dquot_file_open(inode, filp); } -int truncate_data_blocks_range(struct dnode_of_data *dn, int count) +void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_node *raw_node; @@ -501,17 +515,18 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) for (; count > 0; count--, addr++, dn->ofs_in_node++) { block_t blkaddr = le32_to_cpu(*addr); + if (blkaddr == NULL_ADDR) continue; dn->data_blkaddr = NULL_ADDR; - set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn); if (__is_valid_data_blkaddr(blkaddr) && !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) continue; - invalidate_blocks(sbi, blkaddr); + f2fs_invalidate_blocks(sbi, blkaddr); if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN); nr_free++; @@ -523,7 +538,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) * once we invalidate valid blkaddr in range [ofs, ofs + count], * we will invalidate all blkaddr in the whole range. */ - fofs = start_bidx_of_node(ofs_of_node(dn->node_page), + fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + ofs; f2fs_update_extent_cache_range(dn, fofs, 0, len); dec_valid_block_count(sbi, dn->inode, nr_free); @@ -533,18 +548,17 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) f2fs_update_time(sbi, REQ_TIME); trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid, dn->ofs_in_node, nr_free); - return nr_free; } -void truncate_data_blocks(struct dnode_of_data *dn) +void f2fs_truncate_data_blocks(struct dnode_of_data *dn) { - truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); + f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); } static int truncate_partial_data_page(struct inode *inode, u64 from, bool cache_only) { - unsigned offset = from & (PAGE_SIZE - 1); + loff_t offset = from & (PAGE_SIZE - 1); pgoff_t index = from >> PAGE_SHIFT; struct address_space *mapping = inode->i_mapping; struct page *page; @@ -560,11 +574,11 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, return 0; } - page = get_lock_data_page(inode, index, true); + page = f2fs_get_lock_data_page(inode, index, true); if (IS_ERR(page)) return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page); truncate_out: - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); zero_user(page, offset, PAGE_SIZE - offset); /* An encrypted inode should have a key and truncate the last page. */ @@ -575,41 +589,42 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, return 0; } -int truncate_blocks(struct inode *inode, u64 from, bool lock) +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock, + bool buf_write) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - unsigned int blocksize = inode->i_sb->s_blocksize; struct dnode_of_data dn; pgoff_t free_from; int count = 0, err = 0; struct page *ipage; bool truncate_page = false; + int flag = buf_write ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO; trace_f2fs_truncate_blocks_enter(inode, from); - free_from = (pgoff_t)F2FS_BYTES_TO_BLK(from + blocksize - 1); + free_from = (pgoff_t)F2FS_BLK_ALIGN(from); if (free_from >= sbi->max_file_blocks) goto free_partial; if (lock) - f2fs_lock_op(sbi); + __do_map_lock(sbi, flag, true); - ipage = get_node_page(sbi, inode->i_ino); + ipage = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) { err = PTR_ERR(ipage); goto out; } if (f2fs_has_inline_data(inode)) { - truncate_inline_inode(inode, ipage, from); + f2fs_truncate_inline_inode(inode, ipage, from); f2fs_put_page(ipage, 1); truncate_page = true; goto out; } set_new_dnode(&dn, inode, ipage, NULL, 0); - err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA); + err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA); if (err) { if (err == -ENOENT) goto free_next; @@ -622,16 +637,16 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) f2fs_bug_on(sbi, count < 0); if (dn.ofs_in_node || IS_INODE(dn.node_page)) { - truncate_data_blocks_range(&dn, count); + f2fs_truncate_data_blocks_range(&dn, count); free_from += count; } f2fs_put_dnode(&dn); free_next: - err = truncate_inode_blocks(inode, free_from); + err = f2fs_truncate_inode_blocks(inode, free_from); out: if (lock) - f2fs_unlock_op(sbi); + __do_map_lock(sbi, flag, false); free_partial: /* lastly zero out the first data page */ if (!err) @@ -645,18 +660,20 @@ int f2fs_truncate(struct inode *inode) { int err; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) return 0; trace_f2fs_truncate(inode); -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) { f2fs_show_injection_info(FAULT_TRUNCATE); return -EIO; } -#endif + /* we should check inline_data size */ if (!f2fs_may_inline_data(inode)) { err = f2fs_convert_inline_inode(inode); @@ -664,7 +681,7 @@ int f2fs_truncate(struct inode *inode) return err; } - err = truncate_blocks(inode, i_size_read(inode), true); + err = f2fs_truncate_blocks(inode, i_size_read(inode), true, false); if (err) return err; @@ -678,18 +695,27 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, { struct inode *inode = d_inode(path->dentry); struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_inode *ri; unsigned int flags; - flags = fi->i_flags & (FS_FL_USER_VISIBLE | FS_PROJINHERIT_FL); - if (flags & FS_APPEND_FL) + if (f2fs_has_extra_attr(inode) && + f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) && + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { + stat->result_mask |= STATX_BTIME; + stat->btime.tv_sec = fi->i_crtime.tv_sec; + stat->btime.tv_nsec = fi->i_crtime.tv_nsec; + } + + flags = fi->i_flags & F2FS_FL_USER_VISIBLE; + if (flags & F2FS_APPEND_FL) stat->attributes |= STATX_ATTR_APPEND; - if (flags & FS_COMPR_FL) + if (flags & F2FS_COMPR_FL) stat->attributes |= STATX_ATTR_COMPRESSED; if (f2fs_encrypted_inode(inode)) stat->attributes |= STATX_ATTR_ENCRYPTED; - if (flags & FS_IMMUTABLE_FL) + if (flags & F2FS_IMMUTABLE_FL) stat->attributes |= STATX_ATTR_IMMUTABLE; - if (flags & FS_NODUMP_FL) + if (flags & F2FS_NODUMP_FL) stat->attributes |= STATX_ATTR_NODUMP; stat->attributes_mask |= (STATX_ATTR_APPEND | @@ -744,10 +770,17 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) int err; bool size_changed = false; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + err = setattr_prepare(dentry, attr); if (err) return err; + err = fscrypt_prepare_setattr(dentry, attr); + if (err) + return err; + if (is_quota_modification(inode, attr)) { err = dquot_initialize(inode); if (err) @@ -757,36 +790,47 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) !uid_eq(attr->ia_uid, inode->i_uid)) || (attr->ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) { + f2fs_lock_op(F2FS_I_SB(inode)); err = dquot_transfer(inode, attr); - if (err) + if (err) { + set_sbi_flag(F2FS_I_SB(inode), + SBI_QUOTA_NEED_REPAIR); + f2fs_unlock_op(F2FS_I_SB(inode)); return err; + } + /* + * update uid/gid under lock_op(), so that dquot and inode can + * be updated atomically. + */ + if (attr->ia_valid & ATTR_UID) + inode->i_uid = attr->ia_uid; + if (attr->ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; + f2fs_mark_inode_dirty_sync(inode, true); + f2fs_unlock_op(F2FS_I_SB(inode)); } if (attr->ia_valid & ATTR_SIZE) { - if (f2fs_encrypted_inode(inode)) { - err = fscrypt_get_encryption_info(inode); - if (err) - return err; - if (!fscrypt_has_encryption_key(inode)) - return -ENOKEY; - } + bool to_smaller = (attr->ia_size <= i_size_read(inode)); - if (attr->ia_size <= i_size_read(inode)) { - down_write(&F2FS_I(inode)->i_mmap_sem); - truncate_setsize(inode, attr->ia_size); + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); + + truncate_setsize(inode, attr->ia_size); + + if (to_smaller) err = f2fs_truncate(inode); - up_write(&F2FS_I(inode)->i_mmap_sem); - if (err) - return err; - } else { - /* - * do not trim all blocks after i_size if target size is - * larger than i_size. - */ - down_write(&F2FS_I(inode)->i_mmap_sem); - truncate_setsize(inode, attr->ia_size); - up_write(&F2FS_I(inode)->i_mmap_sem); + /* + * do not trim all blocks after i_size if target size is + * larger than i_size. + */ + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + if (err) + return err; + + if (!to_smaller) { /* should convert inline inode here */ if (!f2fs_may_inline_data(inode)) { err = f2fs_convert_inline_inode(inode); @@ -796,13 +840,17 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) inode->i_mtime = inode->i_ctime = current_time(inode); } + down_write(&F2FS_I(inode)->i_sem); + F2FS_I(inode)->last_disk_size = i_size_read(inode); + up_write(&F2FS_I(inode)->i_sem); + size_changed = true; } __setattr_copy(inode, attr); if (attr->ia_valid & ATTR_MODE) { - err = posix_acl_chmod(inode, get_inode_mode(inode)); + err = posix_acl_chmod(inode, f2fs_get_inode_mode(inode)); if (err || is_inode_flag_set(inode, FI_ACL_MODE)) { inode->i_mode = F2FS_I(inode)->i_acl_mode; clear_inode_flag(inode, FI_ACL_MODE); @@ -841,20 +889,20 @@ static int fill_zero(struct inode *inode, pgoff_t index, f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); - page = get_new_data_page(inode, NULL, index, false); + page = f2fs_get_new_data_page(inode, NULL, index, false); f2fs_unlock_op(sbi); if (IS_ERR(page)) return PTR_ERR(page); - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); zero_user(page, start, len); set_page_dirty(page); f2fs_put_page(page, 1); return 0; } -int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) +int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) { int err; @@ -863,10 +911,11 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) pgoff_t end_offset, count; set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, pg_start, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, pg_start, LOOKUP_NODE); if (err) { if (err == -ENOENT) { - pg_start++; + pg_start = f2fs_get_next_page_offset(&dn, + pg_start); continue; } return err; @@ -877,7 +926,7 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset); - truncate_data_blocks_range(&dn, count); + f2fs_truncate_data_blocks_range(&dn, count); f2fs_put_dnode(&dn); pg_start += count; @@ -928,14 +977,19 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) blk_start = (loff_t)pg_start << PAGE_SHIFT; blk_end = (loff_t)pg_end << PAGE_SHIFT; + + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); down_write(&F2FS_I(inode)->i_mmap_sem); + truncate_inode_pages_range(mapping, blk_start, blk_end - 1); f2fs_lock_op(sbi); - ret = truncate_hole(inode, pg_start, pg_end); + ret = f2fs_truncate_hole(inode, pg_start, pg_end); f2fs_unlock_op(sbi); + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); } } @@ -951,7 +1005,7 @@ static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr, next_dnode: set_new_dnode(&dn, inode, NULL, NULL, 0); - ret = get_dnode_of_data(&dn, off, LOOKUP_NODE_RA); + ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA); if (ret && ret != -ENOENT) { return ret; } else if (ret == -ENOENT) { @@ -968,7 +1022,7 @@ static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr, for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { *blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - if (!is_checkpointed_data(sbi, *blkaddr)) { + if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) { if (test_opt(sbi, LFS)) { f2fs_put_dnode(&dn); @@ -1001,10 +1055,10 @@ static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr, continue; set_new_dnode(&dn, inode, NULL, NULL, 0); - ret = get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA); + ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA); if (ret) { dec_valid_block_count(sbi, inode, 1); - invalidate_blocks(sbi, *blkaddr); + f2fs_invalidate_blocks(sbi, *blkaddr); } else { f2fs_update_data_blkaddr(&dn, *blkaddr); } @@ -1034,18 +1088,23 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, pgoff_t ilen; set_new_dnode(&dn, dst_inode, NULL, NULL, 0); - ret = get_dnode_of_data(&dn, dst + i, ALLOC_NODE); + ret = f2fs_get_dnode_of_data(&dn, dst + i, ALLOC_NODE); if (ret) return ret; - get_node_info(sbi, dn.nid, &ni); + ret = f2fs_get_node_info(sbi, dn.nid, &ni); + if (ret) { + f2fs_put_dnode(&dn); + return ret; + } + ilen = min((pgoff_t) ADDRS_PER_PAGE(dn.node_page, dst_inode) - dn.ofs_in_node, len - i); do { dn.data_blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - truncate_data_blocks_range(&dn, 1); + f2fs_truncate_data_blocks_range(&dn, 1); if (do_replace[i]) { f2fs_i_blocks_write(src_inode, @@ -1068,10 +1127,11 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, } else { struct page *psrc, *pdst; - psrc = get_lock_data_page(src_inode, src + i, true); + psrc = f2fs_get_lock_data_page(src_inode, + src + i, true); if (IS_ERR(psrc)) return PTR_ERR(psrc); - pdst = get_new_data_page(dst_inode, NULL, dst + i, + pdst = f2fs_get_new_data_page(dst_inode, NULL, dst + i, true); if (IS_ERR(pdst)) { f2fs_put_page(psrc, 1); @@ -1082,7 +1142,8 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, f2fs_put_page(pdst, 1); f2fs_put_page(psrc, 1); - ret = truncate_hole(src_inode, src + i, src + i + 1); + ret = f2fs_truncate_hole(src_inode, + src + i, src + i + 1); if (ret) return ret; i++; @@ -1103,11 +1164,15 @@ static int __exchange_data_block(struct inode *src_inode, while (len) { olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len); - src_blkaddr = kvzalloc(sizeof(block_t) * olen, GFP_KERNEL); + src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode), + array_size(olen, sizeof(block_t)), + GFP_KERNEL); if (!src_blkaddr) return -ENOMEM; - do_replace = kvzalloc(sizeof(int) * olen, GFP_KERNEL); + do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode), + array_size(olen, sizeof(int)), + GFP_KERNEL); if (!do_replace) { kvfree(src_blkaddr); return -ENOMEM; @@ -1133,31 +1198,39 @@ static int __exchange_data_block(struct inode *src_inode, return 0; roll_back: - __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, len); + __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, olen); kvfree(src_blkaddr); kvfree(do_replace); return ret; } -static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end) +static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + pgoff_t start = offset >> PAGE_SHIFT; + pgoff_t end = (offset + len) >> PAGE_SHIFT; int ret; f2fs_balance_fs(sbi, true); + + /* avoid gc operation during block exchange */ + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_lock_op(sbi); - f2fs_drop_extent_tree(inode); - + truncate_pagecache(inode, offset); ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true); f2fs_unlock_op(sbi); + + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); return ret; } static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) { - pgoff_t pg_start, pg_end; loff_t new_size; int ret; @@ -1172,34 +1245,27 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) if (ret) return ret; - pg_start = offset >> PAGE_SHIFT; - pg_end = (offset + len) >> PAGE_SHIFT; - - down_write(&F2FS_I(inode)->i_mmap_sem); /* write out all dirty pages from offset */ ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); if (ret) - goto out; + return ret; - truncate_pagecache(inode, offset); - - ret = f2fs_do_collapse(inode, pg_start, pg_end); + ret = f2fs_do_collapse(inode, offset, len); if (ret) - goto out; + return ret; /* write out all moved pages, if possible */ + down_write(&F2FS_I(inode)->i_mmap_sem); filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); truncate_pagecache(inode, offset); new_size = i_size_read(inode) - len; truncate_pagecache(inode, new_size); - ret = truncate_blocks(inode, new_size, true); + ret = f2fs_truncate_blocks(inode, new_size, true, false); + up_write(&F2FS_I(inode)->i_mmap_sem); if (!ret) f2fs_i_size_write(inode, new_size); - -out: - up_write(&F2FS_I(inode)->i_mmap_sem); return ret; } @@ -1219,7 +1285,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, } dn->ofs_in_node = ofs_in_node; - ret = reserve_new_blocks(dn, count); + ret = f2fs_reserve_new_blocks(dn, count); if (ret) return ret; @@ -1228,7 +1294,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, dn->data_blkaddr = datablock_addr(dn->inode, dn->node_page, dn->ofs_in_node); /* - * reserve_new_blocks will not guarantee entire block + * f2fs_reserve_new_blocks will not guarantee entire block * allocation. */ if (dn->data_blkaddr == NULL_ADDR) { @@ -1236,9 +1302,9 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, break; } if (dn->data_blkaddr != NEW_ADDR) { - invalidate_blocks(sbi, dn->data_blkaddr); + f2fs_invalidate_blocks(sbi, dn->data_blkaddr); dn->data_blkaddr = NEW_ADDR; - set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn); } } @@ -1265,12 +1331,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, if (ret) return ret; - down_write(&F2FS_I(inode)->i_mmap_sem); ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1); if (ret) - goto out_sem; - - truncate_pagecache_range(inode, offset, offset + len - 1); + return ret; pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; @@ -1282,7 +1345,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ret = fill_zero(inode, pg_start, off_start, off_end - off_start); if (ret) - goto out_sem; + return ret; new_size = max_t(loff_t, new_size, offset + len); } else { @@ -1290,7 +1353,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ret = fill_zero(inode, pg_start++, off_start, PAGE_SIZE - off_start); if (ret) - goto out_sem; + return ret; new_size = max_t(loff_t, new_size, (loff_t)pg_start << PAGE_SHIFT); @@ -1301,12 +1364,21 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, unsigned int end_offset; pgoff_t end; + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); + + truncate_pagecache_range(inode, + (loff_t)index << PAGE_SHIFT, + ((loff_t)pg_end << PAGE_SHIFT) - 1); + f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); - ret = get_dnode_of_data(&dn, index, ALLOC_NODE); + ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE); if (ret) { f2fs_unlock_op(sbi); + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); goto out; } @@ -1315,7 +1387,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ret = f2fs_do_zero_range(&dn, index, end); f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); f2fs_balance_fs(sbi, dn.node_changed); @@ -1343,9 +1418,6 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, else f2fs_i_size_write(inode, new_size); } -out_sem: - up_write(&F2FS_I(inode)->i_mmap_sem); - return ret; } @@ -1375,22 +1447,26 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) f2fs_balance_fs(sbi, true); down_write(&F2FS_I(inode)->i_mmap_sem); - ret = truncate_blocks(inode, i_size_read(inode), true); + ret = f2fs_truncate_blocks(inode, i_size_read(inode), true, false); + up_write(&F2FS_I(inode)->i_mmap_sem); if (ret) - goto out; + return ret; /* write out all dirty pages from offset */ ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); if (ret) - goto out; - - truncate_pagecache(inode, offset); + return ret; pg_start = offset >> PAGE_SHIFT; pg_end = (offset + len) >> PAGE_SHIFT; delta = pg_end - pg_start; idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + /* avoid gc operation during block exchange */ + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); + truncate_pagecache(inode, offset); + while (!ret && idx > pg_start) { nr = idx - pg_start; if (nr > delta) @@ -1404,15 +1480,17 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) idx + delta, nr, false); f2fs_unlock_op(sbi); } + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); /* write out all moved pages, if possible */ + down_write(&F2FS_I(inode)->i_mmap_sem); filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); truncate_pagecache(inode, offset); + up_write(&F2FS_I(inode)->i_mmap_sem); if (!ret) f2fs_i_size_write(inode, new_size); -out: - up_write(&F2FS_I(inode)->i_mmap_sem); return ret; } @@ -1420,7 +1498,9 @@ static int expand_inode_data(struct inode *inode, loff_t offset, loff_t len, int mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_map_blocks map = { .m_next_pgofs = NULL }; + struct f2fs_map_blocks map = { .m_next_pgofs = NULL, + .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE, + .m_may_create = true }; pgoff_t pg_end; loff_t new_size = i_size_read(inode); loff_t off_end; @@ -1454,14 +1534,18 @@ static int expand_inode_data(struct inode *inode, loff_t offset, last_off = map.m_lblk + map.m_len - 1; /* update new size to the failed position */ - new_size = (last_off == pg_end) ? offset + len: + new_size = (last_off == pg_end) ? offset + len : (loff_t)(last_off + 1) << PAGE_SHIFT; } else { new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end; } - if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) - f2fs_i_size_write(inode, new_size); + if (new_size > i_size_read(inode)) { + if (mode & FALLOC_FL_KEEP_SIZE) + file_set_keep_isize(inode); + else + f2fs_i_size_write(inode, new_size); + } return err; } @@ -1472,6 +1556,9 @@ static long f2fs_fallocate(struct file *file, int mode, struct inode *inode = file_inode(file); long ret = 0; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + /* f2fs only support ->fallocate for regular file */ if (!S_ISREG(inode->i_mode)) return -EINVAL; @@ -1505,8 +1592,6 @@ static long f2fs_fallocate(struct file *file, int mode, if (!ret) { inode->i_mtime = inode->i_ctime = current_time(inode); f2fs_mark_inode_dirty_sync(inode, false); - if (mode & FALLOC_FL_KEEP_SIZE) - file_set_keep_isize(inode); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); } @@ -1529,13 +1614,13 @@ static int f2fs_release_file(struct inode *inode, struct file *filp) /* some remained atomic pages should discarded */ if (f2fs_is_atomic_file(inode)) - drop_inmem_pages(inode); + f2fs_drop_inmem_pages(inode); if (f2fs_is_volatile_file(inode)) { - clear_inode_flag(inode, FI_VOLATILE_FILE); - stat_dec_volatile_write(inode); set_inode_flag(inode, FI_DROP_CACHE); filemap_fdatawrite(inode->i_mapping); clear_inode_flag(inode, FI_DROP_CACHE); + clear_inode_flag(inode, FI_VOLATILE_FILE); + stat_dec_volatile_write(inode); } return 0; } @@ -1552,7 +1637,7 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id) */ if (f2fs_is_atomic_file(inode) && F2FS_I(inode)->inmem_task == current) - drop_inmem_pages(inode); + f2fs_drop_inmem_pages(inode); return 0; } @@ -1560,8 +1645,15 @@ static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); struct f2fs_inode_info *fi = F2FS_I(inode); - unsigned int flags = fi->i_flags & - (FS_FL_USER_VISIBLE | FS_PROJINHERIT_FL); + unsigned int flags = fi->i_flags; + + if (f2fs_encrypted_inode(inode)) + flags |= F2FS_ENCRYPT_FL; + if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) + flags |= F2FS_INLINE_DATA_FL; + + flags &= F2FS_FL_USER_VISIBLE; + return put_user(flags, (int __user *)arg); } @@ -1578,22 +1670,22 @@ static int __f2fs_ioc_setflags(struct inode *inode, unsigned int flags) oldflags = fi->i_flags; - if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) + if ((flags ^ oldflags) & (F2FS_APPEND_FL | F2FS_IMMUTABLE_FL)) if (!capable(CAP_LINUX_IMMUTABLE)) return -EPERM; - flags = flags & (FS_FL_USER_MODIFIABLE | FS_PROJINHERIT_FL); - flags |= oldflags & ~(FS_FL_USER_MODIFIABLE | FS_PROJINHERIT_FL); + flags = flags & F2FS_FL_USER_MODIFIABLE; + flags |= oldflags & ~F2FS_FL_USER_MODIFIABLE; fi->i_flags = flags; - if (fi->i_flags & FS_PROJINHERIT_FL) + if (fi->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); else clear_inode_flag(inode, FI_PROJ_INHERIT); inode->i_ctime = current_time(inode); f2fs_set_inode_flags(inode); - f2fs_mark_inode_dirty_sync(inode, false); + f2fs_mark_inode_dirty_sync(inode, true); return 0; } @@ -1646,38 +1738,41 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) inode_lock(inode); - down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); - - if (f2fs_is_atomic_file(inode)) + if (f2fs_is_atomic_file(inode)) { + if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) + ret = -EINVAL; goto out; + } ret = f2fs_convert_inline_inode(inode); if (ret) goto out; - set_inode_flag(inode, FI_ATOMIC_FILE); - set_inode_flag(inode, FI_HOT_DATA); - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - if (!get_dirty_pages(inode)) - goto inc_stat; - - f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, + /* + * Should wait end_io to count F2FS_WB_CP_DATA correctly by + * f2fs_is_atomic_file. + */ + if (get_dirty_pages(inode)) + f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, "Unexpected flush for atomic writes: ino=%lu, npages=%u", inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) { - clear_inode_flag(inode, FI_ATOMIC_FILE); - clear_inode_flag(inode, FI_HOT_DATA); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); goto out; } -inc_stat: + set_inode_flag(inode, FI_ATOMIC_FILE); + clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); F2FS_I(inode)->inmem_task = current; stat_inc_atomic_write(inode); stat_update_max_atomic_write(inode); out: - up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); inode_unlock(inode); mnt_drop_write_file(filp); return ret; @@ -1695,26 +1790,34 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) if (ret) return ret; + f2fs_balance_fs(F2FS_I_SB(inode), true); + inode_lock(inode); - if (f2fs_is_volatile_file(inode)) + if (f2fs_is_volatile_file(inode)) { + ret = -EINVAL; goto err_out; + } if (f2fs_is_atomic_file(inode)) { - ret = commit_inmem_pages(inode); + ret = f2fs_commit_inmem_pages(inode); if (ret) goto err_out; ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); if (!ret) { clear_inode_flag(inode, FI_ATOMIC_FILE); - clear_inode_flag(inode, FI_HOT_DATA); + F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC] = 0; stat_dec_atomic_write(inode); } } else { ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); } err_out: + if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) { + clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); + ret = -EINVAL; + } inode_unlock(inode); mnt_drop_write_file(filp); return ret; @@ -1799,13 +1902,15 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) inode_lock(inode); if (f2fs_is_atomic_file(inode)) - drop_inmem_pages(inode); + f2fs_drop_inmem_pages(inode); if (f2fs_is_volatile_file(inode)) { clear_inode_flag(inode, FI_VOLATILE_FILE); stat_dec_volatile_write(inode); ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); } + clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); + inode_unlock(inode); mnt_drop_write_file(filp); @@ -1836,27 +1941,51 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) switch (in) { case F2FS_GOING_DOWN_FULLSYNC: sb = freeze_bdev(sb->s_bdev); - if (sb && !IS_ERR(sb)) { + if (IS_ERR(sb)) { + ret = PTR_ERR(sb); + goto out; + } + if (sb) { f2fs_stop_checkpoint(sbi, false); + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); thaw_bdev(sb->s_bdev, sb); } break; case F2FS_GOING_DOWN_METASYNC: /* do checkpoint only */ - f2fs_sync_fs(sb, 1); + ret = f2fs_sync_fs(sb, 1); + if (ret) + goto out; f2fs_stop_checkpoint(sbi, false); + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; case F2FS_GOING_DOWN_NOSYNC: f2fs_stop_checkpoint(sbi, false); + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; case F2FS_GOING_DOWN_METAFLUSH: - sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO); + f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO); f2fs_stop_checkpoint(sbi, false); + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); + break; + case F2FS_GOING_DOWN_NEED_FSCK: + set_sbi_flag(sbi, SBI_NEED_FSCK); + /* do checkpoint only */ + ret = f2fs_sync_fs(sb, 1); + if (ret) + goto out; break; default: ret = -EINVAL; goto out; } + + f2fs_stop_gc_thread(sbi); + f2fs_stop_discard_thread(sbi); + + f2fs_drop_discard_cmd(sbi); + clear_opt(sbi, DISCARD); + f2fs_update_time(sbi, REQ_TIME); out: if (in != F2FS_GOING_DOWN_FULLSYNC) @@ -1875,7 +2004,7 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) + if (!f2fs_hw_support_discard(F2FS_SB(sb))) return -EOPNOTSUPP; if (copy_from_user(&range, (struct fstrim_range __user *)arg, @@ -1914,6 +2043,9 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode))) + return -EOPNOTSUPP; + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return fscrypt_ioctl_set_policy(filp, (const void __user *)arg); @@ -1921,6 +2053,8 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) { + if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) + return -EOPNOTSUPP; return fscrypt_ioctl_get_policy(filp, (void __user *)arg); } @@ -1930,16 +2064,18 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; - if (!f2fs_sb_has_crypto(inode->i_sb)) + if (!f2fs_sb_has_encrypt(sbi)) return -EOPNOTSUPP; - if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) - goto got_it; - err = mnt_want_write_file(filp); if (err) return err; + down_write(&sbi->sb_lock); + + if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) + goto got_it; + /* update superblock with uuid */ generate_random_uuid(sbi->raw_super->encrypt_pw_salt); @@ -1947,15 +2083,16 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) if (err) { /* undo new data */ memset(sbi->raw_super->encrypt_pw_salt, 0, 16); - mnt_drop_write_file(filp); - return err; + goto out_err; } - mnt_drop_write_file(filp); got_it: if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt, 16)) - return -EFAULT; - return 0; + err = -EFAULT; +out_err: + up_write(&sbi->sb_lock); + mnt_drop_write_file(filp); + return err; } static int f2fs_ioc_gc(struct file *filp, unsigned long arg) @@ -2011,13 +2148,15 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) if (f2fs_readonly(sbi->sb)) return -EROFS; + end = range.start + range.len; + if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) { + return -EINVAL; + } + ret = mnt_want_write_file(filp); if (ret) return ret; - end = range.start + range.len; - if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) - return -EINVAL; do_more: if (!range.sync) { if (!mutex_trylock(&sbi->gc_mutex)) { @@ -2029,7 +2168,7 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) } ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start)); - range.start += sbi->blocks_per_seg; + range.start += BLKS_PER_SEC(sbi); if (range.start <= end) goto do_more; out: @@ -2049,6 +2188,12 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) if (f2fs_readonly(sbi->sb)) return -EROFS; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + f2fs_msg(sbi->sb, KERN_INFO, + "Skipping Checkpoint. Checkpoints currently disabled."); + return -EINVAL; + } + ret = mnt_want_write_file(filp); if (ret) return ret; @@ -2064,9 +2209,11 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, struct f2fs_defragment *range) { struct inode *inode = file_inode(filp); - struct f2fs_map_blocks map = { .m_next_pgofs = NULL }; - struct extent_info ei = {0,0,0}; - pgoff_t pg_start, pg_end; + struct f2fs_map_blocks map = { .m_next_extent = NULL, + .m_seg_type = NO_CHECK_TYPE , + .m_may_create = false }; + struct extent_info ei = {0, 0, 0}; + pgoff_t pg_start, pg_end, next_pgofs; unsigned int blk_per_seg = sbi->blocks_per_seg; unsigned int total = 0, sec_num; block_t blk_end = 0; @@ -2074,7 +2221,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, int err; /* if in-place-update policy is enabled, don't waste time here */ - if (need_inplace_update_policy(inode, NULL)) + if (f2fs_should_update_inplace(inode, NULL)) return -EINVAL; pg_start = range->start >> PAGE_SHIFT; @@ -2100,6 +2247,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, } map.m_lblk = pg_start; + map.m_next_pgofs = &next_pgofs; /* * lookup mapping info in dnode page cache, skip defragmenting if all @@ -2113,14 +2261,16 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, goto out; if (!(map.m_flags & F2FS_MAP_FLAGS)) { - map.m_lblk++; + map.m_lblk = next_pgofs; continue; } - if (blk_end && blk_end != map.m_pblk) { + if (blk_end && blk_end != map.m_pblk) fragmented = true; - break; - } + + /* record total count of block that we're going to move */ + total += map.m_len; + blk_end = map.m_pblk + map.m_len; map.m_lblk += map.m_len; @@ -2129,10 +2279,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, if (!fragmented) goto out; - map.m_lblk = pg_start; - map.m_len = pg_end - pg_start; - - sec_num = (map.m_len + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi); + sec_num = (total + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi); /* * make sure there are enough free section for LFS allocation, this can @@ -2144,6 +2291,10 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, goto out; } + map.m_lblk = pg_start; + map.m_len = pg_end - pg_start; + total = 0; + while (map.m_lblk < pg_end) { pgoff_t idx; int cnt = 0; @@ -2155,7 +2306,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, goto clear_out; if (!(map.m_flags & F2FS_MAP_FLAGS)) { - map.m_lblk++; + map.m_lblk = next_pgofs; continue; } @@ -2165,7 +2316,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) { struct page *page; - page = get_lock_data_page(inode, idx, true); + page = f2fs_get_lock_data_page(inode, idx, true); if (IS_ERR(page)) { err = PTR_ERR(page); goto clear_out; @@ -2277,10 +2428,9 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, inode_lock(src); if (src != dst) { - if (!inode_trylock(dst)) { - ret = -EBUSY; + ret = -EBUSY; + if (!inode_trylock(dst)) goto out; - } } ret = -EINVAL; @@ -2325,6 +2475,14 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, goto out_unlock; f2fs_balance_fs(sbi, true); + + down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); + if (src != dst) { + ret = -EBUSY; + if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) + goto out_src; + } + f2fs_lock_op(sbi); ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS, pos_out >> F2FS_BLKSIZE_BITS, @@ -2337,6 +2495,11 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, f2fs_i_size_write(dst, dst_osize); } f2fs_unlock_op(sbi); + + if (src != dst) + up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]); +out_src: + up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); out_unlock: if (src != dst) inode_unlock(dst); @@ -2403,12 +2566,15 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) if (f2fs_readonly(sbi->sb)) return -EROFS; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return -EINVAL; + if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg, sizeof(range))) return -EFAULT; if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num || - sbi->segs_per_sec != 1) { + __is_large_section(sbi)) { f2fs_msg(sbi->sb, KERN_WARNING, "Can't flush %u in %d for segs_per_sec %u != 1\n", range.dev_num, sbi->s_ndevs, @@ -2461,18 +2627,33 @@ static int f2fs_ioc_get_features(struct file *filp, unsigned long arg) } #ifdef CONFIG_QUOTA +int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid) +{ + struct dquot *transfer_to[MAXQUOTAS] = {}; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct super_block *sb = sbi->sb; + int err = 0; + + transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); + if (!IS_ERR(transfer_to[PRJQUOTA])) { + err = __dquot_transfer(inode, transfer_to); + if (err) + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + dqput(transfer_to[PRJQUOTA]); + } + return err; +} + static int f2fs_ioc_setproject(struct file *filp, __u32 projid) { struct inode *inode = file_inode(filp); struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct super_block *sb = sbi->sb; - struct dquot *transfer_to[MAXQUOTAS] = {}; struct page *ipage; kprojid_t kprojid; int err; - if (!f2fs_sb_has_project_quota(sb)) { + if (!f2fs_sb_has_project_quota(sbi)) { if (projid != F2FS_DEF_PROJID) return -EOPNOTSUPP; else @@ -2487,53 +2668,45 @@ static int f2fs_ioc_setproject(struct file *filp, __u32 projid) if (projid_eq(kprojid, F2FS_I(inode)->i_projid)) return 0; - err = mnt_want_write_file(filp); - if (err) - return err; - err = -EPERM; - inode_lock(inode); - /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) - goto out_unlock; + return err; - ipage = get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); - goto out_unlock; - } + ipage = f2fs_get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); if (!F2FS_FITS_IN_INODE(F2FS_INODE(ipage), fi->i_extra_isize, i_projid)) { err = -EOVERFLOW; f2fs_put_page(ipage, 1); - goto out_unlock; + return err; } f2fs_put_page(ipage, 1); err = dquot_initialize(inode); if (err) - goto out_unlock; + return err; - transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); - if (!IS_ERR(transfer_to[PRJQUOTA])) { - err = __dquot_transfer(inode, transfer_to); - dqput(transfer_to[PRJQUOTA]); - if (err) - goto out_dirty; - } + f2fs_lock_op(sbi); + err = f2fs_transfer_project_quota(inode, kprojid); + if (err) + goto out_unlock; F2FS_I(inode)->i_projid = kprojid; inode->i_ctime = current_time(inode); -out_dirty: f2fs_mark_inode_dirty_sync(inode, true); out_unlock: - inode_unlock(inode); - mnt_drop_write_file(filp); + f2fs_unlock_op(sbi); return err; } #else +int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid) +{ + return 0; +} + static int f2fs_ioc_setproject(struct file *filp, __u32 projid) { if (projid != F2FS_DEF_PROJID) @@ -2547,17 +2720,17 @@ static inline __u32 f2fs_iflags_to_xflags(unsigned long iflags) { __u32 xflags = 0; - if (iflags & FS_SYNC_FL) + if (iflags & F2FS_SYNC_FL) xflags |= FS_XFLAG_SYNC; - if (iflags & FS_IMMUTABLE_FL) + if (iflags & F2FS_IMMUTABLE_FL) xflags |= FS_XFLAG_IMMUTABLE; - if (iflags & FS_APPEND_FL) + if (iflags & F2FS_APPEND_FL) xflags |= FS_XFLAG_APPEND; - if (iflags & FS_NODUMP_FL) + if (iflags & F2FS_NODUMP_FL) xflags |= FS_XFLAG_NODUMP; - if (iflags & FS_NOATIME_FL) + if (iflags & F2FS_NOATIME_FL) xflags |= FS_XFLAG_NOATIME; - if (iflags & FS_PROJINHERIT_FL) + if (iflags & F2FS_PROJINHERIT_FL) xflags |= FS_XFLAG_PROJINHERIT; return xflags; } @@ -2566,31 +2739,23 @@ static inline __u32 f2fs_iflags_to_xflags(unsigned long iflags) FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \ FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT) -/* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */ -#define F2FS_FL_XFLAG_VISIBLE (FS_SYNC_FL | \ - FS_IMMUTABLE_FL | \ - FS_APPEND_FL | \ - FS_NODUMP_FL | \ - FS_NOATIME_FL | \ - FS_PROJINHERIT_FL) - /* Transfer xflags flags to internal */ static inline unsigned long f2fs_xflags_to_iflags(__u32 xflags) { unsigned long iflags = 0; if (xflags & FS_XFLAG_SYNC) - iflags |= FS_SYNC_FL; + iflags |= F2FS_SYNC_FL; if (xflags & FS_XFLAG_IMMUTABLE) - iflags |= FS_IMMUTABLE_FL; + iflags |= F2FS_IMMUTABLE_FL; if (xflags & FS_XFLAG_APPEND) - iflags |= FS_APPEND_FL; + iflags |= F2FS_APPEND_FL; if (xflags & FS_XFLAG_NODUMP) - iflags |= FS_NODUMP_FL; + iflags |= F2FS_NODUMP_FL; if (xflags & FS_XFLAG_NOATIME) - iflags |= FS_NOATIME_FL; + iflags |= F2FS_NOATIME_FL; if (xflags & FS_XFLAG_PROJINHERIT) - iflags |= FS_PROJINHERIT_FL; + iflags |= F2FS_PROJINHERIT_FL; return iflags; } @@ -2603,9 +2768,9 @@ static int f2fs_ioc_fsgetxattr(struct file *filp, unsigned long arg) memset(&fa, 0, sizeof(struct fsxattr)); fa.fsx_xflags = f2fs_iflags_to_xflags(fi->i_flags & - (FS_FL_USER_VISIBLE | FS_PROJINHERIT_FL)); + F2FS_FL_USER_VISIBLE); - if (f2fs_sb_has_project_quota(inode->i_sb)) + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode))) fa.fsx_projid = (__u32)from_kprojid(&init_user_ns, fi->i_projid); @@ -2614,6 +2779,30 @@ static int f2fs_ioc_fsgetxattr(struct file *filp, unsigned long arg) return 0; } +static int f2fs_ioctl_check_project(struct inode *inode, struct fsxattr *fa) +{ + /* + * Project Quota ID state is only allowed to change from within the init + * namespace. Enforce that restriction only if we are trying to change + * the quota ID state. Everything else is allowed in user namespaces. + */ + if (current_user_ns() == &init_user_ns) + return 0; + + if (__kprojid_val(F2FS_I(inode)->i_projid) != fa->fsx_projid) + return -EINVAL; + + if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) { + if (!(fa->fsx_xflags & FS_XFLAG_PROJINHERIT)) + return -EINVAL; + } else { + if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT) + return -EINVAL; + } + + return 0; +} + static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -2641,23 +2830,149 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) return err; inode_lock(inode); + err = f2fs_ioctl_check_project(inode, &fa); + if (err) + goto out; flags = (fi->i_flags & ~F2FS_FL_XFLAG_VISIBLE) | (flags & F2FS_FL_XFLAG_VISIBLE); err = __f2fs_ioc_setflags(inode, flags); - inode_unlock(inode); - mnt_drop_write_file(filp); if (err) - return err; + goto out; err = f2fs_ioc_setproject(filp, fa.fsx_projid); - if (err) - return err; +out: + inode_unlock(inode); + mnt_drop_write_file(filp); + return err; +} +int f2fs_pin_file_control(struct inode *inode, bool inc) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + /* Use i_gc_failures for normal file as a risk signal. */ + if (inc) + f2fs_i_gc_failures_write(inode, + fi->i_gc_failures[GC_FAILURE_PIN] + 1); + + if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) { + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: Enable GC = ino %lx after %x GC trials\n", + __func__, inode->i_ino, + fi->i_gc_failures[GC_FAILURE_PIN]); + clear_inode_flag(inode, FI_PIN_FILE); + return -EAGAIN; + } return 0; } +static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + __u32 pin; + int ret = 0; + + if (!inode_owner_or_capable(inode)) + return -EACCES; + + if (get_user(pin, (__u32 __user *)arg)) + return -EFAULT; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + if (f2fs_readonly(F2FS_I_SB(inode)->sb)) + return -EROFS; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + inode_lock(inode); + + if (f2fs_should_update_outplace(inode, NULL)) { + ret = -EINVAL; + goto out; + } + + if (!pin) { + clear_inode_flag(inode, FI_PIN_FILE); + f2fs_i_gc_failures_write(inode, 0); + goto done; + } + + if (f2fs_pin_file_control(inode, false)) { + ret = -EAGAIN; + goto out; + } + ret = f2fs_convert_inline_inode(inode); + if (ret) + goto out; + + set_inode_flag(inode, FI_PIN_FILE); + ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; +done: + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); +out: + inode_unlock(inode); + mnt_drop_write_file(filp); + return ret; +} + +static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + __u32 pin = 0; + + if (is_inode_flag_set(inode, FI_PIN_FILE)) + pin = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; + return put_user(pin, (u32 __user *)arg); +} + +int f2fs_precache_extents(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_map_blocks map; + pgoff_t m_next_extent; + loff_t end; + int err; + + if (is_inode_flag_set(inode, FI_NO_EXTENT)) + return -EOPNOTSUPP; + + map.m_lblk = 0; + map.m_next_pgofs = NULL; + map.m_next_extent = &m_next_extent; + map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = false; + end = F2FS_I_SB(inode)->max_file_blocks; + + while (map.m_lblk < end) { + map.m_len = end - map.m_lblk; + + down_write(&fi->i_gc_rwsem[WRITE]); + err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_PRECACHE); + up_write(&fi->i_gc_rwsem[WRITE]); + if (err) + return err; + + map.m_lblk = m_next_extent; + } + + return err; +} + +static int f2fs_ioc_precache_extents(struct file *filp, unsigned long arg) +{ + return f2fs_precache_extents(file_inode(filp)); +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) + return -EIO; + switch (cmd) { case F2FS_IOC_GETFLAGS: return f2fs_ioc_getflags(filp, arg); @@ -2703,6 +3018,12 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_fsgetxattr(filp, arg); case F2FS_IOC_FSSETXATTR: return f2fs_ioc_fssetxattr(filp, arg); + case F2FS_IOC_GET_PIN_FILE: + return f2fs_ioc_get_pin_file(filp, arg); + case F2FS_IOC_SET_PIN_FILE: + return f2fs_ioc_set_pin_file(filp, arg); + case F2FS_IOC_PRECACHE_EXTENTS: + return f2fs_ioc_precache_extents(filp, arg); default: return -ENOTTY; } @@ -2712,10 +3033,20 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); - struct blk_plug plug; ssize_t ret; - inode_lock(inode); + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + + if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) + return -EINVAL; + + if (!inode_trylock(inode)) { + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + inode_lock(inode); + } + ret = generic_write_checks(iocb, from); if (ret > 0) { bool preallocated = false; @@ -2725,18 +3056,31 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (iov_iter_fault_in_readable(from, iov_iter_count(from))) set_inode_flag(inode, FI_NO_PREALLOC); - preallocated = true; - target_size = iocb->ki_pos + iov_iter_count(from); + if ((iocb->ki_flags & IOCB_NOWAIT) && + (iocb->ki_flags & IOCB_DIRECT)) { + if (!f2fs_overwrite_io(inode, iocb->ki_pos, + iov_iter_count(from)) || + f2fs_has_inline_data(inode) || + f2fs_force_buffered_io(inode, + iocb, from)) { + clear_inode_flag(inode, + FI_NO_PREALLOC); + inode_unlock(inode); + return -EAGAIN; + } - err = f2fs_preallocate_blocks(iocb, from); - if (err) { - clear_inode_flag(inode, FI_NO_PREALLOC); - inode_unlock(inode); - return err; + } else { + preallocated = true; + target_size = iocb->ki_pos + iov_iter_count(from); + + err = f2fs_preallocate_blocks(iocb, from); + if (err) { + clear_inode_flag(inode, FI_NO_PREALLOC); + inode_unlock(inode); + return err; + } } - blk_start_plug(&plug); ret = __generic_file_write_iter(iocb, from); - blk_finish_plug(&plug); clear_inode_flag(inode, FI_NO_PREALLOC); /* if we couldn't write data, we should deallocate blocks. */ @@ -2784,6 +3128,9 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_GET_FEATURES: case F2FS_IOC_FSGETXATTR: case F2FS_IOC_FSSETXATTR: + case F2FS_IOC_GET_PIN_FILE: + case F2FS_IOC_SET_PIN_FILE: + case F2FS_IOC_PRECACHE_EXTENTS: break; default: return -ENOIOCTLCMD;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index f228844..d421720 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c
@@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/gc.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/module.h> @@ -43,25 +40,28 @@ static int gc_thread_func(void *data) if (gc_th->gc_wake) gc_th->gc_wake = 0; - if (try_to_freeze()) + if (try_to_freeze()) { + stat_other_skip_bggc_count(sbi); continue; + } if (kthread_should_stop()) break; if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) { increase_sleep_time(gc_th, &wait_ms); + stat_other_skip_bggc_count(sbi); continue; } -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_CHECKPOINT)) { f2fs_show_injection_info(FAULT_CHECKPOINT); f2fs_stop_checkpoint(sbi, false); } -#endif - if (!sb_start_write_trylock(sbi->sb)) + if (!sb_start_write_trylock(sbi->sb)) { + stat_other_skip_bggc_count(sbi); continue; + } /* * [GC triggering condition] @@ -76,17 +76,21 @@ static int gc_thread_func(void *data) * invalidated soon after by user update or deletion. * So, I'd like to wait some time to collect dirty segments. */ - if (!mutex_trylock(&sbi->gc_mutex)) - goto next; - - if (gc_th->gc_urgent) { + if (sbi->gc_mode == GC_URGENT) { wait_ms = gc_th->urgent_sleep_time; + mutex_lock(&sbi->gc_mutex); goto do_gc; } - if (!is_idle(sbi)) { + if (!mutex_trylock(&sbi->gc_mutex)) { + stat_other_skip_bggc_count(sbi); + goto next; + } + + if (!is_idle(sbi, GC_TIME)) { increase_sleep_time(gc_th, &wait_ms); mutex_unlock(&sbi->gc_mutex); + stat_io_skip_bggc_count(sbi); goto next; } @@ -113,7 +117,7 @@ static int gc_thread_func(void *data) return 0; } -int start_gc_thread(struct f2fs_sb_info *sbi) +int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) { struct f2fs_gc_kthread *gc_th; dev_t dev = sbi->sb->s_bdev->bd_dev; @@ -130,8 +134,6 @@ int start_gc_thread(struct f2fs_sb_info *sbi) gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; - gc_th->gc_idle = 0; - gc_th->gc_urgent = 0; gc_th->gc_wake= 0; sbi->gc_thread = gc_th; @@ -140,32 +142,35 @@ int start_gc_thread(struct f2fs_sb_info *sbi) "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(gc_th->f2fs_gc_task)) { err = PTR_ERR(gc_th->f2fs_gc_task); - kfree(gc_th); + kvfree(gc_th); sbi->gc_thread = NULL; } out: return err; } -void stop_gc_thread(struct f2fs_sb_info *sbi) +void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi) { struct f2fs_gc_kthread *gc_th = sbi->gc_thread; if (!gc_th) return; kthread_stop(gc_th->f2fs_gc_task); - kfree(gc_th); + kvfree(gc_th); sbi->gc_thread = NULL; } -static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type) +static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type) { int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY; - if (gc_th && gc_th->gc_idle) { - if (gc_th->gc_idle == 1) - gc_mode = GC_CB; - else if (gc_th->gc_idle == 2) - gc_mode = GC_GREEDY; + switch (sbi->gc_mode) { + case GC_IDLE_CB: + gc_mode = GC_CB; + break; + case GC_IDLE_GREEDY: + case GC_URGENT: + gc_mode = GC_GREEDY; + break; } return gc_mode; } @@ -181,14 +186,16 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, p->max_search = dirty_i->nr_dirty[type]; p->ofs_unit = 1; } else { - p->gc_mode = select_gc_type(sbi->gc_thread, gc_type); + p->gc_mode = select_gc_type(sbi, gc_type); p->dirty_segmap = dirty_i->dirty_segmap[DIRTY]; p->max_search = dirty_i->nr_dirty[DIRTY]; p->ofs_unit = sbi->segs_per_sec; } /* we need to check every dirty segments in the FG_GC case */ - if (gc_type != FG_GC && p->max_search > sbi->max_victim_search) + if (gc_type != FG_GC && + (sbi->gc_mode != GC_URGENT) && + p->max_search > sbi->max_victim_search) p->max_search = sbi->max_victim_search; /* let's select beginning hot/small space first in no_heap mode*/ @@ -226,10 +233,6 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) { if (sec_usage_check(sbi, secno)) continue; - - if (no_fggc_candidate(sbi, secno)) - continue; - clear_bit(secno, dirty_i->victim_secmap); return GET_SEG_FROM_SEC(sbi, secno); } @@ -268,16 +271,6 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) return UINT_MAX - ((100 * (100 - u) * age) / (100 + u)); } -static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi, - unsigned int segno) -{ - unsigned int valid_blocks = - get_valid_blocks(sbi, segno, true); - - return IS_DATASEG(get_seg_entry(sbi, segno)->type) ? - valid_blocks * 2 : valid_blocks; -} - static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno, struct victim_sel_policy *p) { @@ -286,7 +279,7 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, /* alloc_mode == LFS */ if (p->gc_mode == GC_GREEDY) - return get_greedy_cost(sbi, segno); + return get_valid_blocks(sbi, segno, true); else return get_cb_cost(sbi, segno); } @@ -330,8 +323,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, p.min_cost = get_max_cost(sbi, &p); if (*result != NULL_SEGNO) { - if (IS_DATASEG(get_seg_entry(sbi, *result)->type) && - get_valid_blocks(sbi, *result, false) && + if (get_valid_blocks(sbi, *result, false) && !sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) p.min_segno = *result; goto out; @@ -340,6 +332,22 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, if (p.max_search == 0) goto out; + if (__is_large_section(sbi) && p.alloc_mode == LFS) { + if (sbi->next_victim_seg[BG_GC] != NULL_SEGNO) { + p.min_segno = sbi->next_victim_seg[BG_GC]; + *result = p.min_segno; + sbi->next_victim_seg[BG_GC] = NULL_SEGNO; + goto got_result; + } + if (gc_type == FG_GC && + sbi->next_victim_seg[FG_GC] != NULL_SEGNO) { + p.min_segno = sbi->next_victim_seg[FG_GC]; + *result = p.min_segno; + sbi->next_victim_seg[FG_GC] = NULL_SEGNO; + goto got_result; + } + } + last_victim = sm->last_victim[p.gc_mode]; if (p.alloc_mode == LFS && gc_type == FG_GC) { p.min_segno = check_bg_victims(sbi); @@ -377,10 +385,11 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, if (sec_usage_check(sbi, secno)) goto next; - if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) + /* Don't touch checkpointed data */ + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) && + get_ckpt_valid_blocks(sbi, segno))) goto next; - if (gc_type == FG_GC && p.alloc_mode == LFS && - no_fggc_candidate(sbi, secno)) + if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) goto next; cost = get_gc_cost(sbi, segno, &p); @@ -401,6 +410,8 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, } if (p.min_segno != NULL_SEGNO) { got_it: + *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; +got_result: if (p.alloc_mode == LFS) { secno = GET_SEC_FROM_SEG(sbi, p.min_segno); if (gc_type == FG_GC) @@ -408,13 +419,13 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, else set_bit(secno, dirty_i->victim_secmap); } - *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; + } +out: + if (p.min_segno != NULL_SEGNO) trace_f2fs_get_victim(sbi->sb, type, gc_type, &p, sbi->cur_victim_sec, prefree_segments(sbi), free_segments(sbi)); - } -out: mutex_unlock(&dirty_i->seglist_lock); return (p.min_segno == NULL_SEGNO) ? 0 : 1; @@ -442,7 +453,7 @@ static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) iput(inode); return; } - new_ie = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); + new_ie = f2fs_kmem_cache_alloc(f2fs_inode_entry_slab, GFP_NOFS); new_ie->inode = inode; f2fs_radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie); @@ -456,7 +467,7 @@ static void put_gc_inode(struct gc_inode_list *gc_list) radix_tree_delete(&gc_list->iroot, ie->inode->i_ino); iput(ie->inode); list_del(&ie->list); - kmem_cache_free(inode_entry_slab, ie); + kmem_cache_free(f2fs_inode_entry_slab, ie); } } @@ -467,10 +478,10 @@ static int check_valid_map(struct f2fs_sb_info *sbi, struct seg_entry *sentry; int ret; - mutex_lock(&sit_i->sentry_lock); + down_read(&sit_i->sentry_lock); sentry = get_seg_entry(sbi, segno); ret = f2fs_test_bit(offset, sentry->cur_valid_map); - mutex_unlock(&sit_i->sentry_lock); + up_read(&sit_i->sentry_lock); return ret; } @@ -479,65 +490,81 @@ static int check_valid_map(struct f2fs_sb_info *sbi, * On validity, copy that node with cold status, otherwise (invalid node) * ignore that. */ -static void gc_node_segment(struct f2fs_sb_info *sbi, +static int gc_node_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, unsigned int segno, int gc_type) { struct f2fs_summary *entry; block_t start_addr; int off; int phase = 0; + bool fggc = (gc_type == FG_GC); + int submitted = 0; start_addr = START_BLOCK(sbi, segno); next_step: entry = sum; + if (fggc && phase == 2) + atomic_inc(&sbi->wb_sync_req[NODE]); + for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { nid_t nid = le32_to_cpu(entry->nid); struct page *node_page; struct node_info ni; + int err; /* stop BG_GC if there is not enough free sections. */ if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) - return; + return submitted; if (check_valid_map(sbi, segno, off) == 0) continue; if (phase == 0) { - ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1, + f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1, META_NAT, true); continue; } if (phase == 1) { - ra_node_page(sbi, nid); + f2fs_ra_node_page(sbi, nid); continue; } /* phase == 2 */ - node_page = get_node_page(sbi, nid); + node_page = f2fs_get_node_page(sbi, nid); if (IS_ERR(node_page)) continue; - /* block may become invalid during get_node_page */ + /* block may become invalid during f2fs_get_node_page */ if (check_valid_map(sbi, segno, off) == 0) { f2fs_put_page(node_page, 1); continue; } - get_node_info(sbi, nid, &ni); + if (f2fs_get_node_info(sbi, nid, &ni)) { + f2fs_put_page(node_page, 1); + continue; + } + if (ni.blk_addr != start_addr + off) { f2fs_put_page(node_page, 1); continue; } - move_node_page(node_page, gc_type); + err = f2fs_move_node_page(node_page, gc_type); + if (!err && gc_type == FG_GC) + submitted++; stat_inc_node_blk_count(sbi, 1, gc_type); } if (++phase < 3) goto next_step; + + if (fggc) + atomic_dec(&sbi->wb_sync_req[NODE]); + return submitted; } /* @@ -547,7 +574,7 @@ static void gc_node_segment(struct f2fs_sb_info *sbi, * as indirect or double indirect node blocks, are given, it must be a caller's * bug. */ -block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode) +block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode) { unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4; unsigned int bidx; @@ -578,11 +605,14 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, nid = le32_to_cpu(sum->nid); ofs_in_node = le16_to_cpu(sum->ofs_in_node); - node_page = get_node_page(sbi, nid); + node_page = f2fs_get_node_page(sbi, nid); if (IS_ERR(node_page)) return false; - get_node_info(sbi, nid, dni); + if (f2fs_get_node_info(sbi, nid, dni)) { + f2fs_put_page(node_page, 1); + return false; + } if (sum->version != dni->version) { f2fs_msg(sbi->sb, KERN_WARNING, @@ -600,47 +630,137 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, return true; } -/* - * Move data block via META_MAPPING while keeping locked data page. - * This can be used to move blocks, aka LBAs, directly on disk. - */ -static void move_data_block(struct inode *inode, block_t bidx, - unsigned int segno, int off) +static int ra_data_block(struct inode *inode, pgoff_t index) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct address_space *mapping = inode->i_mapping; + struct dnode_of_data dn; + struct page *page; + struct extent_info ei = {0, 0, 0}; struct f2fs_io_info fio = { - .sbi = F2FS_I_SB(inode), + .sbi = sbi, + .ino = inode->i_ino, .type = DATA, .temp = COLD, .op = REQ_OP_READ, .op_flags = 0, .encrypted_page = NULL, .in_list = false, + .retry = false, + }; + int err; + + page = f2fs_grab_cache_page(mapping, index, true); + if (!page) + return -ENOMEM; + + if (f2fs_lookup_extent_cache(inode, index, &ei)) { + dn.data_blkaddr = ei.blk + index - ei.fofs; + goto got_it; + } + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); + if (err) + goto put_page; + f2fs_put_dnode(&dn); + + if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, + DATA_GENERIC))) { + err = -EFAULT; + goto put_page; + } +got_it: + /* read page */ + fio.page = page; + fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; + + /* + * don't cache encrypted data into meta inode until previous dirty + * data were writebacked to avoid racing between GC and flush. + */ + f2fs_wait_on_page_writeback(page, DATA, true, true); + + f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); + + fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi), + dn.data_blkaddr, + FGP_LOCK | FGP_CREAT, GFP_NOFS); + if (!fio.encrypted_page) { + err = -ENOMEM; + goto put_page; + } + + err = f2fs_submit_page_bio(&fio); + if (err) + goto put_encrypted_page; + f2fs_put_page(fio.encrypted_page, 0); + f2fs_put_page(page, 1); + return 0; +put_encrypted_page: + f2fs_put_page(fio.encrypted_page, 1); +put_page: + f2fs_put_page(page, 1); + return err; +} + +/* + * Move data block via META_MAPPING while keeping locked data page. + * This can be used to move blocks, aka LBAs, directly on disk. + */ +static int move_data_block(struct inode *inode, block_t bidx, + int gc_type, unsigned int segno, int off) +{ + struct f2fs_io_info fio = { + .sbi = F2FS_I_SB(inode), + .ino = inode->i_ino, + .type = DATA, + .temp = COLD, + .op = REQ_OP_READ, + .op_flags = 0, + .encrypted_page = NULL, + .in_list = false, + .retry = false, }; struct dnode_of_data dn; struct f2fs_summary sum; struct node_info ni; - struct page *page; + struct page *page, *mpage; block_t newaddr; - int err; + int err = 0; + bool lfs_mode = test_opt(fio.sbi, LFS); /* do not read out */ page = f2fs_grab_cache_page(inode->i_mapping, bidx, false); if (!page) - return; + return -ENOMEM; - if (!check_valid_map(F2FS_I_SB(inode), segno, off)) + if (!check_valid_map(F2FS_I_SB(inode), segno, off)) { + err = -ENOENT; goto out; + } - if (f2fs_is_atomic_file(inode)) + if (f2fs_is_atomic_file(inode)) { + F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++; + F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++; + err = -EAGAIN; goto out; + } + + if (f2fs_is_pinned_file(inode)) { + f2fs_pin_file_control(inode, true); + err = -EAGAIN; + goto out; + } set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, bidx, LOOKUP_NODE); if (err) goto out; if (unlikely(dn.data_blkaddr == NULL_ADDR)) { ClearPageUptodate(page); + err = -ENOENT; goto put_out; } @@ -648,25 +768,50 @@ static void move_data_block(struct inode *inode, block_t bidx, * don't cache encrypted data into meta inode until previous dirty * data were writebacked to avoid racing between GC and flush. */ - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); - get_node_info(fio.sbi, dn.nid, &ni); + f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); + + err = f2fs_get_node_info(fio.sbi, dn.nid, &ni); + if (err) + goto put_out; + set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); /* read page */ fio.page = page; fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; - allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, + if (lfs_mode) + down_write(&fio.sbi->io_order_lock); + + f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, &sum, CURSEG_COLD_DATA, NULL, false); - fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi), newaddr, - FGP_LOCK | FGP_CREAT, GFP_NOFS); + fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi), + newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); if (!fio.encrypted_page) { err = -ENOMEM; goto recover_block; } + mpage = f2fs_pagecache_get_page(META_MAPPING(fio.sbi), + fio.old_blkaddr, FGP_LOCK, GFP_NOFS); + if (mpage) { + bool updated = false; + + if (PageUptodate(mpage)) { + memcpy(page_address(fio.encrypted_page), + page_address(mpage), PAGE_SIZE); + updated = true; + } + f2fs_put_page(mpage, 1); + invalidate_mapping_pages(META_MAPPING(fio.sbi), + fio.old_blkaddr, fio.old_blkaddr); + if (updated) + goto write_page; + } + err = f2fs_submit_page_bio(&fio); if (err) goto put_page_out; @@ -683,21 +828,24 @@ static void move_data_block(struct inode *inode, block_t bidx, goto put_page_out; } +write_page: + f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true, true); set_page_dirty(fio.encrypted_page); - f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true); if (clear_page_dirty_for_io(fio.encrypted_page)) dec_page_count(fio.sbi, F2FS_DIRTY_META); set_page_writeback(fio.encrypted_page); + ClearPageError(page); /* allocate block address */ - f2fs_wait_on_page_writeback(dn.node_page, NODE, true); + f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true); fio.op = REQ_OP_WRITE; fio.op_flags = REQ_SYNC; fio.new_blkaddr = newaddr; - err = f2fs_submit_page_write(&fio); - if (err) { + f2fs_submit_page_write(&fio); + if (fio.retry) { + err = -EAGAIN; if (PageWriteback(fio.encrypted_page)) end_page_writeback(fio.encrypted_page); goto put_page_out; @@ -712,38 +860,57 @@ static void move_data_block(struct inode *inode, block_t bidx, put_page_out: f2fs_put_page(fio.encrypted_page, 1); recover_block: + if (lfs_mode) + up_write(&fio.sbi->io_order_lock); if (err) - __f2fs_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr, + f2fs_do_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr, true, true); put_out: f2fs_put_dnode(&dn); out: f2fs_put_page(page, 1); + return err; } -static void move_data_page(struct inode *inode, block_t bidx, int gc_type, +static int move_data_page(struct inode *inode, block_t bidx, int gc_type, unsigned int segno, int off) { struct page *page; + int err = 0; - page = get_lock_data_page(inode, bidx, true); + page = f2fs_get_lock_data_page(inode, bidx, true); if (IS_ERR(page)) - return; + return PTR_ERR(page); - if (!check_valid_map(F2FS_I_SB(inode), segno, off)) + if (!check_valid_map(F2FS_I_SB(inode), segno, off)) { + err = -ENOENT; goto out; + } - if (f2fs_is_atomic_file(inode)) + if (f2fs_is_atomic_file(inode)) { + F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++; + F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++; + err = -EAGAIN; goto out; + } + if (f2fs_is_pinned_file(inode)) { + if (gc_type == FG_GC) + f2fs_pin_file_control(inode, true); + err = -EAGAIN; + goto out; + } if (gc_type == BG_GC) { - if (PageWriteback(page)) + if (PageWriteback(page)) { + err = -EAGAIN; goto out; + } set_page_dirty(page); set_cold_data(page); } else { struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), + .ino = inode->i_ino, .type = DATA, .temp = COLD, .op = REQ_OP_WRITE, @@ -755,19 +922,19 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type, .io_type = FS_GC_DATA_IO, }; bool is_dirty = PageDirty(page); - int err; retry: + f2fs_wait_on_page_writeback(page, DATA, true, true); + set_page_dirty(page); - f2fs_wait_on_page_writeback(page, DATA, true); if (clear_page_dirty_for_io(page)) { inode_dec_dirty_pages(inode); - remove_dirty_inode(inode); + f2fs_remove_dirty_inode(inode); } set_cold_data(page); - err = do_write_data_page(&fio); + err = f2fs_do_write_data_page(&fio); if (err) { clear_cold_data(page); if (err == -ENOMEM) { @@ -780,6 +947,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type, } out: f2fs_put_page(page, 1); + return err; } /* @@ -789,7 +957,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type, * If the parent node is not valid or the data block address is different, * the victim data block is ignored. */ -static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, +static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, struct gc_inode_list *gc_list, unsigned int segno, int gc_type) { struct super_block *sb = sbi->sb; @@ -797,6 +965,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, block_t start_addr; int off; int phase = 0; + int submitted = 0; start_addr = START_BLOCK(sbi, segno); @@ -813,19 +982,19 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, /* stop BG_GC if there is not enough free sections. */ if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) - return; + return submitted; if (check_valid_map(sbi, segno, off) == 0) continue; if (phase == 0) { - ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1, + f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1, META_NAT, true); continue; } if (phase == 1) { - ra_node_page(sbi, nid); + f2fs_ra_node_page(sbi, nid); continue; } @@ -834,7 +1003,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, continue; if (phase == 2) { - ra_node_page(sbi, dni.ino); + f2fs_ra_node_page(sbi, dni.ino); continue; } @@ -845,16 +1014,31 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (IS_ERR(inode) || is_bad_inode(inode)) continue; - /* if encrypted inode, let's go phase 3 */ - if (f2fs_encrypted_file(inode)) { + if (!down_write_trylock( + &F2FS_I(inode)->i_gc_rwsem[WRITE])) { + iput(inode); + sbi->skipped_gc_rwsem++; + continue; + } + + start_bidx = f2fs_start_bidx_of_node(nofs, inode) + + ofs_in_node; + + if (f2fs_post_read_required(inode)) { + int err = ra_data_block(inode, start_bidx); + + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + if (err) { + iput(inode); + continue; + } add_gc_inode(gc_list, inode); continue; } - start_bidx = start_bidx_of_node(nofs, inode); - data_page = get_read_data_page(inode, - start_bidx + ofs_in_node, REQ_RAHEAD, - true); + data_page = f2fs_get_read_data_page(inode, + start_bidx, REQ_RAHEAD, true); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); if (IS_ERR(data_page)) { iput(inode); continue; @@ -870,13 +1054,15 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (inode) { struct f2fs_inode_info *fi = F2FS_I(inode); bool locked = false; + int err; if (S_ISREG(inode->i_mode)) { - if (!down_write_trylock(&fi->dio_rwsem[READ])) + if (!down_write_trylock(&fi->i_gc_rwsem[READ])) continue; if (!down_write_trylock( - &fi->dio_rwsem[WRITE])) { - up_write(&fi->dio_rwsem[READ]); + &fi->i_gc_rwsem[WRITE])) { + sbi->skipped_gc_rwsem++; + up_write(&fi->i_gc_rwsem[READ]); continue; } locked = true; @@ -885,17 +1071,22 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, inode_dio_wait(inode); } - start_bidx = start_bidx_of_node(nofs, inode) + start_bidx = f2fs_start_bidx_of_node(nofs, inode) + ofs_in_node; - if (f2fs_encrypted_file(inode)) - move_data_block(inode, start_bidx, segno, off); + if (f2fs_post_read_required(inode)) + err = move_data_block(inode, start_bidx, + gc_type, segno, off); else - move_data_page(inode, start_bidx, gc_type, + err = move_data_page(inode, start_bidx, gc_type, segno, off); + if (!err && (gc_type == FG_GC || + f2fs_post_read_required(inode))) + submitted++; + if (locked) { - up_write(&fi->dio_rwsem[WRITE]); - up_write(&fi->dio_rwsem[READ]); + up_write(&fi->i_gc_rwsem[WRITE]); + up_write(&fi->i_gc_rwsem[READ]); } stat_inc_data_blk_count(sbi, 1, gc_type); @@ -904,6 +1095,8 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (++phase < 5) goto next_step; + + return submitted; } static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, @@ -912,10 +1105,10 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, struct sit_info *sit_i = SIT_I(sbi); int ret; - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, NO_CHECK_TYPE, LFS); - mutex_unlock(&sit_i->sentry_lock); + up_write(&sit_i->sentry_lock); return ret; } @@ -928,18 +1121,34 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, struct blk_plug plug; unsigned int segno = start_segno; unsigned int end_segno = start_segno + sbi->segs_per_sec; - int seg_freed = 0; + int seg_freed = 0, migrated = 0; unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ? SUM_TYPE_DATA : SUM_TYPE_NODE; + int submitted = 0; + + if (__is_large_section(sbi)) + end_segno = rounddown(end_segno, sbi->segs_per_sec); /* readahead multi ssa blocks those have contiguous address */ - if (sbi->segs_per_sec > 1) - ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), - sbi->segs_per_sec, META_SSA, true); + if (__is_large_section(sbi)) + f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), + end_segno - segno, META_SSA, true); /* reference all summary page */ while (segno < end_segno) { - sum_page = get_sum_page(sbi, segno++); + sum_page = f2fs_get_sum_page(sbi, segno++); + if (IS_ERR(sum_page)) { + int err = PTR_ERR(sum_page); + + end_segno = segno - 1; + for (segno = start_segno; segno < end_segno; segno++) { + sum_page = find_get_page(META_MAPPING(sbi), + GET_SUM_BLOCK(sbi, segno)); + f2fs_put_page(sum_page, 0); + f2fs_put_page(sum_page, 0); + } + return err; + } unlock_page(sum_page); } @@ -952,10 +1161,13 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, GET_SUM_BLOCK(sbi, segno)); f2fs_put_page(sum_page, 0); - if (get_valid_blocks(sbi, segno, false) == 0 || - !PageUptodate(sum_page) || - unlikely(f2fs_cp_error(sbi))) - goto next; + if (get_valid_blocks(sbi, segno, false) == 0) + goto freed; + if (__is_large_section(sbi) && + migrated >= sbi->migration_granularity) + goto skip; + if (!PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi))) + goto skip; sum = page_address(sum_page); if (type != GET_SUM_TYPE((&sum->footer))) { @@ -963,32 +1175,38 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, "type [%d, %d] in SSA and SIT", segno, type, GET_SUM_TYPE((&sum->footer))); set_sbi_flag(sbi, SBI_NEED_FSCK); - goto next; + goto skip; } /* * this is to avoid deadlock: * - lock_page(sum_page) - f2fs_replace_block - * - check_valid_map() - mutex_lock(sentry_lock) - * - mutex_lock(sentry_lock) - change_curseg() + * - check_valid_map() - down_write(sentry_lock) + * - down_read(sentry_lock) - change_curseg() * - lock_page(sum_page) */ if (type == SUM_TYPE_NODE) - gc_node_segment(sbi, sum->entries, segno, gc_type); - else - gc_data_segment(sbi, sum->entries, gc_list, segno, + submitted += gc_node_segment(sbi, sum->entries, segno, gc_type); + else + submitted += gc_data_segment(sbi, sum->entries, gc_list, + segno, gc_type); stat_inc_seg_count(sbi, type, gc_type); +freed: if (gc_type == FG_GC && get_valid_blocks(sbi, segno, false) == 0) seg_freed++; -next: + migrated++; + + if (__is_large_section(sbi) && segno + 1 < end_segno) + sbi->next_victim_seg[gc_type] = segno + 1; +skip: f2fs_put_page(sum_page, 0); } - if (gc_type == FG_GC) + if (submitted) f2fs_submit_merged_write(sbi, (type == SUM_TYPE_NODE) ? NODE : DATA); @@ -1011,6 +1229,9 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, .ilist = LIST_HEAD_INIT(gc_list.ilist), .iroot = RADIX_TREE_INIT(GFP_NOFS), }; + unsigned long long last_skipped = sbi->skipped_atomic_files[FG_GC]; + unsigned long long first_skipped; + unsigned int skipped_round = 0, round = 0; trace_f2fs_gc_begin(sbi->sb, sync, background, get_pages(sbi, F2FS_DIRTY_NODES), @@ -1022,6 +1243,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, prefree_segments(sbi)); cpc.reason = __get_cp_reason(sbi); + sbi->skipped_gc_rwsem = 0; + first_skipped = last_skipped; gc_more: if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) { ret = -EINVAL; @@ -1038,8 +1261,9 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, * threshold, we can make them free by checkpoint. Then, we * secure free segments which doesn't need fggc any more. */ - if (prefree_segments(sbi)) { - ret = write_checkpoint(sbi, &cpc); + if (prefree_segments(sbi) && + !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { + ret = f2fs_write_checkpoint(sbi, &cpc); if (ret) goto stop; } @@ -1062,17 +1286,36 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, sec_freed++; total_freed += seg_freed; + if (gc_type == FG_GC) { + if (sbi->skipped_atomic_files[FG_GC] > last_skipped || + sbi->skipped_gc_rwsem) + skipped_round++; + last_skipped = sbi->skipped_atomic_files[FG_GC]; + round++; + } + if (gc_type == FG_GC) sbi->cur_victim_sec = NULL_SEGNO; - if (!sync) { - if (has_not_enough_free_secs(sbi, sec_freed, 0)) { + if (sync) + goto stop; + + if (has_not_enough_free_secs(sbi, sec_freed, 0)) { + if (skipped_round <= MAX_SKIP_GC_COUNT || + skipped_round * 2 < round) { segno = NULL_SEGNO; goto gc_more; } - if (gc_type == FG_GC) - ret = write_checkpoint(sbi, &cpc); + if (first_skipped < last_skipped && + (last_skipped - first_skipped) > + sbi->skipped_gc_rwsem) { + f2fs_drop_inmem_pages_all(sbi, true); + segno = NULL_SEGNO; + goto gc_more; + } + if (gc_type == FG_GC && !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) + ret = f2fs_write_checkpoint(sbi, &cpc); } stop: SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0; @@ -1091,27 +1334,19 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, put_gc_inode(&gc_list); - if (sync) + if (sync && !ret) ret = sec_freed ? 0 : -EAGAIN; return ret; } -void build_gc_manager(struct f2fs_sb_info *sbi) +void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) { - u64 main_count, resv_count, ovp_count; - DIRTY_I(sbi)->v_ops = &default_v_ops; - /* threshold of # of valid blocks in a section for victims of FG_GC */ - main_count = SM_I(sbi)->main_segments << sbi->log_blocks_per_seg; - resv_count = SM_I(sbi)->reserved_segments << sbi->log_blocks_per_seg; - ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg; - - sbi->fggc_threshold = div64_u64((main_count - ovp_count) * - BLKS_PER_SEC(sbi), (main_count - resv_count)); + sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES; /* give warm/cold data area from slower device */ - if (sbi->s_ndevs && sbi->segs_per_sec == 1) + if (sbi->s_ndevs && !__is_large_section(sbi)) SIT_I(sbi)->last_victim[ALLOC_NEXT] = GET_SEGNO(sbi, FDEV(0).end_blk) + 1; }
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 9325191..bbac9d3 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h
@@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/gc.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #define GC_THREAD_MIN_WB_PAGES 1 /* * a threshold to determine @@ -20,6 +17,8 @@ #define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ +#define DEF_GC_FAILED_PINNED_FILES 2048 + /* Search max. number of dirty segments to select a victim segment */ #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */ @@ -34,8 +33,6 @@ struct f2fs_gc_kthread { unsigned int no_gc_sleep_time; /* for changing gc mode */ - unsigned int gc_idle; - unsigned int gc_urgent; unsigned int gc_wake; };
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index eb2e031..cc82f14 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c
@@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/hash.c * @@ -7,10 +8,6 @@ * Portions of this code from linux/fs/ext3/hash.c * * Copyright (C) 2002 by Theodore Ts'o - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/types.h> #include <linux/fs.h>
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 888a9dc..b8676a2 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c
@@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/inline.c * Copyright (c) 2013, Intel Corporation * Authors: Huajun Li <huajun.li@intel.com> * Haicheng Li <haicheng.li@intel.com> - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> @@ -13,6 +11,7 @@ #include "f2fs.h" #include "node.h" +#include <trace/events/android_fs.h> bool f2fs_may_inline_data(struct inode *inode) { @@ -25,7 +24,7 @@ bool f2fs_may_inline_data(struct inode *inode) if (i_size_read(inode) > MAX_INLINE_DATA(inode)) return false; - if (f2fs_encrypted_file(inode)) + if (f2fs_post_read_required(inode)) return false; return true; @@ -42,7 +41,7 @@ bool f2fs_may_inline_dentry(struct inode *inode) return true; } -void read_inline_data(struct page *page, struct page *ipage) +void f2fs_do_read_inline_data(struct page *page, struct page *ipage) { struct inode *inode = page->mapping->host; void *src_addr, *dst_addr; @@ -64,7 +63,8 @@ void read_inline_data(struct page *page, struct page *ipage) SetPageUptodate(page); } -void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from) +void f2fs_truncate_inline_inode(struct inode *inode, + struct page *ipage, u64 from) { void *addr; @@ -73,7 +73,7 @@ void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from) addr = inline_data_addr(inode, ipage); - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); memset(addr + from, 0, MAX_INLINE_DATA(inode) - from); set_page_dirty(ipage); @@ -85,25 +85,42 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) { struct page *ipage; - ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); + if (trace_android_fs_dataread_start_enabled()) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + inode); + trace_android_fs_dataread_start(inode, page_offset(page), + PAGE_SIZE, current->pid, + path, current->comm); + } + + ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(ipage)) { + trace_android_fs_dataread_end(inode, page_offset(page), + PAGE_SIZE); unlock_page(page); return PTR_ERR(ipage); } if (!f2fs_has_inline_data(inode)) { f2fs_put_page(ipage, 1); + trace_android_fs_dataread_end(inode, page_offset(page), + PAGE_SIZE); return -EAGAIN; } if (page->index) zero_user_segment(page, 0, PAGE_SIZE); else - read_inline_data(page, ipage); + f2fs_do_read_inline_data(page, ipage); if (!PageUptodate(page)) SetPageUptodate(page); f2fs_put_page(ipage, 1); + trace_android_fs_dataread_end(inode, page_offset(page), + PAGE_SIZE); unlock_page(page); return 0; } @@ -112,6 +129,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) { struct f2fs_io_info fio = { .sbi = F2FS_I_SB(dn->inode), + .ino = dn->inode->i_ino, .type = DATA, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_PRIO, @@ -119,6 +137,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) .encrypted_page = NULL, .io_type = FS_DATA_IO, }; + struct node_info ni; int dirty, err; if (!f2fs_exist_data(dn->inode)) @@ -128,6 +147,14 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) if (err) return err; + err = f2fs_get_node_info(fio.sbi, dn->nid, &ni); + if (err) { + f2fs_put_dnode(dn); + return err; + } + + fio.version = ni.version; + if (unlikely(dn->data_blkaddr != NEW_ADDR)) { f2fs_put_dnode(dn); set_sbi_flag(fio.sbi, SBI_NEED_FSCK); @@ -140,7 +167,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page)); - read_inline_data(page, dn->inode_page); + f2fs_do_read_inline_data(page, dn->inode_page); set_page_dirty(page); /* clear dirty state */ @@ -148,20 +175,21 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) /* write data page to try to make data consistent */ set_page_writeback(page); + ClearPageError(page); fio.old_blkaddr = dn->data_blkaddr; set_inode_flag(dn->inode, FI_HOT_DATA); - write_data_page(dn, &fio); - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_outplace_write_data(dn, &fio); + f2fs_wait_on_page_writeback(page, DATA, true, true); if (dirty) { inode_dec_dirty_pages(dn->inode); - remove_dirty_inode(dn->inode); + f2fs_remove_dirty_inode(dn->inode); } /* this converted inline_data should be recovered. */ set_inode_flag(dn->inode, FI_APPEND_WRITE); /* clear inline data and flag after data writeback */ - truncate_inline_inode(dn->inode, dn->inode_page, 0); + f2fs_truncate_inline_inode(dn->inode, dn->inode_page, 0); clear_inline_node(dn->inode_page); clear_out: stat_dec_inline_inode(dn->inode); @@ -186,7 +214,7 @@ int f2fs_convert_inline_inode(struct inode *inode) f2fs_lock_op(sbi); - ipage = get_node_page(sbi, inode->i_ino); + ipage = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) { err = PTR_ERR(ipage); goto out; @@ -212,12 +240,10 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) { void *src_addr, *dst_addr; struct dnode_of_data dn; - struct address_space *mapping = page_mapping(page); - unsigned long flags; int err; set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, 0, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, 0, LOOKUP_NODE); if (err) return err; @@ -228,17 +254,14 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) f2fs_bug_on(F2FS_I_SB(inode), page->index); - f2fs_wait_on_page_writeback(dn.inode_page, NODE, true); + f2fs_wait_on_page_writeback(dn.inode_page, NODE, true, true); src_addr = kmap_atomic(page); dst_addr = inline_data_addr(inode, dn.inode_page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode)); kunmap_atomic(src_addr); set_page_dirty(dn.inode_page); - spin_lock_irqsave(&mapping->tree_lock, flags); - radix_tree_tag_clear(&mapping->page_tree, page_index(page), - PAGECACHE_TAG_DIRTY); - spin_unlock_irqrestore(&mapping->tree_lock, flags); + f2fs_clear_radix_tree_dirty_tag(page); set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_DATA_EXIST); @@ -248,7 +271,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) return 0; } -bool recover_inline_data(struct inode *inode, struct page *npage) +bool f2fs_recover_inline_data(struct inode *inode, struct page *npage) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode *ri = NULL; @@ -269,10 +292,10 @@ bool recover_inline_data(struct inode *inode, struct page *npage) if (f2fs_has_inline_data(inode) && ri && (ri->i_inline & F2FS_INLINE_DATA)) { process_inline: - ipage = get_node_page(sbi, inode->i_ino); + ipage = f2fs_get_node_page(sbi, inode->i_ino); f2fs_bug_on(sbi, IS_ERR(ipage)); - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); src_addr = inline_data_addr(inode, npage); dst_addr = inline_data_addr(inode, ipage); @@ -287,20 +310,20 @@ bool recover_inline_data(struct inode *inode, struct page *npage) } if (f2fs_has_inline_data(inode)) { - ipage = get_node_page(sbi, inode->i_ino); + ipage = f2fs_get_node_page(sbi, inode->i_ino); f2fs_bug_on(sbi, IS_ERR(ipage)); - truncate_inline_inode(inode, ipage, 0); + f2fs_truncate_inline_inode(inode, ipage, 0); clear_inode_flag(inode, FI_INLINE_DATA); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { - if (truncate_blocks(inode, 0, false)) + if (f2fs_truncate_blocks(inode, 0, false, false)) return false; goto process_inline; } return false; } -struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, +struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, struct fscrypt_name *fname, struct page **res_page) { struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); @@ -311,7 +334,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, void *inline_dentry; f2fs_hash_t namehash; - ipage = get_node_page(sbi, dir->i_ino); + ipage = f2fs_get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) { *res_page = ipage; return NULL; @@ -322,7 +345,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, inline_dentry = inline_data_addr(dir, ipage); make_dentry_ptr_inline(dir, &d, inline_dentry); - de = find_target_dentry(fname, namehash, NULL, &d); + de = f2fs_find_target_dentry(fname, namehash, NULL, &d); unlock_page(ipage); if (de) *res_page = ipage; @@ -332,7 +355,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, return de; } -int make_empty_inline_dir(struct inode *inode, struct inode *parent, +int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, struct page *ipage) { struct f2fs_dentry_ptr d; @@ -341,7 +364,7 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent, inline_dentry = inline_data_addr(inode, ipage); make_dentry_ptr_inline(inode, &d, inline_dentry); - do_make_empty_dir(inode, parent, &d); + f2fs_do_make_empty_dir(inode, parent, &d); set_page_dirty(ipage); @@ -386,10 +409,9 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, goto out; } - f2fs_wait_on_page_writeback(page, DATA, true); - zero_user_segment(page, MAX_INLINE_DATA(dir), PAGE_SIZE); + f2fs_wait_on_page_writeback(page, DATA, true, true); - dentry_blk = kmap_atomic(page); + dentry_blk = page_address(page); make_dentry_ptr_inline(dir, &src, inline_dentry); make_dentry_ptr_block(dir, &dst, dentry_blk); @@ -406,13 +428,12 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, memcpy(dst.dentry, src.dentry, SIZE_OF_DIR_ENTRY * src.max); memcpy(dst.filename, src.filename, src.max * F2FS_SLOT_LEN); - kunmap_atomic(dentry_blk); if (!PageUptodate(page)) SetPageUptodate(page); set_page_dirty(page); /* clear inline dir and flag after data writeback */ - truncate_inline_inode(dir, ipage, 0); + f2fs_truncate_inline_inode(dir, ipage, 0); stat_dec_inline_dir(dir); clear_inode_flag(dir, FI_INLINE_DENTRY); @@ -455,7 +476,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry) new_name.len = le16_to_cpu(de->name_len); ino = le32_to_cpu(de->ino); - fake_mode = get_de_type(de) << S_SHIFT; + fake_mode = f2fs_get_de_type(de) << S_SHIFT; err = f2fs_add_regular_entry(dir, &new_name, NULL, NULL, ino, fake_mode); @@ -467,8 +488,8 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry) return 0; punch_dentry_pages: truncate_inode_pages(&dir->i_data, 0); - truncate_blocks(dir, 0, false); - remove_dirty_inode(dir); + f2fs_truncate_blocks(dir, 0, false, false); + f2fs_remove_dirty_inode(dir); return err; } @@ -486,7 +507,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, } memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA(dir)); - truncate_inline_inode(dir, ipage, 0); + f2fs_truncate_inline_inode(dir, ipage, 0); unlock_page(ipage); @@ -498,18 +519,18 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, stat_dec_inline_dir(dir); clear_inode_flag(dir, FI_INLINE_DENTRY); - kfree(backup_dentry); + kvfree(backup_dentry); return 0; recover: lock_page(ipage); - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir)); f2fs_i_depth_write(dir, 0); f2fs_i_size_write(dir, MAX_INLINE_DATA(dir)); set_page_dirty(ipage); f2fs_put_page(ipage, 1); - kfree(backup_dentry); + kvfree(backup_dentry); return err; } @@ -536,14 +557,14 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, struct page *page = NULL; int err = 0; - ipage = get_node_page(sbi, dir->i_ino); + ipage = f2fs_get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) return PTR_ERR(ipage); inline_dentry = inline_data_addr(dir, ipage); make_dentry_ptr_inline(dir, &d, inline_dentry); - bit_pos = room_for_filename(d.bitmap, slots, d.max); + bit_pos = f2fs_room_for_filename(d.bitmap, slots, d.max); if (bit_pos >= d.max) { err = f2fs_convert_inline_dir(dir, ipage, inline_dentry); if (err) @@ -554,7 +575,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, if (inode) { down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, new_name, + page = f2fs_init_inode_metadata(inode, dir, new_name, orig_name, ipage); if (IS_ERR(page)) { err = PTR_ERR(page); @@ -562,7 +583,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, } } - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); name_hash = f2fs_dentry_hash(new_name, NULL); f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos); @@ -575,7 +596,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, f2fs_put_page(page, 1); } - update_parent_metadata(dir, inode, 0); + f2fs_update_parent_metadata(dir, inode, 0); fail: if (inode) up_write(&F2FS_I(inode)->i_sem); @@ -594,7 +615,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, int i; lock_page(page); - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, true); inline_dentry = inline_data_addr(dir, page); make_dentry_ptr_inline(dir, &d, inline_dentry); @@ -621,7 +642,7 @@ bool f2fs_empty_inline_dir(struct inode *dir) void *inline_dentry; struct f2fs_dentry_ptr d; - ipage = get_node_page(sbi, dir->i_ino); + ipage = f2fs_get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) return false; @@ -652,7 +673,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, if (ctx->pos == d.max) return 0; - ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); + ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(ipage)) return PTR_ERR(ipage); @@ -678,7 +699,7 @@ int f2fs_inline_data_fiemap(struct inode *inode, struct page *ipage; int err = 0; - ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); + ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(ipage)) return PTR_ERR(ipage); @@ -694,7 +715,10 @@ int f2fs_inline_data_fiemap(struct inode *inode, ilen = start + len; ilen -= start; - get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni); + err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni); + if (err) + goto out; + byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits; byteaddr += (char *)inline_data_addr(inode, ipage) - (char *)F2FS_INODE(ipage);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 50818b51..bec5296 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c
@@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/inode.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/f2fs_fs.h> @@ -22,6 +19,9 @@ void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync) { + if (is_inode_flag_set(inode, FI_NEW_INODE)) + return; + if (f2fs_inode_dirtied(inode, sync)) return; @@ -33,18 +33,21 @@ void f2fs_set_inode_flags(struct inode *inode) unsigned int flags = F2FS_I(inode)->i_flags; unsigned int new_fl = 0; - if (flags & FS_SYNC_FL) + if (flags & F2FS_SYNC_FL) new_fl |= S_SYNC; - if (flags & FS_APPEND_FL) + if (flags & F2FS_APPEND_FL) new_fl |= S_APPEND; - if (flags & FS_IMMUTABLE_FL) + if (flags & F2FS_IMMUTABLE_FL) new_fl |= S_IMMUTABLE; - if (flags & FS_NOATIME_FL) + if (flags & F2FS_NOATIME_FL) new_fl |= S_NOATIME; - if (flags & FS_DIRSYNC_FL) + if (flags & F2FS_DIRSYNC_FL) new_fl |= S_DIRSYNC; + if (f2fs_encrypted_inode(inode)) + new_fl |= S_ENCRYPTED; inode_set_flags(inode, new_fl, - S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC| + S_ENCRYPTED); } static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) @@ -100,7 +103,7 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage) while (start < end) { if (*start++) { - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); set_inode_flag(inode, FI_DATA_EXIST); set_raw_inline(inode, F2FS_INODE(ipage)); @@ -114,15 +117,15 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage) static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page) { struct f2fs_inode *ri = &F2FS_NODE(page)->i; - int extra_isize = le32_to_cpu(ri->i_extra_isize); - if (!f2fs_sb_has_inode_chksum(sbi->sb)) + if (!f2fs_sb_has_inode_chksum(sbi)) return false; - if (!RAW_IS_INODE(F2FS_NODE(page)) || !(ri->i_inline & F2FS_EXTRA_ATTR)) + if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR)) return false; - if (!F2FS_FITS_IN_INODE(ri, extra_isize, i_inode_checksum)) + if (!F2FS_FITS_IN_INODE(ri, le16_to_cpu(ri->i_extra_isize), + i_inode_checksum)) return false; return true; @@ -156,8 +159,15 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) struct f2fs_inode *ri; __u32 provided, calculated; + if (unlikely(is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))) + return true; + +#ifdef CONFIG_F2FS_CHECK_FS + if (!f2fs_enable_inode_chksum(sbi, page)) +#else if (!f2fs_enable_inode_chksum(sbi, page) || PageDirty(page) || PageWriteback(page)) +#endif return true; ri = &F2FS_NODE(page)->i; @@ -208,8 +218,17 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } + if (f2fs_sb_has_flexible_inline_xattr(sbi) + && !f2fs_has_extra_attr(inode)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: corrupted inode ino=%lx, run fsck to fix.", + __func__, inode->i_ino); + return false; + } + if (f2fs_has_extra_attr(inode) && - !f2fs_sb_has_extra_attr(sbi->sb)) { + !f2fs_sb_has_extra_attr(sbi)) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_msg(sbi->sb, KERN_WARNING, "%s: inode (ino=%lx) is with extra_attr, " @@ -245,6 +264,26 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } } + + if (f2fs_has_inline_data(inode) && + (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: inode (ino=%lx, mode=%u) should not have " + "inline_data, run fsck to fix", + __func__, inode->i_ino, inode->i_mode); + return false; + } + + if (f2fs_has_inline_dentry(inode) && !S_ISDIR(inode->i_mode)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: inode (ino=%lx, mode=%u) should not have " + "inline_dentry, run fsck to fix", + __func__, inode->i_ino, inode->i_mode); + return false; + } + return true; } @@ -258,10 +297,10 @@ static int do_read_inode(struct inode *inode) int err; /* Check if ino is within scope */ - if (check_nid_range(sbi, inode->i_ino)) + if (f2fs_check_nid_range(sbi, inode->i_ino)) return -EINVAL; - node_page = get_node_page(sbi, inode->i_ino); + node_page = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(node_page)) return PTR_ERR(node_page); @@ -281,8 +320,11 @@ static int do_read_inode(struct inode *inode) inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec); inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec); inode->i_generation = le32_to_cpu(ri->i_generation); - - fi->i_current_depth = le32_to_cpu(ri->i_current_depth); + if (S_ISDIR(inode->i_mode)) + fi->i_current_depth = le32_to_cpu(ri->i_current_depth); + else if (S_ISREG(inode->i_mode)) + fi->i_gc_failures[GC_FAILURE_PIN] = + le16_to_cpu(ri->i_gc_failures); fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); fi->i_flags = le32_to_cpu(ri->i_flags); fi->flags = 0; @@ -298,6 +340,22 @@ static int do_read_inode(struct inode *inode) fi->i_extra_isize = f2fs_has_extra_attr(inode) ? le16_to_cpu(ri->i_extra_isize) : 0; + if (f2fs_sb_has_flexible_inline_xattr(sbi)) { + fi->i_inline_xattr_size = le16_to_cpu(ri->i_inline_xattr_size); + } else if (f2fs_has_inline_xattr(inode) || + f2fs_has_inline_dentry(inode)) { + fi->i_inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; + } else { + + /* + * Previous inline data or directory always reserved 200 bytes + * in inode layout, even if inline_xattr is disabled. In order + * to keep inline_dentry's structure for backward compatibility, + * we get the space back only from inline_data. + */ + fi->i_inline_xattr_size = 0; + } + if (!sanity_check_inode(inode, node_page)) { f2fs_put_page(node_page, 1); return -EINVAL; @@ -307,30 +365,48 @@ static int do_read_inode(struct inode *inode) if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) __recover_inline_status(inode, node_page); + /* try to recover cold bit for non-dir inode */ + if (!S_ISDIR(inode->i_mode) && !is_cold_node(node_page)) { + set_cold_node(node_page, false); + set_page_dirty(node_page); + } + /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); - err = __written_first_block(sbi, ri); - if (err < 0) { - f2fs_put_page(node_page, 1); - return err; + if (S_ISREG(inode->i_mode)) { + err = __written_first_block(sbi, ri); + if (err < 0) { + f2fs_put_page(node_page, 1); + return err; + } + if (!err) + set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); } - if (!err) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); - if (!need_inode_block_update(sbi, inode->i_ino)) + if (!f2fs_need_inode_block_update(sbi, inode->i_ino)) fi->last_disk_size = inode->i_size; - if (fi->i_flags & FS_PROJINHERIT_FL) + if (fi->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); - if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi->sb) && + if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi) && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid)) i_projid = (projid_t)le32_to_cpu(ri->i_projid); else i_projid = F2FS_DEF_PROJID; fi->i_projid = make_kprojid(&init_user_ns, i_projid); + if (f2fs_has_extra_attr(inode) && f2fs_sb_has_inode_crtime(sbi) && + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { + fi->i_crtime.tv_sec = le64_to_cpu(ri->i_crtime); + fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec); + } + + F2FS_I(inode)->i_disk_time[0] = inode->i_atime; + F2FS_I(inode)->i_disk_time[1] = inode->i_ctime; + F2FS_I(inode)->i_disk_time[2] = inode->i_mtime; + F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime; f2fs_put_page(node_page, 1); stat_inc_inline_xattr(inode); @@ -363,10 +439,10 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) make_now: if (ino == F2FS_NODE_INO(sbi)) { inode->i_mapping->a_ops = &f2fs_node_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); } else if (ino == F2FS_META_INO(sbi)) { inode->i_mapping->a_ops = &f2fs_meta_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); } else if (S_ISREG(inode->i_mode)) { inode->i_op = &f2fs_file_inode_operations; inode->i_fop = &f2fs_file_operations; @@ -375,7 +451,7 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); + inode_nohighmem(inode); } else if (S_ISLNK(inode->i_mode)) { if (f2fs_encrypted_inode(inode)) inode->i_op = &f2fs_encrypted_symlink_inode_operations; @@ -416,14 +492,15 @@ struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino) return inode; } -int update_inode(struct inode *inode, struct page *node_page) +void f2fs_update_inode(struct inode *inode, struct page *node_page) { struct f2fs_inode *ri; struct extent_tree *et = F2FS_I(inode)->extent_tree; - f2fs_inode_synced(inode); + f2fs_wait_on_page_writeback(node_page, NODE, true, true); + set_page_dirty(node_page); - f2fs_wait_on_page_writeback(node_page, NODE, true); + f2fs_inode_synced(inode); ri = F2FS_INODE(node_page); @@ -450,7 +527,12 @@ int update_inode(struct inode *inode, struct page *node_page) ri->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); ri->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); ri->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); - ri->i_current_depth = cpu_to_le32(F2FS_I(inode)->i_current_depth); + if (S_ISDIR(inode->i_mode)) + ri->i_current_depth = + cpu_to_le32(F2FS_I(inode)->i_current_depth); + else if (S_ISREG(inode->i_mode)) + ri->i_gc_failures = + cpu_to_le16(F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]); ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid); ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); @@ -460,7 +542,11 @@ int update_inode(struct inode *inode, struct page *node_page) if (f2fs_has_extra_attr(inode)) { ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize); - if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) && + if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode))) + ri->i_inline_xattr_size = + cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size); + + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) && F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, i_projid)) { projid_t i_projid; @@ -469,25 +555,39 @@ int update_inode(struct inode *inode, struct page *node_page) F2FS_I(inode)->i_projid); ri->i_projid = cpu_to_le32(i_projid); } + + if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) && + F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, + i_crtime)) { + ri->i_crtime = + cpu_to_le64(F2FS_I(inode)->i_crtime.tv_sec); + ri->i_crtime_nsec = + cpu_to_le32(F2FS_I(inode)->i_crtime.tv_nsec); + } } __set_inode_rdev(inode, ri); - set_cold_node(inode, node_page); /* deleted inode */ if (inode->i_nlink == 0) clear_inline_node(node_page); - return set_page_dirty(node_page); + F2FS_I(inode)->i_disk_time[0] = inode->i_atime; + F2FS_I(inode)->i_disk_time[1] = inode->i_ctime; + F2FS_I(inode)->i_disk_time[2] = inode->i_mtime; + F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime; + +#ifdef CONFIG_F2FS_CHECK_FS + f2fs_inode_chksum_set(F2FS_I_SB(inode), node_page); +#endif } -int update_inode_page(struct inode *inode) +void f2fs_update_inode_page(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *node_page; - int ret = 0; retry: - node_page = get_node_page(sbi, inode->i_ino); + node_page = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(node_page)) { int err = PTR_ERR(node_page); if (err == -ENOMEM) { @@ -496,11 +596,10 @@ int update_inode_page(struct inode *inode) } else if (err != -ENOENT) { f2fs_stop_checkpoint(sbi, false); } - return 0; + return; } - ret = update_inode(inode, node_page); + f2fs_update_inode(inode, node_page); f2fs_put_page(node_page, 1); - return ret; } int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) @@ -514,11 +613,14 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) return 0; + if (f2fs_is_checkpoint_ready(sbi)) + return -ENOSPC; + /* * We need to balance fs here to prevent from producing dirty node pages * during the urgent cleaning time when runing out of free sections. */ - update_inode_page(inode); + f2fs_update_inode_page(inode); if (wbc && wbc->nr_to_write) f2fs_balance_fs(sbi, true); return 0; @@ -535,7 +637,7 @@ void f2fs_evict_inode(struct inode *inode) /* some remained atomic pages should discarded */ if (f2fs_is_atomic_file(inode)) - drop_inmem_pages(inode); + f2fs_drop_inmem_pages(inode); trace_f2fs_evict_inode(inode); truncate_inode_pages_final(&inode->i_data); @@ -545,17 +647,22 @@ void f2fs_evict_inode(struct inode *inode) goto out_clear; f2fs_bug_on(sbi, get_dirty_pages(inode)); - remove_dirty_inode(inode); + f2fs_remove_dirty_inode(inode); f2fs_destroy_extent_tree(inode); if (inode->i_nlink || is_bad_inode(inode)) goto no_delete; - dquot_initialize(inode); + err = dquot_initialize(inode); + if (err) { + err = 0; + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + } - remove_ino_entry(sbi, inode->i_ino, APPEND_INO); - remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); + f2fs_remove_ino_entry(sbi, inode->i_ino, APPEND_INO); + f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); + f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO); sb_start_intwrite(inode->i_sb); set_inode_flag(inode, FI_NO_ALLOC); @@ -564,15 +671,14 @@ void f2fs_evict_inode(struct inode *inode) if (F2FS_HAS_BLOCKS(inode)) err = f2fs_truncate(inode); -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_EVICT_INODE)) { f2fs_show_injection_info(FAULT_EVICT_INODE); err = -EIO; } -#endif + if (!err) { f2fs_lock_op(sbi); - err = remove_inode_page(inode); + err = f2fs_remove_inode_page(inode); f2fs_unlock_op(sbi); if (err == -ENOENT) err = 0; @@ -584,9 +690,10 @@ void f2fs_evict_inode(struct inode *inode) goto retry; } - if (err) - update_inode_page(inode); - dquot_free_inode(inode); + if (err) { + f2fs_update_inode_page(inode); + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + } sb_end_intwrite(inode->i_sb); no_delete: dquot_drop(inode); @@ -595,8 +702,11 @@ void f2fs_evict_inode(struct inode *inode) stat_dec_inline_dir(inode); stat_dec_inline_inode(inode); - if (!is_set_ckpt_flags(sbi, CP_ERROR_FLAG)) + if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG) && + !is_sbi_flag_set(sbi, SBI_CP_DISABLED))) f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE)); + else + f2fs_inode_synced(inode); /* ino == 0, if f2fs_new_inode() was failed t*/ if (inode->i_ino) @@ -606,30 +716,31 @@ void f2fs_evict_inode(struct inode *inode) invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); if (inode->i_nlink) { if (is_inode_flag_set(inode, FI_APPEND_WRITE)) - add_ino_entry(sbi, inode->i_ino, APPEND_INO); + f2fs_add_ino_entry(sbi, inode->i_ino, APPEND_INO); if (is_inode_flag_set(inode, FI_UPDATE_WRITE)) - add_ino_entry(sbi, inode->i_ino, UPDATE_INO); + f2fs_add_ino_entry(sbi, inode->i_ino, UPDATE_INO); } if (is_inode_flag_set(inode, FI_FREE_NID)) { - alloc_nid_failed(sbi, inode->i_ino); + f2fs_alloc_nid_failed(sbi, inode->i_ino); clear_inode_flag(inode, FI_FREE_NID); } else { /* * If xattr nid is corrupted, we can reach out error condition, - * err & !exist_written_data(sbi, inode->i_ino, ORPHAN_INO)). - * In that case, check_nid_range() is enough to give a clue. + * err & !f2fs_exist_written_data(sbi, inode->i_ino, ORPHAN_INO)). + * In that case, f2fs_check_nid_range() is enough to give a clue. */ } out_clear: - fscrypt_put_encryption_info(inode, NULL); + fscrypt_put_encryption_info(inode); clear_inode(inode); } /* caller should call f2fs_lock_op() */ -void handle_failed_inode(struct inode *inode) +void f2fs_handle_failed_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct node_info ni; + int err; /* * clear nlink of inode in order to release resource of inode @@ -641,7 +752,7 @@ void handle_failed_inode(struct inode *inode) * we must call this to avoid inode being remained as dirty, resulting * in a panic when flushing dirty inodes in gdirty_list. */ - update_inode_page(inode); + f2fs_update_inode_page(inode); f2fs_inode_synced(inode); /* don't make bad inode, since it becomes a regular file. */ @@ -652,22 +763,29 @@ void handle_failed_inode(struct inode *inode) * so we can prevent losing this orphan when encoutering checkpoint * and following suddenly power-off. */ - get_node_info(sbi, inode->i_ino, &ni); + err = f2fs_get_node_info(sbi, inode->i_ino, &ni); + if (err) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "May loss orphan inode, run fsck to fix."); + goto out; + } if (ni.blk_addr != NULL_ADDR) { - int err = acquire_orphan_inode(sbi); + err = f2fs_acquire_orphan_inode(sbi); if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_msg(sbi->sb, KERN_WARNING, "Too many orphan inodes, run fsck to fix."); } else { - add_orphan_inode(inode); + f2fs_add_orphan_inode(inode); } - alloc_nid_done(sbi, inode->i_ino); + f2fs_alloc_nid_done(sbi, inode->i_ino); } else { set_inode_flag(inode, FI_FREE_NID); } +out: f2fs_unlock_op(sbi); /* iput will drop the inode object */
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index b80e7db..31601e4 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c
@@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/namei.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/f2fs_fs.h> @@ -19,6 +16,7 @@ #include "f2fs.h" #include "node.h" +#include "segment.h" #include "xattr.h" #include "acl.h" #include <trace/events/f2fs.h> @@ -29,6 +27,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) nid_t ino; struct inode *inode; bool nid_free = false; + int xattr_size = 0; int err; inode = new_inode(dir->i_sb); @@ -36,7 +35,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) return ERR_PTR(-ENOMEM); f2fs_lock_op(sbi); - if (!alloc_nid(sbi, &ino)) { + if (!f2fs_alloc_nid(sbi, &ino)) { f2fs_unlock_op(sbi); err = -ENOSPC; goto fail; @@ -49,17 +48,21 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) inode->i_ino = ino; inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode->i_ctime = + F2FS_I(inode)->i_crtime = current_time(inode); inode->i_generation = sbi->s_next_generation++; + if (S_ISDIR(inode->i_mode)) + F2FS_I(inode)->i_current_depth = 1; + err = insert_inode_locked(inode); if (err) { err = -EINVAL; goto fail; } - if (f2fs_sb_has_project_quota(sbi->sb) && - (F2FS_I(dir)->i_flags & FS_PROJINHERIT_FL)) + if (f2fs_sb_has_project_quota(sbi) && + (F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL)) F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid; else F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns, @@ -69,28 +72,37 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (err) goto fail_drop; - err = dquot_alloc_inode(inode); - if (err) - goto fail_drop; - - /* If the directory encrypted, then we should encrypt the inode. */ - if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) - f2fs_set_encrypted_inode(inode); - set_inode_flag(inode, FI_NEW_INODE); - if (f2fs_sb_has_extra_attr(sbi->sb)) { + /* If the directory encrypted, then we should encrypt the inode. */ + if ((f2fs_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) && + f2fs_may_encrypt(inode)) + f2fs_set_encrypted_inode(inode); + + if (f2fs_sb_has_extra_attr(sbi)) { set_inode_flag(inode, FI_EXTRA_ATTR); F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE; } if (test_opt(sbi, INLINE_XATTR)) set_inode_flag(inode, FI_INLINE_XATTR); + if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode)) set_inode_flag(inode, FI_INLINE_DATA); if (f2fs_may_inline_dentry(inode)) set_inode_flag(inode, FI_INLINE_DENTRY); + if (f2fs_sb_has_flexible_inline_xattr(sbi)) { + f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode)); + if (f2fs_has_inline_xattr(inode)) + xattr_size = F2FS_OPTION(sbi).inline_xattr_size; + /* Otherwise, will be 0 */ + } else if (f2fs_has_inline_xattr(inode) || + f2fs_has_inline_dentry(inode)) { + xattr_size = DEFAULT_INLINE_XATTR_ADDRS; + } + F2FS_I(inode)->i_inline_xattr_size = xattr_size; + f2fs_init_extent_tree(inode, NULL); stat_inc_inline_xattr(inode); @@ -101,11 +113,13 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED); if (S_ISDIR(inode->i_mode)) - F2FS_I(inode)->i_flags |= FS_INDEX_FL; + F2FS_I(inode)->i_flags |= F2FS_INDEX_FL; - if (F2FS_I(inode)->i_flags & FS_PROJINHERIT_FL) + if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); + f2fs_set_inode_flags(inode); + trace_f2fs_new_inode(inode, 0); return inode; @@ -128,7 +142,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) return ERR_PTR(err); } -static int is_multimedia_file(const unsigned char *s, const char *sub) +static int is_extension_exist(const unsigned char *s, const char *sub) { size_t slen = strlen(s); size_t sublen = strlen(sub); @@ -154,19 +168,97 @@ static int is_multimedia_file(const unsigned char *s, const char *sub) /* * Set multimedia files as cold files for hot/cold data separation */ -static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, +static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode, const unsigned char *name) { - int i; - __u8 (*extlist)[8] = sbi->raw_super->extension_list; + __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; + int i, cold_count, hot_count; - int count = le32_to_cpu(sbi->raw_super->extension_count); - for (i = 0; i < count; i++) { - if (is_multimedia_file(name, extlist[i])) { - file_set_cold(inode); + down_read(&sbi->sb_lock); + + cold_count = le32_to_cpu(sbi->raw_super->extension_count); + hot_count = sbi->raw_super->hot_ext_count; + + for (i = 0; i < cold_count + hot_count; i++) { + if (is_extension_exist(name, extlist[i])) break; - } } + + up_read(&sbi->sb_lock); + + if (i == cold_count + hot_count) + return; + + if (i < cold_count) + file_set_cold(inode); + else + file_set_hot(inode); +} + +int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, + bool hot, bool set) +{ + __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; + int cold_count = le32_to_cpu(sbi->raw_super->extension_count); + int hot_count = sbi->raw_super->hot_ext_count; + int total_count = cold_count + hot_count; + int start, count; + int i; + + if (set) { + if (total_count == F2FS_MAX_EXTENSION) + return -EINVAL; + } else { + if (!hot && !cold_count) + return -EINVAL; + if (hot && !hot_count) + return -EINVAL; + } + + if (hot) { + start = cold_count; + count = total_count; + } else { + start = 0; + count = cold_count; + } + + for (i = start; i < count; i++) { + if (strcmp(name, extlist[i])) + continue; + + if (set) + return -EINVAL; + + memcpy(extlist[i], extlist[i + 1], + F2FS_EXTENSION_LEN * (total_count - i - 1)); + memset(extlist[total_count - 1], 0, F2FS_EXTENSION_LEN); + if (hot) + sbi->raw_super->hot_ext_count = hot_count - 1; + else + sbi->raw_super->extension_count = + cpu_to_le32(cold_count - 1); + return 0; + } + + if (!set) + return -EINVAL; + + if (hot) { + memcpy(extlist[count], name, strlen(name)); + sbi->raw_super->hot_ext_count = hot_count + 1; + } else { + char buf[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN]; + + memcpy(buf, &extlist[cold_count], + F2FS_EXTENSION_LEN * hot_count); + memset(extlist[cold_count], 0, F2FS_EXTENSION_LEN); + memcpy(extlist[cold_count], name, strlen(name)); + memcpy(&extlist[cold_count + 1], buf, + F2FS_EXTENSION_LEN * hot_count); + sbi->raw_super->extension_count = cpu_to_le32(cold_count + 1); + } + return 0; } static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, @@ -177,6 +269,12 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, nid_t ino = 0; int err; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; + err = dquot_initialize(dir); if (err) return err; @@ -186,7 +284,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, return PTR_ERR(inode); if (!test_opt(sbi, DISABLE_EXT_IDENTIFY)) - set_cold_files(sbi, inode, dentry->d_name.name); + set_file_temperature(sbi, inode, dentry->d_name.name); inode->i_op = &f2fs_file_inode_operations; inode->i_fop = &f2fs_file_operations; @@ -199,7 +297,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, goto out; f2fs_unlock_op(sbi); - alloc_nid_done(sbi, ino); + f2fs_alloc_nid_done(sbi, ino); d_instantiate_new(dentry, inode); @@ -209,7 +307,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, f2fs_balance_fs(sbi, true); return 0; out: - handle_failed_inode(inode); + f2fs_handle_failed_inode(inode); return err; } @@ -220,9 +318,15 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, struct f2fs_sb_info *sbi = F2FS_I_SB(dir); int err; - if (f2fs_encrypted_inode(dir) && - !fscrypt_has_permitted_context(dir, inode)) - return -EPERM; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; + + err = fscrypt_prepare_link(old_dentry, dir, dentry); + if (err) + return err; if (is_inode_flag_set(dir, FI_PROJ_INHERIT) && (!projid_eq(F2FS_I(dir)->i_projid, @@ -296,26 +400,23 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) de = f2fs_find_entry(dir, &dot, &page); if (de) { - f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); } else if (IS_ERR(page)) { err = PTR_ERR(page); goto out; } else { - err = __f2fs_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR); + err = f2fs_do_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR); if (err) goto out; } de = f2fs_find_entry(dir, &dotdot, &page); - if (de) { - f2fs_dentry_kunmap(dir, page); + if (de) f2fs_put_page(page, 0); - } else if (IS_ERR(page)) { + else if (IS_ERR(page)) err = PTR_ERR(page); - } else { - err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR); - } + else + err = f2fs_do_add_link(dir, &dotdot, NULL, pino, S_IFDIR); out: if (!err) clear_inode_flag(dir, FI_INLINE_DOTS); @@ -330,53 +431,50 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, struct inode *inode = NULL; struct f2fs_dir_entry *de; struct page *page; - nid_t ino; + struct dentry *new; + nid_t ino = -1; int err = 0; unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir)); - if (f2fs_encrypted_inode(dir)) { - int res = fscrypt_get_encryption_info(dir); + trace_f2fs_lookup_start(dir, dentry, flags); - /* - * DCACHE_ENCRYPTED_WITH_KEY is set if the dentry is - * created while the directory was encrypted and we - * don't have access to the key. - */ - if (fscrypt_has_encryption_key(dir)) - fscrypt_set_encrypted_dentry(dentry); - fscrypt_set_d_op(dentry); - if (res && res != -ENOKEY) - return ERR_PTR(res); + err = fscrypt_prepare_lookup(dir, dentry, flags); + if (err) + goto out; + + if (dentry->d_name.len > F2FS_NAME_LEN) { + err = -ENAMETOOLONG; + goto out; } - if (dentry->d_name.len > F2FS_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); - de = f2fs_find_entry(dir, &dentry->d_name, &page); if (!de) { - if (IS_ERR(page)) - return (struct dentry *)page; - return d_splice_alias(inode, dentry); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto out; + } + goto out_splice; } ino = le32_to_cpu(de->ino); - f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); inode = f2fs_iget(dir->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } if ((dir->i_ino == root_ino) && f2fs_has_inline_dots(dir)) { err = __recover_dot_dentries(dir, root_ino); if (err) - goto err_out; + goto out_iput; } if (f2fs_has_inline_dots(inode)) { err = __recover_dot_dentries(inode, dir->i_ino); if (err) - goto err_out; + goto out_iput; } if (f2fs_encrypted_inode(dir) && (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && @@ -385,12 +483,18 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, "Inconsistent encryption contexts: %lu/%lu", dir->i_ino, inode->i_ino); err = -EPERM; - goto err_out; + goto out_iput; } - return d_splice_alias(inode, dentry); - -err_out: +out_splice: + new = d_splice_alias(inode, dentry); + if (IS_ERR(new)) + err = PTR_ERR(new); + trace_f2fs_lookup_end(dir, dentry, ino, err); + return new; +out_iput: iput(inode); +out: + trace_f2fs_lookup_end(dir, dentry, ino, err); return ERR_PTR(err); } @@ -404,9 +508,15 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) trace_f2fs_unlink_enter(dir, dentry); + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + err = dquot_initialize(dir); if (err) return err; + err = dquot_initialize(inode); + if (err) + return err; de = f2fs_find_entry(dir, &dentry->d_name, &page); if (!de) { @@ -418,10 +528,9 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); - err = acquire_orphan_inode(sbi); + err = f2fs_acquire_orphan_inode(sbi); if (err) { f2fs_unlock_op(sbi); - f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); goto fail; } @@ -455,24 +564,19 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; size_t len = strlen(symname); - struct fscrypt_str disk_link = FSTR_INIT((char *)symname, len + 1); - struct fscrypt_symlink_data *sd = NULL; + struct fscrypt_str disk_link; int err; - if (f2fs_encrypted_inode(dir)) { - err = fscrypt_get_encryption_info(dir); - if (err) - return err; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; - if (!fscrypt_has_encryption_key(dir)) - return -ENOKEY; - - disk_link.len = (fscrypt_fname_encrypted_size(dir, len) + - sizeof(struct fscrypt_symlink_data)); - } - - if (disk_link.len > dir->i_sb->s_blocksize) - return -ENAMETOOLONG; + err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize, + &disk_link); + if (err) + return err; err = dquot_initialize(dir); if (err) @@ -482,7 +586,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) return PTR_ERR(inode); - if (f2fs_encrypted_inode(inode)) + if (IS_ENCRYPTED(inode)) inode->i_op = &f2fs_encrypted_symlink_inode_operations; else inode->i_op = &f2fs_symlink_inode_operations; @@ -492,38 +596,13 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) - goto out; + goto out_f2fs_handle_failed_inode; f2fs_unlock_op(sbi); - alloc_nid_done(sbi, inode->i_ino); + f2fs_alloc_nid_done(sbi, inode->i_ino); - if (f2fs_encrypted_inode(inode)) { - struct qstr istr = QSTR_INIT(symname, len); - struct fscrypt_str ostr; - - sd = kzalloc(disk_link.len, GFP_NOFS); - if (!sd) { - err = -ENOMEM; - goto err_out; - } - - err = fscrypt_get_encryption_info(inode); - if (err) - goto err_out; - - if (!fscrypt_has_encryption_key(inode)) { - err = -ENOKEY; - goto err_out; - } - - ostr.name = sd->encrypted_path; - ostr.len = disk_link.len; - err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr); - if (err) - goto err_out; - - sd->len = cpu_to_le16(ostr.len); - disk_link.name = (char *)sd; - } + err = fscrypt_encrypt_symlink(inode, symname, len, &disk_link); + if (err) + goto err_out; err = page_symlink(inode, disk_link.name, disk_link.len); @@ -549,12 +628,14 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, f2fs_unlink(dir, dentry); } - kfree(sd); - f2fs_balance_fs(sbi, true); - return err; -out: - handle_failed_inode(inode); + goto out_free_encrypted_link; + +out_f2fs_handle_failed_inode: + f2fs_handle_failed_inode(inode); +out_free_encrypted_link: + if (disk_link.name != (unsigned char *)symname) + kvfree(disk_link.name); return err; } @@ -564,6 +645,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct inode *inode; int err; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + err = dquot_initialize(dir); if (err) return err; @@ -575,7 +659,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); + inode_nohighmem(inode); set_inode_flag(inode, FI_INC_LINK); f2fs_lock_op(sbi); @@ -584,7 +668,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto out_fail; f2fs_unlock_op(sbi); - alloc_nid_done(sbi, inode->i_ino); + f2fs_alloc_nid_done(sbi, inode->i_ino); d_instantiate_new(dentry, inode); @@ -596,7 +680,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) out_fail: clear_inode_flag(inode, FI_INC_LINK); - handle_failed_inode(inode); + f2fs_handle_failed_inode(inode); return err; } @@ -615,6 +699,12 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, struct inode *inode; int err = 0; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; + err = dquot_initialize(dir); if (err) return err; @@ -632,7 +722,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, goto out; f2fs_unlock_op(sbi); - alloc_nid_done(sbi, inode->i_ino); + f2fs_alloc_nid_done(sbi, inode->i_ino); d_instantiate_new(dentry, inode); @@ -642,7 +732,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, f2fs_balance_fs(sbi, true); return 0; out: - handle_failed_inode(inode); + f2fs_handle_failed_inode(inode); return err; } @@ -671,7 +761,7 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, } f2fs_lock_op(sbi); - err = acquire_orphan_inode(sbi); + err = f2fs_acquire_orphan_inode(sbi); if (err) goto out; @@ -683,8 +773,8 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, * add this non-linked tmpfile to orphan list, in this way we could * remove all unused data of tmpfile after abnormal power-off. */ - add_orphan_inode(inode); - alloc_nid_done(sbi, inode->i_ino); + f2fs_add_orphan_inode(inode); + f2fs_alloc_nid_done(sbi, inode->i_ino); if (whiteout) { f2fs_i_links_write(inode, false); @@ -700,15 +790,20 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, return 0; release_out: - release_orphan_inode(sbi); + f2fs_release_orphan_inode(sbi); out: - handle_failed_inode(inode); + f2fs_handle_failed_inode(inode); return err; } static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) { - if (f2fs_encrypted_inode(dir)) { + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + + if (f2fs_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) { int err = fscrypt_get_encryption_info(dir); if (err) return err; @@ -719,6 +814,9 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) static int f2fs_create_whiteout(struct inode *dir, struct inode **whiteout) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(dir)))) + return -EIO; + return __f2fs_tmpfile(dir, NULL, S_IFCHR | WHITEOUT_MODE, whiteout); } @@ -736,19 +834,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, struct f2fs_dir_entry *old_entry; struct f2fs_dir_entry *new_entry; bool is_old_inline = f2fs_has_inline_dentry(old_dir); - int err = -ENOENT; + int err; - if ((f2fs_encrypted_inode(old_dir) && - !fscrypt_has_encryption_key(old_dir)) || - (f2fs_encrypted_inode(new_dir) && - !fscrypt_has_encryption_key(new_dir))) - return -ENOKEY; - - if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) && - !fscrypt_has_permitted_context(new_dir, old_inode)) { - err = -EPERM; - goto out; - } + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && (!projid_eq(F2FS_I(new_dir)->i_projid, @@ -763,6 +855,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (err) goto out; + if (new_inode) { + err = dquot_initialize(new_inode); + if (err) + goto out; + } + + err = -ENOENT; old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) @@ -804,7 +903,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_lock_op(sbi); - err = acquire_orphan_inode(sbi); + err = f2fs_acquire_orphan_inode(sbi); if (err) goto put_out_dir; @@ -818,9 +917,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, up_write(&F2FS_I(new_inode)->i_sem); if (!new_inode->i_nlink) - add_orphan_inode(new_inode); + f2fs_add_orphan_inode(new_inode); else - release_orphan_inode(sbi); + f2fs_release_orphan_inode(sbi); } else { f2fs_balance_fs(sbi, true); @@ -881,38 +980,39 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } if (old_dir_entry) { - if (old_dir != new_dir && !whiteout) { + if (old_dir != new_dir && !whiteout) f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); - } else { - f2fs_dentry_kunmap(old_inode, old_dir_page); + else f2fs_put_page(old_dir_page, 0); - } f2fs_i_links_write(old_dir, false); } + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) { + f2fs_add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); + if (S_ISDIR(old_inode->i_mode)) + f2fs_add_ino_entry(sbi, old_inode->i_ino, + TRANS_DIR_INO); + } f2fs_unlock_op(sbi); if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) f2fs_sync_fs(sbi->sb, 1); + + f2fs_update_time(sbi, REQ_TIME); return 0; put_out_dir: f2fs_unlock_op(sbi); - if (new_page) { - f2fs_dentry_kunmap(new_dir, new_page); + if (new_page) f2fs_put_page(new_page, 0); - } out_whiteout: if (whiteout) iput(whiteout); out_dir: - if (old_dir_entry) { - f2fs_dentry_kunmap(old_inode, old_dir_page); + if (old_dir_entry) f2fs_put_page(old_dir_page, 0); - } out_old: - f2fs_dentry_kunmap(old_dir, old_page); f2fs_put_page(old_page, 0); out: return err; @@ -929,19 +1029,13 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL; struct f2fs_dir_entry *old_entry, *new_entry; int old_nlink = 0, new_nlink = 0; - int err = -ENOENT; + int err; - if ((f2fs_encrypted_inode(old_dir) && - !fscrypt_has_encryption_key(old_dir)) || - (f2fs_encrypted_inode(new_dir) && - !fscrypt_has_encryption_key(new_dir))) - return -ENOKEY; - - if ((f2fs_encrypted_inode(old_dir) || f2fs_encrypted_inode(new_dir)) && - (old_dir != new_dir) && - (!fscrypt_has_permitted_context(new_dir, old_inode) || - !fscrypt_has_permitted_context(old_dir, new_inode))) - return -EPERM; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && !projid_eq(F2FS_I(new_dir)->i_projid, @@ -959,6 +1053,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if (err) goto out; + err = -ENOENT; old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) @@ -1053,26 +1148,29 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, } f2fs_mark_inode_dirty_sync(new_dir, false); + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) { + f2fs_add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO); + f2fs_add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); + } + f2fs_unlock_op(sbi); if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) f2fs_sync_fs(sbi->sb, 1); + + f2fs_update_time(sbi, REQ_TIME); return 0; out_new_dir: if (new_dir_entry) { - f2fs_dentry_kunmap(new_inode, new_dir_page); f2fs_put_page(new_dir_page, 0); } out_old_dir: if (old_dir_entry) { - f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } out_new: - f2fs_dentry_kunmap(new_dir, new_page); f2fs_put_page(new_page, 0); out_old: - f2fs_dentry_kunmap(old_dir, old_page); f2fs_put_page(old_page, 0); out: return err; @@ -1082,9 +1180,16 @@ static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { + int err; + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; + err = fscrypt_prepare_rename(old_dir, old_dentry, new_dir, new_dentry, + flags); + if (err) + return err; + if (flags & RENAME_EXCHANGE) { return f2fs_cross_rename(old_dir, old_dentry, new_dir, new_dentry); @@ -1100,68 +1205,20 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { - struct page *cpage = NULL; - char *caddr, *paddr = NULL; - struct fscrypt_str cstr = FSTR_INIT(NULL, 0); - struct fscrypt_str pstr = FSTR_INIT(NULL, 0); - struct fscrypt_symlink_data *sd; - u32 max_size = inode->i_sb->s_blocksize; - int res; + struct page *page; + const char *target; if (!dentry) return ERR_PTR(-ECHILD); - res = fscrypt_get_encryption_info(inode); - if (res) - return ERR_PTR(res); + page = read_mapping_page(inode->i_mapping, 0, NULL); + if (IS_ERR(page)) + return ERR_CAST(page); - cpage = read_mapping_page(inode->i_mapping, 0, NULL); - if (IS_ERR(cpage)) - return ERR_CAST(cpage); - caddr = page_address(cpage); - - /* Symlink is encrypted */ - sd = (struct fscrypt_symlink_data *)caddr; - cstr.name = sd->encrypted_path; - cstr.len = le16_to_cpu(sd->len); - - /* this is broken symlink case */ - if (unlikely(cstr.len == 0)) { - res = -ENOENT; - goto errout; - } - - if ((cstr.len + sizeof(struct fscrypt_symlink_data) - 1) > max_size) { - /* Symlink data on the disk is corrupted */ - res = -EIO; - goto errout; - } - res = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr); - if (res) - goto errout; - - res = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr); - if (res) - goto errout; - - /* this is broken symlink case */ - if (unlikely(pstr.name[0] == 0)) { - res = -ENOENT; - goto errout; - } - - paddr = pstr.name; - - /* Null-terminate the name */ - paddr[pstr.len] = '\0'; - - put_page(cpage); - set_delayed_call(done, kfree_link, paddr); - return paddr; -errout: - fscrypt_fname_free_buffer(&pstr); - put_page(cpage); - return ERR_PTR(res); + target = fscrypt_get_symlink(inode, page_address(page), + inode->i_sb->s_blocksize, done); + put_page(page); + return target; } const struct inode_operations f2fs_encrypted_symlink_inode_operations = {
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 12060fb..46fdaeb 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c
@@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/node.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/f2fs_fs.h> @@ -23,16 +20,17 @@ #include "trace.h" #include <trace/events/f2fs.h> -#define on_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock) +#define on_f2fs_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock) static struct kmem_cache *nat_entry_slab; static struct kmem_cache *free_nid_slab; static struct kmem_cache *nat_entry_set_slab; +static struct kmem_cache *fsync_node_entry_slab; /* * Check whether the given nid is within node id range. */ -int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) +int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) { if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) { set_sbi_flag(sbi, SBI_NEED_FSCK); @@ -44,7 +42,7 @@ int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) return 0; } -bool available_free_memory(struct f2fs_sb_info *sbi, int type) +bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct sysinfo val; @@ -61,7 +59,7 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) * give 25%, 25%, 50%, 50%, 50% memory for each components respectively */ if (type == FREE_NIDS) { - mem_size = (nm_i->nid_cnt[FREE_NID_LIST] * + mem_size = (nm_i->nid_cnt[FREE_NID] * sizeof(struct free_nid)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == NAT_ENTRIES) { @@ -78,7 +76,7 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) } else if (type == INO_ENTRIES) { int i; - for (i = 0; i <= UPDATE_INO; i++) + for (i = 0; i < MAX_INO_ENTRY; i++) mem_size += sbi->im[i].ino_num * sizeof(struct ino_entry); mem_size >>= PAGE_SHIFT; @@ -89,6 +87,10 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) atomic_read(&sbi->total_ext_node) * sizeof(struct extent_node)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); + } else if (type == INMEM_PAGES) { + /* it allows 20% / total_ram for inmemory pages */ + mem_size = get_pages(sbi, F2FS_INMEM_PAGES); + res = mem_size < (val.totalram / 5); } else { if (!sbi->sb->s_bdi->wb.dirty_exceeded) return true; @@ -98,44 +100,35 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) static void clear_node_page_dirty(struct page *page) { - struct address_space *mapping = page->mapping; - unsigned int long flags; - if (PageDirty(page)) { - spin_lock_irqsave(&mapping->tree_lock, flags); - radix_tree_tag_clear(&mapping->page_tree, - page_index(page), - PAGECACHE_TAG_DIRTY); - spin_unlock_irqrestore(&mapping->tree_lock, flags); - + f2fs_clear_radix_tree_dirty_tag(page); clear_page_dirty_for_io(page); - dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES); + dec_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); } ClearPageUptodate(page); } static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid) { - pgoff_t index = current_nat_addr(sbi, nid); - return get_meta_page(sbi, index); + return f2fs_get_meta_page_nofail(sbi, current_nat_addr(sbi, nid)); } static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) { struct page *src_page; struct page *dst_page; - pgoff_t src_off; pgoff_t dst_off; void *src_addr; void *dst_addr; struct f2fs_nm_info *nm_i = NM_I(sbi); - src_off = current_nat_addr(sbi, nid); - dst_off = next_nat_addr(sbi, src_off); + dst_off = next_nat_addr(sbi, current_nat_addr(sbi, nid)); /* get current nat block page with lock */ - src_page = get_meta_page(sbi, src_off); - dst_page = grab_meta_page(sbi, dst_off); + src_page = get_current_nat_page(sbi, nid); + if (IS_ERR(src_page)) + return src_page; + dst_page = f2fs_grab_meta_page(sbi, dst_off); f2fs_bug_on(sbi, PageDirty(src_page)); src_addr = page_address(src_page); @@ -149,9 +142,61 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) return dst_page; } +static struct nat_entry *__alloc_nat_entry(nid_t nid, bool no_fail) +{ + struct nat_entry *new; + + if (no_fail) + new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_F2FS_ZERO); + else + new = kmem_cache_alloc(nat_entry_slab, GFP_F2FS_ZERO); + if (new) { + nat_set_nid(new, nid); + nat_reset_flag(new); + } + return new; +} + +static void __free_nat_entry(struct nat_entry *e) +{ + kmem_cache_free(nat_entry_slab, e); +} + +/* must be locked by nat_tree_lock */ +static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, + struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail) +{ + if (no_fail) + f2fs_radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne); + else if (radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne)) + return NULL; + + if (raw_ne) + node_info_from_raw_nat(&ne->ni, raw_ne); + + spin_lock(&nm_i->nat_list_lock); + list_add_tail(&ne->list, &nm_i->nat_entries); + spin_unlock(&nm_i->nat_list_lock); + + nm_i->nat_cnt++; + return ne; +} + static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) { - return radix_tree_lookup(&nm_i->nat_root, n); + struct nat_entry *ne; + + ne = radix_tree_lookup(&nm_i->nat_root, n); + + /* for recent accessed nat entry, move it to tail of lru list */ + if (ne && !get_nat_flag(ne, IS_DIRTY)) { + spin_lock(&nm_i->nat_list_lock); + if (!list_empty(&ne->list)) + list_move_tail(&ne->list, &nm_i->nat_entries); + spin_unlock(&nm_i->nat_list_lock); + } + + return ne; } static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i, @@ -162,14 +207,13 @@ static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i, static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) { - list_del(&e->list); radix_tree_delete(&nm_i->nat_root, nat_get_nid(e)); nm_i->nat_cnt--; - kmem_cache_free(nat_entry_slab, e); + __free_nat_entry(e); } -static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, - struct nat_entry *ne) +static struct nat_entry_set *__grab_nat_entry_set(struct f2fs_nm_info *nm_i, + struct nat_entry *ne) { nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); struct nat_entry_set *head; @@ -184,24 +228,50 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, head->entry_cnt = 0; f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head); } + return head; +} + +static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, + struct nat_entry *ne) +{ + struct nat_entry_set *head; + bool new_ne = nat_get_blkaddr(ne) == NEW_ADDR; + + if (!new_ne) + head = __grab_nat_entry_set(nm_i, ne); + + /* + * update entry_cnt in below condition: + * 1. update NEW_ADDR to valid block address; + * 2. update old block address to new one; + */ + if (!new_ne && (get_nat_flag(ne, IS_PREALLOC) || + !get_nat_flag(ne, IS_DIRTY))) + head->entry_cnt++; + + set_nat_flag(ne, IS_PREALLOC, new_ne); if (get_nat_flag(ne, IS_DIRTY)) goto refresh_list; nm_i->dirty_nat_cnt++; - head->entry_cnt++; set_nat_flag(ne, IS_DIRTY, true); refresh_list: - if (nat_get_blkaddr(ne) == NEW_ADDR) + spin_lock(&nm_i->nat_list_lock); + if (new_ne) list_del_init(&ne->list); else list_move_tail(&ne->list, &head->entry_list); + spin_unlock(&nm_i->nat_list_lock); } static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, struct nat_entry_set *set, struct nat_entry *ne) { + spin_lock(&nm_i->nat_list_lock); list_move_tail(&ne->list, &nm_i->nat_entries); + spin_unlock(&nm_i->nat_list_lock); + set_nat_flag(ne, IS_DIRTY, false); set->entry_cnt--; nm_i->dirty_nat_cnt--; @@ -214,7 +284,73 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, start, nr); } -int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) +bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page) +{ + return NODE_MAPPING(sbi) == page->mapping && + IS_DNODE(page) && is_cold_node(page); +} + +void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi) +{ + spin_lock_init(&sbi->fsync_node_lock); + INIT_LIST_HEAD(&sbi->fsync_node_list); + sbi->fsync_seg_id = 0; + sbi->fsync_node_num = 0; +} + +static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi, + struct page *page) +{ + struct fsync_node_entry *fn; + unsigned long flags; + unsigned int seq_id; + + fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab, GFP_NOFS); + + get_page(page); + fn->page = page; + INIT_LIST_HEAD(&fn->list); + + spin_lock_irqsave(&sbi->fsync_node_lock, flags); + list_add_tail(&fn->list, &sbi->fsync_node_list); + fn->seq_id = sbi->fsync_seg_id++; + seq_id = fn->seq_id; + sbi->fsync_node_num++; + spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); + + return seq_id; +} + +void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page) +{ + struct fsync_node_entry *fn; + unsigned long flags; + + spin_lock_irqsave(&sbi->fsync_node_lock, flags); + list_for_each_entry(fn, &sbi->fsync_node_list, list) { + if (fn->page == page) { + list_del(&fn->list); + sbi->fsync_node_num--; + spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); + kmem_cache_free(fsync_node_entry_slab, fn); + put_page(page); + return; + } + } + spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); + f2fs_bug_on(sbi, 1); +} + +void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi) +{ + unsigned long flags; + + spin_lock_irqsave(&sbi->fsync_node_lock, flags); + sbi->fsync_seg_id = 0; + spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); +} + +int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; @@ -231,7 +367,7 @@ int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) return need; } -bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) +bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; @@ -245,7 +381,7 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) return is_cp; } -bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) +bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; @@ -261,49 +397,29 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) return need_update; } -static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, - bool no_fail) -{ - struct nat_entry *new; - - if (no_fail) { - new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS); - f2fs_radix_tree_insert(&nm_i->nat_root, nid, new); - } else { - new = kmem_cache_alloc(nat_entry_slab, GFP_NOFS); - if (!new) - return NULL; - if (radix_tree_insert(&nm_i->nat_root, nid, new)) { - kmem_cache_free(nat_entry_slab, new); - return NULL; - } - } - - memset(new, 0, sizeof(struct nat_entry)); - nat_set_nid(new, nid); - nat_reset_flag(new); - list_add_tail(&new->list, &nm_i->nat_entries); - nm_i->nat_cnt++; - return new; -} - +/* must be locked by nat_tree_lock */ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, struct f2fs_nat_entry *ne) { struct f2fs_nm_info *nm_i = NM_I(sbi); - struct nat_entry *e; + struct nat_entry *new, *e; + new = __alloc_nat_entry(nid, false); + if (!new) + return; + + down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); - if (!e) { - e = grab_nat_entry(nm_i, nid, false); - if (e) - node_info_from_raw_nat(&e->ni, ne); - } else { + if (!e) + e = __init_nat_entry(nm_i, new, ne, false); + else f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) || nat_get_blkaddr(e) != le32_to_cpu(ne->block_addr) || nat_get_version(e) != ne->version); - } + up_write(&nm_i->nat_tree_lock); + if (e != new) + __free_nat_entry(new); } static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, @@ -311,11 +427,12 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; + struct nat_entry *new = __alloc_nat_entry(ni->nid, true); down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { - e = grab_nat_entry(nm_i, ni->nid, true); + e = __init_nat_entry(nm_i, new, NULL, true); copy_node_info(&e->ni, ni); f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); } else if (new_blkaddr == NEW_ADDR) { @@ -327,6 +444,9 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, copy_node_info(&e->ni, ni); f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR); } + /* let's free early to reduce memory consumption */ + if (e != new) + __free_nat_entry(new); /* sanity check */ f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr); @@ -341,10 +461,6 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { unsigned char version = nat_get_version(e); nat_set_version(e, inc_node_version(version)); - - /* in order to reuse the nid */ - if (nm_i->next_scan_nid > ni->nid) - nm_i->next_scan_nid = ni->nid; } /* change address */ @@ -364,7 +480,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, up_write(&nm_i->nat_tree_lock); } -int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) +int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) { struct f2fs_nm_info *nm_i = NM_I(sbi); int nr = nr_shrink; @@ -372,13 +488,25 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) if (!down_write_trylock(&nm_i->nat_tree_lock)) return 0; - while (nr_shrink && !list_empty(&nm_i->nat_entries)) { + spin_lock(&nm_i->nat_list_lock); + while (nr_shrink) { struct nat_entry *ne; + + if (list_empty(&nm_i->nat_entries)) + break; + ne = list_first_entry(&nm_i->nat_entries, struct nat_entry, list); + list_del(&ne->list); + spin_unlock(&nm_i->nat_list_lock); + __del_from_nat_cache(nm_i, ne); nr_shrink--; + + spin_lock(&nm_i->nat_list_lock); } + spin_unlock(&nm_i->nat_list_lock); + up_write(&nm_i->nat_tree_lock); return nr - nr_shrink; } @@ -386,7 +514,8 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) /* * This function always returns success */ -void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) +int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, + struct node_info *ni) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -409,14 +538,14 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); up_read(&nm_i->nat_tree_lock); - return; + return 0; } memset(&ne, 0, sizeof(struct f2fs_nat_entry)); /* Check current segment summary */ down_read(&curseg->journal_rwsem); - i = lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0); + i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0); if (i >= 0) { ne = nat_in_journal(journal, i); node_info_from_raw_nat(ni, &ne); @@ -431,22 +560,24 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) index = current_nat_addr(sbi, nid); up_read(&nm_i->nat_tree_lock); - page = get_meta_page(sbi, index); + page = f2fs_get_meta_page(sbi, index); + if (IS_ERR(page)) + return PTR_ERR(page); + nat_blk = (struct f2fs_nat_block *)page_address(page); ne = nat_blk->entries[nid - start_nid]; node_info_from_raw_nat(ni, &ne); f2fs_put_page(page, 1); cache: /* cache nat entry */ - down_write(&nm_i->nat_tree_lock); cache_nat_entry(sbi, nid, &ne); - up_write(&nm_i->nat_tree_lock); + return 0; } /* * readahead MAX_RA_NODE number of node pages. */ -static void ra_node_pages(struct page *parent, int start, int n) +static void f2fs_ra_node_pages(struct page *parent, int start, int n) { struct f2fs_sb_info *sbi = F2FS_P_SB(parent); struct blk_plug plug; @@ -460,13 +591,13 @@ static void ra_node_pages(struct page *parent, int start, int n) end = min(end, NIDS_PER_BLOCK); for (i = start; i < end; i++) { nid = get_nid(parent, i, false); - ra_node_page(sbi, nid); + f2fs_ra_node_page(sbi, nid); } blk_finish_plug(&plug); } -pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs) +pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs) { const long direct_index = ADDRS_PER_INODE(dn->inode); const long direct_blks = ADDRS_PER_BLOCK; @@ -581,7 +712,7 @@ static int get_node_path(struct inode *inode, long block, * f2fs_unlock_op() only if ro is not set RDONLY_NODE. * In the case of RDONLY_NODE, we don't need to care about mutex. */ -int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) +int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct page *npage[4]; @@ -600,7 +731,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) npage[0] = dn->inode_page; if (!npage[0]) { - npage[0] = get_node_page(sbi, nids[0]); + npage[0] = f2fs_get_node_page(sbi, nids[0]); if (IS_ERR(npage[0])) return PTR_ERR(npage[0]); } @@ -624,24 +755,24 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) if (!nids[i] && mode == ALLOC_NODE) { /* alloc new node */ - if (!alloc_nid(sbi, &(nids[i]))) { + if (!f2fs_alloc_nid(sbi, &(nids[i]))) { err = -ENOSPC; goto release_pages; } dn->nid = nids[i]; - npage[i] = new_node_page(dn, noffset[i]); + npage[i] = f2fs_new_node_page(dn, noffset[i]); if (IS_ERR(npage[i])) { - alloc_nid_failed(sbi, nids[i]); + f2fs_alloc_nid_failed(sbi, nids[i]); err = PTR_ERR(npage[i]); goto release_pages; } set_nid(parent, offset[i - 1], nids[i], i == 1); - alloc_nid_done(sbi, nids[i]); + f2fs_alloc_nid_done(sbi, nids[i]); done = true; } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { - npage[i] = get_node_page_ra(parent, offset[i - 1]); + npage[i] = f2fs_get_node_page_ra(parent, offset[i - 1]); if (IS_ERR(npage[i])) { err = PTR_ERR(npage[i]); goto release_pages; @@ -656,7 +787,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) } if (!done) { - npage[i] = get_node_page(sbi, nids[i]); + npage[i] = f2fs_get_node_page(sbi, nids[i]); if (IS_ERR(npage[i])) { err = PTR_ERR(npage[i]); f2fs_put_page(npage[0], 0); @@ -690,22 +821,24 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) return err; } -static void truncate_node(struct dnode_of_data *dn) +static int truncate_node(struct dnode_of_data *dn) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct node_info ni; + int err; pgoff_t index; - get_node_info(sbi, dn->nid, &ni); - f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); + err = f2fs_get_node_info(sbi, dn->nid, &ni); + if (err) + return err; /* Deallocate node address */ - invalidate_blocks(sbi, ni.blk_addr); + f2fs_invalidate_blocks(sbi, ni.blk_addr); dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino); set_node_addr(sbi, &ni, NULL_ADDR, false); if (dn->nid == dn->inode->i_ino) { - remove_orphan_inode(sbi, dn->nid); + f2fs_remove_orphan_inode(sbi, dn->nid); dec_valid_inode_count(sbi); f2fs_inode_synced(dn->inode); } @@ -721,17 +854,20 @@ static void truncate_node(struct dnode_of_data *dn) dn->node_page = NULL; trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); + + return 0; } static int truncate_dnode(struct dnode_of_data *dn) { struct page *page; + int err; if (dn->nid == 0) return 1; /* get direct node */ - page = get_node_page(F2FS_I_SB(dn->inode), dn->nid); + page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid); if (IS_ERR(page) && PTR_ERR(page) == -ENOENT) return 1; else if (IS_ERR(page)) @@ -740,8 +876,11 @@ static int truncate_dnode(struct dnode_of_data *dn) /* Make dnode_of_data for parameter */ dn->node_page = page; dn->ofs_in_node = 0; - truncate_data_blocks(dn); - truncate_node(dn); + f2fs_truncate_data_blocks(dn); + err = truncate_node(dn); + if (err) + return err; + return 1; } @@ -761,13 +900,13 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); - page = get_node_page(F2FS_I_SB(dn->inode), dn->nid); + page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid); if (IS_ERR(page)) { trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page)); return PTR_ERR(page); } - ra_node_pages(page, ofs, NIDS_PER_BLOCK); + f2fs_ra_node_pages(page, ofs, NIDS_PER_BLOCK); rn = F2FS_NODE(page); if (depth < 3) { @@ -806,7 +945,9 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, if (!ofs) { /* remove current indirect node */ dn->node_page = page; - truncate_node(dn); + ret = truncate_node(dn); + if (ret) + goto out_err; freed++; } else { f2fs_put_page(page, 1); @@ -837,7 +978,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, /* get indirect nodes in the path */ for (i = 0; i < idx + 1; i++) { /* reference count'll be increased */ - pages[i] = get_node_page(F2FS_I_SB(dn->inode), nid[i]); + pages[i] = f2fs_get_node_page(F2FS_I_SB(dn->inode), nid[i]); if (IS_ERR(pages[i])) { err = PTR_ERR(pages[i]); idx = i - 1; @@ -846,7 +987,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, nid[i + 1] = get_nid(pages[i], offset[i + 1], false); } - ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK); + f2fs_ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK); /* free direct nodes linked to a partial indirect node */ for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) { @@ -864,7 +1005,9 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, if (offset[idx + 1] == 0) { dn->node_page = pages[idx]; dn->nid = nid[idx]; - truncate_node(dn); + err = truncate_node(dn); + if (err) + goto fail; } else { f2fs_put_page(pages[idx], 1); } @@ -883,7 +1026,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, /* * All the block addresses of data and nodes should be nullified. */ -int truncate_inode_blocks(struct inode *inode, pgoff_t from) +int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err = 0, cont = 1; @@ -899,7 +1042,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from) if (level < 0) return level; - page = get_node_page(sbi, inode->i_ino); + page = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(page)) { trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page)); return PTR_ERR(page); @@ -963,7 +1106,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from) ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) { lock_page(page); BUG_ON(page->mapping != NODE_MAPPING(sbi)); - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, true); ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; set_page_dirty(page); unlock_page(page); @@ -978,27 +1121,31 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from) return err > 0 ? 0 : err; } -int truncate_xattr_node(struct inode *inode, struct page *page) +/* caller must lock inode page */ +int f2fs_truncate_xattr_node(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t nid = F2FS_I(inode)->i_xattr_nid; struct dnode_of_data dn; struct page *npage; + int err; if (!nid) return 0; - npage = get_node_page(sbi, nid); + npage = f2fs_get_node_page(sbi, nid); if (IS_ERR(npage)) return PTR_ERR(npage); + set_new_dnode(&dn, inode, NULL, npage, nid); + err = truncate_node(&dn); + if (err) { + f2fs_put_page(npage, 1); + return err; + } + f2fs_i_xnid_write(inode, 0); - set_new_dnode(&dn, inode, page, npage, nid); - - if (page) - dn.inode_page_locked = true; - truncate_node(&dn); return 0; } @@ -1006,17 +1153,17 @@ int truncate_xattr_node(struct inode *inode, struct page *page) * Caller should grab and release a rwsem by calling f2fs_lock_op() and * f2fs_unlock_op(). */ -int remove_inode_page(struct inode *inode) +int f2fs_remove_inode_page(struct inode *inode) { struct dnode_of_data dn; int err; set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); - err = get_dnode_of_data(&dn, 0, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, 0, LOOKUP_NODE); if (err) return err; - err = truncate_xattr_node(inode, dn.inode_page); + err = f2fs_truncate_xattr_node(inode); if (err) { f2fs_put_dnode(&dn); return err; @@ -1025,18 +1172,26 @@ int remove_inode_page(struct inode *inode) /* remove potential inline_data blocks */ if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - truncate_data_blocks_range(&dn, 1); + f2fs_truncate_data_blocks_range(&dn, 1); /* 0 is possible, after f2fs_new_inode() has failed */ + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { + f2fs_put_dnode(&dn); + return -EIO; + } f2fs_bug_on(F2FS_I_SB(inode), inode->i_blocks != 0 && inode->i_blocks != 8); /* will put inode & node pages */ - truncate_node(&dn); + err = truncate_node(&dn); + if (err) { + f2fs_put_dnode(&dn); + return err; + } return 0; } -struct page *new_inode_page(struct inode *inode) +struct page *f2fs_new_inode_page(struct inode *inode) { struct dnode_of_data dn; @@ -1044,10 +1199,10 @@ struct page *new_inode_page(struct inode *inode) set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); /* caller should f2fs_put_page(page, 1); */ - return new_node_page(&dn, 0); + return f2fs_new_node_page(&dn, 0); } -struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) +struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct node_info new_ni; @@ -1065,7 +1220,11 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) goto fail; #ifdef CONFIG_F2FS_CHECK_FS - get_node_info(sbi, dn->nid, &new_ni); + err = f2fs_get_node_info(sbi, dn->nid, &new_ni); + if (err) { + dec_valid_node_count(sbi, dn->inode, !ofs); + goto fail; + } f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR); #endif new_ni.nid = dn->nid; @@ -1075,9 +1234,9 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) new_ni.version = 0; set_node_addr(sbi, &new_ni, NEW_ADDR, false); - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, true); fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); - set_cold_node(dn->inode, page); + set_cold_node(page, S_ISDIR(dn->inode->i_mode)); if (!PageUptodate(page)) SetPageUptodate(page); if (set_page_dirty(page)) @@ -1113,13 +1272,21 @@ static int read_node_page(struct page *page, int op_flags) .page = page, .encrypted_page = NULL, }; + int err; - if (PageUptodate(page)) + if (PageUptodate(page)) { +#ifdef CONFIG_F2FS_CHECK_FS + f2fs_bug_on(sbi, !f2fs_inode_chksum_verify(sbi, page)); +#endif return LOCKED_PAGE; + } - get_node_info(sbi, page->index, &ni); + err = f2fs_get_node_info(sbi, page->index, &ni); + if (err) + return err; - if (unlikely(ni.blk_addr == NULL_ADDR)) { + if (unlikely(ni.blk_addr == NULL_ADDR) || + is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) { ClearPageUptodate(page); return -ENOENT; } @@ -1131,14 +1298,14 @@ static int read_node_page(struct page *page, int op_flags) /* * Readahead a node page */ -void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) +void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) { struct page *apage; int err; if (!nid) return; - if (check_nid_range(sbi, nid)) + if (f2fs_check_nid_range(sbi, nid)) return; rcu_read_lock(); @@ -1163,7 +1330,7 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, if (!nid) return ERR_PTR(-ENOENT); - if (check_nid_range(sbi, nid)) + if (f2fs_check_nid_range(sbi, nid)) return ERR_PTR(-EINVAL); repeat: page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false); @@ -1180,7 +1347,7 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, } if (parent) - ra_node_pages(parent, start + 1, MAX_RA_NODE); + f2fs_ra_node_pages(parent, start + 1, MAX_RA_NODE); lock_page(page); @@ -1214,12 +1381,12 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, return page; } -struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) +struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) { return __get_node_page(sbi, nid, NULL, 0); } -struct page *get_node_page_ra(struct page *parent, int start) +struct page *f2fs_get_node_page_ra(struct page *parent, int start) { struct f2fs_sb_info *sbi = F2FS_P_SB(parent); nid_t nid = get_nid(parent, start, false); @@ -1238,7 +1405,8 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) if (!inode) return; - page = pagecache_get_page(inode->i_mapping, 0, FGP_LOCK|FGP_NOWAIT, 0); + page = f2fs_pagecache_get_page(inode->i_mapping, 0, + FGP_LOCK|FGP_NOWAIT, 0); if (!page) goto iput_out; @@ -1253,7 +1421,7 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) ret = f2fs_write_inline_data(inode, page); inode_dec_dirty_pages(inode); - remove_dirty_inode(inode); + f2fs_remove_dirty_inode(inode); if (ret) set_page_dirty(page); page_out: @@ -1262,54 +1430,19 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) iput(inode); } -void move_node_page(struct page *node_page, int gc_type) -{ - if (gc_type == FG_GC) { - struct f2fs_sb_info *sbi = F2FS_P_SB(node_page); - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = 1, - .for_reclaim = 0, - }; - - set_page_dirty(node_page); - f2fs_wait_on_page_writeback(node_page, NODE, true); - - f2fs_bug_on(sbi, PageWriteback(node_page)); - if (!clear_page_dirty_for_io(node_page)) - goto out_page; - - if (NODE_MAPPING(sbi)->a_ops->writepage(node_page, &wbc)) - unlock_page(node_page); - goto release_page; - } else { - /* set page dirty and write it */ - if (!PageWriteback(node_page)) - set_page_dirty(node_page); - } -out_page: - unlock_page(node_page); -release_page: - f2fs_put_page(node_page, 0); -} - static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) { - pgoff_t index, end; + pgoff_t index; struct pagevec pvec; struct page *last_page = NULL; + int nr_pages; pagevec_init(&pvec, 0); index = 0; - end = ULONG_MAX; - while (index <= end) { - int i, nr_pages; - nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); - if (nr_pages == 0) - break; + while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, + PAGECACHE_TAG_DIRTY))) { + int i; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; @@ -1355,13 +1488,14 @@ static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) static int __write_node_page(struct page *page, bool atomic, bool *submitted, struct writeback_control *wbc, bool do_balance, - enum iostat_type io_type) + enum iostat_type io_type, unsigned int *seq_id) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); nid_t nid; struct node_info ni; struct f2fs_io_info fio = { .sbi = sbi, + .ino = ino_of_node(page), .type = NODE, .op = REQ_OP_WRITE, .op_flags = wbc_to_write_flags(wbc), @@ -1369,19 +1503,29 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, .encrypted_page = NULL, .submitted = false, .io_type = io_type, + .io_wbc = wbc, }; + unsigned int seq; trace_f2fs_writepage(page, NODE); + if (unlikely(f2fs_cp_error(sbi))) + goto redirty_out; + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; - if (unlikely(f2fs_cp_error(sbi))) + + if (wbc->sync_mode == WB_SYNC_NONE && + IS_DNODE(page) && is_cold_node(page)) goto redirty_out; /* get old block addr of this node page */ nid = nid_of_node(page); f2fs_bug_on(sbi, page->index != nid); + if (f2fs_get_node_info(sbi, nid, &ni)) + goto redirty_out; + if (wbc->for_reclaim) { if (!down_read_trylock(&sbi->node_write)) goto redirty_out; @@ -1389,8 +1533,6 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, down_read(&sbi->node_write); } - get_node_info(sbi, nid, &ni); - /* This page is already truncated */ if (unlikely(ni.blk_addr == NULL_ADDR)) { ClearPageUptodate(page); @@ -1410,15 +1552,22 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, fio.op_flags |= REQ_PREFLUSH | REQ_FUA; set_page_writeback(page); + ClearPageError(page); + + if (f2fs_in_warm_node_list(sbi, page)) { + seq = f2fs_add_fsync_node_entry(sbi, page); + if (seq_id) + *seq_id = seq; + } + fio.old_blkaddr = ni.blk_addr; - write_node_page(nid, &fio); + f2fs_do_write_node_page(nid, &fio); set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page)); dec_page_count(sbi, F2FS_DIRTY_NODES); up_read(&sbi->node_write); if (wbc->for_reclaim) { - f2fs_submit_merged_write_cond(sbi, page->mapping->host, 0, - page->index, NODE); + f2fs_submit_merged_write_cond(sbi, NULL, page, 0, NODE); submitted = NULL; } @@ -1440,22 +1589,63 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, return AOP_WRITEPAGE_ACTIVATE; } +int f2fs_move_node_page(struct page *node_page, int gc_type) +{ + int err = 0; + + if (gc_type == FG_GC) { + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = 1, + .for_reclaim = 0, + }; + + f2fs_wait_on_page_writeback(node_page, NODE, true, true); + + set_page_dirty(node_page); + + if (!clear_page_dirty_for_io(node_page)) { + err = -EAGAIN; + goto out_page; + } + + if (__write_node_page(node_page, false, NULL, + &wbc, false, FS_GC_NODE_IO, NULL)) { + err = -EAGAIN; + unlock_page(node_page); + } + goto release_page; + } else { + /* set page dirty and write it */ + if (!PageWriteback(node_page)) + set_page_dirty(node_page); + } +out_page: + unlock_page(node_page); +release_page: + f2fs_put_page(node_page, 0); + return err; +} + static int f2fs_write_node_page(struct page *page, struct writeback_control *wbc) { - return __write_node_page(page, false, NULL, wbc, false, FS_NODE_IO); + return __write_node_page(page, false, NULL, wbc, false, + FS_NODE_IO, NULL); } -int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, - struct writeback_control *wbc, bool atomic) +int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, + struct writeback_control *wbc, bool atomic, + unsigned int *seq_id) { - pgoff_t index, end; - pgoff_t last_idx = ULONG_MAX; + pgoff_t index; struct pagevec pvec; int ret = 0; struct page *last_page = NULL; bool marked = false; nid_t ino = inode->i_ino; + int nr_pages; + int nwritten = 0; if (atomic) { last_page = last_fsync_dnode(sbi, ino); @@ -1465,15 +1655,10 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, retry: pagevec_init(&pvec, 0); index = 0; - end = ULONG_MAX; - while (index <= end) { - int i, nr_pages; - nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); - if (nr_pages == 0) - break; + while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, + PAGECACHE_TAG_DIRTY))) { + int i; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; @@ -1506,8 +1691,7 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, goto continue_unlock; } - f2fs_wait_on_page_writeback(page, NODE, true); - BUG_ON(PageWriteback(page)); + f2fs_wait_on_page_writeback(page, NODE, true, true); set_fsync_mark(page, 0); set_dentry_mark(page, 0); @@ -1517,9 +1701,9 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, if (IS_INODE(page)) { if (is_inode_flag_set(inode, FI_DIRTY_INODE)) - update_inode(inode, page); + f2fs_update_inode(inode, page); set_dentry_mark(page, - need_dentry_mark(sbi, ino)); + f2fs_need_dentry_mark(sbi, ino)); } /* may be written by other thread */ if (!PageDirty(page)) @@ -1532,13 +1716,13 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, ret = __write_node_page(page, atomic && page == last_page, &submitted, wbc, true, - FS_NODE_IO); + FS_NODE_IO, seq_id); if (ret) { unlock_page(page); f2fs_put_page(last_page, 0); break; } else if (submitted) { - last_idx = page->index; + nwritten++; } if (page == last_page) { @@ -1558,48 +1742,46 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, "Retry to write fsync mark: ino=%u, idx=%lx", ino, last_page->index); lock_page(last_page); - f2fs_wait_on_page_writeback(last_page, NODE, true); + f2fs_wait_on_page_writeback(last_page, NODE, true, true); set_page_dirty(last_page); unlock_page(last_page); goto retry; } out: - if (last_idx != ULONG_MAX) - f2fs_submit_merged_write_cond(sbi, NULL, ino, last_idx, NODE); + if (nwritten) + f2fs_submit_merged_write_cond(sbi, NULL, NULL, ino, NODE); return ret ? -EIO: 0; } -int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, +int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, + struct writeback_control *wbc, bool do_balance, enum iostat_type io_type) { - pgoff_t index, end; + pgoff_t index; struct pagevec pvec; int step = 0; int nwritten = 0; int ret = 0; + int nr_pages, done = 0; pagevec_init(&pvec, 0); next_step: index = 0; - end = ULONG_MAX; - while (index <= end) { - int i, nr_pages; - nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); - if (nr_pages == 0) - break; + while (!done && (nr_pages = pagevec_lookup_tag(&pvec, + NODE_MAPPING(sbi), &index, PAGECACHE_TAG_DIRTY))) { + int i; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; bool submitted = false; - if (unlikely(f2fs_cp_error(sbi))) { - pagevec_release(&pvec); - ret = -EIO; - goto out; + /* give a priority to WB_SYNC threads */ + if (atomic_read(&sbi->wb_sync_req[NODE]) && + wbc->sync_mode == WB_SYNC_NONE) { + done = 1; + break; } /* @@ -1641,9 +1823,8 @@ int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, goto lock_node; } - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, true); - BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) goto continue_unlock; @@ -1651,7 +1832,7 @@ int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, set_dentry_mark(page, 0); ret = __write_node_page(page, false, &submitted, - wbc, do_balance, io_type); + wbc, do_balance, io_type, NULL); if (ret) unlock_page(page); else if (submitted) @@ -1670,51 +1851,60 @@ int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, } if (step < 2) { + if (wbc->sync_mode == WB_SYNC_NONE && step == 1) + goto out; step++; goto next_step; } out: if (nwritten) f2fs_submit_merged_write(sbi, NODE); + + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; return ret; } -int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) +int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, + unsigned int seq_id) { - pgoff_t index = 0, end = ULONG_MAX; - struct pagevec pvec; + struct fsync_node_entry *fn; + struct page *page; + struct list_head *head = &sbi->fsync_node_list; + unsigned long flags; + unsigned int cur_seq_id = 0; int ret2, ret = 0; - pagevec_init(&pvec, 0); - - while (index <= end) { - int i, nr_pages; - nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, - PAGECACHE_TAG_WRITEBACK, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); - if (nr_pages == 0) + while (seq_id && cur_seq_id < seq_id) { + spin_lock_irqsave(&sbi->fsync_node_lock, flags); + if (list_empty(head)) { + spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); break; - - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; - - /* until radix tree lookup accepts end_index */ - if (unlikely(page->index > end)) - continue; - - if (ino && ino_of_node(page) == ino) { - f2fs_wait_on_page_writeback(page, NODE, true); - if (TestClearPageError(page)) - ret = -EIO; - } } - pagevec_release(&pvec); - cond_resched(); + fn = list_first_entry(head, struct fsync_node_entry, list); + if (fn->seq_id > seq_id) { + spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); + break; + } + cur_seq_id = fn->seq_id; + page = fn->page; + get_page(page); + spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); + + f2fs_wait_on_page_writeback(page, NODE, true, false); + if (TestClearPageError(page)) + ret = -EIO; + + put_page(page); + + if (ret) + break; } ret2 = filemap_check_errors(NODE_MAPPING(sbi)); if (!ret) ret = ret2; + return ret; } @@ -1735,14 +1925,21 @@ static int f2fs_write_node_pages(struct address_space *mapping, if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE)) goto skip_write; + if (wbc->sync_mode == WB_SYNC_ALL) + atomic_inc(&sbi->wb_sync_req[NODE]); + else if (atomic_read(&sbi->wb_sync_req[NODE])) + goto skip_write; + trace_f2fs_writepages(mapping->host, wbc, NODE); diff = nr_pages_to_write(sbi, NODE, wbc); - wbc->sync_mode = WB_SYNC_NONE; blk_start_plug(&plug); - sync_node_pages(sbi, wbc, true, FS_NODE_IO); + f2fs_sync_node_pages(sbi, wbc, true, FS_NODE_IO); blk_finish_plug(&plug); wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); + + if (wbc->sync_mode == WB_SYNC_ALL) + atomic_dec(&sbi->wb_sync_req[NODE]); return 0; skip_write: @@ -1757,8 +1954,12 @@ static int f2fs_set_node_page_dirty(struct page *page) if (!PageUptodate(page)) SetPageUptodate(page); +#ifdef CONFIG_F2FS_CHECK_FS + if (IS_INODE(page)) + f2fs_inode_chksum_set(F2FS_P_SB(page), page); +#endif if (!PageDirty(page)) { - f2fs_set_page_dirty_nobuffers(page); + __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); SetPagePrivate(page); f2fs_trace_pid(page); @@ -1787,39 +1988,83 @@ static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, return radix_tree_lookup(&nm_i->free_nid_root, n); } -static int __insert_nid_to_list(struct f2fs_sb_info *sbi, - struct free_nid *i, enum nid_list list, bool new) +static int __insert_free_nid(struct f2fs_sb_info *sbi, + struct free_nid *i, enum nid_state state) { struct f2fs_nm_info *nm_i = NM_I(sbi); - if (new) { - int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i); - if (err) - return err; - } + int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i); + if (err) + return err; - f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW : - i->state != NID_ALLOC); - nm_i->nid_cnt[list]++; - list_add_tail(&i->list, &nm_i->nid_list[list]); + f2fs_bug_on(sbi, state != i->state); + nm_i->nid_cnt[state]++; + if (state == FREE_NID) + list_add_tail(&i->list, &nm_i->free_nid_list); return 0; } -static void __remove_nid_from_list(struct f2fs_sb_info *sbi, - struct free_nid *i, enum nid_list list, bool reuse) +static void __remove_free_nid(struct f2fs_sb_info *sbi, + struct free_nid *i, enum nid_state state) { struct f2fs_nm_info *nm_i = NM_I(sbi); - f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW : - i->state != NID_ALLOC); - nm_i->nid_cnt[list]--; - list_del(&i->list); - if (!reuse) - radix_tree_delete(&nm_i->free_nid_root, i->nid); + f2fs_bug_on(sbi, state != i->state); + nm_i->nid_cnt[state]--; + if (state == FREE_NID) + list_del(&i->list); + radix_tree_delete(&nm_i->free_nid_root, i->nid); +} + +static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i, + enum nid_state org_state, enum nid_state dst_state) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + + f2fs_bug_on(sbi, org_state != i->state); + i->state = dst_state; + nm_i->nid_cnt[org_state]--; + nm_i->nid_cnt[dst_state]++; + + switch (dst_state) { + case PREALLOC_NID: + list_del(&i->list); + break; + case FREE_NID: + list_add_tail(&i->list, &nm_i->free_nid_list); + break; + default: + BUG_ON(1); + } +} + +static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, + bool set, bool build) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid); + unsigned int nid_ofs = nid - START_NID(nid); + + if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap)) + return; + + if (set) { + if (test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs])) + return; + __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); + nm_i->free_nid_count[nat_ofs]++; + } else { + if (!test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs])) + return; + __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); + if (!build) + nm_i->free_nid_count[nat_ofs]--; + } } /* return if the nid is recognized as free */ -static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) +static bool add_free_nid(struct f2fs_sb_info *sbi, + nid_t nid, bool build, bool update) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i, *e; @@ -1833,10 +2078,9 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS); i->nid = nid; - i->state = NID_NEW; + i->state = FREE_NID; - if (radix_tree_preload(GFP_NOFS)) - goto err; + radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); spin_lock(&nm_i->nid_list_lock); @@ -1845,22 +2089,22 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) * Thread A Thread B * - f2fs_create * - f2fs_new_inode - * - alloc_nid - * - __insert_nid_to_list(ALLOC_NID_LIST) + * - f2fs_alloc_nid + * - __insert_nid_to_list(PREALLOC_NID) * - f2fs_balance_fs_bg - * - build_free_nids - * - __build_free_nids + * - f2fs_build_free_nids + * - __f2fs_build_free_nids * - scan_nat_page * - add_free_nid * - __lookup_nat_cache * - f2fs_add_link - * - init_inode_metadata - * - new_inode_page - * - new_node_page + * - f2fs_init_inode_metadata + * - f2fs_new_inode_page + * - f2fs_new_node_page * - set_node_addr - * - alloc_nid_done - * - __remove_nid_from_list(ALLOC_NID_LIST) - * - __insert_nid_to_list(FREE_NID_LIST) + * - f2fs_alloc_nid_done + * - __remove_nid_from_list(PREALLOC_NID) + * - __insert_nid_to_list(FREE_NID) */ ne = __lookup_nat_cache(nm_i, nid); if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || @@ -1869,17 +2113,22 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) e = __lookup_free_nid_list(nm_i, nid); if (e) { - if (e->state == NID_NEW) + if (e->state == FREE_NID) ret = true; goto err_out; } } ret = true; - err = __insert_nid_to_list(sbi, i, FREE_NID_LIST, true); + err = __insert_free_nid(sbi, i, FREE_NID); err_out: + if (update) { + update_free_nid_bitmap(sbi, nid, ret, build); + if (!build) + nm_i->available_nids++; + } spin_unlock(&nm_i->nid_list_lock); radix_tree_preload_end(); -err: + if (err) kmem_cache_free(free_nid_slab, i); return ret; @@ -1893,8 +2142,8 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) spin_lock(&nm_i->nid_list_lock); i = __lookup_free_nid_list(nm_i, nid); - if (i && i->state == NID_NEW) { - __remove_nid_from_list(sbi, i, FREE_NID_LIST, false); + if (i && i->state == FREE_NID) { + __remove_free_nid(sbi, i, FREE_NID); need_free = true; } spin_unlock(&nm_i->nid_list_lock); @@ -1903,28 +2152,7 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) kmem_cache_free(free_nid_slab, i); } -static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, - bool set, bool build) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid); - unsigned int nid_ofs = nid - START_NID(nid); - - if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap)) - return; - - if (set) - __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); - else - __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); - - if (set) - nm_i->free_nid_count[nat_ofs]++; - else if (!build) - nm_i->free_nid_count[nat_ofs]--; -} - -static void scan_nat_page(struct f2fs_sb_info *sbi, +static int scan_nat_page(struct f2fs_sb_info *sbi, struct page *nat_page, nid_t start_nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); @@ -1933,35 +2161,57 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid); int i; - if (test_bit_le(nat_ofs, nm_i->nat_block_bitmap)) - return; - __set_bit_le(nat_ofs, nm_i->nat_block_bitmap); i = start_nid % NAT_ENTRY_PER_BLOCK; for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { - bool freed = false; - if (unlikely(start_nid >= nm_i->max_nid)) break; blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); - f2fs_bug_on(sbi, blk_addr == NEW_ADDR); - if (blk_addr == NULL_ADDR) - freed = add_free_nid(sbi, start_nid, true); - spin_lock(&NM_I(sbi)->nid_list_lock); - update_free_nid_bitmap(sbi, start_nid, freed, true); - spin_unlock(&NM_I(sbi)->nid_list_lock); + + if (blk_addr == NEW_ADDR) + return -EINVAL; + + if (blk_addr == NULL_ADDR) { + add_free_nid(sbi, start_nid, true, true); + } else { + spin_lock(&NM_I(sbi)->nid_list_lock); + update_free_nid_bitmap(sbi, start_nid, false, true); + spin_unlock(&NM_I(sbi)->nid_list_lock); + } } + + return 0; +} + +static void scan_curseg_cache(struct f2fs_sb_info *sbi) +{ + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); + struct f2fs_journal *journal = curseg->journal; + int i; + + down_read(&curseg->journal_rwsem); + for (i = 0; i < nats_in_cursum(journal); i++) { + block_t addr; + nid_t nid; + + addr = le32_to_cpu(nat_in_journal(journal, i).block_addr); + nid = le32_to_cpu(nid_in_journal(journal, i)); + if (addr == NULL_ADDR) + add_free_nid(sbi, nid, true, false); + else + remove_free_nid(sbi, nid); + } + up_read(&curseg->journal_rwsem); } static void scan_free_nid_bits(struct f2fs_sb_info *sbi) { struct f2fs_nm_info *nm_i = NM_I(sbi); - struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); - struct f2fs_journal *journal = curseg->journal; unsigned int i, idx; + nid_t nid; down_read(&nm_i->nat_tree_lock); @@ -1971,72 +2221,75 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi) if (!nm_i->free_nid_count[i]) continue; for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) { - nid_t nid; - - if (!test_bit_le(idx, nm_i->free_nid_bitmap[i])) - continue; + idx = find_next_bit_le(nm_i->free_nid_bitmap[i], + NAT_ENTRY_PER_BLOCK, idx); + if (idx >= NAT_ENTRY_PER_BLOCK) + break; nid = i * NAT_ENTRY_PER_BLOCK + idx; - add_free_nid(sbi, nid, true); + add_free_nid(sbi, nid, true, false); - if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS) + if (nm_i->nid_cnt[FREE_NID] >= MAX_FREE_NIDS) goto out; } } out: - down_read(&curseg->journal_rwsem); - for (i = 0; i < nats_in_cursum(journal); i++) { - block_t addr; - nid_t nid; + scan_curseg_cache(sbi); - addr = le32_to_cpu(nat_in_journal(journal, i).block_addr); - nid = le32_to_cpu(nid_in_journal(journal, i)); - if (addr == NULL_ADDR) - add_free_nid(sbi, nid, true); - else - remove_free_nid(sbi, nid); - } - up_read(&curseg->journal_rwsem); up_read(&nm_i->nat_tree_lock); } -static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) +static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, + bool sync, bool mount) { struct f2fs_nm_info *nm_i = NM_I(sbi); - struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); - struct f2fs_journal *journal = curseg->journal; - int i = 0; + int i = 0, ret; nid_t nid = nm_i->next_scan_nid; if (unlikely(nid >= nm_i->max_nid)) nid = 0; /* Enough entries */ - if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK) - return; + if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK) + return 0; - if (!sync && !available_free_memory(sbi, FREE_NIDS)) - return; + if (!sync && !f2fs_available_free_memory(sbi, FREE_NIDS)) + return 0; if (!mount) { /* try to find free nids in free_nid_bitmap */ scan_free_nid_bits(sbi); - if (nm_i->nid_cnt[FREE_NID_LIST]) - return; + if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK) + return 0; } /* readahead nat pages to be scanned */ - ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, + f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT, true); down_read(&nm_i->nat_tree_lock); while (1) { - struct page *page = get_current_nat_page(sbi, nid); + if (!test_bit_le(NAT_BLOCK_OFFSET(nid), + nm_i->nat_block_bitmap)) { + struct page *page = get_current_nat_page(sbi, nid); - scan_nat_page(sbi, page, nid); - f2fs_put_page(page, 1); + if (IS_ERR(page)) { + ret = PTR_ERR(page); + } else { + ret = scan_nat_page(sbi, page, nid); + f2fs_put_page(page, 1); + } + + if (ret) { + up_read(&nm_i->nat_tree_lock); + f2fs_bug_on(sbi, !mount); + f2fs_msg(sbi->sb, KERN_ERR, + "NAT is corrupt, run fsck to fix it"); + return ret; + } + } nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); if (unlikely(nid >= nm_i->max_nid)) @@ -2050,29 +2303,25 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) nm_i->next_scan_nid = nid; /* find free nids from current sum_pages */ - down_read(&curseg->journal_rwsem); - for (i = 0; i < nats_in_cursum(journal); i++) { - block_t addr; + scan_curseg_cache(sbi); - addr = le32_to_cpu(nat_in_journal(journal, i).block_addr); - nid = le32_to_cpu(nid_in_journal(journal, i)); - if (addr == NULL_ADDR) - add_free_nid(sbi, nid, true); - else - remove_free_nid(sbi, nid); - } - up_read(&curseg->journal_rwsem); up_read(&nm_i->nat_tree_lock); - ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), + f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), nm_i->ra_nid_pages, META_NAT, false); + + return 0; } -void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) +int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) { + int ret; + mutex_lock(&NM_I(sbi)->build_lock); - __build_free_nids(sbi, sync, mount); + ret = __f2fs_build_free_nids(sbi, sync, mount); mutex_unlock(&NM_I(sbi)->build_lock); + + return ret; } /* @@ -2080,17 +2329,16 @@ void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) * from second parameter of this function. * The returned nid could be used ino as well as nid when inode is created. */ -bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) +bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i = NULL; retry: -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_ALLOC_NID)) { f2fs_show_injection_info(FAULT_ALLOC_NID); return false; } -#endif + spin_lock(&nm_i->nid_list_lock); if (unlikely(nm_i->available_nids == 0)) { @@ -2098,16 +2346,14 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) return false; } - /* We should not use stale free nids created by build_free_nids */ - if (nm_i->nid_cnt[FREE_NID_LIST] && !on_build_free_nids(nm_i)) { - f2fs_bug_on(sbi, list_empty(&nm_i->nid_list[FREE_NID_LIST])); - i = list_first_entry(&nm_i->nid_list[FREE_NID_LIST], + /* We should not use stale free nids created by f2fs_build_free_nids */ + if (nm_i->nid_cnt[FREE_NID] && !on_f2fs_build_free_nids(nm_i)) { + f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); + i = list_first_entry(&nm_i->free_nid_list, struct free_nid, list); *nid = i->nid; - __remove_nid_from_list(sbi, i, FREE_NID_LIST, true); - i->state = NID_ALLOC; - __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false); + __move_free_nid(sbi, i, FREE_NID, PREALLOC_NID); nm_i->available_nids--; update_free_nid_bitmap(sbi, *nid, false, false); @@ -2118,14 +2364,15 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) spin_unlock(&nm_i->nid_list_lock); /* Let's scan nat pages and its caches to get free nids */ - build_free_nids(sbi, true, false); - goto retry; + if (!f2fs_build_free_nids(sbi, true, false)) + goto retry; + return false; } /* - * alloc_nid() should be called prior to this function. + * f2fs_alloc_nid() should be called prior to this function. */ -void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) +void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i; @@ -2133,16 +2380,16 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) spin_lock(&nm_i->nid_list_lock); i = __lookup_free_nid_list(nm_i, nid); f2fs_bug_on(sbi, !i); - __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false); + __remove_free_nid(sbi, i, PREALLOC_NID); spin_unlock(&nm_i->nid_list_lock); kmem_cache_free(free_nid_slab, i); } /* - * alloc_nid() should be called prior to this function. + * f2fs_alloc_nid() should be called prior to this function. */ -void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) +void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i; @@ -2155,13 +2402,11 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) i = __lookup_free_nid_list(nm_i, nid); f2fs_bug_on(sbi, !i); - if (!available_free_memory(sbi, FREE_NIDS)) { - __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false); + if (!f2fs_available_free_memory(sbi, FREE_NIDS)) { + __remove_free_nid(sbi, i, PREALLOC_NID); need_free = true; } else { - __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, true); - i->state = NID_NEW; - __insert_nid_to_list(sbi, i, FREE_NID_LIST, false); + __move_free_nid(sbi, i, PREALLOC_NID, FREE_NID); } nm_i->available_nids++; @@ -2174,26 +2419,25 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) kmem_cache_free(free_nid_slab, i); } -int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) +int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i, *next; int nr = nr_shrink; - if (nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS) + if (nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS) return 0; if (!mutex_trylock(&nm_i->build_lock)) return 0; spin_lock(&nm_i->nid_list_lock); - list_for_each_entry_safe(i, next, &nm_i->nid_list[FREE_NID_LIST], - list) { + list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) { if (nr_shrink <= 0 || - nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS) + nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS) break; - __remove_nid_from_list(sbi, i, FREE_NID_LIST, false); + __remove_free_nid(sbi, i, FREE_NID); kmem_cache_free(free_nid_slab, i); nr_shrink--; } @@ -2203,34 +2447,36 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) return nr - nr_shrink; } -void recover_inline_xattr(struct inode *inode, struct page *page) +void f2fs_recover_inline_xattr(struct inode *inode, struct page *page) { void *src_addr, *dst_addr; size_t inline_size; struct page *ipage; struct f2fs_inode *ri; - ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); + ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage)); ri = F2FS_INODE(page); - if (!(ri->i_inline & F2FS_INLINE_XATTR)) { + if (ri->i_inline & F2FS_INLINE_XATTR) { + set_inode_flag(inode, FI_INLINE_XATTR); + } else { clear_inode_flag(inode, FI_INLINE_XATTR); goto update_inode; } - dst_addr = inline_xattr_addr(ipage); - src_addr = inline_xattr_addr(page); + dst_addr = inline_xattr_addr(inode, ipage); + src_addr = inline_xattr_addr(inode, page); inline_size = inline_xattr_size(inode); - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); memcpy(dst_addr, src_addr, inline_size); update_inode: - update_inode(inode, ipage); + f2fs_update_inode(inode, ipage); f2fs_put_page(ipage, 1); } -int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) +int f2fs_recover_xattr_data(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; @@ -2238,31 +2484,34 @@ int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) struct dnode_of_data dn; struct node_info ni; struct page *xpage; + int err; if (!prev_xnid) goto recover_xnid; /* 1: invalidate the previous xattr nid */ - get_node_info(sbi, prev_xnid, &ni); - f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); - invalidate_blocks(sbi, ni.blk_addr); + err = f2fs_get_node_info(sbi, prev_xnid, &ni); + if (err) + return err; + + f2fs_invalidate_blocks(sbi, ni.blk_addr); dec_valid_node_count(sbi, inode, false); set_node_addr(sbi, &ni, NULL_ADDR, false); recover_xnid: /* 2: update xattr nid in inode */ - if (!alloc_nid(sbi, &new_xnid)) + if (!f2fs_alloc_nid(sbi, &new_xnid)) return -ENOSPC; set_new_dnode(&dn, inode, NULL, NULL, new_xnid); - xpage = new_node_page(&dn, XATTR_NODE_OFFSET); + xpage = f2fs_new_node_page(&dn, XATTR_NODE_OFFSET); if (IS_ERR(xpage)) { - alloc_nid_failed(sbi, new_xnid); + f2fs_alloc_nid_failed(sbi, new_xnid); return PTR_ERR(xpage); } - alloc_nid_done(sbi, new_xnid); - update_inode_page(inode); + f2fs_alloc_nid_done(sbi, new_xnid); + f2fs_update_inode_page(inode); /* 3: update and set xattr node page dirty */ memcpy(F2FS_NODE(xpage), F2FS_NODE(page), VALID_XATTR_BLOCK_SIZE); @@ -2273,14 +2522,17 @@ int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) return 0; } -int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) +int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) { struct f2fs_inode *src, *dst; nid_t ino = ino_of_node(page); struct node_info old_ni, new_ni; struct page *ipage; + int err; - get_node_info(sbi, ino, &old_ni); + err = f2fs_get_node_info(sbi, ino, &old_ni); + if (err) + return err; if (unlikely(old_ni.blk_addr != NULL_ADDR)) return -EINVAL; @@ -2297,6 +2549,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) if (!PageUptodate(ipage)) SetPageUptodate(ipage); fill_node_footer(ipage, ino, ino, 0, true); + set_cold_node(ipage, false); src = F2FS_INODE(page); dst = F2FS_INODE(ipage); @@ -2309,10 +2562,23 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) dst->i_inline = src->i_inline & (F2FS_INLINE_XATTR | F2FS_EXTRA_ATTR); if (dst->i_inline & F2FS_EXTRA_ATTR) { dst->i_extra_isize = src->i_extra_isize; - if (f2fs_sb_has_project_quota(sbi->sb) && + + if (f2fs_sb_has_flexible_inline_xattr(sbi) && + F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), + i_inline_xattr_size)) + dst->i_inline_xattr_size = src->i_inline_xattr_size; + + if (f2fs_sb_has_project_quota(sbi) && F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), i_projid)) dst->i_projid = src->i_projid; + + if (f2fs_sb_has_inode_crtime(sbi) && + F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), + i_crtime_nsec)) { + dst->i_crtime = src->i_crtime; + dst->i_crtime_nsec = src->i_crtime_nsec; + } } new_ni = old_ni; @@ -2327,7 +2593,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) return 0; } -int restore_node_summary(struct f2fs_sb_info *sbi, +int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum) { struct f2fs_node *rn; @@ -2344,10 +2610,13 @@ int restore_node_summary(struct f2fs_sb_info *sbi, nrpages = min(last_offset - i, BIO_MAX_PAGES); /* readahead node pages */ - ra_meta_pages(sbi, addr, nrpages, META_POR, true); + f2fs_ra_meta_pages(sbi, addr, nrpages, META_POR, true); for (idx = addr; idx < addr + nrpages; idx++) { - struct page *page = get_tmp_page(sbi, idx); + struct page *page = f2fs_get_tmp_page(sbi, idx); + + if (IS_ERR(page)) + return PTR_ERR(page); rn = F2FS_NODE(page); sum_entry->nid = rn->footer.nid; @@ -2380,8 +2649,8 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) ne = __lookup_nat_cache(nm_i, nid); if (!ne) { - ne = grab_nat_entry(nm_i, nid, true); - node_info_from_raw_nat(&ne->ni, &raw_ne); + ne = __alloc_nat_entry(nid, true); + __init_nat_entry(nm_i, ne, &raw_ne, true); } /* @@ -2427,15 +2696,17 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK; struct f2fs_nat_block *nat_blk = page_address(page); int valid = 0; - int i; + int i = 0; if (!enabled_nat_bits(sbi, NULL)) return; - for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) { - if (start_nid == 0 && i == 0) - valid++; - if (nat_blk->entries[i].block_addr) + if (nat_index == 0) { + valid = 1; + i = 1; + } + for (; i < NAT_ENTRY_PER_BLOCK; i++) { + if (nat_blk->entries[i].block_addr != NULL_ADDR) valid++; } if (valid == 0) { @@ -2451,7 +2722,7 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, __clear_bit_le(nat_index, nm_i->full_nat_bits); } -static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, +static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, struct nat_entry_set *set, struct cp_control *cpc) { struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -2475,6 +2746,9 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, down_write(&curseg->journal_rwsem); } else { page = get_next_nat_page(sbi, start_nid); + if (IS_ERR(page)) + return PTR_ERR(page); + nat_blk = page_address(page); f2fs_bug_on(sbi, !nat_blk); } @@ -2488,7 +2762,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, nat_get_blkaddr(ne) == NEW_ADDR); if (to_journal) { - offset = lookup_journal_in_cursum(journal, + offset = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 1); f2fs_bug_on(sbi, offset < 0); raw_ne = &nat_in_journal(journal, offset); @@ -2500,11 +2774,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, nat_reset_flag(ne); __clear_nat_cache_dirty(NM_I(sbi), set, ne); if (nat_get_blkaddr(ne) == NULL_ADDR) { - add_free_nid(sbi, nid, false); - spin_lock(&NM_I(sbi)->nid_list_lock); - NM_I(sbi)->available_nids++; - update_free_nid_bitmap(sbi, nid, true, false); - spin_unlock(&NM_I(sbi)->nid_list_lock); + add_free_nid(sbi, nid, false, true); } else { spin_lock(&NM_I(sbi)->nid_list_lock); update_free_nid_bitmap(sbi, nid, false, false); @@ -2524,12 +2794,13 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); kmem_cache_free(nat_entry_set_slab, set); } + return 0; } /* * This function is called during the checkpointing process. */ -void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) +int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -2539,9 +2810,17 @@ void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) unsigned int found; nid_t set_idx = 0; LIST_HEAD(sets); + int err = 0; + + /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */ + if (enabled_nat_bits(sbi, cpc)) { + down_write(&nm_i->nat_tree_lock); + remove_nats_in_journal(sbi); + up_write(&nm_i->nat_tree_lock); + } if (!nm_i->dirty_nat_cnt) - return; + return 0; down_write(&nm_i->nat_tree_lock); @@ -2564,11 +2843,16 @@ void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) } /* flush dirty nats in nat entry set */ - list_for_each_entry_safe(set, tmp, &sets, set_list) - __flush_nat_entry_set(sbi, set, cpc); + list_for_each_entry_safe(set, tmp, &sets, set_list) { + err = __flush_nat_entry_set(sbi, set, cpc); + if (err) + break; + } up_write(&nm_i->nat_tree_lock); /* Allow dirty nats by node block allocation in write_begin */ + + return err; } static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) @@ -2583,17 +2867,20 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) if (!enabled_nat_bits(sbi, NULL)) return 0; - nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 + - F2FS_BLKSIZE - 1); - nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, - GFP_KERNEL); + nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8); + nm_i->nat_bits = f2fs_kzalloc(sbi, + nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL); if (!nm_i->nat_bits) return -ENOMEM; nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg - nm_i->nat_bits_blocks; for (i = 0; i < nm_i->nat_bits_blocks; i++) { - struct page *page = get_meta_page(sbi, nat_bits_addr++); + struct page *page; + + page = f2fs_get_meta_page(sbi, nat_bits_addr++); + if (IS_ERR(page)) + return PTR_ERR(page); memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS), page_address(page), F2FS_BLKSIZE); @@ -2630,7 +2917,7 @@ static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi) __set_bit_le(i, nm_i->nat_block_bitmap); nid = i * NAT_ENTRY_PER_BLOCK; - last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK; + last_nid = nid + NAT_ENTRY_PER_BLOCK; spin_lock(&NM_I(sbi)->nid_list_lock); for (; nid < last_nid; nid++) @@ -2664,20 +2951,20 @@ static int init_node_manager(struct f2fs_sb_info *sbi) /* not used nids: 0, node, meta, (and root counted as valid node) */ nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count - - F2FS_RESERVED_NODE_NUM; - nm_i->nid_cnt[FREE_NID_LIST] = 0; - nm_i->nid_cnt[ALLOC_NID_LIST] = 0; + sbi->nquota_files - F2FS_RESERVED_NODE_NUM; + nm_i->nid_cnt[FREE_NID] = 0; + nm_i->nid_cnt[PREALLOC_NID] = 0; nm_i->nat_cnt = 0; nm_i->ram_thresh = DEF_RAM_THRESHOLD; nm_i->ra_nid_pages = DEF_RA_NID_PAGES; nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD; INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); - INIT_LIST_HEAD(&nm_i->nid_list[FREE_NID_LIST]); - INIT_LIST_HEAD(&nm_i->nid_list[ALLOC_NID_LIST]); + INIT_LIST_HEAD(&nm_i->free_nid_list); INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO); INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO); INIT_LIST_HEAD(&nm_i->nat_entries); + spin_lock_init(&nm_i->nat_list_lock); mutex_init(&nm_i->build_lock); spin_lock_init(&nm_i->nid_list_lock); @@ -2711,29 +2998,42 @@ static int init_node_manager(struct f2fs_sb_info *sbi) static int init_free_nid_cache(struct f2fs_sb_info *sbi) { struct f2fs_nm_info *nm_i = NM_I(sbi); + int i; - nm_i->free_nid_bitmap = kvzalloc(nm_i->nat_blocks * - NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL); + nm_i->free_nid_bitmap = + f2fs_kzalloc(sbi, array_size(sizeof(unsigned char *), + nm_i->nat_blocks), + GFP_KERNEL); if (!nm_i->free_nid_bitmap) return -ENOMEM; - nm_i->nat_block_bitmap = kvzalloc(nm_i->nat_blocks / 8, + for (i = 0; i < nm_i->nat_blocks; i++) { + nm_i->free_nid_bitmap[i] = f2fs_kvzalloc(sbi, + f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK), GFP_KERNEL); + if (!nm_i->free_nid_bitmap[i]) + return -ENOMEM; + } + + nm_i->nat_block_bitmap = f2fs_kvzalloc(sbi, nm_i->nat_blocks / 8, GFP_KERNEL); if (!nm_i->nat_block_bitmap) return -ENOMEM; - nm_i->free_nid_count = kvzalloc(nm_i->nat_blocks * - sizeof(unsigned short), GFP_KERNEL); + nm_i->free_nid_count = + f2fs_kvzalloc(sbi, array_size(sizeof(unsigned short), + nm_i->nat_blocks), + GFP_KERNEL); if (!nm_i->free_nid_count) return -ENOMEM; return 0; } -int build_node_manager(struct f2fs_sb_info *sbi) +int f2fs_build_node_manager(struct f2fs_sb_info *sbi) { int err; - sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL); + sbi->nm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_nm_info), + GFP_KERNEL); if (!sbi->nm_info) return -ENOMEM; @@ -2748,11 +3048,10 @@ int build_node_manager(struct f2fs_sb_info *sbi) /* load free nid status from nat_bits table */ load_free_nid_bitmap(sbi); - build_free_nids(sbi, true, true); - return 0; + return f2fs_build_free_nids(sbi, true, true); } -void destroy_node_manager(struct f2fs_sb_info *sbi) +void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i, *next_i; @@ -2766,16 +3065,15 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) /* destroy free nid list */ spin_lock(&nm_i->nid_list_lock); - list_for_each_entry_safe(i, next_i, &nm_i->nid_list[FREE_NID_LIST], - list) { - __remove_nid_from_list(sbi, i, FREE_NID_LIST, false); + list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { + __remove_free_nid(sbi, i, FREE_NID); spin_unlock(&nm_i->nid_list_lock); kmem_cache_free(free_nid_slab, i); spin_lock(&nm_i->nid_list_lock); } - f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID_LIST]); - f2fs_bug_on(sbi, nm_i->nid_cnt[ALLOC_NID_LIST]); - f2fs_bug_on(sbi, !list_empty(&nm_i->nid_list[ALLOC_NID_LIST])); + f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID]); + f2fs_bug_on(sbi, nm_i->nid_cnt[PREALLOC_NID]); + f2fs_bug_on(sbi, !list_empty(&nm_i->free_nid_list)); spin_unlock(&nm_i->nid_list_lock); /* destroy nat cache */ @@ -2785,8 +3083,13 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) unsigned idx; nid = nat_get_nid(natvec[found - 1]) + 1; - for (idx = 0; idx < found; idx++) + for (idx = 0; idx < found; idx++) { + spin_lock(&nm_i->nat_list_lock); + list_del(&natvec[idx]->list); + spin_unlock(&nm_i->nat_list_lock); + __del_from_nat_cache(nm_i, natvec[idx]); + } } f2fs_bug_on(sbi, nm_i->nat_cnt); @@ -2807,19 +3110,25 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) up_write(&nm_i->nat_tree_lock); kvfree(nm_i->nat_block_bitmap); - kvfree(nm_i->free_nid_bitmap); + if (nm_i->free_nid_bitmap) { + int i; + + for (i = 0; i < nm_i->nat_blocks; i++) + kvfree(nm_i->free_nid_bitmap[i]); + kvfree(nm_i->free_nid_bitmap); + } kvfree(nm_i->free_nid_count); - kfree(nm_i->nat_bitmap); - kfree(nm_i->nat_bits); + kvfree(nm_i->nat_bitmap); + kvfree(nm_i->nat_bits); #ifdef CONFIG_F2FS_CHECK_FS - kfree(nm_i->nat_bitmap_mir); + kvfree(nm_i->nat_bitmap_mir); #endif sbi->nm_info = NULL; - kfree(nm_i); + kvfree(nm_i); } -int __init create_node_manager_caches(void) +int __init f2fs_create_node_manager_caches(void) { nat_entry_slab = f2fs_kmem_cache_create("nat_entry", sizeof(struct nat_entry)); @@ -2835,8 +3144,15 @@ int __init create_node_manager_caches(void) sizeof(struct nat_entry_set)); if (!nat_entry_set_slab) goto destroy_free_nid; + + fsync_node_entry_slab = f2fs_kmem_cache_create("fsync_node_entry", + sizeof(struct fsync_node_entry)); + if (!fsync_node_entry_slab) + goto destroy_nat_entry_set; return 0; +destroy_nat_entry_set: + kmem_cache_destroy(nat_entry_set_slab); destroy_free_nid: kmem_cache_destroy(free_nid_slab); destroy_nat_entry: @@ -2845,8 +3161,9 @@ int __init create_node_manager_caches(void) return -ENOMEM; } -void destroy_node_manager_caches(void) +void f2fs_destroy_node_manager_caches(void) { + kmem_cache_destroy(fsync_node_entry_slab); kmem_cache_destroy(nat_entry_set_slab); kmem_cache_destroy(free_nid_slab); kmem_cache_destroy(nat_entry_slab);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index bb53e99..e05af5d 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h
@@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/node.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ /* start node id of a node block dedicated to the given node id */ #define START_NID(nid) (((nid) / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) @@ -44,6 +41,7 @@ enum { HAS_FSYNCED_INODE, /* is the inode fsynced before? */ HAS_LAST_FSYNC, /* has the latest node fsync mark? */ IS_DIRTY, /* this nat entry is dirty? */ + IS_PREALLOC, /* nat entry is preallocated */ }; /* @@ -134,12 +132,18 @@ static inline bool excess_cached_nats(struct f2fs_sb_info *sbi) return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD; } +static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi) +{ + return get_pages(sbi, F2FS_DIRTY_NODES) >= sbi->blocks_per_seg * 8; +} + enum mem_type { FREE_NIDS, /* indicates the free nid list */ NAT_ENTRIES, /* indicates the cached nat entry */ DIRTY_DENTS, /* indicates dirty dentry pages */ INO_ENTRIES, /* indicates inode entries */ EXTENT_CACHE, /* indicates extent cache */ + INMEM_PAGES, /* indicates inmemory pages */ BASE_CHECK, /* check kernel status */ }; @@ -150,18 +154,10 @@ struct nat_entry_set { unsigned int entry_cnt; /* the # of nat entries in set */ }; -/* - * For free nid mangement - */ -enum nid_state { - NID_NEW, /* newly added to free nid list */ - NID_ALLOC /* it is allocated */ -}; - struct free_nid { struct list_head list; /* for free node id list */ nid_t nid; /* node id */ - int state; /* in use or not: NID_NEW or NID_ALLOC */ + int state; /* in use or not: FREE_NID or PREALLOC_NID */ }; static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) @@ -170,12 +166,11 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) struct free_nid *fnid; spin_lock(&nm_i->nid_list_lock); - if (nm_i->nid_cnt[FREE_NID_LIST] <= 0) { + if (nm_i->nid_cnt[FREE_NID] <= 0) { spin_unlock(&nm_i->nid_list_lock); return; } - fnid = list_first_entry(&nm_i->nid_list[FREE_NID_LIST], - struct free_nid, list); + fnid = list_first_entry(&nm_i->free_nid_list, struct free_nid, list); *nid = fnid->nid; spin_unlock(&nm_i->nid_list_lock); } @@ -313,6 +308,10 @@ static inline bool is_recoverable_dnode(struct page *page) struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); __u64 cp_ver = cur_cp_version(ckpt); + /* Don't care crc part, if fsck.f2fs sets it. */ + if (__is_set_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG)) + return (cp_ver << 32) == (cpver_of_node(page) << 32); + if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) cp_ver |= (cur_cp_crc(ckpt) << 32); @@ -362,7 +361,7 @@ static inline int set_nid(struct page *p, int off, nid_t nid, bool i) { struct f2fs_node *rn = F2FS_NODE(p); - f2fs_wait_on_page_writeback(p, NODE, true); + f2fs_wait_on_page_writeback(p, NODE, true, true); if (i) rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); @@ -426,12 +425,12 @@ static inline void clear_inline_node(struct page *page) ClearPageChecked(page); } -static inline void set_cold_node(struct inode *inode, struct page *page) +static inline void set_cold_node(struct page *page, bool is_dir) { struct f2fs_node *rn = F2FS_NODE(page); unsigned int flag = le32_to_cpu(rn->footer.flag); - if (S_ISDIR(inode->i_mode)) + if (is_dir) flag &= ~(0x1 << COLD_BIT_SHIFT); else flag |= (0x1 << COLD_BIT_SHIFT); @@ -447,6 +446,10 @@ static inline void set_mark(struct page *page, int mark, int type) else flag &= ~(0x1 << type); rn->footer.flag = cpu_to_le32(flag); + +#ifdef CONFIG_F2FS_CHECK_FS + f2fs_inode_chksum_set(F2FS_P_SB(page), page); +#endif } #define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT) #define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 6ea4453..047b7d5 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c
@@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/recovery.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/f2fs_fs.h> @@ -47,7 +44,7 @@ static struct kmem_cache *fsync_entry_slab; -bool space_for_roll_forward(struct f2fs_sb_info *sbi) +bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi) { s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count); @@ -99,8 +96,12 @@ static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi, return ERR_PTR(err); } -static void del_fsync_inode(struct fsync_inode_entry *entry) +static void del_fsync_inode(struct fsync_inode_entry *entry, int drop) { + if (drop) { + /* inode should not be recovered, drop it */ + f2fs_inode_synced(entry->inode); + } iput(entry->inode); list_del(&entry->list); kmem_cache_free(fsync_entry_slab, entry); @@ -144,7 +145,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage, retry: de = __f2fs_find_entry(dir, &fname, &page); if (de && inode->i_ino == le32_to_cpu(de->ino)) - goto out_unmap_put; + goto out_put; if (de) { einode = f2fs_iget_retry(inode->i_sb, le32_to_cpu(de->ino)); @@ -153,19 +154,19 @@ static int recover_dentry(struct inode *inode, struct page *ipage, err = PTR_ERR(einode); if (err == -ENOENT) err = -EEXIST; - goto out_unmap_put; + goto out_put; } err = dquot_initialize(einode); if (err) { iput(einode); - goto out_unmap_put; + goto out_put; } - err = acquire_orphan_inode(F2FS_I_SB(inode)); + err = f2fs_acquire_orphan_inode(F2FS_I_SB(inode)); if (err) { iput(einode); - goto out_unmap_put; + goto out_put; } f2fs_delete_entry(de, page, dir, einode); iput(einode); @@ -173,15 +174,14 @@ static int recover_dentry(struct inode *inode, struct page *ipage, } else if (IS_ERR(page)) { err = PTR_ERR(page); } else { - err = __f2fs_do_add_link(dir, &fname, inode, + err = f2fs_add_dentry(dir, &fname, inode, inode->i_ino, inode->i_mode); } if (err == -ENOMEM) goto retry; goto out; -out_unmap_put: - f2fs_dentry_kunmap(dir, page); +out_put: f2fs_put_page(page, 0); out: if (file_enc_name(inode)) @@ -195,12 +195,80 @@ static int recover_dentry(struct inode *inode, struct page *ipage, return err; } -static void recover_inode(struct inode *inode, struct page *page) +static int recover_quota_data(struct inode *inode, struct page *page) +{ + struct f2fs_inode *raw = F2FS_INODE(page); + struct iattr attr; + uid_t i_uid = le32_to_cpu(raw->i_uid); + gid_t i_gid = le32_to_cpu(raw->i_gid); + int err; + + memset(&attr, 0, sizeof(attr)); + + attr.ia_uid = make_kuid(inode->i_sb->s_user_ns, i_uid); + attr.ia_gid = make_kgid(inode->i_sb->s_user_ns, i_gid); + + if (!uid_eq(attr.ia_uid, inode->i_uid)) + attr.ia_valid |= ATTR_UID; + if (!gid_eq(attr.ia_gid, inode->i_gid)) + attr.ia_valid |= ATTR_GID; + + if (!attr.ia_valid) + return 0; + + err = dquot_transfer(inode, &attr); + if (err) + set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR); + return err; +} + +static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri) +{ + if (ri->i_inline & F2FS_PIN_FILE) + set_inode_flag(inode, FI_PIN_FILE); + else + clear_inode_flag(inode, FI_PIN_FILE); + if (ri->i_inline & F2FS_DATA_EXIST) + set_inode_flag(inode, FI_DATA_EXIST); + else + clear_inode_flag(inode, FI_DATA_EXIST); +} + +static int recover_inode(struct inode *inode, struct page *page) { struct f2fs_inode *raw = F2FS_INODE(page); char *name; + int err; inode->i_mode = le16_to_cpu(raw->i_mode); + + err = recover_quota_data(inode, page); + if (err) + return err; + + i_uid_write(inode, le32_to_cpu(raw->i_uid)); + i_gid_write(inode, le32_to_cpu(raw->i_gid)); + + if (raw->i_inline & F2FS_EXTRA_ATTR) { + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) && + F2FS_FITS_IN_INODE(raw, le16_to_cpu(raw->i_extra_isize), + i_projid)) { + projid_t i_projid; + kprojid_t kprojid; + + i_projid = (projid_t)le32_to_cpu(raw->i_projid); + kprojid = make_kprojid(&init_user_ns, i_projid); + + if (!projid_eq(kprojid, F2FS_I(inode)->i_projid)) { + err = f2fs_transfer_project_quota(inode, + kprojid); + if (err) + return err; + F2FS_I(inode)->i_projid = kprojid; + } + } + } + f2fs_i_size_write(inode, le64_to_cpu(raw->i_size)); inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime); inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); @@ -211,14 +279,23 @@ static void recover_inode(struct inode *inode, struct page *page) F2FS_I(inode)->i_advise = raw->i_advise; F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags); + f2fs_set_inode_flags(inode); + F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = + le16_to_cpu(raw->i_gc_failures); + + recover_inline_flags(inode, raw); + + f2fs_mark_inode_dirty_sync(inode, true); if (file_enc_name(inode)) name = "<encrypted>"; else name = F2FS_INODE(page)->i_name; - f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s", - ino_of_node(page), name); + f2fs_msg(inode->i_sb, KERN_NOTICE, + "recover_inode: ino = %x, name = %s, inline = %x", + ino_of_node(page), name, raw->i_inline); + return 0; } static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, @@ -227,6 +304,9 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, struct curseg_info *curseg; struct page *page = NULL; block_t blkaddr; + unsigned int loop_cnt = 0; + unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg - + valid_user_blocks(sbi); int err = 0; /* get node pages in the current segment */ @@ -239,7 +319,11 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR)) return 0; - page = get_tmp_page(sbi, blkaddr); + page = f2fs_get_tmp_page(sbi, blkaddr); + if (IS_ERR(page)) { + err = PTR_ERR(page); + break; + } if (!is_recoverable_dnode(page)) break; @@ -253,7 +337,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, if (!check_only && IS_INODE(page) && is_dent_dnode(page)) { - err = recover_inode_page(sbi, page); + err = f2fs_recover_inode_page(sbi, page); if (err) break; quota_inode = true; @@ -279,22 +363,33 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, if (IS_INODE(page) && is_dent_dnode(page)) entry->last_dentry = blkaddr; next: + /* sanity check in order to detect looped node chain */ + if (++loop_cnt >= free_blocks || + blkaddr == next_blkaddr_of_node(page)) { + f2fs_msg(sbi->sb, KERN_NOTICE, + "%s: detect looped node chain, " + "blkaddr:%u, next:%u", + __func__, blkaddr, next_blkaddr_of_node(page)); + err = -EINVAL; + break; + } + /* check next segment */ blkaddr = next_blkaddr_of_node(page); f2fs_put_page(page, 1); - ra_meta_pages_cond(sbi, blkaddr); + f2fs_ra_meta_pages_cond(sbi, blkaddr); } f2fs_put_page(page, 1); return err; } -static void destroy_fsync_dnodes(struct list_head *head) +static void destroy_fsync_dnodes(struct list_head *head, int drop) { struct fsync_inode_entry *entry, *tmp; list_for_each_entry_safe(entry, tmp, head, list) - del_fsync_inode(entry); + del_fsync_inode(entry, drop); } static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, @@ -326,7 +421,9 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, } } - sum_page = get_sum_page(sbi, segno); + sum_page = f2fs_get_sum_page(sbi, segno); + if (IS_ERR(sum_page)) + return PTR_ERR(sum_page); sum_node = (struct f2fs_summary_block *)page_address(sum_page); sum = sum_node->entries[blkoff]; f2fs_put_page(sum_page, 1); @@ -346,7 +443,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, } /* Get the node page */ - node_page = get_node_page(sbi, nid); + node_page = f2fs_get_node_page(sbi, nid); if (IS_ERR(node_page)) return PTR_ERR(node_page); @@ -371,7 +468,8 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, inode = dn->inode; } - bidx = start_bidx_of_node(offset, inode) + le16_to_cpu(sum.ofs_in_node); + bidx = f2fs_start_bidx_of_node(offset, inode) + + le16_to_cpu(sum.ofs_in_node); /* * if inode page is locked, unlock temporarily, but its reference @@ -381,11 +479,11 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, unlock_page(dn->inode_page); set_new_dnode(&tdn, inode, NULL, NULL, 0); - if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) + if (f2fs_get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) goto out; if (tdn.data_blkaddr == blkaddr) - truncate_data_blocks_range(&tdn, 1); + f2fs_truncate_data_blocks_range(&tdn, 1); f2fs_put_dnode(&tdn); out: @@ -398,14 +496,14 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, truncate_out: if (datablock_addr(tdn.inode, tdn.node_page, tdn.ofs_in_node) == blkaddr) - truncate_data_blocks_range(&tdn, 1); + f2fs_truncate_data_blocks_range(&tdn, 1); if (dn->inode->i_ino == nid && !dn->inode_page_locked) unlock_page(dn->inode_page); return 0; } static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, - struct page *page, block_t blkaddr) + struct page *page) { struct dnode_of_data dn; struct node_info ni; @@ -414,25 +512,25 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, /* step 1: recover xattr */ if (IS_INODE(page)) { - recover_inline_xattr(inode, page); + f2fs_recover_inline_xattr(inode, page); } else if (f2fs_has_xattr_block(ofs_of_node(page))) { - err = recover_xattr_data(inode, page, blkaddr); + err = f2fs_recover_xattr_data(inode, page); if (!err) recovered++; goto out; } /* step 2: recover inline data */ - if (recover_inline_data(inode, page)) + if (f2fs_recover_inline_data(inode, page)) goto out; /* step 3: recover data indices */ - start = start_bidx_of_node(ofs_of_node(page), inode); + start = f2fs_start_bidx_of_node(ofs_of_node(page), inode); end = start + ADDRS_PER_PAGE(page, inode); set_new_dnode(&dn, inode, NULL, NULL, 0); retry_dn: - err = get_dnode_of_data(&dn, start, ALLOC_NODE); + err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE); if (err) { if (err == -ENOMEM) { congestion_wait(BLK_RW_ASYNC, HZ/50); @@ -441,9 +539,12 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, goto out; } - f2fs_wait_on_page_writeback(dn.node_page, NODE, true); + f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true); - get_node_info(sbi, dn.nid, &ni); + err = f2fs_get_node_info(sbi, dn.nid, &ni); + if (err) + goto err; + f2fs_bug_on(sbi, ni.ino != ino_of_node(page)); f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page)); @@ -459,7 +560,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, /* dest is invalid, just invalidate src block */ if (dest == NULL_ADDR) { - truncate_data_blocks_range(&dn, 1); + f2fs_truncate_data_blocks_range(&dn, 1); continue; } @@ -473,8 +574,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, * and then reserve one new block in dnode page. */ if (dest == NEW_ADDR) { - truncate_data_blocks_range(&dn, 1); - reserve_new_block(&dn); + f2fs_truncate_data_blocks_range(&dn, 1); + f2fs_reserve_new_block(&dn); continue; } @@ -482,11 +583,10 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { if (src == NULL_ADDR) { - err = reserve_new_block(&dn); -#ifdef CONFIG_F2FS_FAULT_INJECTION - while (err) - err = reserve_new_block(&dn); -#endif + err = f2fs_reserve_new_block(&dn); + while (err && + IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)) + err = f2fs_reserve_new_block(&dn); /* We should not get -ENOSPC */ f2fs_bug_on(sbi, err); if (err) @@ -526,7 +626,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, - struct list_head *dir_list) + struct list_head *tmp_inode_list, struct list_head *dir_list) { struct curseg_info *curseg; struct page *page = NULL; @@ -543,9 +643,13 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR)) break; - ra_meta_pages_cond(sbi, blkaddr); + f2fs_ra_meta_pages_cond(sbi, blkaddr); - page = get_tmp_page(sbi, blkaddr); + page = f2fs_get_tmp_page(sbi, blkaddr); + if (IS_ERR(page)) { + err = PTR_ERR(page); + break; + } if (!is_recoverable_dnode(page)) { f2fs_put_page(page, 1); @@ -560,8 +664,11 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, * In this case, we can lose the latest inode(x). * So, call recover_inode for the inode update. */ - if (IS_INODE(page)) - recover_inode(entry->inode, page); + if (IS_INODE(page)) { + err = recover_inode(entry->inode, page); + if (err) + break; + } if (entry->last_dentry == blkaddr) { err = recover_dentry(entry->inode, page, dir_list); if (err) { @@ -569,35 +676,39 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, break; } } - err = do_recover_data(sbi, entry->inode, page, blkaddr); + err = do_recover_data(sbi, entry->inode, page); if (err) { f2fs_put_page(page, 1); break; } if (entry->blkaddr == blkaddr) - del_fsync_inode(entry); + list_move_tail(&entry->list, tmp_inode_list); next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); f2fs_put_page(page, 1); } if (!err) - allocate_new_segments(sbi); + f2fs_allocate_new_segments(sbi); return err; } -int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) +int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) { - struct list_head inode_list; + struct list_head inode_list, tmp_inode_list; struct list_head dir_list; int err; int ret = 0; unsigned long s_flags = sbi->sb->s_flags; bool need_writecp = false; +#ifdef CONFIG_QUOTA + int quota_enabled; +#endif if (s_flags & MS_RDONLY) { - f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); + f2fs_msg(sbi->sb, KERN_INFO, + "recover fsync data on readonly fs"); sbi->sb->s_flags &= ~MS_RDONLY; } @@ -605,7 +716,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) /* Needed for iput() to work correctly and not trash data */ sbi->sb->s_flags |= MS_ACTIVE; /* Turn on quotas so that they are updated correctly */ - f2fs_enable_quota_files(sbi); + quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY); #endif fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", @@ -616,6 +727,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) } INIT_LIST_HEAD(&inode_list); + INIT_LIST_HEAD(&tmp_inode_list); INIT_LIST_HEAD(&dir_list); /* prevent checkpoint */ @@ -634,11 +746,16 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) need_writecp = true; /* step #2: recover data */ - err = recover_data(sbi, &inode_list, &dir_list); + err = recover_data(sbi, &inode_list, &tmp_inode_list, &dir_list); if (!err) f2fs_bug_on(sbi, !list_empty(&inode_list)); + else { + /* restore s_flags to let iput() trash data */ + sbi->sb->s_flags = s_flags; + } skip: - destroy_fsync_dnodes(&inode_list); + destroy_fsync_dnodes(&inode_list, err); + destroy_fsync_dnodes(&tmp_inode_list, err); /* truncate meta pages to be used by the recovery */ truncate_inode_pages_range(META_MAPPING(sbi), @@ -647,26 +764,31 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) if (err) { truncate_inode_pages_final(NODE_MAPPING(sbi)); truncate_inode_pages_final(META_MAPPING(sbi)); + } else { + clear_sbi_flag(sbi, SBI_POR_DOING); } - - clear_sbi_flag(sbi, SBI_POR_DOING); mutex_unlock(&sbi->cp_mutex); /* let's drop all the directory inodes for clean checkpoint */ - destroy_fsync_dnodes(&dir_list); + destroy_fsync_dnodes(&dir_list, err); - if (!err && need_writecp) { - struct cp_control cpc = { - .reason = CP_RECOVERY, - }; - err = write_checkpoint(sbi, &cpc); + if (need_writecp) { + set_sbi_flag(sbi, SBI_IS_RECOVERED); + + if (!err) { + struct cp_control cpc = { + .reason = CP_RECOVERY, + }; + err = f2fs_write_checkpoint(sbi, &cpc); + } } kmem_cache_destroy(fsync_entry_slab); out: #ifdef CONFIG_QUOTA /* Turn quotas off */ - f2fs_quota_off_umount(sbi->sb); + if (quota_enabled) + f2fs_quota_off_umount(sbi->sb); #endif sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5c698757..9b79056 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c
@@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/segment.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/f2fs_fs.h> @@ -169,7 +166,7 @@ static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, return result - size + __reverse_ffz(tmp); } -bool need_SSR(struct f2fs_sb_info *sbi) +bool f2fs_need_SSR(struct f2fs_sb_info *sbi) { int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); @@ -177,15 +174,18 @@ bool need_SSR(struct f2fs_sb_info *sbi) if (test_opt(sbi, LFS)) return false; - if (sbi->gc_thread && sbi->gc_thread->gc_urgent) + if (sbi->gc_mode == GC_URGENT) + return true; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return true; return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + - 2 * reserved_sections(sbi)); + SM_I(sbi)->min_ssr_sections + reserved_sections(sbi)); } -void register_inmem_page(struct inode *inode, struct page *page) +void f2fs_register_inmem_page(struct inode *inode, struct page *page) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); struct inmem_pages *new; @@ -204,6 +204,10 @@ void register_inmem_page(struct inode *inode, struct page *page) mutex_lock(&fi->inmem_lock); get_page(page); list_add_tail(&new->list, &fi->inmem_pages); + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (list_empty(&fi->inmem_ilist)) + list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); mutex_unlock(&fi->inmem_lock); @@ -225,7 +229,7 @@ static int __revoke_inmem_pages(struct inode *inode, lock_page(page); - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); if (recover) { struct dnode_of_data dn; @@ -234,7 +238,8 @@ static int __revoke_inmem_pages(struct inode *inode, trace_f2fs_commit_inmem_page(page, INMEM_REVOKE); retry: set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, page->index, + LOOKUP_NODE); if (err) { if (err == -ENOMEM) { congestion_wait(BLK_RW_ASYNC, HZ/50); @@ -244,15 +249,27 @@ static int __revoke_inmem_pages(struct inode *inode, err = -EAGAIN; goto next; } - get_node_info(sbi, dn.nid, &ni); - f2fs_replace_block(sbi, &dn, dn.data_blkaddr, + + err = f2fs_get_node_info(sbi, dn.nid, &ni); + if (err) { + f2fs_put_dnode(&dn); + return err; + } + + if (cur->old_addr == NEW_ADDR) { + f2fs_invalidate_blocks(sbi, dn.data_blkaddr); + f2fs_update_data_blkaddr(&dn, NEW_ADDR); + } else + f2fs_replace_block(sbi, &dn, dn.data_blkaddr, cur->old_addr, ni.version, true, true); f2fs_put_dnode(&dn); } next: /* we don't need to invalidate this in the sccessful status */ - if (drop || recover) + if (drop || recover) { ClearPageUptodate(page); + clear_cold_data(page); + } set_page_private(page, 0); ClearPagePrivate(page); f2fs_put_page(page, 1); @@ -264,20 +281,57 @@ static int __revoke_inmem_pages(struct inode *inode, return err; } -void drop_inmem_pages(struct inode *inode) +void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure) { + struct list_head *head = &sbi->inode_list[ATOMIC_FILE]; + struct inode *inode; + struct f2fs_inode_info *fi; +next: + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (list_empty(head)) { + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + return; + } + fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist); + inode = igrab(&fi->vfs_inode); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + + if (inode) { + if (gc_failure) { + if (fi->i_gc_failures[GC_FAILURE_ATOMIC]) + goto drop; + goto skip; + } +drop: + set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); + f2fs_drop_inmem_pages(inode); + iput(inode); + } +skip: + congestion_wait(BLK_RW_ASYNC, HZ/50); + cond_resched(); + goto next; +} + +void f2fs_drop_inmem_pages(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); mutex_lock(&fi->inmem_lock); __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (!list_empty(&fi->inmem_ilist)) + list_del_init(&fi->inmem_ilist); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); mutex_unlock(&fi->inmem_lock); clear_inode_flag(inode, FI_ATOMIC_FILE); - clear_inode_flag(inode, FI_HOT_DATA); + fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0; stat_dec_atomic_write(inode); } -void drop_inmem_page(struct inode *inode, struct page *page) +void f2fs_drop_inmem_page(struct inode *inode, struct page *page) { struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -292,7 +346,7 @@ void drop_inmem_page(struct inode *inode, struct page *page) break; } - f2fs_bug_on(sbi, !cur || cur->page != page); + f2fs_bug_on(sbi, list_empty(head) || cur->page != page); list_del(&cur->list); mutex_unlock(&fi->inmem_lock); @@ -307,22 +361,25 @@ void drop_inmem_page(struct inode *inode, struct page *page) trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE); } -static int __commit_inmem_pages(struct inode *inode, - struct list_head *revoke_list) +static int __f2fs_commit_inmem_pages(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); struct inmem_pages *cur, *tmp; struct f2fs_io_info fio = { .sbi = sbi, + .ino = inode->i_ino, .type = DATA, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_PRIO, .io_type = FS_DATA_IO, }; - pgoff_t last_idx = ULONG_MAX; + struct list_head revoke_list; + bool submit_bio = false; int err = 0; + INIT_LIST_HEAD(&revoke_list); + list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { struct page *page = cur->page; @@ -330,18 +387,19 @@ static int __commit_inmem_pages(struct inode *inode, if (page->mapping == inode->i_mapping) { trace_f2fs_commit_inmem_page(page, INMEM); + f2fs_wait_on_page_writeback(page, DATA, true, true); + set_page_dirty(page); - f2fs_wait_on_page_writeback(page, DATA, true); if (clear_page_dirty_for_io(page)) { inode_dec_dirty_pages(inode); - remove_dirty_inode(inode); + f2fs_remove_dirty_inode(inode); } retry: fio.page = page; fio.old_blkaddr = NULL_ADDR; fio.encrypted_page = NULL; fio.need_lock = LOCK_DONE; - err = do_write_data_page(&fio); + err = f2fs_do_write_data_page(&fio); if (err) { if (err == -ENOMEM) { congestion_wait(BLK_RW_ASYNC, HZ/50); @@ -353,38 +411,16 @@ static int __commit_inmem_pages(struct inode *inode, } /* record old blkaddr for revoking */ cur->old_addr = fio.old_blkaddr; - last_idx = page->index; + submit_bio = true; } unlock_page(page); - list_move_tail(&cur->list, revoke_list); + list_move_tail(&cur->list, &revoke_list); } - if (last_idx != ULONG_MAX) - f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA); + if (submit_bio) + f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA); - if (!err) - __revoke_inmem_pages(inode, revoke_list, false, false); - - return err; -} - -int commit_inmem_pages(struct inode *inode) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_inode_info *fi = F2FS_I(inode); - struct list_head revoke_list; - int err; - - INIT_LIST_HEAD(&revoke_list); - f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); - - set_inode_flag(inode, FI_ATOMIC_COMMIT); - - mutex_lock(&fi->inmem_lock); - err = __commit_inmem_pages(inode, &revoke_list); if (err) { - int ret; /* * try to revoke all committed pages, but still we could fail * due to no memory or other reason, if that happened, EAGAIN @@ -393,18 +429,44 @@ int commit_inmem_pages(struct inode *inode) * recovery or rewrite & commit last transaction. For other * error number, revoking was done by filesystem itself. */ - ret = __revoke_inmem_pages(inode, &revoke_list, false, true); - if (ret) - err = ret; + err = __revoke_inmem_pages(inode, &revoke_list, false, true); /* drop all uncommitted pages */ __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); + } else { + __revoke_inmem_pages(inode, &revoke_list, false, false); } + + return err; +} + +int f2fs_commit_inmem_pages(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); + int err; + + f2fs_balance_fs(sbi, true); + + down_write(&fi->i_gc_rwsem[WRITE]); + + f2fs_lock_op(sbi); + set_inode_flag(inode, FI_ATOMIC_COMMIT); + + mutex_lock(&fi->inmem_lock); + err = __f2fs_commit_inmem_pages(inode); + + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (!list_empty(&fi->inmem_ilist)) + list_del_init(&fi->inmem_ilist); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); mutex_unlock(&fi->inmem_lock); clear_inode_flag(inode, FI_ATOMIC_COMMIT); f2fs_unlock_op(sbi); + up_write(&fi->i_gc_rwsem[WRITE]); + return err; } @@ -414,17 +476,18 @@ int commit_inmem_pages(struct inode *inode) */ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_CHECKPOINT)) { f2fs_show_injection_info(FAULT_CHECKPOINT); f2fs_stop_checkpoint(sbi, false); } -#endif /* balance_fs_bg is able to be pending */ if (need && excess_cached_nats(sbi)) f2fs_balance_fs_bg(sbi); + if (f2fs_is_checkpoint_ready(sbi)) + return; + /* * We should do GC or end up with checkpoint, if there are so many dirty * dir/node pages without enough free segments. @@ -441,32 +504,34 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) return; /* try to shrink extent cache when there is no enough memory */ - if (!available_free_memory(sbi, EXTENT_CACHE)) + if (!f2fs_available_free_memory(sbi, EXTENT_CACHE)) f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); /* check the # of cached NAT entries */ - if (!available_free_memory(sbi, NAT_ENTRIES)) - try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); + if (!f2fs_available_free_memory(sbi, NAT_ENTRIES)) + f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); - if (!available_free_memory(sbi, FREE_NIDS)) - try_to_free_nids(sbi, MAX_FREE_NIDS); + if (!f2fs_available_free_memory(sbi, FREE_NIDS)) + f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS); else - build_free_nids(sbi, false, false); + f2fs_build_free_nids(sbi, false, false); - if (!is_idle(sbi) && !excess_dirty_nats(sbi)) + if (!is_idle(sbi, REQ_TIME) && + (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi))) return; /* checkpoint is the only way to shrink partial cached entries */ - if (!available_free_memory(sbi, NAT_ENTRIES) || - !available_free_memory(sbi, INO_ENTRIES) || + if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) || + !f2fs_available_free_memory(sbi, INO_ENTRIES) || excess_prefree_segs(sbi) || excess_dirty_nats(sbi) || + excess_dirty_nodes(sbi) || f2fs_time_over(sbi, CP_TIME)) { if (test_opt(sbi, DATA_FLUSH)) { struct blk_plug plug; blk_start_plug(&plug); - sync_dirty_inodes(sbi, FILE_INODE); + f2fs_sync_dirty_inodes(sbi, FILE_INODE); blk_finish_plug(&plug); } f2fs_sync_fs(sbi->sb, true); @@ -477,7 +542,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) static int __submit_flush_wait(struct f2fs_sb_info *sbi, struct block_device *bdev) { - struct bio *bio = f2fs_bio_alloc(0); + struct bio *bio = f2fs_bio_alloc(sbi, 0, true); int ret; bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; @@ -490,15 +555,17 @@ static int __submit_flush_wait(struct f2fs_sb_info *sbi, return ret; } -static int submit_flush_wait(struct f2fs_sb_info *sbi) +static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino) { - int ret = __submit_flush_wait(sbi, sbi->sb->s_bdev); + int ret = 0; int i; - if (!sbi->s_ndevs || ret) - return ret; + if (!sbi->s_ndevs) + return __submit_flush_wait(sbi, sbi->sb->s_bdev); - for (i = 1; i < sbi->s_ndevs; i++) { + for (i = 0; i < sbi->s_ndevs; i++) { + if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO)) + continue; ret = __submit_flush_wait(sbi, FDEV(i).bdev); if (ret) break; @@ -524,7 +591,9 @@ static int issue_flush_thread(void *data) fcc->dispatch_list = llist_del_all(&fcc->issue_list); fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); - ret = submit_flush_wait(sbi); + cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode); + + ret = submit_flush_wait(sbi, cmd->ino); atomic_inc(&fcc->issued_flush); llist_for_each_entry_safe(cmd, next, @@ -542,7 +611,7 @@ static int issue_flush_thread(void *data) goto repeat; } -int f2fs_issue_flush(struct f2fs_sb_info *sbi) +int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) { struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; struct flush_cmd cmd; @@ -552,19 +621,22 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) return 0; if (!test_opt(sbi, FLUSH_MERGE)) { - ret = submit_flush_wait(sbi); + atomic_inc(&fcc->queued_flush); + ret = submit_flush_wait(sbi, ino); + atomic_dec(&fcc->queued_flush); atomic_inc(&fcc->issued_flush); return ret; } - if (atomic_inc_return(&fcc->issing_flush) == 1) { - ret = submit_flush_wait(sbi); - atomic_dec(&fcc->issing_flush); + if (atomic_inc_return(&fcc->queued_flush) == 1 || sbi->s_ndevs > 1) { + ret = submit_flush_wait(sbi, ino); + atomic_dec(&fcc->queued_flush); atomic_inc(&fcc->issued_flush); return ret; } + cmd.ino = ino; init_completion(&cmd.wait); llist_add(&cmd.llnode, &fcc->issue_list); @@ -577,23 +649,23 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) if (fcc->f2fs_issue_flush) { wait_for_completion(&cmd.wait); - atomic_dec(&fcc->issing_flush); + atomic_dec(&fcc->queued_flush); } else { struct llist_node *list; list = llist_del_all(&fcc->issue_list); if (!list) { wait_for_completion(&cmd.wait); - atomic_dec(&fcc->issing_flush); + atomic_dec(&fcc->queued_flush); } else { struct flush_cmd *tmp, *next; - ret = submit_flush_wait(sbi); + ret = submit_flush_wait(sbi, ino); llist_for_each_entry_safe(tmp, next, list, llnode) { if (tmp == &cmd) { cmd.ret = ret; - atomic_dec(&fcc->issing_flush); + atomic_dec(&fcc->queued_flush); continue; } tmp->ret = ret; @@ -605,7 +677,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) return cmd.ret; } -int create_flush_cmd_control(struct f2fs_sb_info *sbi) +int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi) { dev_t dev = sbi->sb->s_bdev->bd_dev; struct flush_cmd_control *fcc; @@ -618,11 +690,11 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) goto init_thread; } - fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); + fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL); if (!fcc) return -ENOMEM; atomic_set(&fcc->issued_flush, 0); - atomic_set(&fcc->issing_flush, 0); + atomic_set(&fcc->queued_flush, 0); init_waitqueue_head(&fcc->flush_wait_queue); init_llist_head(&fcc->issue_list); SM_I(sbi)->fcc_info = fcc; @@ -634,7 +706,7 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(fcc->f2fs_issue_flush)) { err = PTR_ERR(fcc->f2fs_issue_flush); - kfree(fcc); + kvfree(fcc); SM_I(sbi)->fcc_info = NULL; return err; } @@ -642,7 +714,7 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) return err; } -void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) +void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) { struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; @@ -653,11 +725,33 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) kthread_stop(flush_thread); } if (free) { - kfree(fcc); + kvfree(fcc); SM_I(sbi)->fcc_info = NULL; } } +int f2fs_flush_device_cache(struct f2fs_sb_info *sbi) +{ + int ret = 0, i; + + if (!sbi->s_ndevs) + return 0; + + for (i = 1; i < sbi->s_ndevs; i++) { + if (!f2fs_test_bit(i, (char *)&sbi->dirty_device)) + continue; + ret = __submit_flush_wait(sbi, FDEV(i).bdev); + if (ret) + break; + + spin_lock(&sbi->dev_lock); + f2fs_clear_bit(i, (char *)&sbi->dirty_device); + spin_unlock(&sbi->dev_lock); + } + + return ret; +} + static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, enum dirty_type dirty_type) { @@ -712,7 +806,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned short valid_blocks; + unsigned short valid_blocks, ckpt_valid_blocks; if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno)) return; @@ -720,8 +814,10 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_lock(&dirty_i->seglist_lock); valid_blocks = get_valid_blocks(sbi, segno, false); + ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno); - if (valid_blocks == 0) { + if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) || + ckpt_valid_blocks == sbi->blocks_per_seg)) { __locate_dirty_segment(sbi, segno, PRE); __remove_dirty_segment(sbi, segno, DIRTY); } else if (valid_blocks < sbi->blocks_per_seg) { @@ -734,6 +830,66 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_unlock(&dirty_i->seglist_lock); } +/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */ +void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int segno; + + mutex_lock(&dirty_i->seglist_lock); + for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { + if (get_valid_blocks(sbi, segno, false)) + continue; + if (IS_CURSEG(sbi, segno)) + continue; + __locate_dirty_segment(sbi, segno, PRE); + __remove_dirty_segment(sbi, segno, DIRTY); + } + mutex_unlock(&dirty_i->seglist_lock); +} + +int f2fs_disable_cp_again(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + block_t ovp = overprovision_segments(sbi) << sbi->log_blocks_per_seg; + block_t holes[2] = {0, 0}; /* DATA and NODE */ + struct seg_entry *se; + unsigned int segno; + + mutex_lock(&dirty_i->seglist_lock); + for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { + se = get_seg_entry(sbi, segno); + if (IS_NODESEG(se->type)) + holes[NODE] += sbi->blocks_per_seg - se->valid_blocks; + else + holes[DATA] += sbi->blocks_per_seg - se->valid_blocks; + } + mutex_unlock(&dirty_i->seglist_lock); + + if (holes[DATA] > ovp || holes[NODE] > ovp) + return -EAGAIN; + return 0; +} + +/* This is only used by SBI_CP_DISABLED */ +static unsigned int get_free_segment(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int segno = 0; + + mutex_lock(&dirty_i->seglist_lock); + for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { + if (get_valid_blocks(sbi, segno, false)) + continue; + if (get_ckpt_valid_blocks(sbi, segno)) + continue; + mutex_unlock(&dirty_i->seglist_lock); + return segno; + } + mutex_unlock(&dirty_i->seglist_lock); + return NULL_SEGNO; +} + static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, block_t start, block_t len) @@ -754,9 +910,12 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, dc->len = len; dc->ref = 0; dc->state = D_PREP; + dc->queued = 0; dc->error = 0; init_completion(&dc->wait); list_add_tail(&dc->list, pend_list); + spin_lock_init(&dc->lock); + dc->bio_ref = 0; atomic_inc(&dcc->discard_cmd_cnt); dcc->undiscard_blks += len; @@ -766,7 +925,8 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, block_t start, block_t len, - struct rb_node *parent, struct rb_node **p) + struct rb_node *parent, struct rb_node **p, + bool leftmost) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_cmd *dc; @@ -774,7 +934,7 @@ static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi, dc = __create_discard_cmd(sbi, bdev, lstart, start, len); rb_link_node(&dc->rb_node, parent, p); - rb_insert_color(&dc->rb_node, &dcc->root); + rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost); return dc; } @@ -783,10 +943,10 @@ static void __detach_discard_cmd(struct discard_cmd_control *dcc, struct discard_cmd *dc) { if (dc->state == D_DONE) - atomic_dec(&dcc->issing_discard); + atomic_sub(dc->queued, &dcc->queued_discard); list_del(&dc->list); - rb_erase(&dc->rb_node, &dcc->root); + rb_erase_cached(&dc->rb_node, &dcc->root); dcc->undiscard_blks -= dc->len; kmem_cache_free(discard_cmd_slab, dc); @@ -798,6 +958,16 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + unsigned long flags; + + trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len); + + spin_lock_irqsave(&dc->lock, flags); + if (dc->bio_ref) { + spin_unlock_irqrestore(&dc->lock, flags); + return; + } + spin_unlock_irqrestore(&dc->lock, flags); f2fs_bug_on(sbi, dc->ref); @@ -805,23 +975,30 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, dc->error = 0; if (dc->error) - f2fs_msg(sbi->sb, KERN_INFO, - "Issue discard(%u, %u, %u) failed, ret: %d", - dc->lstart, dc->start, dc->len, dc->error); + printk_ratelimited( + "%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d", + KERN_INFO, dc->lstart, dc->start, dc->len, dc->error); __detach_discard_cmd(dcc, dc); } static void f2fs_submit_discard_endio(struct bio *bio) { struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; + unsigned long flags; dc->error = blk_status_to_errno(bio->bi_status); - dc->state = D_DONE; - complete_all(&dc->wait); + + spin_lock_irqsave(&dc->lock, flags); + dc->bio_ref--; + if (!dc->bio_ref && dc->state == D_SUBMIT) { + dc->state = D_DONE; + complete_all(&dc->wait); + } + spin_unlock_irqrestore(&dc->lock, flags); bio_put(bio); } -void __check_sit_bitmap(struct f2fs_sb_info *sbi, +static void __check_sit_bitmap(struct f2fs_sb_info *sbi, block_t start, block_t end) { #ifdef CONFIG_F2FS_CHECK_FS @@ -848,40 +1025,152 @@ void __check_sit_bitmap(struct f2fs_sb_info *sbi, #endif } -/* this function is copied from blkdev_issue_discard from block/blk-lib.c */ -static void __submit_discard_cmd(struct f2fs_sb_info *sbi, - struct discard_cmd *dc) +static void __init_discard_policy(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy, + int discard_type, unsigned int granularity) { + /* common policy */ + dpolicy->type = discard_type; + dpolicy->sync = true; + dpolicy->ordered = false; + dpolicy->granularity = granularity; + + dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; + dpolicy->io_aware_gran = MAX_PLIST_NUM; + + if (discard_type == DPOLICY_BG) { + dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; + dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; + dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; + dpolicy->io_aware = true; + dpolicy->sync = false; + dpolicy->ordered = true; + if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) { + dpolicy->granularity = 1; + dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME; + } + } else if (discard_type == DPOLICY_FORCE) { + dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; + dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; + dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; + dpolicy->io_aware = false; + } else if (discard_type == DPOLICY_FSTRIM) { + dpolicy->io_aware = false; + } else if (discard_type == DPOLICY_UMOUNT) { + dpolicy->max_requests = UINT_MAX; + dpolicy->io_aware = false; + } +} + +static void __update_discard_tree_range(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t lstart, + block_t start, block_t len); +/* this function is copied from blkdev_issue_discard from block/blk-lib.c */ +static int __submit_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy, + struct discard_cmd *dc, + unsigned int *issued) +{ + struct block_device *bdev = dc->bdev; + struct request_queue *q = bdev_get_queue(bdev); + unsigned int max_discard_blocks = + SECTOR_TO_BLOCK(q->limits.max_discard_sectors); struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct bio *bio = NULL; + struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? + &(dcc->fstrim_list) : &(dcc->wait_list); + int flag = dpolicy->sync ? REQ_SYNC : 0; + block_t lstart, start, len, total_len; + int err = 0; if (dc->state != D_PREP) - return; + return 0; - trace_f2fs_issue_discard(dc->bdev, dc->start, dc->len); + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) + return 0; - dc->error = __blkdev_issue_discard(dc->bdev, - SECTOR_FROM_BLOCK(dc->start), - SECTOR_FROM_BLOCK(dc->len), - GFP_NOFS, 0, &bio); - if (!dc->error) { - /* should keep before submission to avoid D_DONE right away */ - dc->state = D_SUBMIT; - atomic_inc(&dcc->issued_discard); - atomic_inc(&dcc->issing_discard); - if (bio) { - bio->bi_private = dc; - bio->bi_end_io = f2fs_submit_discard_endio; - bio->bi_opf |= REQ_SYNC; - submit_bio(bio); - list_move_tail(&dc->list, &dcc->wait_list); - __check_sit_bitmap(sbi, dc->start, dc->start + dc->len); + trace_f2fs_issue_discard(bdev, dc->start, dc->len); - f2fs_update_iostat(sbi, FS_DISCARD, 1); + lstart = dc->lstart; + start = dc->start; + len = dc->len; + total_len = len; + + dc->len = 0; + + while (total_len && *issued < dpolicy->max_requests && !err) { + struct bio *bio = NULL; + unsigned long flags; + bool last = true; + + if (len > max_discard_blocks) { + len = max_discard_blocks; + last = false; } - } else { - __remove_discard_cmd(sbi, dc); + + (*issued)++; + if (*issued == dpolicy->max_requests) + last = true; + + dc->len += len; + + if (time_to_inject(sbi, FAULT_DISCARD)) { + f2fs_show_injection_info(FAULT_DISCARD); + err = -EIO; + goto submit; + } + err = __blkdev_issue_discard(bdev, + SECTOR_FROM_BLOCK(start), + SECTOR_FROM_BLOCK(len), + GFP_NOFS, 0, &bio); +submit: + if (err) { + spin_lock_irqsave(&dc->lock, flags); + if (dc->state == D_PARTIAL) + dc->state = D_SUBMIT; + spin_unlock_irqrestore(&dc->lock, flags); + + break; + } + + f2fs_bug_on(sbi, !bio); + + /* + * should keep before submission to avoid D_DONE + * right away + */ + spin_lock_irqsave(&dc->lock, flags); + if (last) + dc->state = D_SUBMIT; + else + dc->state = D_PARTIAL; + dc->bio_ref++; + spin_unlock_irqrestore(&dc->lock, flags); + + atomic_inc(&dcc->queued_discard); + dc->queued++; + list_move_tail(&dc->list, wait_list); + + /* sanity check on discard range */ + __check_sit_bitmap(sbi, lstart, lstart + len); + + bio->bi_private = dc; + bio->bi_end_io = f2fs_submit_discard_endio; + bio->bi_opf |= flag; + submit_bio(bio); + + atomic_inc(&dcc->issued_discard); + + f2fs_update_iostat(sbi, FS_DISCARD, 1); + + lstart += len; + start += len; + total_len -= len; + len = total_len; } + + if (!err && len) + __update_discard_tree_range(sbi, bdev, lstart, start, len); + return err; } static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, @@ -891,9 +1180,10 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, struct rb_node *insert_parent) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct rb_node **p = &dcc->root.rb_node; + struct rb_node **p; struct rb_node *parent = NULL; struct discard_cmd *dc = NULL; + bool leftmost = true; if (insert_p && insert_parent) { parent = insert_parent; @@ -901,9 +1191,11 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, goto do_insert; } - p = __lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart); + p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, + lstart, &leftmost); do_insert: - dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p); + dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, + p, leftmost); if (!dc) return NULL; @@ -962,15 +1254,16 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, struct discard_cmd *dc; struct discard_info di = {0}; struct rb_node **insert_p = NULL, *insert_parent = NULL; + struct request_queue *q = bdev_get_queue(bdev); + unsigned int max_discard_blocks = + SECTOR_TO_BLOCK(q->limits.max_discard_sectors); block_t end = lstart + len; - mutex_lock(&dcc->cmd_lock); - - dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root, + dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, NULL, lstart, (struct rb_entry **)&prev_dc, (struct rb_entry **)&next_dc, - &insert_p, &insert_parent, true); + &insert_p, &insert_parent, true, NULL); if (dc) prev_dc = dc; @@ -1005,7 +1298,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, if (prev_dc && prev_dc->state == D_PREP && prev_dc->bdev == bdev && - __is_discard_back_mergeable(&di, &prev_dc->di)) { + __is_discard_back_mergeable(&di, &prev_dc->di, + max_discard_blocks)) { prev_dc->di.len += di.len; dcc->undiscard_blks += di.len; __relocate_discard_cmd(dcc, prev_dc); @@ -1016,7 +1310,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, if (next_dc && next_dc->state == D_PREP && next_dc->bdev == bdev && - __is_discard_front_mergeable(&di, &next_dc->di)) { + __is_discard_front_mergeable(&di, &next_dc->di, + max_discard_blocks)) { next_dc->di.lstart = di.lstart; next_dc->di.len += di.len; next_dc->di.start = di.start; @@ -1039,8 +1334,6 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, node = rb_next(&prev_dc->rb_node); next_dc = rb_entry_safe(node, struct discard_cmd, rb_node); } - - mutex_unlock(&dcc->cmd_lock); } static int __queue_discard_cmd(struct f2fs_sb_info *sbi, @@ -1055,61 +1348,64 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, blkstart -= FDEV(devi).start_blk; } + mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock); __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen); + mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock); return 0; } -static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) +static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *pend_list; - struct discard_cmd *dc, *tmp; + struct discard_cmd *prev_dc = NULL, *next_dc = NULL; + struct rb_node **insert_p = NULL, *insert_parent = NULL; + struct discard_cmd *dc; struct blk_plug plug; - int iter = 0, issued = 0; - int i; + unsigned int pos = dcc->next_pos; + unsigned int issued = 0; bool io_interrupted = false; mutex_lock(&dcc->cmd_lock); - f2fs_bug_on(sbi, - !__check_rb_tree_consistence(sbi, &dcc->root)); + dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, + NULL, pos, + (struct rb_entry **)&prev_dc, + (struct rb_entry **)&next_dc, + &insert_p, &insert_parent, true, NULL); + if (!dc) + dc = next_dc; + blk_start_plug(&plug); - for (i = MAX_PLIST_NUM - 1; - i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) { - pend_list = &dcc->pend_list[i]; - list_for_each_entry_safe(dc, tmp, pend_list, list) { - f2fs_bug_on(sbi, dc->state != D_PREP); - /* Hurry up to finish fstrim */ - if (dcc->pend_list_tag[i] & P_TRIM) { - __submit_discard_cmd(sbi, dc); - issued++; + while (dc) { + struct rb_node *node; + int err = 0; - if (fatal_signal_pending(current)) - break; - continue; - } + if (dc->state != D_PREP) + goto next; - if (!issue_cond) { - __submit_discard_cmd(sbi, dc); - issued++; - continue; - } - - if (is_idle(sbi)) { - __submit_discard_cmd(sbi, dc); - issued++; - } else { - io_interrupted = true; - } - - if (++iter >= DISCARD_ISSUE_RATE) - goto out; + if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) { + io_interrupted = true; + break; } - if (list_empty(pend_list) && dcc->pend_list_tag[i] & P_TRIM) - dcc->pend_list_tag[i] &= (~P_TRIM); + + dcc->next_pos = dc->lstart + dc->len; + err = __submit_discard_cmd(sbi, dpolicy, dc, &issued); + + if (issued >= dpolicy->max_requests) + break; +next: + node = rb_next(&dc->rb_node); + if (err) + __remove_discard_cmd(sbi, dc); + dc = rb_entry_safe(node, struct discard_cmd, rb_node); } -out: + blk_finish_plug(&plug); + + if (!dc) + dcc->next_pos = 0; + mutex_unlock(&dcc->cmd_lock); if (!issued && io_interrupted) @@ -1118,12 +1414,67 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) return issued; } -static void __drop_discard_cmd(struct f2fs_sb_info *sbi) +static int __issue_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *pend_list; + struct discard_cmd *dc, *tmp; + struct blk_plug plug; + int i, issued = 0; + bool io_interrupted = false; + + for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { + if (i + 1 < dpolicy->granularity) + break; + + if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) + return __issue_discard_cmd_orderly(sbi, dpolicy); + + pend_list = &dcc->pend_list[i]; + + mutex_lock(&dcc->cmd_lock); + if (list_empty(pend_list)) + goto next; + if (unlikely(dcc->rbtree_check)) + f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, + &dcc->root)); + blk_start_plug(&plug); + list_for_each_entry_safe(dc, tmp, pend_list, list) { + f2fs_bug_on(sbi, dc->state != D_PREP); + + if (dpolicy->io_aware && i < dpolicy->io_aware_gran && + !is_idle(sbi, DISCARD_TIME)) { + io_interrupted = true; + break; + } + + __submit_discard_cmd(sbi, dpolicy, dc, &issued); + + if (issued >= dpolicy->max_requests) + break; + } + blk_finish_plug(&plug); +next: + mutex_unlock(&dcc->cmd_lock); + + if (issued >= dpolicy->max_requests || io_interrupted) + break; + } + + if (!issued && io_interrupted) + issued = -1; + + return issued; +} + +static bool __drop_discard_cmd(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *pend_list; struct discard_cmd *dc, *tmp; int i; + bool dropped = false; mutex_lock(&dcc->cmd_lock); for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { @@ -1131,39 +1482,63 @@ static void __drop_discard_cmd(struct f2fs_sb_info *sbi) list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); __remove_discard_cmd(sbi, dc); + dropped = true; } } mutex_unlock(&dcc->cmd_lock); + + return dropped; } -static void __wait_one_discard_bio(struct f2fs_sb_info *sbi, +void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi) +{ + __drop_discard_cmd(sbi); +} + +static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi, struct discard_cmd *dc) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + unsigned int len = 0; wait_for_completion_io(&dc->wait); mutex_lock(&dcc->cmd_lock); f2fs_bug_on(sbi, dc->state != D_DONE); dc->ref--; - if (!dc->ref) + if (!dc->ref) { + if (!dc->error) + len = dc->len; __remove_discard_cmd(sbi, dc); + } mutex_unlock(&dcc->cmd_lock); + + return len; } -static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) +static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy, + block_t start, block_t end) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *wait_list = &(dcc->wait_list); + struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? + &(dcc->fstrim_list) : &(dcc->wait_list); struct discard_cmd *dc, *tmp; bool need_wait; + unsigned int trimmed = 0; next: need_wait = false; mutex_lock(&dcc->cmd_lock); list_for_each_entry_safe(dc, tmp, wait_list, list) { - if (!wait_cond || (dc->state == D_DONE && !dc->ref)) { + if (dc->lstart + dc->len <= start || end <= dc->lstart) + continue; + if (dc->len < dpolicy->granularity) + continue; + if (dc->state == D_DONE && !dc->ref) { wait_for_completion_io(&dc->wait); + if (!dc->error) + trimmed += dc->len; __remove_discard_cmd(sbi, dc); } else { dc->ref++; @@ -1174,20 +1549,41 @@ static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) mutex_unlock(&dcc->cmd_lock); if (need_wait) { - __wait_one_discard_bio(sbi, dc); + trimmed += __wait_one_discard_bio(sbi, dc); goto next; } + + return trimmed; +} + +static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy) +{ + struct discard_policy dp; + unsigned int discard_blks; + + if (dpolicy) + return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX); + + /* wait all */ + __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1); + discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); + __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1); + discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); + + return discard_blks; } /* This should be covered by global mutex, &sit_i->sentry_lock */ -void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) +static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_cmd *dc; bool need_wait = false; mutex_lock(&dcc->cmd_lock); - dc = (struct discard_cmd *)__lookup_rb_tree(&dcc->root, NULL, blkaddr); + dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root, + NULL, blkaddr); if (dc) { if (dc->state == D_PREP) { __punch_discard_cmd(sbi, dc, blkaddr); @@ -1202,7 +1598,7 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) __wait_one_discard_bio(sbi, dc); } -void stop_discard_thread(struct f2fs_sb_info *sbi) +void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; @@ -1214,23 +1610,23 @@ void stop_discard_thread(struct f2fs_sb_info *sbi) } } -/* This comes from f2fs_put_super and f2fs_trim_fs */ -void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount) -{ - __issue_discard_cmd(sbi, false); - __drop_discard_cmd(sbi); - __wait_discard_cmd(sbi, !umount); -} - -static void mark_discard_range_all(struct f2fs_sb_info *sbi) +/* This comes from f2fs_put_super */ +bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - int i; + struct discard_policy dpolicy; + bool dropped; - mutex_lock(&dcc->cmd_lock); - for (i = 0; i < MAX_PLIST_NUM; i++) - dcc->pend_list_tag[i] |= P_TRIM; - mutex_unlock(&dcc->cmd_lock); + __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, + dcc->discard_granularity); + __issue_discard_cmd(sbi, &dpolicy); + dropped = __drop_discard_cmd(sbi); + + /* just to make sure there is no pending discard commands */ + __wait_all_discard_cmd(sbi, NULL); + + f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt)); + return dropped; } static int issue_discard_thread(void *data) @@ -1238,35 +1634,54 @@ static int issue_discard_thread(void *data) struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; + struct discard_policy dpolicy; unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; int issued; set_freezable(); do { + __init_discard_policy(sbi, &dpolicy, DPOLICY_BG, + dcc->discard_granularity); + wait_event_interruptible_timeout(*q, kthread_should_stop() || freezing(current) || dcc->discard_wake, msecs_to_jiffies(wait_ms)); + + if (dcc->discard_wake) + dcc->discard_wake = 0; + + /* clean up pending candidates before going to sleep */ + if (atomic_read(&dcc->queued_discard)) + __wait_all_discard_cmd(sbi, NULL); + if (try_to_freeze()) continue; + if (f2fs_readonly(sbi->sb)) + continue; if (kthread_should_stop()) return 0; - - if (dcc->discard_wake) { - dcc->discard_wake = 0; - if (sbi->gc_thread && sbi->gc_thread->gc_urgent) - mark_discard_range_all(sbi); + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { + wait_ms = dpolicy.max_interval; + continue; } + if (sbi->gc_mode == GC_URGENT) + __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1); + sb_start_intwrite(sbi->sb); - issued = __issue_discard_cmd(sbi, true); - if (issued) { - __wait_discard_cmd(sbi, true); - wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; + issued = __issue_discard_cmd(sbi, &dpolicy); + if (issued > 0) { + __wait_all_discard_cmd(sbi, &dpolicy); + wait_ms = dpolicy.min_interval; + } else if (issued == -1){ + wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME); + if (!wait_ms) + wait_ms = dpolicy.mid_interval; } else { - wait_ms = DEF_MAX_DISCARD_ISSUE_TIME; + wait_ms = dpolicy.max_interval; } sb_end_intwrite(sbi->sb); @@ -1326,7 +1741,7 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { #ifdef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_mounted_blkzoned(sbi->sb) && + if (f2fs_sb_has_blkzoned(sbi) && bdev_zoned_model(bdev) != BLK_ZONED_NONE) return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); #endif @@ -1389,11 +1804,11 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, struct list_head *head = &SM_I(sbi)->dcc_info->entry_list; int i; - if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi)) + if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi)) return false; if (!force) { - if (!test_opt(sbi, DISCARD) || !se->valid_blocks || + if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks || SM_I(sbi)->dcc_info->nr_discards >= SM_I(sbi)->dcc_info->max_discards) return false; @@ -1433,20 +1848,24 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, return false; } -void release_discard_addrs(struct f2fs_sb_info *sbi) +static void release_discard_addr(struct discard_entry *entry) +{ + list_del(&entry->list); + kmem_cache_free(discard_entry_slab, entry); +} + +void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi) { struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list); struct discard_entry *entry, *this; /* drop caches */ - list_for_each_entry_safe(entry, this, head, list) { - list_del(&entry->list); - kmem_cache_free(discard_entry_slab, entry); - } + list_for_each_entry_safe(entry, this, head, list) + release_discard_addr(entry); } /* - * Should call clear_prefree_segments after checkpoint is done. + * Should call f2fs_clear_prefree_segments after checkpoint is done. */ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) { @@ -1459,7 +1878,8 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) mutex_unlock(&dirty_i->seglist_lock); } -void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) +void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, + struct cp_control *cpc) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *head = &dcc->entry_list; @@ -1469,30 +1889,39 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) unsigned int start = 0, end = -1; unsigned int secno, start_segno; bool force = (cpc->reason & CP_DISCARD); + bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi); mutex_lock(&dirty_i->seglist_lock); while (1) { int i; + + if (need_align && end != -1) + end--; start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1); if (start >= MAIN_SEGS(sbi)) break; end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi), start + 1); - for (i = start; i < end; i++) - clear_bit(i, prefree_map); + if (need_align) { + start = rounddown(start, sbi->segs_per_sec); + end = roundup(end, sbi->segs_per_sec); + } - dirty_i->nr_dirty[PRE] -= end - start; + for (i = start; i < end; i++) { + if (test_and_clear_bit(i, prefree_map)) + dirty_i->nr_dirty[PRE]--; + } - if (!test_opt(sbi, DISCARD)) + if (!f2fs_realtime_discard_enable(sbi)) continue; if (force && start >= cpc->trim_start && (end - 1) <= cpc->trim_end) continue; - if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) { + if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) { f2fs_issue_discard(sbi, START_BLOCK(sbi, start), (end - start) << sbi->log_blocks_per_seg); continue; @@ -1524,13 +1953,12 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) sbi->blocks_per_seg, cur_pos); len = next_pos - cur_pos; - if (f2fs_sb_mounted_blkzoned(sbi->sb) || + if (f2fs_sb_has_blkzoned(sbi) || (force && len < cpc->trim_minlen)) goto skip; f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos, len); - cpc->trimmed += len; total_len += len; } else { next_pos = find_next_bit_le(entry->discard_map, @@ -1543,9 +1971,8 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (cur_pos < sbi->blocks_per_seg) goto find_next; - list_del(&entry->list); + release_discard_addr(entry); dcc->nr_discards -= total_len; - kmem_cache_free(discard_entry_slab, entry); } wake_up_discard_thread(sbi, false); @@ -1562,26 +1989,26 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) goto init_thread; } - dcc = kzalloc(sizeof(struct discard_cmd_control), GFP_KERNEL); + dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL); if (!dcc) return -ENOMEM; dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; INIT_LIST_HEAD(&dcc->entry_list); - for (i = 0; i < MAX_PLIST_NUM; i++) { + for (i = 0; i < MAX_PLIST_NUM; i++) INIT_LIST_HEAD(&dcc->pend_list[i]); - if (i >= dcc->discard_granularity - 1) - dcc->pend_list_tag[i] |= P_ACTIVE; - } INIT_LIST_HEAD(&dcc->wait_list); + INIT_LIST_HEAD(&dcc->fstrim_list); mutex_init(&dcc->cmd_lock); atomic_set(&dcc->issued_discard, 0); - atomic_set(&dcc->issing_discard, 0); + atomic_set(&dcc->queued_discard, 0); atomic_set(&dcc->discard_cmd_cnt, 0); dcc->nr_discards = 0; dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; dcc->undiscard_blks = 0; - dcc->root = RB_ROOT; + dcc->next_pos = 0; + dcc->root = RB_ROOT_CACHED; + dcc->rbtree_check = false; init_waitqueue_head(&dcc->discard_wait_queue); SM_I(sbi)->dcc_info = dcc; @@ -1590,7 +2017,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(dcc->f2fs_issue_discard)) { err = PTR_ERR(dcc->f2fs_issue_discard); - kfree(dcc); + kvfree(dcc); SM_I(sbi)->dcc_info = NULL; return err; } @@ -1605,9 +2032,9 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) if (!dcc) return; - stop_discard_thread(sbi); + f2fs_stop_discard_thread(sbi); - kfree(dcc); + kvfree(dcc); SM_I(sbi)->dcc_info = NULL; } @@ -1652,8 +2079,9 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) (new_vblocks > sbi->blocks_per_seg))); se->valid_blocks = new_vblocks; - se->mtime = get_mtime(sbi); - SIT_I(sbi)->max_mtime = se->mtime; + se->mtime = get_mtime(sbi, false); + if (se->mtime > SIT_I(sbi)->max_mtime) + SIT_I(sbi)->max_mtime = se->mtime; /* Update valid block bitmap */ if (del > 0) { @@ -1676,12 +2104,12 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) del = 0; } - if (f2fs_discard_en(sbi) && - !f2fs_test_and_set_bit(offset, se->discard_map)) + if (!f2fs_test_and_set_bit(offset, se->discard_map)) sbi->discard_blks--; /* don't overwrite by SSR to keep node chain */ - if (se->type == CURSEG_WARM_NODE) { + if (IS_NODESEG(se->type) && + !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) se->ckpt_valid_blocks++; } @@ -1703,10 +2131,18 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) f2fs_bug_on(sbi, 1); se->valid_blocks++; del = 0; + } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + /* + * If checkpoints are off, we must not reuse data that + * was used in the previous checkpoint. If it was used + * before, we must track that to know how much space we + * really have. + */ + if (f2fs_test_bit(offset, se->ckpt_valid_map)) + sbi->unusable_block_count++; } - if (f2fs_discard_en(sbi) && - f2fs_test_and_clear_bit(offset, se->discard_map)) + if (f2fs_test_and_clear_bit(offset, se->discard_map)) sbi->discard_blks++; } if (!f2fs_test_bit(offset, se->ckpt_valid_map)) @@ -1717,21 +2153,11 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) /* update total number of valid blocks to be written in ckpt area */ SIT_I(sbi)->written_valid_blocks += del; - if (sbi->segs_per_sec > 1) + if (__is_large_section(sbi)) get_sec_entry(sbi, segno)->valid_blocks += del; } -void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new) -{ - update_sit_entry(sbi, new, 1); - if (GET_SEGNO(sbi, old) != NULL_SEGNO) - update_sit_entry(sbi, old, -1); - - locate_dirty_segment(sbi, GET_SEGNO(sbi, old)); - locate_dirty_segment(sbi, GET_SEGNO(sbi, new)); -} - -void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) +void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) { unsigned int segno = GET_SEGNO(sbi, addr); struct sit_info *sit_i = SIT_I(sbi); @@ -1740,18 +2166,20 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) if (addr == NEW_ADDR) return; + invalidate_mapping_pages(META_MAPPING(sbi), addr, addr); + /* add it into sit main buffer */ - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); update_sit_entry(sbi, addr, -1); /* add it into dirty seglist */ locate_dirty_segment(sbi, segno); - mutex_unlock(&sit_i->sentry_lock); + up_write(&sit_i->sentry_lock); } -bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) +bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) { struct sit_info *sit_i = SIT_I(sbi); unsigned int segno, offset; @@ -1761,7 +2189,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) if (!is_valid_data_blkaddr(sbi, blkaddr)) return true; - mutex_lock(&sit_i->sentry_lock); + down_read(&sit_i->sentry_lock); segno = GET_SEGNO(sbi, blkaddr); se = get_seg_entry(sbi, segno); @@ -1770,7 +2198,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) if (f2fs_test_bit(offset, se->ckpt_valid_map)) is_cp = true; - mutex_unlock(&sit_i->sentry_lock); + up_read(&sit_i->sentry_lock); return is_cp; } @@ -1790,7 +2218,7 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, /* * Calculate the number of current summary pages for writing */ -int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) +int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) { int valid_sum_count = 0; int i, sum_in_page; @@ -1820,20 +2248,17 @@ int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) /* * Caller should put this summary page */ -struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) +struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) { - return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno)); + return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno)); } -void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) +void f2fs_update_meta_page(struct f2fs_sb_info *sbi, + void *src, block_t blk_addr) { - struct page *page = grab_meta_page(sbi, blk_addr); - void *dst = page_address(page); + struct page *page = f2fs_grab_meta_page(sbi, blk_addr); - if (src) - memcpy(dst, src, PAGE_SIZE); - else - memset(dst, 0, PAGE_SIZE); + memcpy(page_address(page), src, PAGE_SIZE); set_page_dirty(page); f2fs_put_page(page, 1); } @@ -1841,18 +2266,19 @@ void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) static void write_sum_page(struct f2fs_sb_info *sbi, struct f2fs_summary_block *sum_blk, block_t blk_addr) { - update_meta_page(sbi, (void *)sum_blk, blk_addr); + f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr); } static void write_current_sum_page(struct f2fs_sb_info *sbi, int type, block_t blk_addr) { struct curseg_info *curseg = CURSEG_I(sbi, type); - struct page *page = grab_meta_page(sbi, blk_addr); + struct page *page = f2fs_grab_meta_page(sbi, blk_addr); struct f2fs_summary_block *src = curseg->sum_blk; struct f2fs_summary_block *dst; dst = (struct f2fs_summary_block *)page_address(page); + memset(dst, 0, PAGE_SIZE); mutex_lock(&curseg->curseg_mutex); @@ -1932,7 +2358,6 @@ static void get_new_segment(struct f2fs_sb_info *sbi, } secno = left_start; skip_left: - hint = secno; segno = GET_SEG_FROM_SEC(sbi, secno); zoneno = GET_ZONE_FROM_SEC(sbi, secno); @@ -1994,15 +2419,23 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) { /* if segs_per_sec is large than 1, we need to keep original policy. */ - if (sbi->segs_per_sec != 1) + if (__is_large_section(sbi)) return CURSEG_I(sbi, type)->segno; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return 0; + if (test_opt(sbi, NOHEAP) && (type == CURSEG_HOT_DATA || IS_NODESEG(type))) return 0; if (SIT_I(sbi)->last_victim[ALLOC_NEXT]) return SIT_I(sbi)->last_victim[ALLOC_NEXT]; + + /* find segments from 0 to reuse freed segments */ + if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE) + return 0; + return CURSEG_I(sbi, type)->segno; } @@ -2088,7 +2521,8 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type) curseg->alloc_type = SSR; __next_free_blkoff(sbi, curseg, 0); - sum_page = get_sum_page(sbi, new_segno); + sum_page = f2fs_get_sum_page(sbi, new_segno); + f2fs_bug_on(sbi, IS_ERR(sum_page)); sum_node = (struct f2fs_summary_block *)page_address(sum_page); memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); f2fs_put_page(sum_page, 1); @@ -2102,7 +2536,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) int i, cnt; bool reversed = false; - /* need_SSR() already forces to do this */ + /* f2fs_need_SSR() already forces to do this */ if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) { curseg->next_segno = segno; return 1; @@ -2135,6 +2569,15 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) return 1; } } + + /* find valid_blocks=0 in dirty list */ + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + segno = get_free_segment(sbi); + if (segno != NULL_SEGNO) { + curseg->next_segno = segno; + return 1; + } + } return 0; } @@ -2152,9 +2595,10 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && type == CURSEG_WARM_NODE) new_curseg(sbi, type, false); - else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) + else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) && + likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) new_curseg(sbi, type, false); - else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) + else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type)) change_curseg(sbi, type); else new_curseg(sbi, type, false); @@ -2162,97 +2606,187 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, stat_inc_seg_type(sbi, curseg); } -void allocate_new_segments(struct f2fs_sb_info *sbi) +void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) { struct curseg_info *curseg; unsigned int old_segno; int i; + down_write(&SIT_I(sbi)->sentry_lock); + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { curseg = CURSEG_I(sbi, i); old_segno = curseg->segno; SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); locate_dirty_segment(sbi, old_segno); } + + up_write(&SIT_I(sbi)->sentry_lock); } static const struct segment_allocation default_salloc_ops = { .allocate_segment = allocate_segment_by_default, }; -bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc) +bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, + struct cp_control *cpc) { __u64 trim_start = cpc->trim_start; bool has_candidate = false; - mutex_lock(&SIT_I(sbi)->sentry_lock); + down_write(&SIT_I(sbi)->sentry_lock); for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) { if (add_discard_addrs(sbi, cpc, true)) { has_candidate = true; break; } } - mutex_unlock(&SIT_I(sbi)->sentry_lock); + up_write(&SIT_I(sbi)->sentry_lock); cpc->trim_start = trim_start; return has_candidate; } +static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy, + unsigned int start, unsigned int end) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_cmd *prev_dc = NULL, *next_dc = NULL; + struct rb_node **insert_p = NULL, *insert_parent = NULL; + struct discard_cmd *dc; + struct blk_plug plug; + int issued; + unsigned int trimmed = 0; + +next: + issued = 0; + + mutex_lock(&dcc->cmd_lock); + if (unlikely(dcc->rbtree_check)) + f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, + &dcc->root)); + + dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, + NULL, start, + (struct rb_entry **)&prev_dc, + (struct rb_entry **)&next_dc, + &insert_p, &insert_parent, true, NULL); + if (!dc) + dc = next_dc; + + blk_start_plug(&plug); + + while (dc && dc->lstart <= end) { + struct rb_node *node; + int err = 0; + + if (dc->len < dpolicy->granularity) + goto skip; + + if (dc->state != D_PREP) { + list_move_tail(&dc->list, &dcc->fstrim_list); + goto skip; + } + + err = __submit_discard_cmd(sbi, dpolicy, dc, &issued); + + if (issued >= dpolicy->max_requests) { + start = dc->lstart + dc->len; + + if (err) + __remove_discard_cmd(sbi, dc); + + blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); + trimmed += __wait_all_discard_cmd(sbi, NULL); + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto next; + } +skip: + node = rb_next(&dc->rb_node); + if (err) + __remove_discard_cmd(sbi, dc); + dc = rb_entry_safe(node, struct discard_cmd, rb_node); + + if (fatal_signal_pending(current)) + break; + } + + blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); + + return trimmed; +} + int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) { __u64 start = F2FS_BYTES_TO_BLK(range->start); __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; unsigned int start_segno, end_segno; + block_t start_block, end_block; struct cp_control cpc; + struct discard_policy dpolicy; + unsigned long long trimmed = 0; int err = 0; + bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi); if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) return -EINVAL; - cpc.trimmed = 0; - if (end <= MAIN_BLKADDR(sbi)) + if (end < MAIN_BLKADDR(sbi)) goto out; if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { f2fs_msg(sbi->sb, KERN_WARNING, "Found FS corruption, run fsck to fix."); - goto out; + return -EIO; } /* start/end segment number in main_area */ start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : GET_SEGNO(sbi, end); + if (need_align) { + start_segno = rounddown(start_segno, sbi->segs_per_sec); + end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1; + } + cpc.reason = CP_DISCARD; cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); + cpc.trim_start = start_segno; + cpc.trim_end = end_segno; - /* do checkpoint to issue discard commands safely */ - for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) { - cpc.trim_start = start_segno; + if (sbi->discard_blks == 0) + goto out; - if (sbi->discard_blks == 0) - break; - else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi)) - cpc.trim_end = end_segno; - else - cpc.trim_end = min_t(unsigned int, - rounddown(start_segno + - BATCHED_TRIM_SEGMENTS(sbi), - sbi->segs_per_sec) - 1, end_segno); + mutex_lock(&sbi->gc_mutex); + err = f2fs_write_checkpoint(sbi, &cpc); + mutex_unlock(&sbi->gc_mutex); + if (err) + goto out; - mutex_lock(&sbi->gc_mutex); - err = write_checkpoint(sbi, &cpc); - mutex_unlock(&sbi->gc_mutex); - if (err) - break; + /* + * We filed discard candidates, but actually we don't need to wait for + * all of them, since they'll be issued in idle time along with runtime + * discard option. User configuration looks like using runtime discard + * or periodic fstrim instead of it. + */ + if (f2fs_realtime_discard_enable(sbi)) + goto out; - schedule(); - } - /* It's time to issue all the filed discards */ - mark_discard_range_all(sbi); - f2fs_wait_discard_bios(sbi, false); + start_block = START_BLOCK(sbi, start_segno); + end_block = START_BLOCK(sbi, end_segno + 1); + + __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen); + trimmed = __issue_discard_cmd_range(sbi, &dpolicy, + start_block, end_block); + + trimmed += __wait_discard_cmd_range(sbi, &dpolicy, + start_block, end_block); out: - range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); + if (!err) + range->len = F2FS_BLK_TO_BYTES(trimmed); return err; } @@ -2264,6 +2798,113 @@ static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) return false; } +int f2fs_rw_hint_to_seg_type(enum rw_hint hint) +{ + switch (hint) { + case WRITE_LIFE_SHORT: + return CURSEG_HOT_DATA; + case WRITE_LIFE_EXTREME: + return CURSEG_COLD_DATA; + default: + return CURSEG_WARM_DATA; + } +} + +/* This returns write hints for each segment type. This hints will be + * passed down to block layer. There are mapping tables which depend on + * the mount option 'whint_mode'. + * + * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET. + * + * 2) whint_mode=user-based. F2FS tries to pass down hints given by users. + * + * User F2FS Block + * ---- ---- ----- + * META WRITE_LIFE_NOT_SET + * HOT_NODE " + * WARM_NODE " + * COLD_NODE " + * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME + * extension list " " + * + * -- buffered io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET + * WRITE_LIFE_NONE " " + * WRITE_LIFE_MEDIUM " " + * WRITE_LIFE_LONG " " + * + * -- direct io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET + * WRITE_LIFE_NONE " WRITE_LIFE_NONE + * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM + * WRITE_LIFE_LONG " WRITE_LIFE_LONG + * + * 3) whint_mode=fs-based. F2FS passes down hints with its policy. + * + * User F2FS Block + * ---- ---- ----- + * META WRITE_LIFE_MEDIUM; + * HOT_NODE WRITE_LIFE_NOT_SET + * WARM_NODE " + * COLD_NODE WRITE_LIFE_NONE + * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME + * extension list " " + * + * -- buffered io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG + * WRITE_LIFE_NONE " " + * WRITE_LIFE_MEDIUM " " + * WRITE_LIFE_LONG " " + * + * -- direct io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET + * WRITE_LIFE_NONE " WRITE_LIFE_NONE + * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM + * WRITE_LIFE_LONG " WRITE_LIFE_LONG + */ + +enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, + enum page_type type, enum temp_type temp) +{ + if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) { + if (type == DATA) { + if (temp == WARM) + return WRITE_LIFE_NOT_SET; + else if (temp == HOT) + return WRITE_LIFE_SHORT; + else if (temp == COLD) + return WRITE_LIFE_EXTREME; + } else { + return WRITE_LIFE_NOT_SET; + } + } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) { + if (type == DATA) { + if (temp == WARM) + return WRITE_LIFE_LONG; + else if (temp == HOT) + return WRITE_LIFE_SHORT; + else if (temp == COLD) + return WRITE_LIFE_EXTREME; + } else if (type == NODE) { + if (temp == WARM || temp == HOT) + return WRITE_LIFE_NOT_SET; + else if (temp == COLD) + return WRITE_LIFE_NONE; + } else if (type == META) { + return WRITE_LIFE_MEDIUM; + } + } + return WRITE_LIFE_NOT_SET; +} + static int __get_segment_type_2(struct f2fs_io_info *fio) { if (fio->type == DATA) @@ -2296,9 +2937,12 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (is_cold_data(fio->page) || file_is_cold(inode)) return CURSEG_COLD_DATA; - if (is_inode_flag_set(inode, FI_HOT_DATA)) + if (file_is_hot(inode) || + is_inode_flag_set(inode, FI_HOT_DATA) || + f2fs_is_atomic_file(inode) || + f2fs_is_volatile_file(inode)) return CURSEG_HOT_DATA; - return CURSEG_WARM_DATA; + return f2fs_rw_hint_to_seg_type(inode->i_write_hint); } else { if (IS_DNODE(fio->page)) return is_cold_node(fio->page) ? CURSEG_WARM_NODE : @@ -2311,7 +2955,7 @@ static int __get_segment_type(struct f2fs_io_info *fio) { int type = 0; - switch (fio->sbi->active_logs) { + switch (F2FS_OPTION(fio->sbi).active_logs) { case 2: type = __get_segment_type_2(fio); break; @@ -2334,7 +2978,7 @@ static int __get_segment_type(struct f2fs_io_info *fio) return type; } -void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, +void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio, bool add_list) @@ -2342,8 +2986,10 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); + down_read(&SM_I(sbi)->curseg_lock); + mutex_lock(&curseg->curseg_mutex); - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); @@ -2360,15 +3006,26 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, stat_inc_block_count(sbi, curseg); + /* + * SIT information should be updated before segment allocation, + * since SSR needs latest valid block information. + */ + update_sit_entry(sbi, *new_blkaddr, 1); + if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) + update_sit_entry(sbi, old_blkaddr, -1); + if (!__has_curseg_space(sbi, type)) sit_i->s_ops->allocate_segment(sbi, type, false); - /* - * SIT information should be updated after segment allocation, - * since we need to keep dirty segments precisely under SSR. - */ - refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); - mutex_unlock(&sit_i->sentry_lock); + /* + * segment dirty status should be updated after segment allocation, + * so we just need to update status only one time after previous + * segment being closed. + */ + locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); + locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr)); + + up_write(&sit_i->sentry_lock); if (page && IS_NODESEG(type)) { fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); @@ -2381,6 +3038,7 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, INIT_LIST_HEAD(&fio->list); fio->in_list = true; + fio->retry = false; io = sbi->write_io[fio->type] + fio->temp; spin_lock(&io->io_lock); list_add_tail(&fio->list, &io->io_list); @@ -2388,31 +3046,65 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, } mutex_unlock(&curseg->curseg_mutex); + + up_read(&SM_I(sbi)->curseg_lock); +} + +static void update_device_state(struct f2fs_io_info *fio) +{ + struct f2fs_sb_info *sbi = fio->sbi; + unsigned int devidx; + + if (!sbi->s_ndevs) + return; + + devidx = f2fs_target_device_index(sbi, fio->new_blkaddr); + + /* update device state for fsync */ + f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO); + + /* update device state for checkpoint */ + if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) { + spin_lock(&sbi->dev_lock); + f2fs_set_bit(devidx, (char *)&sbi->dirty_device); + spin_unlock(&sbi->dev_lock); + } } static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { int type = __get_segment_type(fio); - int err; + bool keep_order = (test_opt(fio->sbi, LFS) && type == CURSEG_COLD_DATA); + if (keep_order) + down_read(&fio->sbi->io_order_lock); reallocate: - allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, + f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, &fio->new_blkaddr, sum, type, fio, true); + if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) + invalidate_mapping_pages(META_MAPPING(fio->sbi), + fio->old_blkaddr, fio->old_blkaddr); /* writeout dirty page into bdev */ - err = f2fs_submit_page_write(fio); - if (err == -EAGAIN) { + f2fs_submit_page_write(fio); + if (fio->retry) { fio->old_blkaddr = fio->new_blkaddr; goto reallocate; } + + update_device_state(fio); + + if (keep_order) + up_read(&fio->sbi->io_order_lock); } -void write_meta_page(struct f2fs_sb_info *sbi, struct page *page, +void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, enum iostat_type io_type) { struct f2fs_io_info fio = { .sbi = sbi, .type = META, + .temp = HOT, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_META | REQ_PRIO, .old_blkaddr = page->index, @@ -2426,12 +3118,14 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page, fio.op_flags &= ~REQ_META; set_page_writeback(page); + ClearPageError(page); f2fs_submit_page_write(&fio); + stat_inc_meta_count(sbi, page->index); f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE); } -void write_node_page(unsigned int nid, struct f2fs_io_info *fio) +void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio) { struct f2fs_summary sum; @@ -2441,36 +3135,56 @@ void write_node_page(unsigned int nid, struct f2fs_io_info *fio) f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); } -void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) +void f2fs_outplace_write_data(struct dnode_of_data *dn, + struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; struct f2fs_summary sum; - struct node_info ni; f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); - get_node_info(sbi, dn->nid, &ni); - set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); + set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version); do_write_page(&sum, fio); f2fs_update_data_blkaddr(dn, fio->new_blkaddr); f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE); } -int rewrite_data_page(struct f2fs_io_info *fio) +int f2fs_inplace_write_data(struct f2fs_io_info *fio) { int err; + struct f2fs_sb_info *sbi = fio->sbi; fio->new_blkaddr = fio->old_blkaddr; + /* i/o temperature is needed for passing down write hints */ + __get_segment_type(fio); + + f2fs_bug_on(sbi, !IS_DATASEG(get_seg_entry(sbi, + GET_SEGNO(sbi, fio->new_blkaddr))->type)); + stat_inc_inplace_blocks(fio->sbi); err = f2fs_submit_page_bio(fio); + if (!err) + update_device_state(fio); f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); return err; } -void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, +static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + int i; + + for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) { + if (CURSEG_I(sbi, i)->segno == segno) + break; + } + return i; +} + +void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, block_t old_blkaddr, block_t new_blkaddr, bool recover_curseg, bool recover_newaddr) { @@ -2485,6 +3199,8 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, se = get_seg_entry(sbi, segno); type = se->type; + down_write(&SM_I(sbi)->curseg_lock); + if (!recover_curseg) { /* for recovery flow */ if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { @@ -2494,14 +3210,20 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, type = CURSEG_WARM_DATA; } } else { - if (!IS_CURSEG(sbi, segno)) + if (IS_CURSEG(sbi, segno)) { + /* se->type is volatile as SSR allocation */ + type = __f2fs_get_curseg(sbi, segno); + f2fs_bug_on(sbi, type == NO_CHECK_TYPE); + } else { type = CURSEG_WARM_DATA; + } } + f2fs_bug_on(sbi, !IS_DATASEG(type)); curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); old_cursegno = curseg->segno; old_blkoff = curseg->next_blkoff; @@ -2517,8 +3239,11 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (!recover_curseg || recover_newaddr) update_sit_entry(sbi, new_blkaddr, 1); - if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) + if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) { + invalidate_mapping_pages(META_MAPPING(sbi), + old_blkaddr, old_blkaddr); update_sit_entry(sbi, old_blkaddr, -1); + } locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr)); @@ -2533,8 +3258,9 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, curseg->next_blkoff = old_blkoff; } - mutex_unlock(&sit_i->sentry_lock); + up_write(&sit_i->sentry_lock); mutex_unlock(&curseg->curseg_mutex); + up_write(&SM_I(sbi)->curseg_lock); } void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, @@ -2546,41 +3272,55 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, set_summary(&sum, dn->nid, dn->ofs_in_node, version); - __f2fs_replace_block(sbi, &sum, old_addr, new_addr, + f2fs_do_replace_block(sbi, &sum, old_addr, new_addr, recover_curseg, recover_newaddr); f2fs_update_data_blkaddr(dn, new_addr); } void f2fs_wait_on_page_writeback(struct page *page, - enum page_type type, bool ordered) + enum page_type type, bool ordered, bool locked) { if (PageWriteback(page)) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); - f2fs_submit_merged_write_cond(sbi, page->mapping->host, - 0, page->index, type); - if (ordered) + f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type); + if (ordered) { wait_on_page_writeback(page); - else + f2fs_bug_on(sbi, locked && PageWriteback(page)); + } else { wait_for_stable_page(page); + } } } -void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr) +void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *cpage; + if (!f2fs_post_read_required(inode)) + return; + if (!is_valid_data_blkaddr(sbi, blkaddr)) return; cpage = find_lock_page(META_MAPPING(sbi), blkaddr); if (cpage) { - f2fs_wait_on_page_writeback(cpage, DATA, true); + f2fs_wait_on_page_writeback(cpage, DATA, true, true); f2fs_put_page(cpage, 1); } } +void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, + block_t len) +{ + block_t i; + + for (i = 0; i < len; i++) + f2fs_wait_on_block_writeback(inode, blkaddr + i); +} + static int read_compacted_summaries(struct f2fs_sb_info *sbi) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); @@ -2592,7 +3332,9 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi) start = start_sum_block(sbi); - page = get_meta_page(sbi, start++); + page = f2fs_get_meta_page(sbi, start++); + if (IS_ERR(page)) + return PTR_ERR(page); kaddr = (unsigned char *)page_address(page); /* Step 1: restore nat cache */ @@ -2632,7 +3374,9 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi) f2fs_put_page(page, 1); page = NULL; - page = get_meta_page(sbi, start++); + page = f2fs_get_meta_page(sbi, start++); + if (IS_ERR(page)) + return PTR_ERR(page); kaddr = (unsigned char *)page_address(page); offset = 0; } @@ -2650,6 +3394,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) unsigned short blk_off; unsigned int segno = 0; block_t blk_addr = 0; + int err = 0; /* get segment number and block addr */ if (IS_DATASEG(type)) { @@ -2672,7 +3417,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) blk_addr = GET_SUM_BLOCK(sbi, segno); } - new = get_meta_page(sbi, blk_addr); + new = f2fs_get_meta_page(sbi, blk_addr); + if (IS_ERR(new)) + return PTR_ERR(new); sum = (struct f2fs_summary_block *)page_address(new); if (IS_NODESEG(type)) { @@ -2684,13 +3431,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) ns->ofs_in_node = 0; } } else { - int err; - - err = restore_node_summary(sbi, segno, sum); - if (err) { - f2fs_put_page(new, 1); - return err; - } + err = f2fs_restore_node_summary(sbi, segno, sum); + if (err) + goto out; } } @@ -2710,8 +3453,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) curseg->alloc_type = ckpt->alloc_type[type]; curseg->next_blkoff = blk_off; mutex_unlock(&curseg->curseg_mutex); +out: f2fs_put_page(new, 1); - return 0; + return err; } static int restore_curseg_summaries(struct f2fs_sb_info *sbi) @@ -2722,20 +3466,21 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) int err; if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) { - int npages = npages_for_summary_flush(sbi, true); + int npages = f2fs_npages_for_summary_flush(sbi, true); if (npages >= 2) - ra_meta_pages(sbi, start_sum_block(sbi), npages, + f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages, META_CP, true); /* restore for compacted data summary */ - if (read_compacted_summaries(sbi)) - return -EINVAL; + err = read_compacted_summaries(sbi); + if (err) + return err; type = CURSEG_HOT_NODE; } if (__exist_node_summaries(sbi)) - ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type), + f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type), NR_CURSEG_TYPE - type, META_CP, true); for (; type <= CURSEG_COLD_NODE; type++) { @@ -2761,8 +3506,9 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) int written_size = 0; int i, j; - page = grab_meta_page(sbi, blkaddr++); + page = f2fs_grab_meta_page(sbi, blkaddr++); kaddr = (unsigned char *)page_address(page); + memset(kaddr, 0, PAGE_SIZE); /* Step 1: write nat cache */ seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -2785,8 +3531,9 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) for (j = 0; j < blkoff; j++) { if (!page) { - page = grab_meta_page(sbi, blkaddr++); + page = f2fs_grab_meta_page(sbi, blkaddr++); kaddr = (unsigned char *)page_address(page); + memset(kaddr, 0, PAGE_SIZE); written_size = 0; } summary = (struct f2fs_summary *)(kaddr + written_size); @@ -2821,7 +3568,7 @@ static void write_normal_summaries(struct f2fs_sb_info *sbi, write_current_sum_page(sbi, i, blkaddr + (i - type)); } -void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) +void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) { if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) write_compacted_summaries(sbi, start_blk); @@ -2829,12 +3576,12 @@ void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA); } -void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) +void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) { write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); } -int lookup_journal_in_cursum(struct f2fs_journal *journal, int type, +int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, unsigned int val, int alloc) { int i; @@ -2859,35 +3606,26 @@ int lookup_journal_in_cursum(struct f2fs_journal *journal, int type, static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, unsigned int segno) { - return get_meta_page(sbi, current_sit_addr(sbi, segno)); + return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno)); } static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, unsigned int start) { struct sit_info *sit_i = SIT_I(sbi); - struct page *src_page, *dst_page; + struct page *page; pgoff_t src_off, dst_off; - void *src_addr, *dst_addr; src_off = current_sit_addr(sbi, start); dst_off = next_sit_addr(sbi, src_off); - /* get current sit block page without lock */ - src_page = get_meta_page(sbi, src_off); - dst_page = grab_meta_page(sbi, dst_off); - f2fs_bug_on(sbi, PageDirty(src_page)); + page = f2fs_grab_meta_page(sbi, dst_off); + seg_info_to_sit_page(sbi, page, start); - src_addr = page_address(src_page); - dst_addr = page_address(dst_page); - memcpy(dst_addr, src_addr, PAGE_SIZE); - - set_page_dirty(dst_page); - f2fs_put_page(src_page, 1); - + set_page_dirty(page); set_to_next_sit(sit_i, start); - return dst_page; + return page; } static struct sit_entry_set *grab_sit_entry_set(void) @@ -2977,7 +3715,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi) * CP calls this function, which flushes SIT entries including sit_journal, * and moves prefree segs to free segs. */ -void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) +void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct sit_info *sit_i = SIT_I(sbi); unsigned long *bitmap = sit_i->dirty_sentries_bitmap; @@ -2988,7 +3726,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) bool to_journal = true; struct seg_entry *se; - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); if (!sit_i->dirty_sentries) goto out; @@ -3036,6 +3774,11 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) int offset, sit_offset; se = get_seg_entry(sbi, segno); +#ifdef CONFIG_F2FS_CHECK_FS + if (memcmp(se->cur_valid_map, se->cur_valid_map_mir, + SIT_VBLOCK_MAP_SIZE)) + f2fs_bug_on(sbi, 1); +#endif /* add discard candidates */ if (!(cpc->reason & CP_DISCARD)) { @@ -3044,17 +3787,21 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) } if (to_journal) { - offset = lookup_journal_in_cursum(journal, + offset = f2fs_lookup_journal_in_cursum(journal, SIT_JOURNAL, segno, 1); f2fs_bug_on(sbi, offset < 0); segno_in_journal(journal, offset) = cpu_to_le32(segno); seg_info_to_raw_sit(se, &sit_in_journal(journal, offset)); + check_block_count(sbi, segno, + &sit_in_journal(journal, offset)); } else { sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]); + check_block_count(sbi, segno, + &raw_sit->entries[sit_offset]); } __clear_bit(segno, bitmap); @@ -3082,7 +3829,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) cpc->trim_start = trim_start; } - mutex_unlock(&sit_i->sentry_lock); + up_write(&sit_i->sentry_lock); set_prefree_as_free_segments(sbi); } @@ -3096,53 +3843,57 @@ static int build_sit_info(struct f2fs_sb_info *sbi) unsigned int bitmap_size; /* allocate memory for SIT information */ - sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL); + sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL); if (!sit_i) return -ENOMEM; SM_I(sbi)->sit_info = sit_i; - sit_i->sentries = kvzalloc(MAIN_SEGS(sbi) * - sizeof(struct seg_entry), GFP_KERNEL); + sit_i->sentries = + f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry), + MAIN_SEGS(sbi)), + GFP_KERNEL); if (!sit_i->sentries) return -ENOMEM; bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); - sit_i->dirty_sentries_bitmap = kvzalloc(bitmap_size, GFP_KERNEL); + sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, bitmap_size, + GFP_KERNEL); if (!sit_i->dirty_sentries_bitmap) return -ENOMEM; for (start = 0; start < MAIN_SEGS(sbi); start++) { sit_i->sentries[start].cur_valid_map - = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); sit_i->sentries[start].ckpt_valid_map - = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); if (!sit_i->sentries[start].cur_valid_map || !sit_i->sentries[start].ckpt_valid_map) return -ENOMEM; #ifdef CONFIG_F2FS_CHECK_FS sit_i->sentries[start].cur_valid_map_mir - = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); if (!sit_i->sentries[start].cur_valid_map_mir) return -ENOMEM; #endif - if (f2fs_discard_en(sbi)) { - sit_i->sentries[start].discard_map - = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); - if (!sit_i->sentries[start].discard_map) - return -ENOMEM; - } + sit_i->sentries[start].discard_map + = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, + GFP_KERNEL); + if (!sit_i->sentries[start].discard_map) + return -ENOMEM; } - sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); if (!sit_i->tmp_map) return -ENOMEM; - if (sbi->segs_per_sec > 1) { - sit_i->sec_entries = kvzalloc(MAIN_SECS(sbi) * - sizeof(struct sec_entry), GFP_KERNEL); + if (__is_large_section(sbi)) { + sit_i->sec_entries = + f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry), + MAIN_SECS(sbi)), + GFP_KERNEL); if (!sit_i->sec_entries) return -ENOMEM; } @@ -3175,7 +3926,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); sit_i->mounted_time = ktime_get_real_seconds(); - mutex_init(&sit_i->sentry_lock); + init_rwsem(&sit_i->sentry_lock); return 0; } @@ -3185,19 +3936,19 @@ static int build_free_segmap(struct f2fs_sb_info *sbi) unsigned int bitmap_size, sec_bitmap_size; /* allocate memory for free segmap information */ - free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL); + free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL); if (!free_i) return -ENOMEM; SM_I(sbi)->free_info = free_i; bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); - free_i->free_segmap = kvmalloc(bitmap_size, GFP_KERNEL); + free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL); if (!free_i->free_segmap) return -ENOMEM; sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); - free_i->free_secmap = kvmalloc(sec_bitmap_size, GFP_KERNEL); + free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL); if (!free_i->free_secmap) return -ENOMEM; @@ -3218,7 +3969,8 @@ static int build_curseg(struct f2fs_sb_info *sbi) struct curseg_info *array; int i; - array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL); + array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)), + GFP_KERNEL); if (!array) return -ENOMEM; @@ -3226,12 +3978,12 @@ static int build_curseg(struct f2fs_sb_info *sbi) for (i = 0; i < NR_CURSEG_TYPE; i++) { mutex_init(&array[i].curseg_mutex); - array[i].sum_blk = kzalloc(PAGE_SIZE, GFP_KERNEL); + array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL); if (!array[i].sum_blk) return -ENOMEM; init_rwsem(&array[i].journal_rwsem); - array[i].journal = kzalloc(sizeof(struct f2fs_journal), - GFP_KERNEL); + array[i].journal = f2fs_kzalloc(sbi, + sizeof(struct f2fs_journal), GFP_KERNEL); if (!array[i].journal) return -ENOMEM; array[i].segno = NULL_SEGNO; @@ -3254,7 +4006,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) block_t total_node_blocks = 0; do { - readed = ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES, + readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES, META_SIT, true); start = start_blk * sit_i->sents_per_block; @@ -3266,6 +4018,8 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) se = &sit_i->sentries[start]; page = get_current_sit_page(sbi, start); + if (IS_ERR(page)) + return PTR_ERR(page); sit_blk = (struct f2fs_sit_block *)page_address(page); sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; f2fs_put_page(page, 1); @@ -3278,21 +4032,19 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) total_node_blocks += se->valid_blocks; /* build discard map only one time */ - if (f2fs_discard_en(sbi)) { - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { - memset(se->discard_map, 0xff, - SIT_VBLOCK_MAP_SIZE); - } else { - memcpy(se->discard_map, - se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += - sbi->blocks_per_seg - - se->valid_blocks; - } + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, + SIT_VBLOCK_MAP_SIZE); + } else { + memcpy(se->discard_map, + se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += + sbi->blocks_per_seg - + se->valid_blocks; } - if (sbi->segs_per_sec > 1) + if (__is_large_section(sbi)) get_sec_entry(sbi, start)->valid_blocks += se->valid_blocks; } @@ -3327,21 +4079,21 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) if (IS_NODESEG(se->type)) total_node_blocks += se->valid_blocks; - if (f2fs_discard_en(sbi)) { - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { - memset(se->discard_map, 0xff, - SIT_VBLOCK_MAP_SIZE); - } else { - memcpy(se->discard_map, se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += old_valid_blocks - - se->valid_blocks; - } + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); + } else { + memcpy(se->discard_map, se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += old_valid_blocks; + sbi->discard_blks -= se->valid_blocks; } - if (sbi->segs_per_sec > 1) + if (__is_large_section(sbi)) { get_sec_entry(sbi, start)->valid_blocks += - se->valid_blocks - old_valid_blocks; + se->valid_blocks; + get_sec_entry(sbi, start)->valid_blocks -= + old_valid_blocks; + } } up_read(&curseg->journal_rwsem); @@ -3408,7 +4160,7 @@ static int init_victim_secmap(struct f2fs_sb_info *sbi) struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); - dirty_i->victim_secmap = kvzalloc(bitmap_size, GFP_KERNEL); + dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL); if (!dirty_i->victim_secmap) return -ENOMEM; return 0; @@ -3420,7 +4172,8 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi) unsigned int bitmap_size, i; /* allocate memory for dirty segments list information */ - dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL); + dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info), + GFP_KERNEL); if (!dirty_i) return -ENOMEM; @@ -3430,7 +4183,8 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi) bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); for (i = 0; i < NR_DIRTY_TYPE; i++) { - dirty_i->dirty_segmap[i] = kvzalloc(bitmap_size, GFP_KERNEL); + dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size, + GFP_KERNEL); if (!dirty_i->dirty_segmap[i]) return -ENOMEM; } @@ -3447,9 +4201,9 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi) struct sit_info *sit_i = SIT_I(sbi); unsigned int segno; - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); - sit_i->min_mtime = LLONG_MAX; + sit_i->min_mtime = ULLONG_MAX; for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { unsigned int i; @@ -3463,18 +4217,18 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi) if (sit_i->min_mtime > mtime) sit_i->min_mtime = mtime; } - sit_i->max_mtime = get_mtime(sbi); - mutex_unlock(&sit_i->sentry_lock); + sit_i->max_mtime = get_mtime(sbi, false); + up_write(&sit_i->sentry_lock); } -int build_segment_manager(struct f2fs_sb_info *sbi) +int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct f2fs_sm_info *sm_info; int err; - sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL); + sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL); if (!sm_info) return -ENOMEM; @@ -3496,14 +4250,16 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; + sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec; sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; - - sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; + sm_info->min_ssr_sections = reserved_sections(sbi); INIT_LIST_HEAD(&sm_info->sit_entry_set); + init_rwsem(&sm_info->curseg_lock); + if (!f2fs_readonly(sbi->sb)) { - err = create_flush_cmd_control(sbi); + err = f2fs_create_flush_cmd_control(sbi); if (err) return err; } @@ -3567,7 +4323,7 @@ static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) destroy_victim_secmap(sbi); SM_I(sbi)->dirty_info = NULL; - kfree(dirty_i); + kvfree(dirty_i); } static void destroy_curseg(struct f2fs_sb_info *sbi) @@ -3579,10 +4335,10 @@ static void destroy_curseg(struct f2fs_sb_info *sbi) return; SM_I(sbi)->curseg_array = NULL; for (i = 0; i < NR_CURSEG_TYPE; i++) { - kfree(array[i].sum_blk); - kfree(array[i].journal); + kvfree(array[i].sum_blk); + kvfree(array[i].journal); } - kfree(array); + kvfree(array); } static void destroy_free_segmap(struct f2fs_sb_info *sbi) @@ -3593,7 +4349,7 @@ static void destroy_free_segmap(struct f2fs_sb_info *sbi) SM_I(sbi)->free_info = NULL; kvfree(free_i->free_segmap); kvfree(free_i->free_secmap); - kfree(free_i); + kvfree(free_i); } static void destroy_sit_info(struct f2fs_sb_info *sbi) @@ -3606,45 +4362,45 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) if (sit_i->sentries) { for (start = 0; start < MAIN_SEGS(sbi); start++) { - kfree(sit_i->sentries[start].cur_valid_map); + kvfree(sit_i->sentries[start].cur_valid_map); #ifdef CONFIG_F2FS_CHECK_FS - kfree(sit_i->sentries[start].cur_valid_map_mir); + kvfree(sit_i->sentries[start].cur_valid_map_mir); #endif - kfree(sit_i->sentries[start].ckpt_valid_map); - kfree(sit_i->sentries[start].discard_map); + kvfree(sit_i->sentries[start].ckpt_valid_map); + kvfree(sit_i->sentries[start].discard_map); } } - kfree(sit_i->tmp_map); + kvfree(sit_i->tmp_map); kvfree(sit_i->sentries); kvfree(sit_i->sec_entries); kvfree(sit_i->dirty_sentries_bitmap); SM_I(sbi)->sit_info = NULL; - kfree(sit_i->sit_bitmap); + kvfree(sit_i->sit_bitmap); #ifdef CONFIG_F2FS_CHECK_FS - kfree(sit_i->sit_bitmap_mir); + kvfree(sit_i->sit_bitmap_mir); #endif - kfree(sit_i); + kvfree(sit_i); } -void destroy_segment_manager(struct f2fs_sb_info *sbi) +void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi) { struct f2fs_sm_info *sm_info = SM_I(sbi); if (!sm_info) return; - destroy_flush_cmd_control(sbi, true); + f2fs_destroy_flush_cmd_control(sbi, true); destroy_discard_cmd_control(sbi); destroy_dirty_segmap(sbi); destroy_curseg(sbi); destroy_free_segmap(sbi); destroy_sit_info(sbi); sbi->sm_info = NULL; - kfree(sm_info); + kvfree(sm_info); } -int __init create_segment_manager_caches(void) +int __init f2fs_create_segment_manager_caches(void) { discard_entry_slab = f2fs_kmem_cache_create("discard_entry", sizeof(struct discard_entry)); @@ -3677,7 +4433,7 @@ int __init create_segment_manager_caches(void) return -ENOMEM; } -void destroy_segment_manager_caches(void) +void f2fs_destroy_segment_manager_caches(void) { kmem_cache_destroy(sit_entry_set_slab); kmem_cache_destroy(discard_cmd_slab);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 47348d98..a77f76f 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h
@@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/segment.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/blkdev.h> #include <linux/backing-dev.h> @@ -215,6 +212,8 @@ struct segment_allocation { #define IS_DUMMY_WRITTEN_PAGE(page) \ (page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE) +#define MAX_SKIP_GC_COUNT 16 + struct inmem_pages { struct list_head list; struct page *page; @@ -237,7 +236,7 @@ struct sit_info { unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */ unsigned int dirty_sentries; /* # of dirty sentries */ unsigned int sents_per_block; /* # of SIT entries per block */ - struct mutex sentry_lock; /* to protect SIT cache */ + struct rw_semaphore sentry_lock; /* to protect SIT cache */ struct seg_entry *sentries; /* SIT segment-level cache */ struct sec_entry *sec_entries; /* SIT section-level cache */ @@ -334,12 +333,18 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, * In order to get # of valid blocks in a section instantly from many * segments, f2fs manages two counting structures separately. */ - if (use_section && sbi->segs_per_sec > 1) + if (use_section && __is_large_section(sbi)) return get_sec_entry(sbi, segno)->valid_blocks; else return get_seg_entry(sbi, segno)->valid_blocks; } +static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + return get_seg_entry(sbi, segno)->ckpt_valid_blocks; +} + static inline void seg_info_from_raw_sit(struct seg_entry *se, struct f2fs_sit_entry *rs) { @@ -354,16 +359,42 @@ static inline void seg_info_from_raw_sit(struct seg_entry *se, se->mtime = le64_to_cpu(rs->mtime); } -static inline void seg_info_to_raw_sit(struct seg_entry *se, +static inline void __seg_info_to_raw_sit(struct seg_entry *se, struct f2fs_sit_entry *rs) { unsigned short raw_vblocks = (se->type << SIT_VBLOCKS_SHIFT) | se->valid_blocks; rs->vblocks = cpu_to_le16(raw_vblocks); memcpy(rs->valid_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); + rs->mtime = cpu_to_le64(se->mtime); +} + +static inline void seg_info_to_sit_page(struct f2fs_sb_info *sbi, + struct page *page, unsigned int start) +{ + struct f2fs_sit_block *raw_sit; + struct seg_entry *se; + struct f2fs_sit_entry *rs; + unsigned int end = min(start + SIT_ENTRY_PER_BLOCK, + (unsigned long)MAIN_SEGS(sbi)); + int i; + + raw_sit = (struct f2fs_sit_block *)page_address(page); + memset(raw_sit, 0, PAGE_SIZE); + for (i = 0; i < end - start; i++) { + rs = &raw_sit->entries[i]; + se = get_seg_entry(sbi, start + i); + __seg_info_to_raw_sit(se, rs); + } +} + +static inline void seg_info_to_raw_sit(struct seg_entry *se, + struct f2fs_sit_entry *rs) +{ + __seg_info_to_raw_sit(se, rs); + memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); se->ckpt_valid_blocks = se->valid_blocks; - rs->mtime = cpu_to_le64(se->mtime); } static inline unsigned int find_next_inuse(struct free_segmap_info *free_i, @@ -506,6 +537,33 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi) return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi)); } +static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi) +{ + unsigned int node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) + + get_pages(sbi, F2FS_DIRTY_DENTS); + unsigned int dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS); + unsigned int segno, left_blocks; + int i; + + /* check current node segment */ + for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) { + segno = CURSEG_I(sbi, i)->segno; + left_blocks = sbi->blocks_per_seg - + get_seg_entry(sbi, segno)->ckpt_valid_blocks; + + if (node_blocks > left_blocks) + return false; + } + + /* check current data segment */ + segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno; + left_blocks = sbi->blocks_per_seg - + get_seg_entry(sbi, segno)->ckpt_valid_blocks; + if (dent_blocks > left_blocks) + return false; + return true; +} + static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed, int needed) { @@ -516,11 +574,23 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) return false; + if (free_sections(sbi) + freed == reserved_sections(sbi) + needed && + has_curseg_enough_space(sbi)) + return false; return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + imeta_secs + reserved_sections(sbi) + needed); } +static inline int f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi) +{ + if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return 0; + if (likely(!has_not_enough_free_secs(sbi, 0, 0))) + return 0; + return -ENOSPC; +} + static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi) { return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments; @@ -550,6 +620,8 @@ static inline int utilization(struct f2fs_sb_info *sbi) #define DEF_MIN_FSYNC_BLOCKS 8 #define DEF_MIN_HOT_BLOCKS 16 +#define SMALL_VOLUME_SEGMENTS (16 * 512) /* 16GB */ + enum { F2FS_IPU_FORCE, F2FS_IPU_SSR, @@ -559,47 +631,6 @@ enum { F2FS_IPU_ASYNC, }; -static inline bool need_inplace_update_policy(struct inode *inode, - struct f2fs_io_info *fio) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - unsigned int policy = SM_I(sbi)->ipu_policy; - - if (test_opt(sbi, LFS)) - return false; - - /* if this is cold file, we should overwrite to avoid fragmentation */ - if (file_is_cold(inode)) - return true; - - if (policy & (0x1 << F2FS_IPU_FORCE)) - return true; - if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi)) - return true; - if (policy & (0x1 << F2FS_IPU_UTIL) && - utilization(sbi) > SM_I(sbi)->min_ipu_util) - return true; - if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && need_SSR(sbi) && - utilization(sbi) > SM_I(sbi)->min_ipu_util) - return true; - - /* - * IPU for rewrite async pages - */ - if (policy & (0x1 << F2FS_IPU_ASYNC) && - fio && fio->op == REQ_OP_WRITE && - !(fio->op_flags & REQ_SYNC) && - !f2fs_encrypted_inode(inode)) - return true; - - /* this is only set during fdatasync */ - if (policy & (0x1 << F2FS_IPU_FSYNC) && - is_inode_flag_set(inode, FI_NEED_IPU)) - return true; - - return false; -} - static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi, int type) { @@ -726,12 +757,23 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) #endif } -static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) +static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi, + bool base_time) { struct sit_info *sit_i = SIT_I(sbi); - time64_t now = ktime_get_real_seconds(); + time64_t diff, now = ktime_get_real_seconds(); - return sit_i->elapsed_time + now - sit_i->mounted_time; + if (now >= sit_i->mounted_time) + return sit_i->elapsed_time + now - sit_i->mounted_time; + + /* system time is set to the past */ + if (!base_time) { + diff = sit_i->mounted_time - now; + if (sit_i->elapsed_time >= diff) + return sit_i->elapsed_time - diff; + return 0; + } + return sit_i->elapsed_time; } static inline void set_summary(struct f2fs_summary *sum, nid_t nid, @@ -755,15 +797,6 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) - (base + 1) + type; } -static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi, - unsigned int secno) -{ - if (get_valid_blocks(sbi, GET_SEG_FROM_SEC(sbi, secno), true) >= - sbi->fggc_threshold) - return true; - return false; -} - static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno) { if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno)) @@ -823,8 +856,9 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force) goto wake_up; mutex_lock(&dcc->cmd_lock); - for (i = MAX_PLIST_NUM - 1; - i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) { + for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { + if (i + 1 < dcc->discard_granularity) + break; if (!list_empty(&dcc->pend_list[i])) { wakeup = true; break;
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index ec71d2e..a467aca 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c
@@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs shrinker support * the basic infra was copied from fs/ubifs/shrinker.c * * Copyright (c) 2015 Motorola Mobility * Copyright (c) 2015 Jaegeuk Kim <jaegeuk@kernel.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/f2fs_fs.h> @@ -28,7 +25,7 @@ static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi) static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) { - long count = NM_I(sbi)->nid_cnt[FREE_NID_LIST] - MAX_FREE_NIDS; + long count = NM_I(sbi)->nid_cnt[FREE_NID] - MAX_FREE_NIDS; return count > 0 ? count : 0; } @@ -109,11 +106,11 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink, /* shrink clean nat cache entries */ if (freed < nr) - freed += try_to_free_nats(sbi, nr - freed); + freed += f2fs_try_to_free_nats(sbi, nr - freed); /* shrink free nids cache entries */ if (freed < nr) - freed += try_to_free_nids(sbi, nr - freed); + freed += f2fs_try_to_free_nids(sbi, nr - freed); spin_lock(&f2fs_list_lock); p = p->next;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index fc5c412..76a791f8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c
@@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/super.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/module.h> #include <linux/init.h> @@ -41,31 +38,39 @@ static struct kmem_cache *f2fs_inode_cachep; #ifdef CONFIG_F2FS_FAULT_INJECTION -char *fault_name[FAULT_MAX] = { +const char *f2fs_fault_name[FAULT_MAX] = { [FAULT_KMALLOC] = "kmalloc", + [FAULT_KVMALLOC] = "kvmalloc", [FAULT_PAGE_ALLOC] = "page alloc", + [FAULT_PAGE_GET] = "page get", + [FAULT_ALLOC_BIO] = "alloc bio", [FAULT_ALLOC_NID] = "alloc nid", [FAULT_ORPHAN] = "orphan", [FAULT_BLOCK] = "no more block", [FAULT_DIR_DEPTH] = "too big dir depth", [FAULT_EVICT_INODE] = "evict_inode fail", [FAULT_TRUNCATE] = "truncate fail", - [FAULT_IO] = "IO error", + [FAULT_READ_IO] = "read IO error", [FAULT_CHECKPOINT] = "checkpoint error", + [FAULT_DISCARD] = "discard error", + [FAULT_WRITE_IO] = "write IO error", }; -static void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, - unsigned int rate) +void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, + unsigned int type) { - struct f2fs_fault_info *ffi = &sbi->fault_info; + struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info; if (rate) { atomic_set(&ffi->inject_ops, 0); ffi->inject_rate = rate; - ffi->inject_type = (1 << FAULT_MAX) - 1; - } else { - memset(ffi, 0, sizeof(struct f2fs_fault_info)); } + + if (type) + ffi->inject_type = type; + + if (!rate && !type) + memset(ffi, 0, sizeof(struct f2fs_fault_info)); } #endif @@ -92,6 +97,7 @@ enum { Opt_disable_ext_identify, Opt_inline_xattr, Opt_noinline_xattr, + Opt_inline_xattr_size, Opt_inline_data, Opt_inline_dentry, Opt_noinline_dentry, @@ -103,9 +109,13 @@ enum { Opt_noextent_cache, Opt_noinline_data, Opt_data_flush, + Opt_reserve_root, + Opt_resgid, + Opt_resuid, Opt_mode, Opt_io_size_bits, Opt_fault_injection, + Opt_fault_type, Opt_lazytime, Opt_nolazytime, Opt_quota, @@ -122,6 +132,11 @@ enum { Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, + Opt_whint, + Opt_alloc, + Opt_fsync, + Opt_test_dummy_encryption, + Opt_checkpoint, Opt_err, }; @@ -141,6 +156,7 @@ static match_table_t f2fs_tokens = { {Opt_disable_ext_identify, "disable_ext_identify"}, {Opt_inline_xattr, "inline_xattr"}, {Opt_noinline_xattr, "noinline_xattr"}, + {Opt_inline_xattr_size, "inline_xattr_size=%u"}, {Opt_inline_data, "inline_data"}, {Opt_inline_dentry, "inline_dentry"}, {Opt_noinline_dentry, "noinline_dentry"}, @@ -152,9 +168,13 @@ static match_table_t f2fs_tokens = { {Opt_noextent_cache, "noextent_cache"}, {Opt_noinline_data, "noinline_data"}, {Opt_data_flush, "data_flush"}, + {Opt_reserve_root, "reserve_root=%u"}, + {Opt_resgid, "resgid=%u"}, + {Opt_resuid, "resuid=%u"}, {Opt_mode, "mode=%s"}, {Opt_io_size_bits, "io_bits=%u"}, {Opt_fault_injection, "fault_injection=%u"}, + {Opt_fault_type, "fault_type=%u"}, {Opt_lazytime, "lazytime"}, {Opt_nolazytime, "nolazytime"}, {Opt_quota, "quota"}, @@ -171,6 +191,11 @@ static match_table_t f2fs_tokens = { {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, + {Opt_whint, "whint_mode=%s"}, + {Opt_alloc, "alloc_mode=%s"}, + {Opt_fsync, "fsync_mode=%s"}, + {Opt_test_dummy_encryption, "test_dummy_encryption"}, + {Opt_checkpoint, "checkpoint=%s"}, {Opt_err, NULL}, }; @@ -182,10 +207,35 @@ void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; - printk_ratelimited("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf); + printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf); va_end(args); } +static inline void limit_reserve_root(struct f2fs_sb_info *sbi) +{ + block_t limit = (sbi->user_block_count << 1) / 1000; + + /* limit is 0.2% */ + if (test_opt(sbi, RESERVE_ROOT) && + F2FS_OPTION(sbi).root_reserved_blocks > limit) { + F2FS_OPTION(sbi).root_reserved_blocks = limit; + f2fs_msg(sbi->sb, KERN_INFO, + "Reduce reserved blocks for root = %u", + F2FS_OPTION(sbi).root_reserved_blocks); + } + if (!test_opt(sbi, RESERVE_ROOT) && + (!uid_eq(F2FS_OPTION(sbi).s_resuid, + make_kuid(&init_user_ns, F2FS_DEF_RESUID)) || + !gid_eq(F2FS_OPTION(sbi).s_resgid, + make_kgid(&init_user_ns, F2FS_DEF_RESGID)))) + f2fs_msg(sbi->sb, KERN_INFO, + "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root", + from_kuid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resuid), + from_kgid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resgid)); +} + static void init_once(void *foo) { struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo; @@ -203,20 +253,26 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, char *qname; int ret = -EINVAL; - if (sb_any_quota_loaded(sb) && !sbi->s_qf_names[qtype]) { + if (sb_any_quota_loaded(sb) && !F2FS_OPTION(sbi).s_qf_names[qtype]) { f2fs_msg(sb, KERN_ERR, "Cannot change journaled " "quota options when quota turned on"); return -EINVAL; } + if (f2fs_sb_has_quota_ino(sbi)) { + f2fs_msg(sb, KERN_INFO, + "QUOTA feature is enabled, so ignore qf_name"); + return 0; + } + qname = match_strdup(args); if (!qname) { f2fs_msg(sb, KERN_ERR, "Not enough memory for storing quotafile name"); return -EINVAL; } - if (sbi->s_qf_names[qtype]) { - if (strcmp(sbi->s_qf_names[qtype], qname) == 0) + if (F2FS_OPTION(sbi).s_qf_names[qtype]) { + if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0) ret = 0; else f2fs_msg(sb, KERN_ERR, @@ -229,11 +285,11 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, "quotafile must be on filesystem root"); goto errout; } - sbi->s_qf_names[qtype] = qname; + F2FS_OPTION(sbi).s_qf_names[qtype] = qname; set_opt(sbi, QUOTA); return 0; errout: - kfree(qname); + kvfree(qname); return ret; } @@ -241,13 +297,13 @@ static int f2fs_clear_qf_name(struct super_block *sb, int qtype) { struct f2fs_sb_info *sbi = F2FS_SB(sb); - if (sb_any_quota_loaded(sb) && sbi->s_qf_names[qtype]) { + if (sb_any_quota_loaded(sb) && F2FS_OPTION(sbi).s_qf_names[qtype]) { f2fs_msg(sb, KERN_ERR, "Cannot change journaled quota options" " when quota turned on"); return -EINVAL; } - kfree(sbi->s_qf_names[qtype]); - sbi->s_qf_names[qtype] = NULL; + kvfree(F2FS_OPTION(sbi).s_qf_names[qtype]); + F2FS_OPTION(sbi).s_qf_names[qtype] = NULL; return 0; } @@ -258,20 +314,24 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) * 'grpquota' mount options are allowed even without quota feature * to support legacy quotas in quota files. */ - if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi->sb)) { + if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi)) { f2fs_msg(sbi->sb, KERN_ERR, "Project quota feature not enabled. " "Cannot enable project quota enforcement."); return -1; } - if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA] || - sbi->s_qf_names[PRJQUOTA]) { - if (test_opt(sbi, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) + if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] || + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) { + if (test_opt(sbi, USRQUOTA) && + F2FS_OPTION(sbi).s_qf_names[USRQUOTA]) clear_opt(sbi, USRQUOTA); - if (test_opt(sbi, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) + if (test_opt(sbi, GRPQUOTA) && + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA]) clear_opt(sbi, GRPQUOTA); - if (test_opt(sbi, PRJQUOTA) && sbi->s_qf_names[PRJQUOTA]) + if (test_opt(sbi, PRJQUOTA) && + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) clear_opt(sbi, PRJQUOTA); if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) || @@ -281,12 +341,18 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) return -1; } - if (!sbi->s_jquota_fmt) { + if (!F2FS_OPTION(sbi).s_jquota_fmt) { f2fs_msg(sbi->sb, KERN_ERR, "journaled quota format " "not specified"); return -1; } } + + if (f2fs_sb_has_quota_ino(sbi) && F2FS_OPTION(sbi).s_jquota_fmt) { + f2fs_msg(sbi->sb, KERN_INFO, + "QUOTA feature is enabled, so ignore jquota_fmt"); + F2FS_OPTION(sbi).s_jquota_fmt = 0; + } return 0; } #endif @@ -294,10 +360,11 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) static int parse_options(struct super_block *sb, char *options) { struct f2fs_sb_info *sbi = F2FS_SB(sb); - struct request_queue *q; substring_t args[MAX_OPT_ARGS]; char *p, *name; int arg = 0; + kuid_t uid; + kgid_t gid; #ifdef CONFIG_QUOTA int ret; #endif @@ -332,10 +399,10 @@ static int parse_options(struct super_block *sb, char *options) set_opt(sbi, BG_GC); set_opt(sbi, FORCE_FG_GC); } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; case Opt_disable_roll_forward: set_opt(sbi, DISABLE_ROLL_FORWARD); @@ -347,17 +414,10 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; break; case Opt_discard: - q = bdev_get_queue(sb->s_bdev); - if (blk_queue_discard(q)) { - set_opt(sbi, DISCARD); - } else if (!f2fs_sb_mounted_blkzoned(sb)) { - f2fs_msg(sb, KERN_WARNING, - "mounting with \"discard\" option, but " - "the device does not support discard"); - } + set_opt(sbi, DISCARD); break; case Opt_nodiscard: - if (f2fs_sb_mounted_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sbi)) { f2fs_msg(sb, KERN_WARNING, "discard is required for zoned block devices"); return -EINVAL; @@ -383,6 +443,12 @@ static int parse_options(struct super_block *sb, char *options) case Opt_noinline_xattr: clear_opt(sbi, INLINE_XATTR); break; + case Opt_inline_xattr_size: + if (args->from && match_int(args, &arg)) + return -EINVAL; + set_opt(sbi, INLINE_XATTR_SIZE); + F2FS_OPTION(sbi).inline_xattr_size = arg; + break; #else case Opt_user_xattr: f2fs_msg(sb, KERN_INFO, @@ -421,7 +487,7 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE) return -EINVAL; - sbi->active_logs = arg; + F2FS_OPTION(sbi).active_logs = arg; break; case Opt_disable_ext_identify: set_opt(sbi, DISABLE_EXT_IDENTIFY); @@ -459,6 +525,40 @@ static int parse_options(struct super_block *sb, char *options) case Opt_data_flush: set_opt(sbi, DATA_FLUSH); break; + case Opt_reserve_root: + if (args->from && match_int(args, &arg)) + return -EINVAL; + if (test_opt(sbi, RESERVE_ROOT)) { + f2fs_msg(sb, KERN_INFO, + "Preserve previous reserve_root=%u", + F2FS_OPTION(sbi).root_reserved_blocks); + } else { + F2FS_OPTION(sbi).root_reserved_blocks = arg; + set_opt(sbi, RESERVE_ROOT); + } + break; + case Opt_resuid: + if (args->from && match_int(args, &arg)) + return -EINVAL; + uid = make_kuid(current_user_ns(), arg); + if (!uid_valid(uid)) { + f2fs_msg(sb, KERN_ERR, + "Invalid uid value %d", arg); + return -EINVAL; + } + F2FS_OPTION(sbi).s_resuid = uid; + break; + case Opt_resgid: + if (args->from && match_int(args, &arg)) + return -EINVAL; + gid = make_kgid(current_user_ns(), arg); + if (!gid_valid(gid)) { + f2fs_msg(sb, KERN_ERR, + "Invalid gid value %d", arg); + return -EINVAL; + } + F2FS_OPTION(sbi).s_resgid = gid; + break; case Opt_mode: name = match_strdup(&args[0]); @@ -466,11 +566,11 @@ static int parse_options(struct super_block *sb, char *options) return -ENOMEM; if (strlen(name) == 8 && !strncmp(name, "adaptive", 8)) { - if (f2fs_sb_mounted_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sbi)) { f2fs_msg(sb, KERN_WARNING, "adaptive mode is not allowed with " "zoned block device feature"); - kfree(name); + kvfree(name); return -EINVAL; } set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); @@ -478,10 +578,10 @@ static int parse_options(struct super_block *sb, char *options) !strncmp(name, "lfs", 3)) { set_opt_mode(sbi, F2FS_MOUNT_LFS); } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; case Opt_io_size_bits: if (args->from && match_int(args, &arg)) @@ -492,19 +592,33 @@ static int parse_options(struct super_block *sb, char *options) 1 << arg, BIO_MAX_PAGES); return -EINVAL; } - sbi->write_io_size_bits = arg; + F2FS_OPTION(sbi).write_io_size_bits = arg; break; +#ifdef CONFIG_F2FS_FAULT_INJECTION case Opt_fault_injection: if (args->from && match_int(args, &arg)) return -EINVAL; -#ifdef CONFIG_F2FS_FAULT_INJECTION - f2fs_build_fault_attr(sbi, arg); + f2fs_build_fault_attr(sbi, arg, F2FS_ALL_FAULT_TYPE); set_opt(sbi, FAULT_INJECTION); -#else - f2fs_msg(sb, KERN_INFO, - "FAULT_INJECTION was not selected"); -#endif break; + + case Opt_fault_type: + if (args->from && match_int(args, &arg)) + return -EINVAL; + f2fs_build_fault_attr(sbi, 0, arg); + set_opt(sbi, FAULT_INJECTION); + break; +#else + case Opt_fault_injection: + f2fs_msg(sb, KERN_INFO, + "fault_injection options not supported"); + break; + + case Opt_fault_type: + f2fs_msg(sb, KERN_INFO, + "fault_type options not supported"); + break; +#endif case Opt_lazytime: sb->s_flags |= MS_LAZYTIME; break; @@ -553,13 +667,13 @@ static int parse_options(struct super_block *sb, char *options) return ret; break; case Opt_jqfmt_vfsold: - sbi->s_jquota_fmt = QFMT_VFS_OLD; + F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_OLD; break; case Opt_jqfmt_vfsv0: - sbi->s_jquota_fmt = QFMT_VFS_V0; + F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V0; break; case Opt_jqfmt_vfsv1: - sbi->s_jquota_fmt = QFMT_VFS_V1; + F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V1; break; case Opt_noquota: clear_opt(sbi, QUOTA); @@ -586,6 +700,94 @@ static int parse_options(struct super_block *sb, char *options) "quota operations not supported"); break; #endif + case Opt_whint: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + if (strlen(name) == 10 && + !strncmp(name, "user-based", 10)) { + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_USER; + } else if (strlen(name) == 3 && + !strncmp(name, "off", 3)) { + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; + } else if (strlen(name) == 8 && + !strncmp(name, "fs-based", 8)) { + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS; + } else { + kvfree(name); + return -EINVAL; + } + kvfree(name); + break; + case Opt_alloc: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + + if (strlen(name) == 7 && + !strncmp(name, "default", 7)) { + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; + } else if (strlen(name) == 5 && + !strncmp(name, "reuse", 5)) { + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; + } else { + kvfree(name); + return -EINVAL; + } + kvfree(name); + break; + case Opt_fsync: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + if (strlen(name) == 5 && + !strncmp(name, "posix", 5)) { + F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; + } else if (strlen(name) == 6 && + !strncmp(name, "strict", 6)) { + F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_STRICT; + } else if (strlen(name) == 9 && + !strncmp(name, "nobarrier", 9)) { + F2FS_OPTION(sbi).fsync_mode = + FSYNC_MODE_NOBARRIER; + } else { + kvfree(name); + return -EINVAL; + } + kvfree(name); + break; + case Opt_test_dummy_encryption: +#ifdef CONFIG_F2FS_FS_ENCRYPTION + if (!f2fs_sb_has_encrypt(sbi)) { + f2fs_msg(sb, KERN_ERR, "Encrypt feature is off"); + return -EINVAL; + } + + F2FS_OPTION(sbi).test_dummy_encryption = true; + f2fs_msg(sb, KERN_INFO, + "Test dummy encryption mode enabled"); +#else + f2fs_msg(sb, KERN_INFO, + "Test dummy encryption mount option ignored"); +#endif + break; + case Opt_checkpoint: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + + if (strlen(name) == 6 && + !strncmp(name, "enable", 6)) { + clear_opt(sbi, DISABLE_CHECKPOINT); + } else if (strlen(name) == 7 && + !strncmp(name, "disable", 7)) { + set_opt(sbi, DISABLE_CHECKPOINT); + } else { + kvfree(name); + return -EINVAL; + } + kvfree(name); + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -596,6 +798,19 @@ static int parse_options(struct super_block *sb, char *options) #ifdef CONFIG_QUOTA if (f2fs_check_quota_options(sbi)) return -EINVAL; +#else + if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) { + f2fs_msg(sbi->sb, KERN_INFO, + "Filesystem with quota feature cannot be mounted RDWR " + "without CONFIG_QUOTA"); + return -EINVAL; + } + if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) { + f2fs_msg(sb, KERN_ERR, + "Filesystem with project quota feature cannot be " + "mounted RDWR without CONFIG_QUOTA"); + return -EINVAL; + } #endif if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) { @@ -604,6 +819,44 @@ static int parse_options(struct super_block *sb, char *options) F2FS_IO_SIZE_KB(sbi)); return -EINVAL; } + + if (test_opt(sbi, INLINE_XATTR_SIZE)) { + if (!f2fs_sb_has_extra_attr(sbi) || + !f2fs_sb_has_flexible_inline_xattr(sbi)) { + f2fs_msg(sb, KERN_ERR, + "extra_attr or flexible_inline_xattr " + "feature is off"); + return -EINVAL; + } + if (!test_opt(sbi, INLINE_XATTR)) { + f2fs_msg(sb, KERN_ERR, + "inline_xattr_size option should be " + "set with inline_xattr option"); + return -EINVAL; + } + if (!F2FS_OPTION(sbi).inline_xattr_size || + F2FS_OPTION(sbi).inline_xattr_size >= + DEF_ADDRS_PER_INODE - + F2FS_TOTAL_EXTRA_ATTR_SIZE - + DEF_INLINE_RESERVED_SIZE - + DEF_MIN_INLINE_SIZE) { + f2fs_msg(sb, KERN_ERR, + "inline xattr size is out of range"); + return -EINVAL; + } + } + + if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) { + f2fs_msg(sb, KERN_ERR, + "LFS not compatible with checkpoint=disable\n"); + return -EINVAL; + } + + /* Not pass down write hints if the number of active logs is lesser + * than NR_CURSEG_TYPE. + */ + if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE) + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; return 0; } @@ -618,24 +871,18 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) init_once((void *) fi); /* Initialize f2fs-specific inode info */ - fi->vfs_inode.i_version = 1; atomic_set(&fi->dirty_pages, 0); - fi->i_current_depth = 1; - fi->i_advise = 0; init_rwsem(&fi->i_sem); INIT_LIST_HEAD(&fi->dirty_list); INIT_LIST_HEAD(&fi->gdirty_list); + INIT_LIST_HEAD(&fi->inmem_ilist); INIT_LIST_HEAD(&fi->inmem_pages); mutex_init(&fi->inmem_lock); - init_rwsem(&fi->dio_rwsem[READ]); - init_rwsem(&fi->dio_rwsem[WRITE]); + init_rwsem(&fi->i_gc_rwsem[READ]); + init_rwsem(&fi->i_gc_rwsem[WRITE]); init_rwsem(&fi->i_mmap_sem); init_rwsem(&fi->i_xattr_sem); -#ifdef CONFIG_QUOTA - memset(&fi->i_dquot, 0, sizeof(fi->i_dquot)); - fi->i_reserved_quota = 0; -#endif /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; @@ -660,7 +907,7 @@ static int f2fs_drop_inode(struct inode *inode) /* some remained atomic pages should discarded */ if (f2fs_is_atomic_file(inode)) - drop_inmem_pages(inode); + f2fs_drop_inmem_pages(inode); /* should remain fi->extent_tree for writepage */ f2fs_destroy_extent_node(inode); @@ -673,7 +920,6 @@ static int f2fs_drop_inode(struct inode *inode) sb_end_intwrite(inode->i_sb); - fscrypt_put_encryption_info(inode, NULL); spin_lock(&inode->i_lock); atomic_dec(&inode->i_count); } @@ -771,16 +1017,17 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) for (i = 0; i < sbi->s_ndevs; i++) { blkdev_put(FDEV(i).bdev, FMODE_EXCL); #ifdef CONFIG_BLK_DEV_ZONED - kfree(FDEV(i).blkz_type); + kvfree(FDEV(i).blkz_type); #endif } - kfree(sbi->devs); + kvfree(sbi->devs); } static void f2fs_put_super(struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); int i; + bool dropped; f2fs_quota_off_umount(sb); @@ -792,32 +1039,30 @@ static void f2fs_put_super(struct super_block *sb) * But, the previous checkpoint was not done by umount, it needs to do * clean checkpoint again. */ - if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) || - !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { + if ((is_sbi_flag_set(sbi, SBI_IS_DIRTY) || + !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG))) { struct cp_control cpc = { .reason = CP_UMOUNT, }; - write_checkpoint(sbi, &cpc); + f2fs_write_checkpoint(sbi, &cpc); } /* be sure to wait for any on-going discard commands */ - f2fs_wait_discard_bios(sbi, true); + dropped = f2fs_wait_discard_bios(sbi); - if (f2fs_discard_en(sbi) && !sbi->discard_blks) { + if ((f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) && + !sbi->discard_blks && !dropped) { struct cp_control cpc = { .reason = CP_UMOUNT | CP_TRIMMED, }; - write_checkpoint(sbi, &cpc); + f2fs_write_checkpoint(sbi, &cpc); } - /* write_checkpoint can update stat informaion */ - f2fs_destroy_stats(sbi); - /* * normally superblock is clean, so we need to release this. * In addition, EIO will skip do checkpoint, we need this as well. */ - release_ino_entry(sbi, true); + f2fs_release_ino_entry(sbi, true); f2fs_leave_shrinker(sbi); mutex_unlock(&sbi->umount_mutex); @@ -825,32 +1070,45 @@ static void f2fs_put_super(struct super_block *sb) /* our cp_error case, we can wait for any writeback page */ f2fs_flush_merged_writes(sbi); + f2fs_wait_on_all_pages_writeback(sbi); + + f2fs_bug_on(sbi, sbi->fsync_node_num); + iput(sbi->node_inode); + sbi->node_inode = NULL; + iput(sbi->meta_inode); + sbi->meta_inode = NULL; + + /* + * iput() can update stat information, if f2fs_write_checkpoint() + * above failed with error. + */ + f2fs_destroy_stats(sbi); /* destroy f2fs internal modules */ - destroy_node_manager(sbi); - destroy_segment_manager(sbi); + f2fs_destroy_node_manager(sbi); + f2fs_destroy_segment_manager(sbi); - kfree(sbi->ckpt); + kvfree(sbi->ckpt); f2fs_unregister_sysfs(sbi); sb->s_fs_info = NULL; if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); - kfree(sbi->raw_super); + kvfree(sbi->raw_super); destroy_device_list(sbi); mempool_destroy(sbi->write_io_dummy); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) - kfree(sbi->s_qf_names[i]); + kvfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif destroy_percpu_info(sbi); for (i = 0; i < NR_PAGE_TYPE; i++) - kfree(sbi->write_io[i]); - kfree(sbi); + kvfree(sbi->write_io[i]); + kvfree(sbi); } int f2fs_sync_fs(struct super_block *sb, int sync) @@ -858,6 +1116,11 @@ int f2fs_sync_fs(struct super_block *sb, int sync) struct f2fs_sb_info *sbi = F2FS_SB(sb); int err = 0; + if (unlikely(f2fs_cp_error(sbi))) + return 0; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return 0; + trace_f2fs_sync_fs(sb, sync); if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) @@ -869,7 +1132,7 @@ int f2fs_sync_fs(struct super_block *sb, int sync) cpc.reason = __get_cp_reason(sbi); mutex_lock(&sbi->gc_mutex); - err = write_checkpoint(sbi, &cpc); + err = f2fs_write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); } f2fs_trace_ios(NULL, 1); @@ -910,7 +1173,7 @@ static int f2fs_statfs_project(struct super_block *sb, dquot = dqget(sb, qid); if (IS_ERR(dquot)) return PTR_ERR(dquot); - spin_lock(&dq_data_lock); + spin_lock(&dquot->dq_dqb_lock); limit = (dquot->dq_dqb.dqb_bsoftlimit ? dquot->dq_dqb.dqb_bsoftlimit : @@ -933,7 +1196,7 @@ static int f2fs_statfs_project(struct super_block *sb, (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0; } - spin_unlock(&dq_data_lock); + spin_unlock(&dquot->dq_dqb_lock); dqput(dquot); return 0; } @@ -944,22 +1207,31 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) struct super_block *sb = dentry->d_sb; struct f2fs_sb_info *sbi = F2FS_SB(sb); u64 id = huge_encode_dev(sb->s_bdev->bd_dev); - block_t total_count, user_block_count, start_count, ovp_count; + block_t total_count, user_block_count, start_count; u64 avail_node_count; total_count = le64_to_cpu(sbi->raw_super->block_count); user_block_count = sbi->user_block_count; start_count = le32_to_cpu(sbi->raw_super->segment0_blkaddr); - ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg; buf->f_type = F2FS_SUPER_MAGIC; buf->f_bsize = sbi->blocksize; buf->f_blocks = total_count - start_count; - buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count; - buf->f_bavail = user_block_count - valid_user_blocks(sbi) - - sbi->reserved_blocks; + buf->f_bfree = user_block_count - valid_user_blocks(sbi) - + sbi->current_reserved_blocks; + if (unlikely(buf->f_bfree <= sbi->unusable_block_count)) + buf->f_bfree = 0; + else + buf->f_bfree -= sbi->unusable_block_count; - avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; + if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks) + buf->f_bavail = buf->f_bfree - + F2FS_OPTION(sbi).root_reserved_blocks; + else + buf->f_bavail = 0; + + avail_node_count = sbi->total_node_count - sbi->nquota_files - + F2FS_RESERVED_NODE_NUM; if (avail_node_count > user_block_count) { buf->f_files = user_block_count; @@ -989,10 +1261,10 @@ static inline void f2fs_show_quota_options(struct seq_file *seq, #ifdef CONFIG_QUOTA struct f2fs_sb_info *sbi = F2FS_SB(sb); - if (sbi->s_jquota_fmt) { + if (F2FS_OPTION(sbi).s_jquota_fmt) { char *fmtname = ""; - switch (sbi->s_jquota_fmt) { + switch (F2FS_OPTION(sbi).s_jquota_fmt) { case QFMT_VFS_OLD: fmtname = "vfsold"; break; @@ -1006,14 +1278,17 @@ static inline void f2fs_show_quota_options(struct seq_file *seq, seq_printf(seq, ",jqfmt=%s", fmtname); } - if (sbi->s_qf_names[USRQUOTA]) - seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]); + if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA]) + seq_show_option(seq, "usrjquota", + F2FS_OPTION(sbi).s_qf_names[USRQUOTA]); - if (sbi->s_qf_names[GRPQUOTA]) - seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]); + if (F2FS_OPTION(sbi).s_qf_names[GRPQUOTA]) + seq_show_option(seq, "grpjquota", + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA]); - if (sbi->s_qf_names[PRJQUOTA]) - seq_show_option(seq, "prjjquota", sbi->s_qf_names[PRJQUOTA]); + if (F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) + seq_show_option(seq, "prjjquota", + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]); #endif } @@ -1046,6 +1321,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",inline_xattr"); else seq_puts(seq, ",noinline_xattr"); + if (test_opt(sbi, INLINE_XATTR_SIZE)) + seq_printf(seq, ",inline_xattr_size=%u", + F2FS_OPTION(sbi).inline_xattr_size); #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL if (test_opt(sbi, POSIX_ACL)) @@ -1081,13 +1359,24 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, "adaptive"); else if (test_opt(sbi, LFS)) seq_puts(seq, "lfs"); - seq_printf(seq, ",active_logs=%u", sbi->active_logs); + seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs); + if (test_opt(sbi, RESERVE_ROOT)) + seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u", + F2FS_OPTION(sbi).root_reserved_blocks, + from_kuid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resuid), + from_kgid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resgid)); if (F2FS_IO_SIZE_BITS(sbi)) - seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi)); + seq_printf(seq, ",io_bits=%u", + F2FS_OPTION(sbi).write_io_size_bits); #ifdef CONFIG_F2FS_FAULT_INJECTION - if (test_opt(sbi, FAULT_INJECTION)) + if (test_opt(sbi, FAULT_INJECTION)) { seq_printf(seq, ",fault_injection=%u", - sbi->fault_info.inject_rate); + F2FS_OPTION(sbi).fault_info.inject_rate); + seq_printf(seq, ",fault_type=%u", + F2FS_OPTION(sbi).fault_info.inject_type); + } #endif #ifdef CONFIG_QUOTA if (test_opt(sbi, QUOTA)) @@ -1100,14 +1389,43 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",prjquota"); #endif f2fs_show_quota_options(seq, sbi->sb); + if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) + seq_printf(seq, ",whint_mode=%s", "user-based"); + else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) + seq_printf(seq, ",whint_mode=%s", "fs-based"); +#ifdef CONFIG_F2FS_FS_ENCRYPTION + if (F2FS_OPTION(sbi).test_dummy_encryption) + seq_puts(seq, ",test_dummy_encryption"); +#endif + if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_DEFAULT) + seq_printf(seq, ",alloc_mode=%s", "default"); + else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE) + seq_printf(seq, ",alloc_mode=%s", "reuse"); + + if (test_opt(sbi, DISABLE_CHECKPOINT)) + seq_puts(seq, ",checkpoint=disable"); + + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX) + seq_printf(seq, ",fsync_mode=%s", "posix"); + else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) + seq_printf(seq, ",fsync_mode=%s", "strict"); + else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_NOBARRIER) + seq_printf(seq, ",fsync_mode=%s", "nobarrier"); return 0; } static void default_options(struct f2fs_sb_info *sbi) { /* init some FS parameters */ - sbi->active_logs = NR_CURSEG_TYPE; + F2FS_OPTION(sbi).active_logs = NR_CURSEG_TYPE; + F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; + F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; + F2FS_OPTION(sbi).test_dummy_encryption = false; + F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); + F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); set_opt(sbi, BG_GC); set_opt(sbi, INLINE_XATTR); @@ -1116,13 +1434,13 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, EXTENT_CACHE); set_opt(sbi, NOHEAP); sbi->sb->s_flags |= MS_LAZYTIME; + clear_opt(sbi, DISABLE_CHECKPOINT); set_opt(sbi, FLUSH_MERGE); - if (f2fs_sb_mounted_blkzoned(sbi->sb)) { + set_opt(sbi, DISCARD); + if (f2fs_sb_has_blkzoned(sbi)) set_opt_mode(sbi, F2FS_MOUNT_LFS); - set_opt(sbi, DISCARD); - } else { + else set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); - } #ifdef CONFIG_F2FS_FS_XATTR set_opt(sbi, XATTR_USER); @@ -1131,9 +1449,58 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, POSIX_ACL); #endif -#ifdef CONFIG_F2FS_FAULT_INJECTION - f2fs_build_fault_attr(sbi, 0); + f2fs_build_fault_attr(sbi, 0, 0); +} + +#ifdef CONFIG_QUOTA +static int f2fs_enable_quotas(struct super_block *sb); #endif + +static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) +{ + struct cp_control cpc; + int err; + + sbi->sb->s_flags |= SB_ACTIVE; + + f2fs_update_time(sbi, DISABLE_TIME); + + while (!f2fs_time_over(sbi, DISABLE_TIME)) { + mutex_lock(&sbi->gc_mutex); + err = f2fs_gc(sbi, true, false, NULL_SEGNO); + if (err == -ENODATA) + break; + if (err && err != -EAGAIN) + return err; + } + + err = sync_filesystem(sbi->sb); + if (err) + return err; + + if (f2fs_disable_cp_again(sbi)) + return -EAGAIN; + + mutex_lock(&sbi->gc_mutex); + cpc.reason = CP_PAUSE; + set_sbi_flag(sbi, SBI_CP_DISABLED); + f2fs_write_checkpoint(sbi, &cpc); + + sbi->unusable_block_count = 0; + mutex_unlock(&sbi->gc_mutex); + return 0; +} + +static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) +{ + mutex_lock(&sbi->gc_mutex); + f2fs_dirty_to_prefree(sbi); + + clear_sbi_flag(sbi, SBI_CP_DISABLED); + set_sbi_flag(sbi, SBI_IS_DIRTY); + mutex_unlock(&sbi->gc_mutex); + + f2fs_sync_fs(sbi->sb, 1); } static int f2fs_remount(struct super_block *sb, int *flags, char *data) @@ -1141,16 +1508,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) struct f2fs_sb_info *sbi = F2FS_SB(sb); struct f2fs_mount_info org_mount_opt; unsigned long old_sb_flags; - int err, active_logs; + int err; bool need_restart_gc = false; bool need_stop_gc = false; bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE); -#ifdef CONFIG_F2FS_FAULT_INJECTION - struct f2fs_fault_info ffi = sbi->fault_info; -#endif + bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT); + bool checkpoint_changed; #ifdef CONFIG_QUOTA - int s_jquota_fmt; - char *s_qf_names[MAXQUOTAS]; int i, j; #endif @@ -1160,21 +1524,21 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) */ org_mount_opt = sbi->mount_opt; old_sb_flags = sb->s_flags; - active_logs = sbi->active_logs; #ifdef CONFIG_QUOTA - s_jquota_fmt = sbi->s_jquota_fmt; + org_mount_opt.s_jquota_fmt = F2FS_OPTION(sbi).s_jquota_fmt; for (i = 0; i < MAXQUOTAS; i++) { - if (sbi->s_qf_names[i]) { - s_qf_names[i] = kstrdup(sbi->s_qf_names[i], - GFP_KERNEL); - if (!s_qf_names[i]) { + if (F2FS_OPTION(sbi).s_qf_names[i]) { + org_mount_opt.s_qf_names[i] = + kstrdup(F2FS_OPTION(sbi).s_qf_names[i], + GFP_KERNEL); + if (!org_mount_opt.s_qf_names[i]) { for (j = 0; j < i; j++) - kfree(s_qf_names[j]); + kvfree(org_mount_opt.s_qf_names[j]); return -ENOMEM; } } else { - s_qf_names[i] = NULL; + org_mount_opt.s_qf_names[i] = NULL; } } #endif @@ -1194,6 +1558,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) err = parse_options(sb, data); if (err) goto restore_opts; + checkpoint_changed = + disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT); /* * Previous and new state of filesystem is RO, @@ -1202,16 +1568,23 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) if (f2fs_readonly(sb) && (*flags & MS_RDONLY)) goto skip; +#ifdef CONFIG_QUOTA if (!f2fs_readonly(sb) && (*flags & MS_RDONLY)) { err = dquot_suspend(sb, -1); if (err < 0) goto restore_opts; - } else { + } else if (f2fs_readonly(sb) && !(*flags & SB_RDONLY)) { /* dquot_resume needs RW */ sb->s_flags &= ~MS_RDONLY; - dquot_resume(sb, -1); + if (sb_any_quota_suspended(sb)) { + dquot_resume(sb, -1); + } else if (f2fs_sb_has_quota_ino(sbi)) { + err = f2fs_enable_quotas(sb); + if (err) + goto restore_opts; + } } - +#endif /* disallow enable/disable extent_cache dynamically */ if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) { err = -EINVAL; @@ -1220,6 +1593,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } + if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) { + err = -EINVAL; + f2fs_msg(sbi->sb, KERN_WARNING, + "disabling checkpoint not compatible with read-only"); + goto restore_opts; + } + /* * We stop the GC thread if FS is mounted as RO * or if background_gc = off is passed in mount @@ -1227,17 +1607,18 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) */ if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) { if (sbi->gc_thread) { - stop_gc_thread(sbi); + f2fs_stop_gc_thread(sbi); need_restart_gc = true; } } else if (!sbi->gc_thread) { - err = start_gc_thread(sbi); + err = f2fs_start_gc_thread(sbi); if (err) goto restore_opts; need_stop_gc = true; } - if (*flags & MS_RDONLY) { + if (*flags & MS_RDONLY || + F2FS_OPTION(sbi).whint_mode != org_mount_opt.whint_mode) { writeback_inodes_sb(sb, WB_REASON_SYNC); sync_inodes_sb(sb); @@ -1247,15 +1628,25 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) clear_sbi_flag(sbi, SBI_IS_CLOSE); } + if (checkpoint_changed) { + if (test_opt(sbi, DISABLE_CHECKPOINT)) { + err = f2fs_disable_checkpoint(sbi); + if (err) + goto restore_gc; + } else { + f2fs_enable_checkpoint(sbi); + } + } + /* * We stop issue flush thread if FS is mounted as RO * or if flush_merge is not passed in mount option. */ if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { clear_opt(sbi, FLUSH_MERGE); - destroy_flush_cmd_control(sbi, false); + f2fs_destroy_flush_cmd_control(sbi, false); } else { - err = create_flush_cmd_control(sbi); + err = f2fs_create_flush_cmd_control(sbi); if (err) goto restore_gc; } @@ -1263,35 +1654,33 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) #ifdef CONFIG_QUOTA /* Release old quota file names */ for (i = 0; i < MAXQUOTAS; i++) - kfree(s_qf_names[i]); + kvfree(org_mount_opt.s_qf_names[i]); #endif /* Update the POSIXACL Flag */ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); + limit_reserve_root(sbi); + *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME); return 0; restore_gc: if (need_restart_gc) { - if (start_gc_thread(sbi)) + if (f2fs_start_gc_thread(sbi)) f2fs_msg(sbi->sb, KERN_WARNING, "background gc thread has stopped"); } else if (need_stop_gc) { - stop_gc_thread(sbi); + f2fs_stop_gc_thread(sbi); } restore_opts: #ifdef CONFIG_QUOTA - sbi->s_jquota_fmt = s_jquota_fmt; + F2FS_OPTION(sbi).s_jquota_fmt = org_mount_opt.s_jquota_fmt; for (i = 0; i < MAXQUOTAS; i++) { - kfree(sbi->s_qf_names[i]); - sbi->s_qf_names[i] = s_qf_names[i]; + kvfree(F2FS_OPTION(sbi).s_qf_names[i]); + F2FS_OPTION(sbi).s_qf_names[i] = org_mount_opt.s_qf_names[i]; } #endif sbi->mount_opt = org_mount_opt; - sbi->active_logs = active_logs; sb->s_flags = old_sb_flags; -#ifdef CONFIG_F2FS_FAULT_INJECTION - sbi->fault_info = ffi; -#endif return err; } @@ -1319,9 +1708,15 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data, while (toread > 0) { tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread); repeat: - page = read_mapping_page(mapping, blkidx, NULL); - if (IS_ERR(page)) + page = read_cache_page_gfp(mapping, blkidx, GFP_NOFS); + if (IS_ERR(page)) { + if (PTR_ERR(page) == -ENOMEM) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto repeat; + } + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); return PTR_ERR(page); + } lock_page(page); @@ -1331,6 +1726,7 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data, } if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 1); + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); return -EIO; } @@ -1364,11 +1760,17 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, while (towrite > 0) { tocopy = min_t(unsigned long, sb->s_blocksize - offset, towrite); - +retry: err = a_ops->write_begin(NULL, mapping, off, tocopy, 0, &page, NULL); - if (unlikely(err)) + if (unlikely(err)) { + if (err == -ENOMEM) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; + } + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); break; + } kaddr = kmap_atomic(page); memcpy(kaddr + offset, data, tocopy); @@ -1385,8 +1787,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, } if (len == towrite) - return 0; - inode->i_version++; + return err; inode->i_mtime = inode->i_ctime = current_time(inode); f2fs_mark_inode_dirty_sync(inode, false); return len - towrite; @@ -1404,54 +1805,157 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode) static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type) { - return dquot_quota_on_mount(sbi->sb, sbi->s_qf_names[type], - sbi->s_jquota_fmt, type); + if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) { + f2fs_msg(sbi->sb, KERN_ERR, + "quota sysfile may be corrupted, skip loading it"); + return 0; + } + + return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type], + F2FS_OPTION(sbi).s_jquota_fmt, type); } -void f2fs_enable_quota_files(struct f2fs_sb_info *sbi) +int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) { - int i, ret; + int enabled = 0; + int i, err; + + if (f2fs_sb_has_quota_ino(sbi) && rdonly) { + err = f2fs_enable_quotas(sbi->sb); + if (err) { + f2fs_msg(sbi->sb, KERN_ERR, + "Cannot turn on quota_ino: %d", err); + return 0; + } + return 1; + } for (i = 0; i < MAXQUOTAS; i++) { - if (sbi->s_qf_names[i]) { - ret = f2fs_quota_on_mount(sbi, i); - if (ret < 0) - f2fs_msg(sbi->sb, KERN_ERR, - "Cannot turn on journaled " - "quota: error %d", ret); + if (F2FS_OPTION(sbi).s_qf_names[i]) { + err = f2fs_quota_on_mount(sbi, i); + if (!err) { + enabled = 1; + continue; + } + f2fs_msg(sbi->sb, KERN_ERR, + "Cannot turn on quotas: %d on %d", err, i); } } + return enabled; } -static int f2fs_quota_sync(struct super_block *sb, int type) +static int f2fs_quota_enable(struct super_block *sb, int type, int format_id, + unsigned int flags) { + struct inode *qf_inode; + unsigned long qf_inum; + int err; + + BUG_ON(!f2fs_sb_has_quota_ino(F2FS_SB(sb))); + + qf_inum = f2fs_qf_ino(sb, type); + if (!qf_inum) + return -EPERM; + + qf_inode = f2fs_iget(sb, qf_inum); + if (IS_ERR(qf_inode)) { + f2fs_msg(sb, KERN_ERR, + "Bad quota inode %u:%lu", type, qf_inum); + return PTR_ERR(qf_inode); + } + + /* Don't account quota for quota files to avoid recursion */ + qf_inode->i_flags |= S_NOQUOTA; + err = dquot_enable(qf_inode, type, format_id, flags); + iput(qf_inode); + return err; +} + +static int f2fs_enable_quotas(struct super_block *sb) +{ + int type, err = 0; + unsigned long qf_inum; + bool quota_mopt[MAXQUOTAS] = { + test_opt(F2FS_SB(sb), USRQUOTA), + test_opt(F2FS_SB(sb), GRPQUOTA), + test_opt(F2FS_SB(sb), PRJQUOTA), + }; + + if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) { + f2fs_msg(sb, KERN_ERR, + "quota file may be corrupted, skip loading it"); + return 0; + } + + sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; + + for (type = 0; type < MAXQUOTAS; type++) { + qf_inum = f2fs_qf_ino(sb, type); + if (qf_inum) { + err = f2fs_quota_enable(sb, type, QFMT_VFS_V1, + DQUOT_USAGE_ENABLED | + (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0)); + if (err) { + f2fs_msg(sb, KERN_ERR, + "Failed to enable quota tracking " + "(type=%d, err=%d). Please run " + "fsck to fix.", type, err); + for (type--; type >= 0; type--) + dquot_quota_off(sb, type); + set_sbi_flag(F2FS_SB(sb), + SBI_QUOTA_NEED_REPAIR); + return err; + } + } + } + return 0; +} + +int f2fs_quota_sync(struct super_block *sb, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); struct quota_info *dqopt = sb_dqopt(sb); int cnt; int ret; ret = dquot_writeback_dquots(sb, type); if (ret) - return ret; + goto out; /* * Now when everything is written we can discard the pagecache so * that userspace sees the changes. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + struct address_space *mapping; + if (type != -1 && cnt != type) continue; if (!sb_has_quota_active(sb, cnt)) continue; - ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping); + mapping = dqopt->files[cnt]->i_mapping; + + ret = filemap_fdatawrite(mapping); if (ret) - return ret; + goto out; + + /* if we are using journalled quota */ + if (is_journalled_quota(sbi)) + continue; + + ret = filemap_fdatawait(mapping); + if (ret) + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); inode_lock(dqopt->files[cnt]); truncate_inode_pages(&dqopt->files[cnt]->i_data, 0); inode_unlock(dqopt->files[cnt]); } - return 0; +out: + if (ret) + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); + return ret; } static int f2fs_quota_on(struct super_block *sb, int type, int format_id, @@ -1471,9 +1975,8 @@ static int f2fs_quota_on(struct super_block *sb, int type, int format_id, inode = d_inode(path->dentry); inode_lock(inode); - F2FS_I(inode)->i_flags |= FS_NOATIME_FL | FS_IMMUTABLE_FL; - inode_set_flags(inode, S_NOATIME | S_IMMUTABLE, - S_NOATIME | S_IMMUTABLE); + F2FS_I(inode)->i_flags |= F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL; + f2fs_set_inode_flags(inode); inode_unlock(inode); f2fs_mark_inode_dirty_sync(inode, false); @@ -1493,12 +1996,12 @@ static int f2fs_quota_off(struct super_block *sb, int type) goto out_put; err = dquot_quota_off(sb, type); - if (err) + if (err || f2fs_sb_has_quota_ino(F2FS_SB(sb))) goto out_put; inode_lock(inode); - F2FS_I(inode)->i_flags &= ~(FS_NOATIME_FL | FS_IMMUTABLE_FL); - inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE); + F2FS_I(inode)->i_flags &= ~(F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL); + f2fs_set_inode_flags(inode); inode_unlock(inode); f2fs_mark_inode_dirty_sync(inode, false); out_put: @@ -1520,12 +2023,80 @@ void f2fs_quota_off_umount(struct super_block *sb) "Fail to turn off disk quota " "(type: %d, err: %d, ret:%d), Please " "run fsck to fix it.", type, err, ret); - set_sbi_flag(F2FS_SB(sb), SBI_NEED_FSCK); + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); } } } -int f2fs_get_projid(struct inode *inode, kprojid_t *projid) +static void f2fs_truncate_quota_inode_pages(struct super_block *sb) +{ + struct quota_info *dqopt = sb_dqopt(sb); + int type; + + for (type = 0; type < MAXQUOTAS; type++) { + if (!dqopt->files[type]) + continue; + f2fs_inode_synced(dqopt->files[type]); + } +} + +static int f2fs_dquot_commit(struct dquot *dquot) +{ + int ret; + + ret = dquot_commit(dquot); + if (ret < 0) + set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR); + return ret; +} + +static int f2fs_dquot_acquire(struct dquot *dquot) +{ + int ret; + + ret = dquot_acquire(dquot); + if (ret < 0) + set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR); + + return ret; +} + +static int f2fs_dquot_release(struct dquot *dquot) +{ + int ret; + + ret = dquot_release(dquot); + if (ret < 0) + set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR); + return ret; +} + +static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot) +{ + struct super_block *sb = dquot->dq_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int ret; + + ret = dquot_mark_dquot_dirty(dquot); + + /* if we are using journalled quota */ + if (is_journalled_quota(sbi)) + set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + + return ret; +} + +static int f2fs_dquot_commit_info(struct super_block *sb, int type) +{ + int ret; + + ret = dquot_commit_info(sb, type); + if (ret < 0) + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); + return ret; +} + +static int f2fs_get_projid(struct inode *inode, kprojid_t *projid) { *projid = F2FS_I(inode)->i_projid; return 0; @@ -1533,11 +2104,11 @@ int f2fs_get_projid(struct inode *inode, kprojid_t *projid) static const struct dquot_operations f2fs_quota_operations = { .get_reserved_space = f2fs_get_reserved_space, - .write_dquot = dquot_commit, - .acquire_dquot = dquot_acquire, - .release_dquot = dquot_release, - .mark_dirty = dquot_mark_dquot_dirty, - .write_info = dquot_commit_info, + .write_dquot = f2fs_dquot_commit, + .acquire_dquot = f2fs_dquot_acquire, + .release_dquot = f2fs_dquot_release, + .mark_dirty = f2fs_dquot_mark_dquot_dirty, + .write_info = f2fs_dquot_commit_info, .alloc_dquot = dquot_alloc, .destroy_dquot = dquot_destroy, .get_projid = f2fs_get_projid, @@ -1555,6 +2126,11 @@ static const struct quotactl_ops f2fs_quotactl_ops = { .get_nextdqblk = dquot_get_next_dqblk, }; #else +int f2fs_quota_sync(struct super_block *sb, int type) +{ + return 0; +} + void f2fs_quota_off_umount(struct super_block *sb) { } @@ -1592,28 +2168,35 @@ static int f2fs_get_context(struct inode *inode, void *ctx, size_t len) static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len, void *fs_data) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + /* + * Encrypting the root directory is not allowed because fsck + * expects lost+found directory to exist and remain unencrypted + * if LOST_FOUND feature is enabled. + * + */ + if (f2fs_sb_has_lost_found(sbi) && + inode->i_ino == F2FS_ROOT_INO(sbi)) + return -EPERM; + return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len, fs_data, XATTR_CREATE); } -static unsigned f2fs_max_namelen(struct inode *inode) +static bool f2fs_dummy_context(struct inode *inode) { - return S_ISLNK(inode->i_mode) ? - inode->i_sb->s_blocksize : F2FS_NAME_LEN; + return DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(inode)); } static const struct fscrypt_operations f2fs_cryptops = { .key_prefix = "f2fs:", .get_context = f2fs_get_context, .set_context = f2fs_set_context, - .is_encrypted = f2fs_encrypted_inode, + .dummy_context = f2fs_dummy_context, .empty_dir = f2fs_empty_dir, - .max_namelen = f2fs_max_namelen, -}; -#else -static const struct fscrypt_operations f2fs_cryptops = { - .is_encrypted = f2fs_encrypted_inode, + .max_namelen = F2FS_NAME_LEN, }; #endif @@ -1623,7 +2206,7 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb, struct f2fs_sb_info *sbi = F2FS_SB(sb); struct inode *inode; - if (check_nid_range(sbi, ino)) + if (f2fs_check_nid_range(sbi, ino)) return ERR_PTR(-ESTALE); /* @@ -1669,7 +2252,7 @@ static loff_t max_file_blocks(void) /* * note: previously, result is equal to (DEF_ADDRS_PER_INODE - - * F2FS_INLINE_XATTR_ADDRS), but now f2fs try to reserve more + * DEFAULT_INLINE_XATTR_ADDRS), but now f2fs try to reserve more * space in inode.i_addr, it will be more safe to reassign * result as zero. */ @@ -1694,7 +2277,6 @@ static int __f2fs_commit_super(struct buffer_head *bh, lock_buffer(bh); if (super) memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super)); - set_buffer_uptodate(bh); set_buffer_dirty(bh); unlock_buffer(bh); @@ -1813,6 +2395,26 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, (bh->b_data + F2FS_SUPER_OFFSET); struct super_block *sb = sbi->sb; unsigned int blocksize; + size_t crc_offset = 0; + __u32 crc = 0; + + /* Check checksum_offset and crc in superblock */ + if (__F2FS_HAS_FEATURE(raw_super, F2FS_FEATURE_SB_CHKSUM)) { + crc_offset = le32_to_cpu(raw_super->checksum_offset); + if (crc_offset != + offsetof(struct f2fs_super_block, crc)) { + f2fs_msg(sb, KERN_INFO, + "Invalid SB checksum offset: %zu", + crc_offset); + return 1; + } + crc = le32_to_cpu(raw_super->crc); + if (!f2fs_crc_valid(sbi, crc, raw_super, crc_offset)) { + f2fs_msg(sb, KERN_INFO, + "Invalid SB checksum value: %u", crc); + return 1; + } + } if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) { f2fs_msg(sb, KERN_INFO, @@ -1910,10 +2512,14 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, secs_per_zone, total_sections); return 1; } - if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION) { + if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION || + raw_super->hot_ext_count > F2FS_MAX_EXTENSION || + (le32_to_cpu(raw_super->extension_count) + + raw_super->hot_ext_count) > F2FS_MAX_EXTENSION) { f2fs_msg(sb, KERN_INFO, - "Corrupted extension count (%u > %u)", + "Corrupted extension count (%u + %u > %u)", le32_to_cpu(raw_super->extension_count), + raw_super->hot_ext_count, F2FS_MAX_EXTENSION); return 1; } @@ -1946,7 +2552,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return 0; } -int sanity_check_ckpt(struct f2fs_sb_info *sbi) +int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) { unsigned int total, fsmeta; struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); @@ -1959,7 +2565,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) unsigned int segment_count_main; unsigned int cp_pack_start_sum, cp_payload; block_t user_block_count; - int i; + int i, j; total = le32_to_cpu(raw_super->segment_count); fsmeta = le32_to_cpu(raw_super->segment_count_ckpt); @@ -2000,11 +2606,43 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs || le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg) return 1; + for (j = i + 1; j < NR_CURSEG_NODE_TYPE; j++) { + if (le32_to_cpu(ckpt->cur_node_segno[i]) == + le32_to_cpu(ckpt->cur_node_segno[j])) { + f2fs_msg(sbi->sb, KERN_ERR, + "Node segment (%u, %u) has the same " + "segno: %u", i, j, + le32_to_cpu(ckpt->cur_node_segno[i])); + return 1; + } + } } for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) { if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs || le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg) return 1; + for (j = i + 1; j < NR_CURSEG_DATA_TYPE; j++) { + if (le32_to_cpu(ckpt->cur_data_segno[i]) == + le32_to_cpu(ckpt->cur_data_segno[j])) { + f2fs_msg(sbi->sb, KERN_ERR, + "Data segment (%u, %u) has the same " + "segno: %u", i, j, + le32_to_cpu(ckpt->cur_data_segno[i])); + return 1; + } + } + } + for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { + for (j = i; j < NR_CURSEG_DATA_TYPE; j++) { + if (le32_to_cpu(ckpt->cur_node_segno[i]) == + le32_to_cpu(ckpt->cur_data_segno[j])) { + f2fs_msg(sbi->sb, KERN_ERR, + "Data segment (%u) and Data segment (%u)" + " has the same segno: %u", i, j, + le32_to_cpu(ckpt->cur_node_segno[i])); + return 1; + } + } } sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize); @@ -2039,7 +2677,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) static void init_sb_info(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = sbi->raw_super; - int i, j; + int i; sbi->log_sectors_per_block = le32_to_cpu(raw_super->log_sectors_per_block); @@ -2057,24 +2695,34 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->node_ino_num = le32_to_cpu(raw_super->node_ino); sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino); sbi->cur_victim_sec = NULL_SECNO; + sbi->next_victim_seg[BG_GC] = NULL_SEGNO; + sbi->next_victim_seg[FG_GC] = NULL_SEGNO; sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; + sbi->migration_granularity = sbi->segs_per_sec; sbi->dir_level = DEF_DIR_LEVEL; sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL; sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL; + sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL; + sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL; + sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL; clear_sbi_flag(sbi, SBI_NEED_FSCK); for (i = 0; i < NR_COUNT_TYPE; i++) atomic_set(&sbi->nr_pages[i], 0); - atomic_set(&sbi->wb_sync_req, 0); + for (i = 0; i < META; i++) + atomic_set(&sbi->wb_sync_req[i], 0); INIT_LIST_HEAD(&sbi->s_list); mutex_init(&sbi->umount_mutex); - for (i = 0; i < NR_PAGE_TYPE - 1; i++) - for (j = HOT; j < NR_TEMP_TYPE; j++) - mutex_init(&sbi->wio_mutex[i][j]); + init_rwsem(&sbi->io_order_lock); spin_lock_init(&sbi->cp_lock); + + sbi->dirty_device = 0; + spin_lock_init(&sbi->dev_lock); + + init_rwsem(&sbi->sb_lock); } static int init_percpu_info(struct f2fs_sb_info *sbi) @@ -2085,8 +2733,12 @@ static int init_percpu_info(struct f2fs_sb_info *sbi) if (err) return err; - return percpu_counter_init(&sbi->total_valid_inode_count, 0, + err = percpu_counter_init(&sbi->total_valid_inode_count, 0, GFP_KERNEL); + if (err) + percpu_counter_destroy(&sbi->alloc_valid_block_count); + + return err; } #ifdef CONFIG_BLK_DEV_ZONED @@ -2100,7 +2752,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) unsigned int n = 0; int err = -EIO; - if (!f2fs_sb_mounted_blkzoned(sbi->sb)) + if (!f2fs_sb_has_blkzoned(sbi)) return 0; if (sbi->blocks_per_blkz && sbi->blocks_per_blkz != @@ -2116,14 +2768,17 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) if (nr_sectors & (bdev_zone_sectors(bdev) - 1)) FDEV(devi).nr_blkz++; - FDEV(devi).blkz_type = kmalloc(FDEV(devi).nr_blkz, GFP_KERNEL); + FDEV(devi).blkz_type = f2fs_kmalloc(sbi, FDEV(devi).nr_blkz, + GFP_KERNEL); if (!FDEV(devi).blkz_type) return -ENOMEM; #define F2FS_REPORT_NR_ZONES 4096 - zones = kcalloc(F2FS_REPORT_NR_ZONES, sizeof(struct blk_zone), - GFP_KERNEL); + zones = f2fs_kzalloc(sbi, + array_size(F2FS_REPORT_NR_ZONES, + sizeof(struct blk_zone)), + GFP_KERNEL); if (!zones) return -ENOMEM; @@ -2148,7 +2803,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) } } - kfree(zones); + kvfree(zones); return err; } @@ -2208,7 +2863,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, /* No valid superblock */ if (!*raw_super) - kfree(super); + kvfree(super); else err = 0; @@ -2218,6 +2873,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) { struct buffer_head *bh; + __u32 crc = 0; int err; if ((recover && f2fs_readonly(sbi->sb)) || @@ -2226,8 +2882,15 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) return -EROFS; } + /* we should update superblock crc here */ + if (!recover && f2fs_sb_has_sb_chksum(sbi)) { + crc = f2fs_crc32(sbi, F2FS_RAW_SUPER(sbi), + offsetof(struct f2fs_super_block, crc)); + F2FS_RAW_SUPER(sbi)->crc = cpu_to_le32(crc); + } + /* write back-up superblock first */ - bh = sb_getblk(sbi->sb, sbi->valid_super_block ? 0: 1); + bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1); if (!bh) return -EIO; err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi)); @@ -2238,7 +2901,7 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) return err; /* write current valid superblock */ - bh = sb_getblk(sbi->sb, sbi->valid_super_block); + bh = sb_bread(sbi->sb, sbi->valid_super_block); if (!bh) return -EIO; err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi)); @@ -2263,8 +2926,10 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) * Initialize multiple devices information, or single * zoned block device information. */ - sbi->devs = kcalloc(max_devices, sizeof(struct f2fs_dev_info), - GFP_KERNEL); + sbi->devs = f2fs_kzalloc(sbi, + array_size(max_devices, + sizeof(struct f2fs_dev_info)), + GFP_KERNEL); if (!sbi->devs) return -ENOMEM; @@ -2306,7 +2971,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) #ifdef CONFIG_BLK_DEV_ZONED if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM && - !f2fs_sb_mounted_blkzoned(sbi->sb)) { + !f2fs_sb_has_blkzoned(sbi)) { f2fs_msg(sbi->sb, KERN_ERR, "Zoned block device feature not enabled\n"); return -EINVAL; @@ -2340,6 +3005,20 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) return 0; } +static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) +{ + struct f2fs_sm_info *sm_i = SM_I(sbi); + + /* adjust parameters according to the volume size */ + if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) { + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; + sm_i->dcc_info->discard_granularity = 1; + sm_i->ipu_policy = 1 << F2FS_IPU_FORCE; + } + + sbi->readdir_ra = 1; +} + static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; @@ -2388,7 +3067,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sbi->raw_super = raw_super; /* precompute checksum seed for metadata */ - if (f2fs_sb_has_inode_chksum(sb)) + if (f2fs_sb_has_inode_chksum(sbi)) sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid, sizeof(raw_super->uuid)); @@ -2398,7 +3077,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) * devices, but mandatory for host-managed zoned block devices. */ #ifndef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_mounted_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sbi)) { f2fs_msg(sb, KERN_ERR, "Zoned block device support is not enabled\n"); err = -EOPNOTSUPP; @@ -2425,12 +3104,24 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_QUOTA sb->dq_op = &f2fs_quota_operations; - sb->s_qcop = &f2fs_quotactl_ops; + if (f2fs_sb_has_quota_ino(sbi)) + sb->s_qcop = &dquot_quotactl_sysfile_ops; + else + sb->s_qcop = &f2fs_quotactl_ops; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; + + if (f2fs_sb_has_quota_ino(sbi)) { + for (i = 0; i < MAXQUOTAS; i++) { + if (f2fs_qf_ino(sbi->sb, i)) + sbi->nquota_files++; + } + } #endif sb->s_op = &f2fs_sops; +#ifdef CONFIG_F2FS_FS_ENCRYPTION sb->s_cop = &f2fs_cryptops; +#endif sb->s_xattr = f2fs_xattr_handlers; sb->s_export_op = &f2fs_export_ops; sb->s_magic = F2FS_SUPER_MAGIC; @@ -2438,10 +3129,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); + sb->s_iflags |= SB_I_CGROUPWB; /* init f2fs-specific super block info */ sbi->valid_super_block = valid_super_block; mutex_init(&sbi->gc_mutex); + mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); init_rwsem(&sbi->node_write); init_rwsem(&sbi->node_change); @@ -2458,11 +3151,14 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) int n = (i == META) ? 1: NR_TEMP_TYPE; int j; - sbi->write_io[i] = kmalloc(n * sizeof(struct f2fs_bio_info), - GFP_KERNEL); + sbi->write_io[i] = + f2fs_kmalloc(sbi, + array_size(n, + sizeof(struct f2fs_bio_info)), + GFP_KERNEL); if (!sbi->write_io[i]) { err = -ENOMEM; - goto free_options; + goto free_bio_info; } for (j = HOT; j < n; j++) { @@ -2480,14 +3176,14 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) err = init_percpu_info(sbi); if (err) - goto free_options; + goto free_bio_info; if (F2FS_IO_SIZE(sbi) > 1) { sbi->write_io_dummy = mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0); if (!sbi->write_io_dummy) { err = -ENOMEM; - goto free_options; + goto free_percpu; } } @@ -2499,12 +3195,15 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_io_dummy; } - err = get_valid_checkpoint(sbi); + err = f2fs_get_valid_checkpoint(sbi); if (err) { f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint"); goto free_meta_inode; } + if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG)) + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + /* Initialize device list */ err = f2fs_scan_devices(sbi); if (err) { @@ -2521,24 +3220,28 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) le64_to_cpu(sbi->ckpt->valid_block_count); sbi->last_valid_block_count = sbi->total_valid_block_count; sbi->reserved_blocks = 0; + sbi->current_reserved_blocks = 0; + limit_reserve_root(sbi); for (i = 0; i < NR_INODE_TYPE; i++) { INIT_LIST_HEAD(&sbi->inode_list[i]); spin_lock_init(&sbi->inode_lock[i]); } - init_extent_cache_info(sbi); + f2fs_init_extent_cache_info(sbi); - init_ino_entry_info(sbi); + f2fs_init_ino_entry_info(sbi); + + f2fs_init_fsync_node_info(sbi); /* setup f2fs internal modules */ - err = build_segment_manager(sbi); + err = f2fs_build_segment_manager(sbi); if (err) { f2fs_msg(sb, KERN_ERR, "Failed to initialize F2FS segment manager"); goto free_sm; } - err = build_node_manager(sbi); + err = f2fs_build_node_manager(sbi); if (err) { f2fs_msg(sb, KERN_ERR, "Failed to initialize F2FS node manager"); @@ -2556,22 +3259,20 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sbi->kbytes_written = le64_to_cpu(seg_i->journal->info.kbytes_written); - build_gc_manager(sbi); + f2fs_build_gc_manager(sbi); + + err = f2fs_build_stats(sbi); + if (err) + goto free_nm; /* get an inode for node space */ sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi)); if (IS_ERR(sbi->node_inode)) { f2fs_msg(sb, KERN_ERR, "Failed to read node inode"); err = PTR_ERR(sbi->node_inode); - goto free_nm; + goto free_stats; } - f2fs_join_shrinker(sbi); - - err = f2fs_build_stats(sbi); - if (err) - goto free_nm; - /* read root inode and dentry */ root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); if (IS_ERR(root)) { @@ -2579,7 +3280,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) err = PTR_ERR(root); goto free_node_inode; } - if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { + if (!S_ISDIR(root->i_mode) || !root->i_blocks || + !root->i_size || !root->i_nlink) { iput(root); err = -EINVAL; goto free_node_inode; @@ -2595,10 +3297,22 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto free_root_inode; +#ifdef CONFIG_QUOTA + /* Enable quota usage during mount */ + if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) { + err = f2fs_enable_quotas(sb); + if (err) + f2fs_msg(sb, KERN_ERR, + "Cannot turn on quotas: error %d", err); + } +#endif /* if there are nt orphan nodes free them */ - err = recover_orphan_inodes(sbi); + err = f2fs_recover_orphan_inodes(sbi); if (err) - goto free_sysfs; + goto free_meta; + + if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG))) + goto skip_recovery; /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { @@ -2618,7 +3332,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (!retry) goto skip_recovery; - err = recover_fsync_data(sbi, false); + err = f2fs_recover_fsync_data(sbi, false); if (err < 0) { need_fsck = true; f2fs_msg(sb, KERN_ERR, @@ -2626,30 +3340,38 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_meta; } } else { - err = recover_fsync_data(sbi, true); + err = f2fs_recover_fsync_data(sbi, true); if (!f2fs_readonly(sb) && err > 0) { err = -EINVAL; f2fs_msg(sb, KERN_ERR, "Need to recover fsync data"); - goto free_sysfs; + goto free_meta; } } skip_recovery: - /* recover_fsync_data() cleared this already */ + /* f2fs_recover_fsync_data() cleared this already */ clear_sbi_flag(sbi, SBI_POR_DOING); + if (test_opt(sbi, DISABLE_CHECKPOINT)) { + err = f2fs_disable_checkpoint(sbi); + if (err) + goto free_meta; + } else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) { + f2fs_enable_checkpoint(sbi); + } + /* * If filesystem is not mounted as read-only then * do start the gc_thread. */ if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) { /* After POR, we can run background GC thread.*/ - err = start_gc_thread(sbi); + err = f2fs_start_gc_thread(sbi); if (err) goto free_meta; } - kfree(options); + kvfree(options); /* recover broken superblock */ if (recovery) { @@ -2659,6 +3381,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sbi->valid_super_block ? 1 : 2, err); } + f2fs_join_shrinker(sbi); + + f2fs_tuning_parameters(sbi); + f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx", cur_cp_version(F2FS_CKPT(sbi))); f2fs_update_time(sbi, CP_TIME); @@ -2666,54 +3392,59 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) return 0; free_meta: - f2fs_sync_inode_meta(sbi); +#ifdef CONFIG_QUOTA + f2fs_truncate_quota_inode_pages(sb); + if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) + f2fs_quota_off_umount(sbi->sb); +#endif /* - * Some dirty meta pages can be produced by recover_orphan_inodes() + * Some dirty meta pages can be produced by f2fs_recover_orphan_inodes() * failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg() - * followed by write_checkpoint() through f2fs_write_node_pages(), which - * falls into an infinite loop in sync_meta_pages(). + * followed by f2fs_write_checkpoint() through f2fs_write_node_pages(), which + * falls into an infinite loop in f2fs_sync_meta_pages(). */ truncate_inode_pages_final(META_MAPPING(sbi)); -free_sysfs: f2fs_unregister_sysfs(sbi); free_root_inode: dput(sb->s_root); sb->s_root = NULL; free_node_inode: + f2fs_release_ino_entry(sbi, true); truncate_inode_pages_final(NODE_MAPPING(sbi)); - mutex_lock(&sbi->umount_mutex); - release_ino_entry(sbi, true); - f2fs_leave_shrinker(sbi); iput(sbi->node_inode); - mutex_unlock(&sbi->umount_mutex); + sbi->node_inode = NULL; +free_stats: f2fs_destroy_stats(sbi); free_nm: - destroy_node_manager(sbi); + f2fs_destroy_node_manager(sbi); free_sm: - destroy_segment_manager(sbi); + f2fs_destroy_segment_manager(sbi); free_devices: destroy_device_list(sbi); - kfree(sbi->ckpt); + kvfree(sbi->ckpt); free_meta_inode: make_bad_inode(sbi->meta_inode); iput(sbi->meta_inode); + sbi->meta_inode = NULL; free_io_dummy: mempool_destroy(sbi->write_io_dummy); -free_options: - for (i = 0; i < NR_PAGE_TYPE; i++) - kfree(sbi->write_io[i]); +free_percpu: destroy_percpu_info(sbi); +free_bio_info: + for (i = 0; i < NR_PAGE_TYPE; i++) + kvfree(sbi->write_io[i]); +free_options: #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) - kfree(sbi->s_qf_names[i]); + kvfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif - kfree(options); + kvfree(options); free_sb_buf: - kfree(raw_super); + kvfree(raw_super); free_sbi: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); - kfree(sbi); + kvfree(sbi); /* give only one another chance */ if (retry) { @@ -2733,9 +3464,22 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, static void kill_f2fs_super(struct super_block *sb) { if (sb->s_root) { - set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE); - stop_gc_thread(F2FS_SB(sb)); - stop_discard_thread(F2FS_SB(sb)); + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + set_sbi_flag(sbi, SBI_IS_CLOSE); + f2fs_stop_gc_thread(sbi); + f2fs_stop_discard_thread(sbi); + + if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) || + !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { + struct cp_control cpc = { + .reason = CP_UMOUNT, + }; + f2fs_write_checkpoint(sbi, &cpc); + } + + if (is_sbi_flag_set(sbi, SBI_IS_RECOVERED) && f2fs_readonly(sb)) + sb->s_flags &= ~SB_RDONLY; } kill_block_super(sb); } @@ -2784,16 +3528,16 @@ static int __init init_f2fs_fs(void) err = init_inodecache(); if (err) goto fail; - err = create_node_manager_caches(); + err = f2fs_create_node_manager_caches(); if (err) goto free_inodecache; - err = create_segment_manager_caches(); + err = f2fs_create_segment_manager_caches(); if (err) goto free_node_manager_caches; - err = create_checkpoint_caches(); + err = f2fs_create_checkpoint_caches(); if (err) goto free_segment_manager_caches; - err = create_extent_cache(); + err = f2fs_create_extent_cache(); if (err) goto free_checkpoint_caches; err = f2fs_init_sysfs(); @@ -2808,8 +3552,13 @@ static int __init init_f2fs_fs(void) err = f2fs_create_root_stats(); if (err) goto free_filesystem; + err = f2fs_init_post_read_processing(); + if (err) + goto free_root_stats; return 0; +free_root_stats: + f2fs_destroy_root_stats(); free_filesystem: unregister_filesystem(&f2fs_fs_type); free_shrinker: @@ -2817,13 +3566,13 @@ static int __init init_f2fs_fs(void) free_sysfs: f2fs_exit_sysfs(); free_extent_cache: - destroy_extent_cache(); + f2fs_destroy_extent_cache(); free_checkpoint_caches: - destroy_checkpoint_caches(); + f2fs_destroy_checkpoint_caches(); free_segment_manager_caches: - destroy_segment_manager_caches(); + f2fs_destroy_segment_manager_caches(); free_node_manager_caches: - destroy_node_manager_caches(); + f2fs_destroy_node_manager_caches(); free_inodecache: destroy_inodecache(); fail: @@ -2832,14 +3581,15 @@ static int __init init_f2fs_fs(void) static void __exit exit_f2fs_fs(void) { + f2fs_destroy_post_read_processing(); f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); unregister_shrinker(&f2fs_shrinker_info); f2fs_exit_sysfs(); - destroy_extent_cache(); - destroy_checkpoint_caches(); - destroy_segment_manager_caches(); - destroy_node_manager_caches(); + f2fs_destroy_extent_cache(); + f2fs_destroy_checkpoint_caches(); + f2fs_destroy_segment_manager_caches(); + f2fs_destroy_node_manager_caches(); destroy_inodecache(); f2fs_destroy_trace_ios(); }
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 93af9d7..948c1a2 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c
@@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs sysfs interface * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ * Copyright (c) 2017 Chao Yu <chao@kernel.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/compiler.h> #include <linux/proc_fs.h> @@ -31,7 +28,7 @@ enum { FAULT_INFO_RATE, /* struct f2fs_fault_info */ FAULT_INFO_TYPE, /* struct f2fs_fault_info */ #endif - RESERVED_BLOCKS, + RESERVED_BLOCKS, /* struct f2fs_sb_info */ }; struct f2fs_attr { @@ -59,11 +56,18 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) #ifdef CONFIG_F2FS_FAULT_INJECTION else if (struct_type == FAULT_INFO_RATE || struct_type == FAULT_INFO_TYPE) - return (unsigned char *)&sbi->fault_info; + return (unsigned char *)&F2FS_OPTION(sbi).fault_info; #endif return NULL; } +static ssize_t dirty_segments_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)(dirty_segments(sbi))); +} + static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -86,25 +90,46 @@ static ssize_t features_show(struct f2fs_attr *a, if (!sb->s_bdev->bd_part) return snprintf(buf, PAGE_SIZE, "0\n"); - if (f2fs_sb_has_crypto(sb)) + if (f2fs_sb_has_encrypt(sbi)) len += snprintf(buf, PAGE_SIZE - len, "%s", "encryption"); - if (f2fs_sb_mounted_blkzoned(sb)) + if (f2fs_sb_has_blkzoned(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "blkzoned"); - if (f2fs_sb_has_extra_attr(sb)) + if (f2fs_sb_has_extra_attr(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "extra_attr"); - if (f2fs_sb_has_project_quota(sb)) + if (f2fs_sb_has_project_quota(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "projquota"); - if (f2fs_sb_has_inode_chksum(sb)) + if (f2fs_sb_has_inode_chksum(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "inode_checksum"); + if (f2fs_sb_has_flexible_inline_xattr(sbi)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "flexible_inline_xattr"); + if (f2fs_sb_has_quota_ino(sbi)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "quota_ino"); + if (f2fs_sb_has_inode_crtime(sbi)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "inode_crtime"); + if (f2fs_sb_has_lost_found(sbi)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "lost_found"); + if (f2fs_sb_has_sb_chksum(sbi)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "sb_checksum"); len += snprintf(buf + len, PAGE_SIZE - len, "\n"); return len; } +static ssize_t current_reserved_blocks_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%u\n", sbi->current_reserved_blocks); +} + static ssize_t f2fs_sbi_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -115,12 +140,33 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, if (!ptr) return -EINVAL; + if (!strcmp(a->attr.name, "extension_list")) { + __u8 (*extlist)[F2FS_EXTENSION_LEN] = + sbi->raw_super->extension_list; + int cold_count = le32_to_cpu(sbi->raw_super->extension_count); + int hot_count = sbi->raw_super->hot_ext_count; + int len = 0, i; + + len += snprintf(buf + len, PAGE_SIZE - len, + "cold file extension:\n"); + for (i = 0; i < cold_count; i++) + len += snprintf(buf + len, PAGE_SIZE - len, "%s\n", + extlist[i]); + + len += snprintf(buf + len, PAGE_SIZE - len, + "hot file extension:\n"); + for (i = cold_count; i < cold_count + hot_count; i++) + len += snprintf(buf + len, PAGE_SIZE - len, "%s\n", + extlist[i]); + return len; + } + ui = (unsigned int *)(ptr + a->offset); return snprintf(buf, PAGE_SIZE, "%u\n", *ui); } -static ssize_t f2fs_sbi_store(struct f2fs_attr *a, +static ssize_t __sbi_store(struct f2fs_attr *a, struct f2fs_sb_info *sbi, const char *buf, size_t count) { @@ -133,6 +179,41 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, if (!ptr) return -EINVAL; + if (!strcmp(a->attr.name, "extension_list")) { + const char *name = strim((char *)buf); + bool set = true, hot; + + if (!strncmp(name, "[h]", 3)) + hot = true; + else if (!strncmp(name, "[c]", 3)) + hot = false; + else + return -EINVAL; + + name += 3; + + if (*name == '!') { + name++; + set = false; + } + + if (strlen(name) >= F2FS_EXTENSION_LEN) + return -EINVAL; + + down_write(&sbi->sb_lock); + + ret = f2fs_update_extension_list(sbi, name, hot, set); + if (ret) + goto out; + + ret = f2fs_commit_super(sbi, false); + if (ret) + f2fs_update_extension_list(sbi, name, hot, !set); +out: + up_write(&sbi->sb_lock); + return ret ? ret : count; + } + ui = (unsigned int *)(ptr + a->offset); ret = kstrtoul(skip_spaces(buf), 0, &t); @@ -144,51 +225,85 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, #endif if (a->struct_type == RESERVED_BLOCKS) { spin_lock(&sbi->stat_lock); - if ((unsigned long)sbi->total_valid_block_count + t > - (unsigned long)sbi->user_block_count) { + if (t > (unsigned long)(sbi->user_block_count - + F2FS_OPTION(sbi).root_reserved_blocks)) { spin_unlock(&sbi->stat_lock); return -EINVAL; } *ui = t; + sbi->current_reserved_blocks = min(sbi->reserved_blocks, + sbi->user_block_count - valid_user_blocks(sbi)); spin_unlock(&sbi->stat_lock); return count; } if (!strcmp(a->attr.name, "discard_granularity")) { - struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - int i; - if (t == 0 || t > MAX_PLIST_NUM) return -EINVAL; if (t == *ui) return count; - - mutex_lock(&dcc->cmd_lock); - for (i = 0; i < MAX_PLIST_NUM; i++) { - if (i >= t - 1) - dcc->pend_list_tag[i] |= P_ACTIVE; - else - dcc->pend_list_tag[i] &= (~P_ACTIVE); - } - mutex_unlock(&dcc->cmd_lock); - *ui = t; return count; } + if (!strcmp(a->attr.name, "migration_granularity")) { + if (t == 0 || t > sbi->segs_per_sec) + return -EINVAL; + } + + if (!strcmp(a->attr.name, "trim_sections")) + return -EINVAL; + + if (!strcmp(a->attr.name, "gc_urgent")) { + if (t >= 1) { + sbi->gc_mode = GC_URGENT; + if (sbi->gc_thread) { + sbi->gc_thread->gc_wake = 1; + wake_up_interruptible_all( + &sbi->gc_thread->gc_wait_queue_head); + wake_up_discard_thread(sbi, true); + } + } else { + sbi->gc_mode = GC_NORMAL; + } + return count; + } + if (!strcmp(a->attr.name, "gc_idle")) { + if (t == GC_IDLE_CB) + sbi->gc_mode = GC_IDLE_CB; + else if (t == GC_IDLE_GREEDY) + sbi->gc_mode = GC_IDLE_GREEDY; + else + sbi->gc_mode = GC_NORMAL; + return count; + } + *ui = t; if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0) f2fs_reset_iostat(sbi); - if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) { - sbi->gc_thread->gc_wake = 1; - wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head); - wake_up_discard_thread(sbi, true); - } - return count; } +static ssize_t f2fs_sbi_store(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, + const char *buf, size_t count) +{ + ssize_t ret; + bool gc_entry = (!strcmp(a->attr.name, "gc_urgent") || + a->struct_type == GC_THREAD); + + if (gc_entry) { + if (!down_read_trylock(&sbi->sb->s_umount)) + return -EAGAIN; + } + ret = __sbi_store(a, sbi, buf, count); + if (gc_entry) + up_read(&sbi->sb->s_umount); + + return ret; +} + static ssize_t f2fs_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -223,6 +338,11 @@ enum feat_id { FEAT_EXTRA_ATTR, FEAT_PROJECT_QUOTA, FEAT_INODE_CHECKSUM, + FEAT_FLEXIBLE_INLINE_XATTR, + FEAT_QUOTA_INO, + FEAT_INODE_CRTIME, + FEAT_LOST_FOUND, + FEAT_SB_CHECKSUM, }; static ssize_t f2fs_feature_show(struct f2fs_attr *a, @@ -235,6 +355,11 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a, case FEAT_EXTRA_ATTR: case FEAT_PROJECT_QUOTA: case FEAT_INODE_CHECKSUM: + case FEAT_FLEXIBLE_INLINE_XATTR: + case FEAT_QUOTA_INO: + case FEAT_INODE_CRTIME: + case FEAT_LOST_FOUND: + case FEAT_SB_CHECKSUM: return snprintf(buf, PAGE_SIZE, "supported\n"); } return 0; @@ -269,8 +394,8 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent_sleep_time, F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent, gc_urgent); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle, gc_mode); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); @@ -279,21 +404,32 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, migration_granularity, migration_granularity); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval, + interval_time[DISCARD_TIME]); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); +F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list); #ifdef CONFIG_F2FS_FAULT_INJECTION F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); #endif +F2FS_GENERAL_RO_ATTR(dirty_segments); F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); F2FS_GENERAL_RO_ATTR(features); +F2FS_GENERAL_RO_ATTR(current_reserved_blocks); #ifdef CONFIG_F2FS_FS_ENCRYPTION F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO); @@ -305,6 +441,11 @@ F2FS_FEATURE_RO_ATTR(atomic_write, FEAT_ATOMIC_WRITE); F2FS_FEATURE_RO_ATTR(extra_attr, FEAT_EXTRA_ATTR); F2FS_FEATURE_RO_ATTR(project_quota, FEAT_PROJECT_QUOTA); F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM); +F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR); +F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO); +F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME); +F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND); +F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -321,22 +462,32 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(ipu_policy), ATTR_LIST(min_ipu_util), ATTR_LIST(min_fsync_blocks), + ATTR_LIST(min_seq_blocks), ATTR_LIST(min_hot_blocks), + ATTR_LIST(min_ssr_sections), ATTR_LIST(max_victim_search), + ATTR_LIST(migration_granularity), ATTR_LIST(dir_level), ATTR_LIST(ram_thresh), ATTR_LIST(ra_nid_pages), ATTR_LIST(dirty_nats_ratio), ATTR_LIST(cp_interval), ATTR_LIST(idle_interval), + ATTR_LIST(discard_idle_interval), + ATTR_LIST(gc_idle_interval), ATTR_LIST(iostat_enable), + ATTR_LIST(readdir_ra), + ATTR_LIST(gc_pin_file_thresh), + ATTR_LIST(extension_list), #ifdef CONFIG_F2FS_FAULT_INJECTION ATTR_LIST(inject_rate), ATTR_LIST(inject_type), #endif + ATTR_LIST(dirty_segments), ATTR_LIST(lifetime_write_kbytes), ATTR_LIST(features), ATTR_LIST(reserved_blocks), + ATTR_LIST(current_reserved_blocks), NULL, }; @@ -351,6 +502,11 @@ static struct attribute *f2fs_feat_attrs[] = { ATTR_LIST(extra_attr), ATTR_LIST(project_quota), ATTR_LIST(inode_checksum), + ATTR_LIST(flexible_inline_xattr), + ATTR_LIST(quota_ino), + ATTR_LIST(inode_crtime), + ATTR_LIST(lost_found), + ATTR_LIST(sb_checksum), NULL, }; @@ -478,6 +634,28 @@ static int __maybe_unused iostat_info_seq_show(struct seq_file *seq, return 0; } +static int __maybe_unused victim_bits_seq_show(struct seq_file *seq, + void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + int i; + + seq_puts(seq, "format: victim_secmap bitmaps\n"); + + for (i = 0; i < MAIN_SECS(sbi); i++) { + if ((i % 10) == 0) + seq_printf(seq, "%-10d", i); + seq_printf(seq, "%d", test_bit(i, dirty_i->victim_secmap) ? 1 : 0); + if ((i % 10) == 9 || i == (MAIN_SECS(sbi) - 1)) + seq_putc(seq, '\n'); + else + seq_putc(seq, ' '); + } + return 0; +} + #define F2FS_PROC_FILE_DEF(_name) \ static int _name##_open_fs(struct inode *inode, struct file *file) \ { \ @@ -494,6 +672,7 @@ static const struct file_operations f2fs_seq_##_name##_fops = { \ F2FS_PROC_FILE_DEF(segment_info); F2FS_PROC_FILE_DEF(segment_bits); F2FS_PROC_FILE_DEF(iostat_info); +F2FS_PROC_FILE_DEF(victim_bits); int __init f2fs_init_sysfs(void) { @@ -544,6 +723,8 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) &f2fs_seq_segment_bits_fops, sb); proc_create_data("iostat_info", S_IRUGO, sbi->s_proc, &f2fs_seq_iostat_info_fops, sb); + proc_create_data("victim_bits", S_IRUGO, sbi->s_proc, + &f2fs_seq_victim_bits_fops, sb); } return 0; } @@ -554,6 +735,7 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) remove_proc_entry("iostat_info", sbi->s_proc); remove_proc_entry("segment_info", sbi->s_proc); remove_proc_entry("segment_bits", sbi->s_proc); + remove_proc_entry("victim_bits", sbi->s_proc); remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); } kobject_del(&sbi->s_kobj);
diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index bccbbf2..ce2a5eb 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c
@@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs IO tracer * * Copyright (c) 2014 Motorola Mobility * Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/f2fs_fs.h> @@ -17,7 +14,7 @@ #include "trace.h" static RADIX_TREE(pids, GFP_ATOMIC); -static spinlock_t pids_lock; +static struct mutex pids_lock; static struct last_io_info last_io; static inline void __print_last_io(void) @@ -64,7 +61,7 @@ void f2fs_trace_pid(struct page *page) if (radix_tree_preload(GFP_NOFS)) return; - spin_lock(&pids_lock); + mutex_lock(&pids_lock); p = radix_tree_lookup(&pids, pid); if (p == current) goto out; @@ -77,7 +74,7 @@ void f2fs_trace_pid(struct page *page) MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), pid, current->comm); out: - spin_unlock(&pids_lock); + mutex_unlock(&pids_lock); radix_tree_preload_end(); } @@ -122,7 +119,7 @@ void f2fs_trace_ios(struct f2fs_io_info *fio, int flush) void f2fs_build_trace_ios(void) { - spin_lock_init(&pids_lock); + mutex_init(&pids_lock); } #define PIDVEC_SIZE 128 @@ -150,7 +147,7 @@ void f2fs_destroy_trace_ios(void) pid_t next_pid = 0; unsigned int found; - spin_lock(&pids_lock); + mutex_lock(&pids_lock); while ((found = gang_lookup_pids(pid, next_pid, PIDVEC_SIZE))) { unsigned idx; @@ -158,5 +155,5 @@ void f2fs_destroy_trace_ios(void) for (idx = 0; idx < found; idx++) radix_tree_delete(&pids, pid[idx]); } - spin_unlock(&pids_lock); + mutex_unlock(&pids_lock); }
diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h index 67db24a..e8075fc 100644 --- a/fs/f2fs/trace.h +++ b/fs/f2fs/trace.h
@@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs IO tracer * * Copyright (c) 2014 Motorola Mobility * Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef __F2FS_TRACE_H__ #define __F2FS_TRACE_H__
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 7c65540..18d5ffb 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c
@@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/xattr.c * @@ -13,10 +14,6 @@ * suggestion of Luka Renko <luka.renko@hermes.si>. * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>, * Red Hat Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/rwsem.h> #include <linux/f2fs_fs.h> @@ -37,9 +34,6 @@ static int f2fs_xattr_generic_get(const struct xattr_handler *handler, return -EOPNOTSUPP; break; case F2FS_XATTR_INDEX_TRUSTED: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - break; case F2FS_XATTR_INDEX_SECURITY: break; default: @@ -62,9 +56,6 @@ static int f2fs_xattr_generic_set(const struct xattr_handler *handler, return -EOPNOTSUPP; break; case F2FS_XATTR_INDEX_TRUSTED: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - break; case F2FS_XATTR_INDEX_SECURITY: break; default: @@ -100,12 +91,22 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler, const char *name, const void *value, size_t size, int flags) { + unsigned char old_advise = F2FS_I(inode)->i_advise; + unsigned char new_advise; + if (!inode_owner_or_capable(inode)) return -EPERM; if (value == NULL) return -EINVAL; - F2FS_I(inode)->i_advise |= *(char *)value; + new_advise = *(char *)value; + if (new_advise & ~FADVISE_MODIFIABLE_BITS) + return -EINVAL; + + new_advise = new_advise & FADVISE_MODIFIABLE_BITS; + new_advise |= old_advise & ~FADVISE_MODIFIABLE_BITS; + + F2FS_I(inode)->i_advise = new_advise; f2fs_mark_inode_dirty_sync(inode, true); return 0; } @@ -217,12 +218,12 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index, return entry; } -static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr, - void **last_addr, int index, - size_t len, const char *name) +static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode, + void *base_addr, void **last_addr, int index, + size_t len, const char *name) { struct f2fs_xattr_entry *entry; - unsigned int inline_size = F2FS_INLINE_XATTR_ADDRS << 2; + unsigned int inline_size = inline_xattr_size(inode); list_for_each_xattr(entry, base_addr) { if ((void *)entry + sizeof(__u32) > base_addr + inline_size || @@ -241,12 +242,54 @@ static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr, return entry; } +static int read_inline_xattr(struct inode *inode, struct page *ipage, + void *txattr_addr) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + unsigned int inline_size = inline_xattr_size(inode); + struct page *page = NULL; + void *inline_addr; + + if (ipage) { + inline_addr = inline_xattr_addr(inode, ipage); + } else { + page = f2fs_get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) + return PTR_ERR(page); + + inline_addr = inline_xattr_addr(inode, page); + } + memcpy(txattr_addr, inline_addr, inline_size); + f2fs_put_page(page, 1); + + return 0; +} + +static int read_xattr_block(struct inode *inode, void *txattr_addr) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + nid_t xnid = F2FS_I(inode)->i_xattr_nid; + unsigned int inline_size = inline_xattr_size(inode); + struct page *xpage; + void *xattr_addr; + + /* The inode already has an extended attribute block. */ + xpage = f2fs_get_node_page(sbi, xnid); + if (IS_ERR(xpage)) + return PTR_ERR(xpage); + + xattr_addr = page_address(xpage); + memcpy(txattr_addr + inline_size, xattr_addr, VALID_XATTR_BLOCK_SIZE); + f2fs_put_page(xpage, 1); + + return 0; +} + static int lookup_all_xattrs(struct inode *inode, struct page *ipage, unsigned int index, unsigned int len, const char *name, struct f2fs_xattr_entry **xe, - void **base_addr) + void **base_addr, int *base_size) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); void *cur_addr, *txattr_addr, *last_addr = NULL; nid_t xnid = F2FS_I(inode)->i_xattr_nid; unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0; @@ -256,50 +299,30 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, if (!size && !inline_size) return -ENODATA; - txattr_addr = kzalloc(inline_size + size + XATTR_PADDING_SIZE, - GFP_F2FS_ZERO); + *base_size = inline_size + size + XATTR_PADDING_SIZE; + txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), *base_size, GFP_NOFS); if (!txattr_addr) return -ENOMEM; /* read from inline xattr */ if (inline_size) { - struct page *page = NULL; - void *inline_addr; + err = read_inline_xattr(inode, ipage, txattr_addr); + if (err) + goto out; - if (ipage) { - inline_addr = inline_xattr_addr(ipage); - } else { - page = get_node_page(sbi, inode->i_ino); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto out; - } - inline_addr = inline_xattr_addr(page); - } - memcpy(txattr_addr, inline_addr, inline_size); - f2fs_put_page(page, 1); - - *xe = __find_inline_xattr(txattr_addr, &last_addr, + *xe = __find_inline_xattr(inode, txattr_addr, &last_addr, index, len, name); - if (*xe) + if (*xe) { + *base_size = inline_size; goto check; + } } /* read from xattr node block */ if (xnid) { - struct page *xpage; - void *xattr_addr; - - /* The inode already has an extended attribute block. */ - xpage = get_node_page(sbi, xnid); - if (IS_ERR(xpage)) { - err = PTR_ERR(xpage); + err = read_xattr_block(inode, txattr_addr); + if (err) goto out; - } - - xattr_addr = page_address(xpage); - memcpy(txattr_addr + inline_size, xattr_addr, size); - f2fs_put_page(xpage, 1); } if (last_addr) @@ -324,7 +347,6 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, static int read_all_xattrs(struct inode *inode, struct page *ipage, void **base_addr) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_xattr_header *header; nid_t xnid = F2FS_I(inode)->i_xattr_nid; unsigned int size = VALID_XATTR_BLOCK_SIZE; @@ -332,45 +354,23 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage, void *txattr_addr; int err; - txattr_addr = kzalloc(inline_size + size + XATTR_PADDING_SIZE, - GFP_F2FS_ZERO); + txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), + inline_size + size + XATTR_PADDING_SIZE, GFP_NOFS); if (!txattr_addr) return -ENOMEM; /* read from inline xattr */ if (inline_size) { - struct page *page = NULL; - void *inline_addr; - - if (ipage) { - inline_addr = inline_xattr_addr(ipage); - } else { - page = get_node_page(sbi, inode->i_ino); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto fail; - } - inline_addr = inline_xattr_addr(page); - } - memcpy(txattr_addr, inline_addr, inline_size); - f2fs_put_page(page, 1); + err = read_inline_xattr(inode, ipage, txattr_addr); + if (err) + goto fail; } /* read from xattr node block */ if (xnid) { - struct page *xpage; - void *xattr_addr; - - /* The inode already has an extended attribute block. */ - xpage = get_node_page(sbi, xnid); - if (IS_ERR(xpage)) { - err = PTR_ERR(xpage); + err = read_xattr_block(inode, txattr_addr); + if (err) goto fail; - } - - xattr_addr = page_address(xpage); - memcpy(txattr_addr + inline_size, xattr_addr, size); - f2fs_put_page(xpage, 1); } header = XATTR_HDR(txattr_addr); @@ -392,70 +392,81 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); size_t inline_size = inline_xattr_size(inode); + struct page *in_page = NULL; void *xattr_addr; + void *inline_addr = NULL; struct page *xpage; nid_t new_nid = 0; - int err; + int err = 0; if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid) - if (!alloc_nid(sbi, &new_nid)) + if (!f2fs_alloc_nid(sbi, &new_nid)) return -ENOSPC; /* write to inline xattr */ if (inline_size) { - struct page *page = NULL; - void *inline_addr; - if (ipage) { - inline_addr = inline_xattr_addr(ipage); - f2fs_wait_on_page_writeback(ipage, NODE, true); - set_page_dirty(ipage); + inline_addr = inline_xattr_addr(inode, ipage); } else { - page = get_node_page(sbi, inode->i_ino); - if (IS_ERR(page)) { - alloc_nid_failed(sbi, new_nid); - return PTR_ERR(page); + in_page = f2fs_get_node_page(sbi, inode->i_ino); + if (IS_ERR(in_page)) { + f2fs_alloc_nid_failed(sbi, new_nid); + return PTR_ERR(in_page); } - inline_addr = inline_xattr_addr(page); - f2fs_wait_on_page_writeback(page, NODE, true); + inline_addr = inline_xattr_addr(inode, in_page); } - memcpy(inline_addr, txattr_addr, inline_size); - f2fs_put_page(page, 1); + f2fs_wait_on_page_writeback(ipage ? ipage : in_page, + NODE, true, true); /* no need to use xattr node block */ if (hsize <= inline_size) { - err = truncate_xattr_node(inode, ipage); - alloc_nid_failed(sbi, new_nid); - return err; + err = f2fs_truncate_xattr_node(inode); + f2fs_alloc_nid_failed(sbi, new_nid); + if (err) { + f2fs_put_page(in_page, 1); + return err; + } + memcpy(inline_addr, txattr_addr, inline_size); + set_page_dirty(ipage ? ipage : in_page); + goto in_page_out; } } /* write to xattr node block */ if (F2FS_I(inode)->i_xattr_nid) { - xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); + xpage = f2fs_get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); if (IS_ERR(xpage)) { - alloc_nid_failed(sbi, new_nid); - return PTR_ERR(xpage); + err = PTR_ERR(xpage); + f2fs_alloc_nid_failed(sbi, new_nid); + goto in_page_out; } f2fs_bug_on(sbi, new_nid); - f2fs_wait_on_page_writeback(xpage, NODE, true); + f2fs_wait_on_page_writeback(xpage, NODE, true, true); } else { struct dnode_of_data dn; set_new_dnode(&dn, inode, NULL, NULL, new_nid); - xpage = new_node_page(&dn, XATTR_NODE_OFFSET); + xpage = f2fs_new_node_page(&dn, XATTR_NODE_OFFSET); if (IS_ERR(xpage)) { - alloc_nid_failed(sbi, new_nid); - return PTR_ERR(xpage); + err = PTR_ERR(xpage); + f2fs_alloc_nid_failed(sbi, new_nid); + goto in_page_out; } - alloc_nid_done(sbi, new_nid); + f2fs_alloc_nid_done(sbi, new_nid); } - xattr_addr = page_address(xpage); - memcpy(xattr_addr, txattr_addr + inline_size, VALID_XATTR_BLOCK_SIZE); - set_page_dirty(xpage); - f2fs_put_page(xpage, 1); - return 0; + if (inline_size) + memcpy(inline_addr, txattr_addr, inline_size); + memcpy(xattr_addr, txattr_addr + inline_size, VALID_XATTR_BLOCK_SIZE); + + if (inline_size) + set_page_dirty(ipage ? ipage : in_page); + set_page_dirty(xpage); + + f2fs_put_page(xpage, 1); +in_page_out: + f2fs_put_page(in_page, 1); + return err; } int f2fs_getxattr(struct inode *inode, int index, const char *name, @@ -465,6 +476,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, int error = 0; unsigned int size, len; void *base_addr = NULL; + int base_size; if (name == NULL) return -EINVAL; @@ -475,7 +487,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, down_read(&F2FS_I(inode)->i_xattr_sem); error = lookup_all_xattrs(inode, ipage, index, len, name, - &entry, &base_addr); + &entry, &base_addr, &base_size); up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; @@ -489,6 +501,11 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, if (buffer) { char *pval = entry->e_name + entry->e_name_len; + + if (base_size - (pval - (char *)base_addr) < size) { + error = -ERANGE; + goto out; + } memcpy(buffer, pval, size); } error = size; @@ -592,7 +609,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, goto exit; } - if (f2fs_xattr_value_same(here, value, size)) + if (value && f2fs_xattr_value_same(here, value, size)) goto exit; } else if ((flags & XATTR_REPLACE)) { error = -ENODATA; @@ -681,7 +698,11 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; - /* this case is only from init_inode_metadata */ + err = dquot_initialize(inode); + if (err) + return err; + + /* this case is only from f2fs_init_inode_metadata */ if (ipage) return __f2fs_setxattr(inode, index, name, value, size, ipage, flags);
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index dbcd1d1..67db134 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h
@@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/xattr.h * @@ -9,10 +10,6 @@ * On-disk format of extended attributes for the ext2 filesystem. * * (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef __F2FS_XATTR_H__ #define __F2FS_XATTR_H__
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 3244932f..8e1b9b1 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c
@@ -2112,7 +2112,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) (dirtytime && (inode->i_state & I_DIRTY_INODE))) return; - if (unlikely(block_dump)) + if (unlikely(block_dump > 1)) block_dump___mark_inode_dirty(inode); spin_lock(&inode->i_lock);
diff --git a/fs/fs_struct.c b/fs/fs_struct.c index be02507..987c95b 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c
@@ -45,6 +45,7 @@ void set_fs_pwd(struct fs_struct *fs, const struct path *path) if (old_pwd.dentry) path_put(&old_pwd); } +EXPORT_SYMBOL(set_fs_pwd); static inline int replace_path(struct path *p, const struct path *old, const struct path *new) { @@ -90,6 +91,7 @@ void free_fs_struct(struct fs_struct *fs) path_put(&fs->pwd); kmem_cache_free(fs_cachep, fs); } +EXPORT_SYMBOL(free_fs_struct); void exit_fs(struct task_struct *tsk) { @@ -128,6 +130,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) } return fs; } +EXPORT_SYMBOL_GPL(copy_fs_struct); int unshare_fs_struct(void) {
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 63fd333..9fdb027 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c
@@ -14,6 +14,7 @@ #include <linux/sched/signal.h> #include <linux/uio.h> #include <linux/miscdevice.h> +#include <linux/namei.h> #include <linux/pagemap.h> #include <linux/file.h> #include <linux/slab.h> @@ -21,6 +22,7 @@ #include <linux/swap.h> #include <linux/splice.h> #include <linux/sched.h> +#include <linux/freezer.h> MODULE_ALIAS_MISCDEV(FUSE_MINOR); MODULE_ALIAS("devname:fuse"); @@ -471,7 +473,9 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) * Either request is already in userspace, or it was forced. * Wait it out. */ - wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags)); + while (!test_bit(FR_FINISHED, &req->flags)) + wait_event_freezable(req->waitq, + test_bit(FR_FINISHED, &req->flags)); } static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) @@ -1912,6 +1916,12 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, cs->move_pages = 0; err = copy_out_args(cs, &req->out, nbytes); + if (req->in.h.opcode == FUSE_CANONICAL_PATH) { + char *path = (char *)req->out.args[0].value; + + path[req->out.args[0].size - 1] = 0; + req->out.h.error = kern_path(path, 0, req->canonical_path); + } fuse_copy_finish(cs); spin_lock(&fpq->lock);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index d933ecb..b77eb19 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c
@@ -262,6 +262,50 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) goto out; } +/* + * Get the canonical path. Since we must translate to a path, this must be done + * in the context of the userspace daemon, however, the userspace daemon cannot + * look up paths on its own. Instead, we handle the lookup as a special case + * inside of the write request. + */ +static void fuse_dentry_canonical_path(const struct path *path, struct path *canonical_path) { + struct inode *inode = path->dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req; + int err; + char *path_name; + + req = fuse_get_req(fc, 1); + err = PTR_ERR(req); + if (IS_ERR(req)) + goto default_path; + + path_name = (char*)__get_free_page(GFP_KERNEL); + if (!path_name) { + fuse_put_request(fc, req); + goto default_path; + } + + req->in.h.opcode = FUSE_CANONICAL_PATH; + req->in.h.nodeid = get_node_id(inode); + req->in.numargs = 0; + req->out.numargs = 1; + req->out.args[0].size = PATH_MAX; + req->out.args[0].value = path_name; + req->canonical_path = canonical_path; + req->out.argvar = 1; + fuse_request_send(fc, req); + err = req->out.h.error; + fuse_put_request(fc, req); + free_page((unsigned long)path_name); + if (!err) + return; +default_path: + canonical_path->dentry = path->dentry; + canonical_path->mnt = path->mnt; + path_get(canonical_path); +} + static int invalid_nodeid(u64 nodeid) { return !nodeid || nodeid == FUSE_ROOT_ID; @@ -284,11 +328,13 @@ const struct dentry_operations fuse_dentry_operations = { .d_revalidate = fuse_dentry_revalidate, .d_init = fuse_dentry_init, .d_release = fuse_dentry_release, + .d_canonical_path = fuse_dentry_canonical_path, }; const struct dentry_operations fuse_root_dentry_operations = { .d_init = fuse_dentry_init, .d_release = fuse_dentry_release, + .d_canonical_path = fuse_dentry_canonical_path, }; int fuse_valid_type(int m)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 19ea122..d4eb4a2 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c
@@ -838,9 +838,9 @@ struct fuse_fill_data { unsigned nr_pages; }; -static int fuse_readpages_fill(void *_data, struct page *page) +static int fuse_readpages_fill(struct file *_data, struct page *page) { - struct fuse_fill_data *data = _data; + struct fuse_fill_data *data = (struct fuse_fill_data *)_data; struct fuse_req *req = data->req; struct inode *inode = data->inode; struct fuse_conn *fc = get_fuse_conn(inode);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index e682f2e..42977d8 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h
@@ -370,6 +370,9 @@ struct fuse_req { /** Inode used in the request or NULL */ struct inode *inode; + /** Path used for completing d_canonical_path */ + struct path *canonical_path; + /** AIO control block */ struct fuse_io_priv *io;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index ffb6178..9713898 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c
@@ -31,7 +31,7 @@ static struct kmem_cache *fuse_inode_cachep; struct list_head fuse_conn_list; DEFINE_MUTEX(fuse_mutex); -static int set_global_limit(const char *val, struct kernel_param *kp); +static int set_global_limit(const char *val, const struct kernel_param *kp); unsigned max_user_bgreq; module_param_call(max_user_bgreq, set_global_limit, param_get_uint, @@ -826,7 +826,7 @@ static void sanitize_global_limit(unsigned *limit) *limit = (1 << 16) - 1; } -static int set_global_limit(const char *val, struct kernel_param *kp) +static int set_global_limit(const char *val, const struct kernel_param *kp) { int rv;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 68ed069..ec3147e 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c
@@ -280,22 +280,6 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping, for(i = 0; i < nr_pages; i++) { struct page *page = pvec->pages[i]; - /* - * At this point, the page may be truncated or - * invalidated (changing page->mapping to NULL), or - * even swizzled back from swapper_space to tmpfs file - * mapping. However, page->index will not change - * because we have a reference on the page. - */ - if (page->index > end) { - /* - * can't be range_cyclic (1st pass) because - * end == -1 in that case. - */ - ret = 1; - break; - } - *done_index = page->index; lock_page(page); @@ -413,8 +397,8 @@ static int gfs2_write_cache_jdata(struct address_space *mapping, tag_pages_for_writeback(mapping, index, end); done_index = index; while (!done && (index <= end)) { - nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); + nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, + tag); if (nr_pages == 0) break; @@ -523,7 +507,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) * */ -static int __gfs2_readpage(void *file, struct page *page) +static int __gfs2_readpage(struct file *file, struct page *page) { struct gfs2_inode *ip = GFS2_I(page->mapping->host); struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
diff --git a/fs/inode.c b/fs/inode.c index cfc36d1..602e28f 100644 --- a/fs/inode.c +++ b/fs/inode.c
@@ -1794,7 +1794,7 @@ int dentry_needs_remove_privs(struct dentry *dentry) return mask; } -static int __remove_privs(struct dentry *dentry, int kill) +static int __remove_privs(struct vfsmount *mnt, struct dentry *dentry, int kill) { struct iattr newattrs; @@ -1803,7 +1803,7 @@ static int __remove_privs(struct dentry *dentry, int kill) * Note we call this on write, so notify_change will not * encounter any conflicting delegations: */ - return notify_change(dentry, &newattrs, NULL); + return notify_change2(mnt, dentry, &newattrs, NULL); } /* @@ -1825,7 +1825,7 @@ int file_remove_privs(struct file *file) if (kill < 0) return kill; if (kill) - error = __remove_privs(dentry, kill); + error = __remove_privs(file->f_path.mnt, dentry, kill); if (!error) inode_has_no_xattr(inode);
diff --git a/fs/internal.h b/fs/internal.h index 48cee21..b433845 100644 --- a/fs/internal.h +++ b/fs/internal.h
@@ -90,9 +90,11 @@ extern struct file *get_empty_filp(void); * super.c */ extern int do_remount_sb(struct super_block *, int, void *, int); +extern int do_remount_sb2(struct vfsmount *, struct super_block *, int, + void *, int); extern bool trylock_super(struct super_block *sb); extern struct dentry *mount_fs(struct file_system_type *, - int, const char *, void *); + int, const char *, struct vfsmount *, void *); extern struct super_block *user_get_super(dev_t); /*
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 809cbccb..da2825a 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c
@@ -614,7 +614,7 @@ static struct ctl_table nlm_sysctl_root[] = { */ #define param_set_min_max(name, type, which_strtol, min, max) \ -static int param_set_##name(const char *val, struct kernel_param *kp) \ +static int param_set_##name(const char *val, const struct kernel_param *kp) \ { \ char *endp; \ __typeof__(type) num = which_strtol(val, &endp, 0); \
diff --git a/fs/mpage.c b/fs/mpage.c index b7e7f57..93e3cf7 100644 --- a/fs/mpage.c +++ b/fs/mpage.c
@@ -32,6 +32,14 @@ #include <linux/cleancache.h> #include "internal.h" +#define CREATE_TRACE_POINTS +#include <trace/events/android_fs.h> + +EXPORT_TRACEPOINT_SYMBOL(android_fs_datawrite_start); +EXPORT_TRACEPOINT_SYMBOL(android_fs_datawrite_end); +EXPORT_TRACEPOINT_SYMBOL(android_fs_dataread_start); +EXPORT_TRACEPOINT_SYMBOL(android_fs_dataread_end); + /* * I/O completion handler for multipage BIOs. * @@ -49,6 +57,16 @@ static void mpage_end_io(struct bio *bio) struct bio_vec *bv; int i; + if (trace_android_fs_dataread_end_enabled() && + (bio_data_dir(bio) == READ)) { + struct page *first_page = bio->bi_io_vec[0].bv_page; + + if (first_page != NULL) + trace_android_fs_dataread_end(first_page->mapping->host, + page_offset(first_page), + bio->bi_iter.bi_size); + } + bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; page_endio(page, op_is_write(bio_op(bio)), @@ -60,6 +78,24 @@ static void mpage_end_io(struct bio *bio) static struct bio *mpage_bio_submit(int op, int op_flags, struct bio *bio) { + if (trace_android_fs_dataread_start_enabled() && (op == REQ_OP_READ)) { + struct page *first_page = bio->bi_io_vec[0].bv_page; + + if (first_page != NULL) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + first_page->mapping->host); + trace_android_fs_dataread_start( + first_page->mapping->host, + page_offset(first_page), + bio->bi_iter.bi_size, + current->pid, + path, + current->comm); + } + } bio->bi_end_io = mpage_end_io; bio_set_op_attrs(bio, op, op_flags); guard_bio_eod(op, bio);
diff --git a/fs/namei.c b/fs/namei.c index d1e467b..d33064a 100644 --- a/fs/namei.c +++ b/fs/namei.c
@@ -377,9 +377,11 @@ EXPORT_SYMBOL(generic_permission); * flag in inode->i_opflags, that says "this has not special * permission function, use the fast case". */ -static inline int do_inode_permission(struct inode *inode, int mask) +static inline int do_inode_permission(struct vfsmount *mnt, struct inode *inode, int mask) { if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) { + if (likely(mnt && inode->i_op->permission2)) + return inode->i_op->permission2(mnt, inode, mask); if (likely(inode->i_op->permission)) return inode->i_op->permission(inode, mask); @@ -403,7 +405,7 @@ static inline int do_inode_permission(struct inode *inode, int mask) * This does not check for a read-only file system. You probably want * inode_permission(). */ -int __inode_permission(struct inode *inode, int mask) +int __inode_permission2(struct vfsmount *mnt, struct inode *inode, int mask) { int retval; @@ -423,7 +425,7 @@ int __inode_permission(struct inode *inode, int mask) return -EACCES; } - retval = do_inode_permission(inode, mask); + retval = do_inode_permission(mnt, inode, mask); if (retval) return retval; @@ -431,7 +433,14 @@ int __inode_permission(struct inode *inode, int mask) if (retval) return retval; - return security_inode_permission(inode, mask); + retval = security_inode_permission(inode, mask); + return retval; +} +EXPORT_SYMBOL(__inode_permission2); + +int __inode_permission(struct inode *inode, int mask) +{ + return __inode_permission2(NULL, inode, mask); } EXPORT_SYMBOL(__inode_permission); @@ -466,14 +475,20 @@ static int sb_permission(struct super_block *sb, struct inode *inode, int mask) * * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask. */ -int inode_permission(struct inode *inode, int mask) +int inode_permission2(struct vfsmount *mnt, struct inode *inode, int mask) { int retval; retval = sb_permission(inode->i_sb, inode, mask); if (retval) return retval; - return __inode_permission(inode, mask); + return __inode_permission2(mnt, inode, mask); +} +EXPORT_SYMBOL(inode_permission2); + +int inode_permission(struct inode *inode, int mask) +{ + return inode_permission2(NULL, inode, mask); } EXPORT_SYMBOL(inode_permission); @@ -1707,13 +1722,13 @@ static struct dentry *lookup_slow(const struct qstr *name, static inline int may_lookup(struct nameidata *nd) { if (nd->flags & LOOKUP_RCU) { - int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK); + int err = inode_permission2(nd->path.mnt, nd->inode, MAY_EXEC|MAY_NOT_BLOCK); if (err != -ECHILD) return err; if (unlazy_walk(nd)) return -ECHILD; } - return inode_permission(nd->inode, MAY_EXEC); + return inode_permission2(nd->path.mnt, nd->inode, MAY_EXEC); } static inline int handle_dots(struct nameidata *nd, int type) @@ -2489,6 +2504,7 @@ EXPORT_SYMBOL(vfs_path_lookup); /** * lookup_one_len - filesystem helper to lookup single pathname component * @name: pathname component to lookup + * @mnt: mount we are looking up on * @base: base directory to lookup from * @len: maximum length @len should be interpreted to * @@ -2497,7 +2513,7 @@ EXPORT_SYMBOL(vfs_path_lookup); * * The caller must hold base->i_mutex. */ -struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) +struct dentry *lookup_one_len2(const char *name, struct vfsmount *mnt, struct dentry *base, int len) { struct qstr this; unsigned int c; @@ -2531,12 +2547,18 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) return ERR_PTR(err); } - err = inode_permission(base->d_inode, MAY_EXEC); + err = inode_permission2(mnt, base->d_inode, MAY_EXEC); if (err) return ERR_PTR(err); return __lookup_hash(&this, base, 0); } +EXPORT_SYMBOL(lookup_one_len2); + +struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) +{ + return lookup_one_len2(name, NULL, base, len); +} EXPORT_SYMBOL(lookup_one_len); /** @@ -2814,7 +2836,7 @@ EXPORT_SYMBOL(__check_sticky); * 11. We don't allow removal of NFS sillyrenamed files; it's handled by * nfs_async_unlink(). */ -static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) +static int may_delete(struct vfsmount *mnt, struct inode *dir, struct dentry *victim, bool isdir) { struct inode *inode = d_backing_inode(victim); int error; @@ -2826,7 +2848,7 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) BUG_ON(victim->d_parent->d_inode != dir); audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); - error = inode_permission(dir, MAY_WRITE | MAY_EXEC); + error = inode_permission2(mnt, dir, MAY_WRITE | MAY_EXEC); if (error) return error; if (IS_APPEND(dir)) @@ -2858,7 +2880,7 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) * 4. We should have write and exec permissions on dir * 5. We can't do it if dir is immutable (done in permission()) */ -static inline int may_create(struct inode *dir, struct dentry *child) +static inline int may_create(struct vfsmount *mnt, struct inode *dir, struct dentry *child) { struct user_namespace *s_user_ns; audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE); @@ -2870,7 +2892,7 @@ static inline int may_create(struct inode *dir, struct dentry *child) if (!kuid_has_mapping(s_user_ns, current_fsuid()) || !kgid_has_mapping(s_user_ns, current_fsgid())) return -EOVERFLOW; - return inode_permission(dir, MAY_WRITE | MAY_EXEC); + return inode_permission2(mnt, dir, MAY_WRITE | MAY_EXEC); } /* @@ -2917,10 +2939,10 @@ void unlock_rename(struct dentry *p1, struct dentry *p2) } EXPORT_SYMBOL(unlock_rename); -int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - bool want_excl) +int vfs_create2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, + umode_t mode, bool want_excl) { - int error = may_create(dir, dentry); + int error = may_create(mnt, dir, dentry); if (error) return error; @@ -2936,6 +2958,13 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, fsnotify_create(dir, dentry); return error; } +EXPORT_SYMBOL(vfs_create2); + +int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool want_excl) +{ + return vfs_create2(NULL, dir, dentry, mode, want_excl); +} EXPORT_SYMBOL(vfs_create); bool may_open_dev(const struct path *path) @@ -2947,6 +2976,7 @@ bool may_open_dev(const struct path *path) static int may_open(const struct path *path, int acc_mode, int flag) { struct dentry *dentry = path->dentry; + struct vfsmount *mnt = path->mnt; struct inode *inode = dentry->d_inode; int error; @@ -2971,7 +3001,7 @@ static int may_open(const struct path *path, int acc_mode, int flag) break; } - error = inode_permission(inode, MAY_OPEN | acc_mode); + error = inode_permission2(mnt, inode, MAY_OPEN | acc_mode); if (error) return error; @@ -3006,7 +3036,7 @@ static int handle_truncate(struct file *filp) if (!error) error = security_path_truncate(path); if (!error) { - error = do_truncate(path->dentry, 0, + error = do_truncate2(path->mnt, path->dentry, 0, ATTR_MTIME|ATTR_CTIME|ATTR_OPEN, filp); } @@ -3033,7 +3063,7 @@ static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t m !kgid_has_mapping(s_user_ns, current_fsgid())) return -EOVERFLOW; - error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC); + error = inode_permission2(dir->mnt, dir->dentry->d_inode, MAY_WRITE | MAY_EXEC); if (error) return error; @@ -3452,7 +3482,8 @@ struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag) int error; /* we want directory to be writable */ - error = inode_permission(dir, MAY_WRITE | MAY_EXEC); + error = inode_permission2(ERR_PTR(-EOPNOTSUPP), dir, + MAY_WRITE | MAY_EXEC); if (error) goto out_err; error = -EOPNOTSUPP; @@ -3730,9 +3761,9 @@ inline struct dentry *user_path_create(int dfd, const char __user *pathname, } EXPORT_SYMBOL(user_path_create); -int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) +int vfs_mknod2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { - int error = may_create(dir, dentry); + int error = may_create(mnt, dir, dentry); if (error) return error; @@ -3756,6 +3787,12 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) fsnotify_create(dir, dentry); return error; } +EXPORT_SYMBOL(vfs_mknod2); + +int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) +{ + return vfs_mknod2(NULL, dir, dentry, mode, dev); +} EXPORT_SYMBOL(vfs_mknod); static int may_mknod(umode_t mode) @@ -3798,12 +3835,12 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, goto out; switch (mode & S_IFMT) { case 0: case S_IFREG: - error = vfs_create(path.dentry->d_inode,dentry,mode,true); + error = vfs_create2(path.mnt, path.dentry->d_inode,dentry,mode,true); if (!error) ima_post_path_mknod(dentry); break; case S_IFCHR: case S_IFBLK: - error = vfs_mknod(path.dentry->d_inode,dentry,mode, + error = vfs_mknod2(path.mnt, path.dentry->d_inode,dentry,mode, new_decode_dev(dev)); break; case S_IFIFO: case S_IFSOCK: @@ -3824,9 +3861,9 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d return sys_mknodat(AT_FDCWD, filename, mode, dev); } -int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +int vfs_mkdir2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, umode_t mode) { - int error = may_create(dir, dentry); + int error = may_create(mnt, dir, dentry); unsigned max_links = dir->i_sb->s_max_links; if (error) @@ -3848,6 +3885,12 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) fsnotify_mkdir(dir, dentry); return error; } +EXPORT_SYMBOL(vfs_mkdir2); + +int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + return vfs_mkdir2(NULL, dir, dentry, mode); +} EXPORT_SYMBOL(vfs_mkdir); SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) @@ -3866,7 +3909,7 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) mode &= ~current_umask(); error = security_path_mkdir(&path, dentry, mode); if (!error) - error = vfs_mkdir(path.dentry->d_inode, dentry, mode); + error = vfs_mkdir2(path.mnt, path.dentry->d_inode, dentry, mode); done_path_create(&path, dentry); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; @@ -3880,9 +3923,9 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode) return sys_mkdirat(AT_FDCWD, pathname, mode); } -int vfs_rmdir(struct inode *dir, struct dentry *dentry) +int vfs_rmdir2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry) { - int error = may_delete(dir, dentry, 1); + int error = may_delete(mnt, dir, dentry, 1); if (error) return error; @@ -3917,6 +3960,12 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) d_delete(dentry); return error; } +EXPORT_SYMBOL(vfs_rmdir2); + +int vfs_rmdir(struct inode *dir, struct dentry *dentry) +{ + return vfs_rmdir2(NULL, dir, dentry); +} EXPORT_SYMBOL(vfs_rmdir); static long do_rmdir(int dfd, const char __user *pathname) @@ -3962,7 +4011,7 @@ static long do_rmdir(int dfd, const char __user *pathname) error = security_path_rmdir(&path, dentry); if (error) goto exit3; - error = vfs_rmdir(path.dentry->d_inode, dentry); + error = vfs_rmdir2(path.mnt, path.dentry->d_inode, dentry); exit3: dput(dentry); exit2: @@ -4001,10 +4050,10 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname) * be appropriate for callers that expect the underlying filesystem not * to be NFS exported. */ -int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode) +int vfs_unlink2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, struct inode **delegated_inode) { struct inode *target = dentry->d_inode; - int error = may_delete(dir, dentry, 0); + int error = may_delete(mnt, dir, dentry, 0); if (error) return error; @@ -4039,6 +4088,12 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate return error; } +EXPORT_SYMBOL(vfs_unlink2); + +int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode) +{ + return vfs_unlink2(NULL, dir, dentry, delegated_inode); +} EXPORT_SYMBOL(vfs_unlink); /* @@ -4086,7 +4141,7 @@ static long do_unlinkat(int dfd, const char __user *pathname) error = security_path_unlink(&path, dentry); if (error) goto exit2; - error = vfs_unlink(path.dentry->d_inode, dentry, &delegated_inode); + error = vfs_unlink2(path.mnt, path.dentry->d_inode, dentry, &delegated_inode); exit2: dput(dentry); } @@ -4136,9 +4191,9 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname) return do_unlinkat(AT_FDCWD, pathname); } -int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) +int vfs_symlink2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, const char *oldname) { - int error = may_create(dir, dentry); + int error = may_create(mnt, dir, dentry); if (error) return error; @@ -4155,6 +4210,12 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) fsnotify_create(dir, dentry); return error; } +EXPORT_SYMBOL(vfs_symlink2); + +int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) +{ + return vfs_symlink2(NULL, dir, dentry, oldname); +} EXPORT_SYMBOL(vfs_symlink); SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, @@ -4177,7 +4238,7 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, error = security_path_symlink(&path, dentry, from->name); if (!error) - error = vfs_symlink(path.dentry->d_inode, dentry, from->name); + error = vfs_symlink2(path.mnt, path.dentry->d_inode, dentry, from->name); done_path_create(&path, dentry); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; @@ -4212,7 +4273,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn * be appropriate for callers that expect the underlying filesystem not * to be NFS exported. */ -int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode) +int vfs_link2(struct vfsmount *mnt, struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode) { struct inode *inode = old_dentry->d_inode; unsigned max_links = dir->i_sb->s_max_links; @@ -4221,7 +4282,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de if (!inode) return -ENOENT; - error = may_create(dir, new_dentry); + error = may_create(mnt, dir, new_dentry); if (error) return error; @@ -4271,6 +4332,12 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de fsnotify_link(dir, inode, new_dentry); return error; } +EXPORT_SYMBOL(vfs_link2); + +int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode) +{ + return vfs_link2(NULL, old_dentry, dir, new_dentry, delegated_inode); +} EXPORT_SYMBOL(vfs_link); /* @@ -4326,7 +4393,7 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, error = security_path_link(old_path.dentry, &new_path, new_dentry); if (error) goto out_dput; - error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode); + error = vfs_link2(old_path.mnt, old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode); out_dput: done_path_create(&new_path, new_dentry); if (delegated_inode) { @@ -4402,7 +4469,8 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname * ->i_mutex on parents, which works but leads to some truly excessive * locking]. */ -int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, +int vfs_rename2(struct vfsmount *mnt, + struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, struct inode **delegated_inode, unsigned int flags) { @@ -4417,19 +4485,19 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (source == target) return 0; - error = may_delete(old_dir, old_dentry, is_dir); + error = may_delete(mnt, old_dir, old_dentry, is_dir); if (error) return error; if (!target) { - error = may_create(new_dir, new_dentry); + error = may_create(mnt, new_dir, new_dentry); } else { new_is_dir = d_is_dir(new_dentry); if (!(flags & RENAME_EXCHANGE)) - error = may_delete(new_dir, new_dentry, is_dir); + error = may_delete(mnt, new_dir, new_dentry, is_dir); else - error = may_delete(new_dir, new_dentry, new_is_dir); + error = may_delete(mnt, new_dir, new_dentry, new_is_dir); } if (error) return error; @@ -4443,12 +4511,12 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, */ if (new_dir != old_dir) { if (is_dir) { - error = inode_permission(source, MAY_WRITE); + error = inode_permission2(mnt, source, MAY_WRITE); if (error) return error; } if ((flags & RENAME_EXCHANGE) && new_is_dir) { - error = inode_permission(target, MAY_WRITE); + error = inode_permission2(mnt, target, MAY_WRITE); if (error) return error; } @@ -4525,6 +4593,14 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, return error; } +EXPORT_SYMBOL(vfs_rename2); + +int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + struct inode **delegated_inode, unsigned int flags) +{ + return vfs_rename2(NULL, old_dir, old_dentry, new_dir, new_dentry, delegated_inode, flags); +} EXPORT_SYMBOL(vfs_rename); SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, @@ -4638,7 +4714,7 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, &new_path, new_dentry, flags); if (error) goto exit5; - error = vfs_rename(old_path.dentry->d_inode, old_dentry, + error = vfs_rename2(old_path.mnt, old_path.dentry->d_inode, old_dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode, flags); exit5: @@ -4683,7 +4759,7 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna int vfs_whiteout(struct inode *dir, struct dentry *dentry) { - int error = may_create(dir, dentry); + int error = may_create(NULL, dir, dentry); if (error) return error;
diff --git a/fs/namespace.c b/fs/namespace.c index e9c13ee..4851e33 100644 --- a/fs/namespace.c +++ b/fs/namespace.c
@@ -226,6 +226,7 @@ static struct mount *alloc_vfsmnt(const char *name) mnt->mnt_count = 1; mnt->mnt_writers = 0; #endif + mnt->mnt.data = NULL; INIT_HLIST_NODE(&mnt->mnt_hash); INIT_LIST_HEAD(&mnt->mnt_child); @@ -637,6 +638,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) static void free_vfsmnt(struct mount *mnt) { + kfree(mnt->mnt.data); kfree_const(mnt->mnt_devname); #ifdef CONFIG_SMP free_percpu(mnt->mnt_pcp); @@ -1040,10 +1042,18 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void if (!mnt) return ERR_PTR(-ENOMEM); + if (type->alloc_mnt_data) { + mnt->mnt.data = type->alloc_mnt_data(); + if (!mnt->mnt.data) { + mnt_free_id(mnt); + free_vfsmnt(mnt); + return ERR_PTR(-ENOMEM); + } + } if (flags & SB_KERNMOUNT) mnt->mnt.mnt_flags = MNT_INTERNAL; - root = mount_fs(type, flags, name, data); + root = mount_fs(type, flags, name, &mnt->mnt, data); if (IS_ERR(root)) { mnt_free_id(mnt); free_vfsmnt(mnt); @@ -1087,6 +1097,14 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, if (!mnt) return ERR_PTR(-ENOMEM); + if (sb->s_op->clone_mnt_data) { + mnt->mnt.data = sb->s_op->clone_mnt_data(old->mnt.data); + if (!mnt->mnt.data) { + err = -ENOMEM; + goto out_free; + } + } + if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE)) mnt->mnt_group_id = 0; /* not a peer of original */ else @@ -2366,8 +2384,14 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags, err = change_mount_flags(path->mnt, ms_flags); else if (!capable(CAP_SYS_ADMIN)) err = -EPERM; - else - err = do_remount_sb(sb, sb_flags, data, 0); + else { + err = do_remount_sb2(path->mnt, sb, sb_flags, data, 0); + namespace_lock(); + lock_mount_hash(); + propagate_remount(mnt); + unlock_mount_hash(); + namespace_unlock(); + } if (!err) { lock_mount_hash(); mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index bf2c436..64dc8ea 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c
@@ -671,8 +671,9 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, * We only need to convert from xdr once so future lookups are much simpler */ static -int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page) +int nfs_readdir_filler(struct file *file, struct page* page) { + nfs_readdir_descriptor_t *desc = (nfs_readdir_descriptor_t *)file; struct inode *inode = file_inode(desc->file); int ret; @@ -705,7 +706,7 @@ static struct page *get_cache_page(nfs_readdir_descriptor_t *desc) { return read_cache_page(desc->file->f_mapping, - desc->page_index, (filler_t *)nfs_readdir_filler, desc); + desc->page_index, nfs_readdir_filler, desc); } /*
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 48d7277..42dbf4f 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c
@@ -354,7 +354,7 @@ struct nfs_readdesc { }; static int -readpage_async_filler(void *data, struct page *page) +readpage_async_filler(struct file *data, struct page *page) { struct nfs_readdesc *desc = (struct nfs_readdesc *)data; struct nfs_page *new;
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 06eb44b..220d5ba 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c
@@ -26,8 +26,9 @@ * and straight-forward than readdir caching. */ -static int nfs_symlink_filler(struct inode *inode, struct page *page) +static int nfs_symlink_filler(struct file *file, struct page *page) { + struct inode *inode = (struct inode *)file; int error; error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE); @@ -66,7 +67,7 @@ static const char *nfs_get_link(struct dentry *dentry, if (err) return err; page = read_cache_page(&inode->i_data, 0, - (filler_t *)nfs_symlink_filler, inode); + nfs_symlink_filler, inode); if (IS_ERR(page)) return ERR_CAST(page); }
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 06ffa13..35989c7 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c
@@ -2158,8 +2158,8 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, pagevec_init(&pvec, 0); - while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY, - PAGEVEC_SIZE)) { + while (pagevec_lookup_tag(&pvec, btcache, &index, + PAGECACHE_TAG_DIRTY)) { for (i = 0; i < pagevec_count(&pvec); i++) { bh = head = page_buffers(pvec.pages[i]); do {
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 8616c46..1c1672691 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c
@@ -257,8 +257,7 @@ int nilfs_copy_dirty_pages(struct address_space *dmap, pagevec_init(&pvec, 0); repeat: - if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY, - PAGEVEC_SIZE)) + if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY)) return 0; for (i = 0; i < pagevec_count(&pvec); i++) { @@ -376,8 +375,8 @@ void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) pagevec_init(&pvec, 0); - while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, - PAGEVEC_SIZE)) { + while (pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY)) { for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i];
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 50e1295..5f229f5 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c
@@ -711,18 +711,14 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, pagevec_init(&pvec, 0); repeat: if (unlikely(index > last) || - !pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, - min_t(pgoff_t, last - index, - PAGEVEC_SIZE - 1) + 1)) + !pagevec_lookup_range_tag(&pvec, mapping, &index, last, + PAGECACHE_TAG_DIRTY)) return ndirties; for (i = 0; i < pagevec_count(&pvec); i++) { struct buffer_head *bh, *head; struct page *page = pvec.pages[i]; - if (unlikely(page->index > last)) - break; - lock_page(page); if (!page_has_buffers(page)) create_empty_buffers(page, i_blocksize(inode), 0); @@ -759,8 +755,8 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, pagevec_init(&pvec, 0); - while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, - PAGEVEC_SIZE)) { + while (pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY)) { for (i = 0; i < pagevec_count(&pvec); i++) { bh = head = page_buffers(pvec.pages[i]); do {
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 9752e72..685e39e7 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c
@@ -495,7 +495,7 @@ static int fanotify_find_path(int dfd, const char __user *filename, } /* you can only watch an inode if you have read permissions on it */ - ret = inode_permission(path->dentry->d_inode, MAY_READ); + ret = inode_permission2(path->mnt, path->dentry->d_inode, MAY_READ); if (ret) path_put(path); out:
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 7cc7d3f..5c3caea 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c
@@ -335,7 +335,7 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns if (error) return error; /* you can only watch an inode if you have read permissions on it */ - error = inode_permission(path->dentry->d_inode, MAY_READ); + error = inode_permission2(path->mnt, path->dentry->d_inode, MAY_READ); if (error) path_put(path); return error; @@ -671,6 +671,8 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, struct fsnotify_group *group; struct inode *inode; struct path path; + struct path alteredpath; + struct path *canonical_path = &path; struct fd f; int ret; unsigned flags = 0; @@ -710,13 +712,22 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, if (ret) goto fput_and_out; + /* support stacked filesystems */ + if(path.dentry && path.dentry->d_op) { + if (path.dentry->d_op->d_canonical_path) { + path.dentry->d_op->d_canonical_path(&path, &alteredpath); + canonical_path = &alteredpath; + path_put(&path); + } + } + /* inode held in place by reference to path; group by fget on fd */ - inode = path.dentry->d_inode; + inode = canonical_path->dentry->d_inode; group = f.file->private_data; /* create/update an inode mark */ ret = inotify_update_watch(group, inode, mask); - path_put(&path); + path_put(canonical_path); fput_and_out: fdput(f); return ret;
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 9ab9e18..988137d 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -88,13 +88,13 @@ struct workqueue_struct *user_dlm_worker; */ #define DLMFS_CAPABILITIES "bast stackglue" static int param_set_dlmfs_capabilities(const char *val, - struct kernel_param *kp) + const struct kernel_param *kp) { printk(KERN_ERR "%s: readonly parameter\n", kp->name); return -EINVAL; } static int param_get_dlmfs_capabilities(char *buffer, - struct kernel_param *kp) + const struct kernel_param *kp) { return strlcpy(buffer, DLMFS_CAPABILITIES, strlen(DLMFS_CAPABILITIES) + 1);
diff --git a/fs/open.c b/fs/open.c index 7ea1184..39c24f1 100644 --- a/fs/open.c +++ b/fs/open.c
@@ -34,8 +34,8 @@ #include "internal.h" -int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, - struct file *filp) +int do_truncate2(struct vfsmount *mnt, struct dentry *dentry, loff_t length, + unsigned int time_attrs, struct file *filp) { int ret; struct iattr newattrs; @@ -60,18 +60,25 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, inode_lock(dentry->d_inode); /* Note any delegations or leases have already been broken: */ - ret = notify_change(dentry, &newattrs, NULL); + ret = notify_change2(mnt, dentry, &newattrs, NULL); inode_unlock(dentry->d_inode); return ret; } +int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, + struct file *filp) +{ + return do_truncate2(NULL, dentry, length, time_attrs, filp); +} long vfs_truncate(const struct path *path, loff_t length) { struct inode *inode; + struct vfsmount *mnt; struct dentry *upperdentry; long error; inode = path->dentry->d_inode; + mnt = path->mnt; /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ if (S_ISDIR(inode->i_mode)) @@ -83,7 +90,7 @@ long vfs_truncate(const struct path *path, loff_t length) if (error) goto out; - error = inode_permission(inode, MAY_WRITE); + error = inode_permission2(mnt, inode, MAY_WRITE); if (error) goto mnt_drop_write_and_out; @@ -117,7 +124,7 @@ long vfs_truncate(const struct path *path, loff_t length) if (!error) error = security_path_truncate(path); if (!error) - error = do_truncate(path->dentry, length, 0, NULL); + error = do_truncate2(mnt, path->dentry, length, 0, NULL); put_write_and_out: put_write_access(upperdentry->d_inode); @@ -166,6 +173,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) { struct inode *inode; struct dentry *dentry; + struct vfsmount *mnt; struct fd f; int error; @@ -182,6 +190,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) small = 0; dentry = f.file->f_path.dentry; + mnt = f.file->f_path.mnt; inode = dentry->d_inode; error = -EINVAL; if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE)) @@ -202,7 +211,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) if (!error) error = security_path_truncate(&f.file->f_path); if (!error) - error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file); + error = do_truncate2(mnt, dentry, length, ATTR_MTIME|ATTR_CTIME, f.file); sb_end_write(inode->i_sb); out_putf: fdput(f); @@ -356,6 +365,7 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) struct cred *override_cred; struct path path; struct inode *inode; + struct vfsmount *mnt; int res; unsigned int lookup_flags = LOOKUP_FOLLOW; @@ -386,6 +396,7 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) goto out; inode = d_backing_inode(path.dentry); + mnt = path.mnt; if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) { /* @@ -397,7 +408,7 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) goto out_path_release; } - res = inode_permission(inode, mode | MAY_ACCESS); + res = inode_permission2(mnt, inode, mode | MAY_ACCESS); /* SuS v2 requires we report a read only fs too */ if (res || !(mode & S_IWOTH) || special_file(inode->i_mode)) goto out_path_release; @@ -441,7 +452,7 @@ SYSCALL_DEFINE1(chdir, const char __user *, filename) if (error) goto out; - error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); + error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); if (error) goto dput_and_out; @@ -470,7 +481,8 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd) if (!d_can_lookup(f.file->f_path.dentry)) goto out_putf; - error = inode_permission(file_inode(f.file), MAY_EXEC | MAY_CHDIR); + error = inode_permission2(f.file->f_path.mnt, file_inode(f.file), + MAY_EXEC | MAY_CHDIR); if (!error) set_fs_pwd(current->fs, &f.file->f_path); out_putf: @@ -489,7 +501,7 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename) if (error) goto out; - error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); + error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); if (error) goto dput_and_out; @@ -529,7 +541,7 @@ static int chmod_common(const struct path *path, umode_t mode) goto out_unlock; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - error = notify_change(path->dentry, &newattrs, &delegated_inode); + error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode); out_unlock: inode_unlock(inode); if (delegated_inode) { @@ -609,7 +621,7 @@ static int chown_common(const struct path *path, uid_t user, gid_t group) inode_lock(inode); error = security_path_chown(path, uid, gid); if (!error) - error = notify_change(path->dentry, &newattrs, &delegated_inode); + error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode); inode_unlock(inode); if (delegated_inode) { error = break_deleg_wait(&delegated_inode);
diff --git a/fs/pnode.c b/fs/pnode.c index 53d411a..681916df 100644 --- a/fs/pnode.c +++ b/fs/pnode.c
@@ -607,3 +607,19 @@ int propagate_umount(struct list_head *list) return 0; } + +void propagate_remount(struct mount *mnt) +{ + struct mount *parent = mnt->mnt_parent; + struct mount *p = mnt, *m; + struct super_block *sb = mnt->mnt.mnt_sb; + + if (!sb->s_op->copy_mnt_data) + return; + for (p = propagation_next(parent, parent); p; + p = propagation_next(p, parent)) { + m = __lookup_mnt(&p->mnt, mnt->mnt_mountpoint); + if (m) + sb->s_op->copy_mnt_data(m->mnt.data, mnt->mnt.data); + } +}
diff --git a/fs/pnode.h b/fs/pnode.h index dc87e65..a9a6576 100644 --- a/fs/pnode.h +++ b/fs/pnode.h
@@ -44,6 +44,7 @@ int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, int propagate_umount(struct list_head *); int propagate_mount_busy(struct mount *, int); void propagate_mount_unlock(struct mount *); +void propagate_remount(struct mount *); void mnt_release_group_id(struct mount *); int get_dominating_id(struct mount *mnt, const struct path *root); unsigned int mnt_get_count(struct mount *mnt);
diff --git a/fs/posix_acl.c b/fs/posix_acl.c index eebf5f6..2fd0fde 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c
@@ -43,7 +43,7 @@ struct posix_acl *get_cached_acl(struct inode *inode, int type) rcu_read_lock(); acl = rcu_dereference(*p); if (!acl || is_uncached_acl(acl) || - atomic_inc_not_zero(&acl->a_refcount)) + refcount_inc_not_zero(&acl->a_refcount)) break; rcu_read_unlock(); cpu_relax(); @@ -164,7 +164,7 @@ EXPORT_SYMBOL(get_acl); void posix_acl_init(struct posix_acl *acl, int count) { - atomic_set(&acl->a_refcount, 1); + refcount_set(&acl->a_refcount, 1); acl->a_count = count; } EXPORT_SYMBOL(posix_acl_init); @@ -197,7 +197,7 @@ posix_acl_clone(const struct posix_acl *acl, gfp_t flags) sizeof(struct posix_acl_entry); clone = kmemdup(acl, size, flags); if (clone) - atomic_set(&clone->a_refcount, 1); + refcount_set(&clone->a_refcount, 1); } return clone; }
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 1ade120..08dce22 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig
@@ -81,3 +81,10 @@ Say Y if you are running any user-space software which takes benefit from this interface. For example, rkt is such a piece of software. + +config PROC_UID + bool "Include /proc/uid/ files" + default y + depends on PROC_FS && RT_MUTEXES + help + Provides aggregated per-uid information under /proc/uid.
diff --git a/fs/proc/Makefile b/fs/proc/Makefile index f7456c4..d8dcb18 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile
@@ -26,6 +26,7 @@ proc-y += namespaces.o proc-y += self.o proc-y += thread_self.o +proc-$(CONFIG_PROC_UID) += uid.o proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o proc-$(CONFIG_NET) += proc_net.o proc-$(CONFIG_PROC_KCORE) += kcore.o
diff --git a/fs/proc/base.c b/fs/proc/base.c index 9063738..6335aa6 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c
@@ -93,6 +93,7 @@ #include <linux/sched/stat.h> #include <linux/flex_array.h> #include <linux/posix-timers.h> +#include <linux/cpufreq_times.h> #ifdef CONFIG_HARDWALL #include <asm/hardwall.h> #endif @@ -3036,6 +3037,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_LIVEPATCH ONE("patch_state", S_IRUSR, proc_pid_patch_state), #endif +#ifdef CONFIG_CPU_FREQ_TIMES + ONE("time_in_state", 0444, proc_time_in_state_show), +#endif }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) @@ -3423,6 +3427,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_LIVEPATCH ONE("patch_state", S_IRUSR, proc_pid_patch_state), #endif +#ifdef CONFIG_CPU_FREQ_TIMES + ONE("time_in_state", 0444, proc_time_in_state_show), +#endif }; static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 225f541..c5f154b 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c
@@ -472,17 +472,12 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) return inode; } -int proc_fill_super(struct super_block *s, void *data, int silent) +int proc_fill_super(struct super_block *s) { - struct pid_namespace *ns = get_pid_ns(s->s_fs_info); struct inode *root_inode; int ret; - if (!proc_parse_options(data, ns)) - return -EINVAL; - - /* User space would break if executables or devices appear on proc */ - s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; + s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NODEV; s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; s->s_blocksize = 1024; s->s_blocksize_bits = 10;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index a34195e..02e0d9e 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h
@@ -203,7 +203,7 @@ extern const struct inode_operations proc_pid_link_inode_operations; extern void proc_init_inodecache(void); void set_proc_pid_nlink(void); extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); -extern int proc_fill_super(struct super_block *, void *data, int flags); +extern int proc_fill_super(struct super_block *); extern void proc_entry_rundown(struct proc_dir_entry *); /* @@ -249,6 +249,15 @@ static inline void proc_sys_evict_inode(struct inode *inode, #endif /* + * uid.c + */ +#ifdef CONFIG_PROC_UID +extern int proc_uid_init(void); +#else +static inline void proc_uid_init(void) { } +#endif + +/* * proc_tty.c */ #ifdef CONFIG_TTY @@ -261,7 +270,6 @@ static inline void proc_tty_init(void) {} * root.c */ extern struct proc_dir_entry proc_root; -extern int proc_parse_options(char *options, struct pid_namespace *pid); extern void proc_self_init(void); extern int proc_remount(struct super_block *, int *, char *);
diff --git a/fs/proc/root.c b/fs/proc/root.c index 4e42aba..ef6840c 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c
@@ -26,6 +26,21 @@ #include "internal.h" +static int proc_test_super(struct super_block *sb, void *data) +{ + return sb->s_fs_info == data; +} + +static int proc_set_super(struct super_block *sb, void *data) +{ + int err = set_anon_super(sb, NULL); + if (!err) { + struct pid_namespace *ns = (struct pid_namespace *)data; + sb->s_fs_info = get_pid_ns(ns); + } + return err; +} + enum { Opt_gid, Opt_hidepid, Opt_err, }; @@ -36,7 +51,7 @@ static const match_table_t tokens = { {Opt_err, NULL}, }; -int proc_parse_options(char *options, struct pid_namespace *pid) +static int proc_parse_options(char *options, struct pid_namespace *pid) { char *p; substring_t args[MAX_OPT_ARGS]; @@ -89,16 +104,45 @@ int proc_remount(struct super_block *sb, int *flags, char *data) static struct dentry *proc_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { + int err; + struct super_block *sb; struct pid_namespace *ns; + char *options; if (flags & MS_KERNMOUNT) { - ns = data; - data = NULL; + ns = (struct pid_namespace *)data; + options = NULL; } else { ns = task_active_pid_ns(current); + options = data; + + /* Does the mounter have privilege over the pid namespace? */ + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); } - return mount_ns(fs_type, flags, data, ns, ns->user_ns, proc_fill_super); + sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns); + if (IS_ERR(sb)) + return ERR_CAST(sb); + + if (!proc_parse_options(options, ns)) { + deactivate_locked_super(sb); + return ERR_PTR(-EINVAL); + } + + if (!sb->s_root) { + err = proc_fill_super(sb); + if (err) { + deactivate_locked_super(sb); + return ERR_PTR(err); + } + + sb->s_flags |= MS_ACTIVE; + /* User space would break if executables appear on proc */ + sb->s_iflags |= SB_I_NOEXEC; + } + + return dget(sb->s_root); } static void proc_kill_sb(struct super_block *sb) @@ -136,7 +180,7 @@ void __init proc_root_init(void) proc_symlink("mounts", NULL, "self/mounts"); proc_net_init(); - + proc_uid_init(); #ifdef CONFIG_SYSVIPC proc_mkdir("sysvipc", NULL); #endif
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 2b47757..4d1da9b 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c
@@ -130,6 +130,56 @@ static void release_task_mempolicy(struct proc_maps_private *priv) } #endif +static void seq_print_vma_name(struct seq_file *m, struct vm_area_struct *vma) +{ + const char __user *name = vma_get_anon_name(vma); + struct mm_struct *mm = vma->vm_mm; + + unsigned long page_start_vaddr; + unsigned long page_offset; + unsigned long num_pages; + unsigned long max_len = NAME_MAX; + int i; + + page_start_vaddr = (unsigned long)name & PAGE_MASK; + page_offset = (unsigned long)name - page_start_vaddr; + num_pages = DIV_ROUND_UP(page_offset + max_len, PAGE_SIZE); + + seq_puts(m, "[anon:"); + + for (i = 0; i < num_pages; i++) { + int len; + int write_len; + const char *kaddr; + long pages_pinned; + struct page *page; + + pages_pinned = get_user_pages_remote(current, mm, + page_start_vaddr, 1, 0, &page, NULL, NULL); + if (pages_pinned < 1) { + seq_puts(m, "<fault>]"); + return; + } + + kaddr = (const char *)kmap(page); + len = min(max_len, PAGE_SIZE - page_offset); + write_len = strnlen(kaddr + page_offset, len); + seq_write(m, kaddr + page_offset, write_len); + kunmap(page); + put_page(page); + + /* if strnlen hit a null terminator then we're done */ + if (write_len != len) + break; + + max_len -= len; + page_offset = 0; + page_start_vaddr += PAGE_SIZE; + } + + seq_putc(m, ']'); +} + static void vma_stop(struct proc_maps_private *priv) { struct mm_struct *mm = priv->mm; @@ -349,8 +399,15 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) goto done; } - if (is_stack(vma)) + if (is_stack(vma)) { name = "[stack]"; + goto done; + } + + if (vma_get_anon_name(vma)) { + seq_pad(m, ' '); + seq_print_vma_name(m, vma); + } } done: @@ -800,6 +857,11 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) if (!rollup_mode) { show_map_vma(m, vma, is_pid); + if (vma_get_anon_name(vma)) { + seq_puts(m, "Name: "); + seq_print_vma_name(m, vma); + seq_putc(m, '\n'); + } } else if (last_vma) { show_vma_header_prefix( m, mss->first_vma_start, vma->vm_end, 0, 0, 0, 0);
diff --git a/fs/proc/uid.c b/fs/proc/uid.c new file mode 100644 index 0000000..6a096d2 --- /dev/null +++ b/fs/proc/uid.c
@@ -0,0 +1,295 @@ +/* + * /proc/uid support + */ + +#include <linux/cpufreq_times.h> +#include <linux/fs.h> +#include <linux/hashtable.h> +#include <linux/init.h> +#include <linux/proc_fs.h> +#include <linux/rtmutex.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include "internal.h" + +static struct proc_dir_entry *proc_uid; + +#define UID_HASH_BITS 10 + +static DECLARE_HASHTABLE(proc_uid_hash_table, UID_HASH_BITS); + +/* + * use rt_mutex here to avoid priority inversion between high-priority readers + * of these files and tasks calling proc_register_uid(). + */ +static DEFINE_RT_MUTEX(proc_uid_lock); /* proc_uid_hash_table */ + +struct uid_hash_entry { + uid_t uid; + struct hlist_node hash; +}; + +/* Caller must hold proc_uid_lock */ +static bool uid_hash_entry_exists_locked(uid_t uid) +{ + struct uid_hash_entry *entry; + + hash_for_each_possible(proc_uid_hash_table, entry, hash, uid) { + if (entry->uid == uid) + return true; + } + return false; +} + +void proc_register_uid(kuid_t kuid) +{ + struct uid_hash_entry *entry; + bool exists; + uid_t uid = from_kuid_munged(current_user_ns(), kuid); + + rt_mutex_lock(&proc_uid_lock); + exists = uid_hash_entry_exists_locked(uid); + rt_mutex_unlock(&proc_uid_lock); + if (exists) + return; + + entry = kzalloc(sizeof(struct uid_hash_entry), GFP_KERNEL); + if (!entry) + return; + entry->uid = uid; + + rt_mutex_lock(&proc_uid_lock); + if (uid_hash_entry_exists_locked(uid)) + kfree(entry); + else + hash_add(proc_uid_hash_table, &entry->hash, uid); + rt_mutex_unlock(&proc_uid_lock); +} + +struct uid_entry { + const char *name; + int len; + umode_t mode; + const struct inode_operations *iop; + const struct file_operations *fop; +}; + +#define NOD(NAME, MODE, IOP, FOP) { \ + .name = (NAME), \ + .len = sizeof(NAME) - 1, \ + .mode = MODE, \ + .iop = IOP, \ + .fop = FOP, \ +} + +#ifdef CONFIG_CPU_FREQ_TIMES +static const struct file_operations proc_uid_time_in_state_operations = { + .open = single_uid_time_in_state_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + +static const struct uid_entry uid_base_stuff[] = { +#ifdef CONFIG_CPU_FREQ_TIMES + NOD("time_in_state", 0444, NULL, &proc_uid_time_in_state_operations), +#endif +}; + +static const struct inode_operations proc_uid_def_inode_operations = { + .setattr = proc_setattr, +}; + +static struct inode *proc_uid_make_inode(struct super_block *sb, kuid_t kuid) +{ + struct inode *inode; + + inode = new_inode(sb); + if (!inode) + return NULL; + + inode->i_ino = get_next_ino(); + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_op = &proc_uid_def_inode_operations; + inode->i_uid = kuid; + + return inode; +} + +static int proc_uident_instantiate(struct inode *dir, struct dentry *dentry, + struct task_struct *unused, const void *ptr) +{ + const struct uid_entry *u = ptr; + struct inode *inode; + + inode = proc_uid_make_inode(dir->i_sb, dir->i_uid); + if (!inode) + return -ENOENT; + + inode->i_mode = u->mode; + if (S_ISDIR(inode->i_mode)) + set_nlink(inode, 2); + if (u->iop) + inode->i_op = u->iop; + if (u->fop) + inode->i_fop = u->fop; + d_add(dentry, inode); + return 0; +} + +static struct dentry *proc_uid_base_lookup(struct inode *dir, + struct dentry *dentry, + unsigned int flags) +{ + const struct uid_entry *u, *last; + unsigned int nents = ARRAY_SIZE(uid_base_stuff); + + if (nents == 0) + return ERR_PTR(-ENOENT); + + last = &uid_base_stuff[nents - 1]; + for (u = uid_base_stuff; u <= last; u++) { + if (u->len != dentry->d_name.len) + continue; + if (!memcmp(dentry->d_name.name, u->name, u->len)) + break; + } + if (u > last) + return ERR_PTR(-ENOENT); + + return ERR_PTR(proc_uident_instantiate(dir, dentry, NULL, u)); +} + +static int proc_uid_base_readdir(struct file *file, struct dir_context *ctx) +{ + unsigned int nents = ARRAY_SIZE(uid_base_stuff); + const struct uid_entry *u; + + if (!dir_emit_dots(file, ctx)) + return 0; + + if (ctx->pos >= nents + 2) + return 0; + + for (u = uid_base_stuff + (ctx->pos - 2); + u < uid_base_stuff + nents; u++) { + if (!proc_fill_cache(file, ctx, u->name, u->len, + proc_uident_instantiate, NULL, u)) + break; + ctx->pos++; + } + + return 0; +} + +static const struct inode_operations proc_uid_base_inode_operations = { + .lookup = proc_uid_base_lookup, + .setattr = proc_setattr, +}; + +static const struct file_operations proc_uid_base_operations = { + .read = generic_read_dir, + .iterate = proc_uid_base_readdir, + .llseek = default_llseek, +}; + +static int proc_uid_instantiate(struct inode *dir, struct dentry *dentry, + struct task_struct *unused, const void *ptr) +{ + unsigned int i, len; + nlink_t nlinks; + kuid_t *kuid = (kuid_t *)ptr; + struct inode *inode = proc_uid_make_inode(dir->i_sb, *kuid); + + if (!inode) + return -ENOENT; + + inode->i_mode = S_IFDIR | 0555; + inode->i_op = &proc_uid_base_inode_operations; + inode->i_fop = &proc_uid_base_operations; + inode->i_flags |= S_IMMUTABLE; + + nlinks = 2; + len = ARRAY_SIZE(uid_base_stuff); + for (i = 0; i < len; ++i) { + if (S_ISDIR(uid_base_stuff[i].mode)) + ++nlinks; + } + set_nlink(inode, nlinks); + + d_add(dentry, inode); + + return 0; +} + +static int proc_uid_readdir(struct file *file, struct dir_context *ctx) +{ + int last_shown, i; + unsigned long bkt; + struct uid_hash_entry *entry; + + if (!dir_emit_dots(file, ctx)) + return 0; + + i = 0; + last_shown = ctx->pos - 2; + rt_mutex_lock(&proc_uid_lock); + hash_for_each(proc_uid_hash_table, bkt, entry, hash) { + int len; + char buf[PROC_NUMBUF]; + + if (i < last_shown) + continue; + len = snprintf(buf, sizeof(buf), "%u", entry->uid); + if (!proc_fill_cache(file, ctx, buf, len, + proc_uid_instantiate, NULL, &entry->uid)) + break; + i++; + ctx->pos++; + } + rt_mutex_unlock(&proc_uid_lock); + return 0; +} + +static struct dentry *proc_uid_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + int result = -ENOENT; + + uid_t uid = name_to_int(&dentry->d_name); + bool uid_exists; + + rt_mutex_lock(&proc_uid_lock); + uid_exists = uid_hash_entry_exists_locked(uid); + rt_mutex_unlock(&proc_uid_lock); + if (uid_exists) { + kuid_t kuid = make_kuid(current_user_ns(), uid); + + result = proc_uid_instantiate(dir, dentry, NULL, &kuid); + } + return ERR_PTR(result); +} + +static const struct file_operations proc_uid_operations = { + .read = generic_read_dir, + .iterate = proc_uid_readdir, + .llseek = default_llseek, +}; + +static const struct inode_operations proc_uid_inode_operations = { + .lookup = proc_uid_lookup, + .setattr = proc_setattr, +}; + +int __init proc_uid_init(void) +{ + proc_uid = proc_mkdir("uid", NULL); + if (!proc_uid) + return -ENOMEM; + proc_uid->proc_iops = &proc_uid_inode_operations; + proc_uid->proc_fops = &proc_uid_operations; + + return 0; +}
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 7626ee1..b859aae 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c
@@ -121,7 +121,9 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt) if (err) goto out; show_mnt_opts(m, mnt); - if (sb->s_op->show_options) + if (sb->s_op->show_options2) + err = sb->s_op->show_options2(mnt, m, mnt_path.dentry); + else if (sb->s_op->show_options) err = sb->s_op->show_options(m, mnt_path.dentry); seq_puts(m, " 0 0\n"); out: @@ -183,7 +185,9 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt) err = show_sb_opts(m, sb); if (err) goto out; - if (sb->s_op->show_options) + if (sb->s_op->show_options2) { + err = sb->s_op->show_options2(mnt, m, mnt->mnt_root); + } else if (sb->s_op->show_options) err = sb->s_op->show_options(m, mnt->mnt_root); seq_putc(m, '\n'); out:
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index f371e03..0db2b69 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c
@@ -707,6 +707,12 @@ static int ramoops_parse_dt(struct platform_device *pdev, return 0; } +void notrace ramoops_console_write_buf(const char *buf, size_t size) +{ + struct ramoops_context *cxt = &oops_cxt; + persistent_ram_write(cxt->cprz, buf, size); +} + static int ramoops_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev;
diff --git a/fs/read_write.c b/fs/read_write.c index 57a00ef..fd69e51 100644 --- a/fs/read_write.c +++ b/fs/read_write.c
@@ -455,6 +455,8 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) return ret; } +EXPORT_SYMBOL(vfs_read); + static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) { struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; @@ -553,6 +555,8 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ return ret; } +EXPORT_SYMBOL(vfs_write); + static inline loff_t file_pos_read(struct file *file) { return file->f_pos;
diff --git a/fs/sdcardfs/Kconfig b/fs/sdcardfs/Kconfig new file mode 100644 index 0000000..a1c1033 --- /dev/null +++ b/fs/sdcardfs/Kconfig
@@ -0,0 +1,13 @@ +config SDCARD_FS + tristate "sdcard file system" + depends on CONFIGFS_FS + default n + help + Sdcardfs is based on Wrapfs file system. + +config SDCARD_FS_FADV_NOACTIVE + bool "sdcardfs fadvise noactive support" + depends on FADV_NOACTIVE + default y + help + Sdcardfs supports fadvise noactive mode.
diff --git a/fs/sdcardfs/Makefile b/fs/sdcardfs/Makefile new file mode 100644 index 0000000..b84fbb2 --- /dev/null +++ b/fs/sdcardfs/Makefile
@@ -0,0 +1,7 @@ +SDCARDFS_VERSION="0.1" + +EXTRA_CFLAGS += -DSDCARDFS_VERSION=\"$(SDCARDFS_VERSION)\" + +obj-$(CONFIG_SDCARD_FS) += sdcardfs.o + +sdcardfs-y := dentry.o file.o inode.o main.o super.o lookup.o mmap.o packagelist.o derived_perm.o
diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c new file mode 100644 index 0000000..cb573f1 --- /dev/null +++ b/fs/sdcardfs/dentry.c
@@ -0,0 +1,196 @@ +/* + * fs/sdcardfs/dentry.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" +#include "linux/ctype.h" + +/* + * returns: -ERRNO if error (returned to user) + * 0: tell VFS to invalidate dentry + * 1: dentry is valid + */ +static int sdcardfs_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + int err = 1; + struct path parent_lower_path, lower_path; + struct dentry *parent_dentry = NULL; + struct dentry *parent_lower_dentry = NULL; + struct dentry *lower_cur_parent_dentry = NULL; + struct dentry *lower_dentry = NULL; + struct inode *inode; + struct sdcardfs_inode_data *data; + + if (flags & LOOKUP_RCU) + return -ECHILD; + + spin_lock(&dentry->d_lock); + if (IS_ROOT(dentry)) { + spin_unlock(&dentry->d_lock); + return 1; + } + spin_unlock(&dentry->d_lock); + + /* check uninitialized obb_dentry and + * whether the base obbpath has been changed or not + */ + if (is_obbpath_invalid(dentry)) { + return 0; + } + + parent_dentry = dget_parent(dentry); + sdcardfs_get_lower_path(parent_dentry, &parent_lower_path); + sdcardfs_get_real_lower(dentry, &lower_path); + parent_lower_dentry = parent_lower_path.dentry; + lower_dentry = lower_path.dentry; + lower_cur_parent_dentry = dget_parent(lower_dentry); + + if ((lower_dentry->d_flags & DCACHE_OP_REVALIDATE)) { + err = lower_dentry->d_op->d_revalidate(lower_dentry, flags); + if (err == 0) { + goto out; + } + } + + spin_lock(&lower_dentry->d_lock); + if (d_unhashed(lower_dentry)) { + spin_unlock(&lower_dentry->d_lock); + err = 0; + goto out; + } + spin_unlock(&lower_dentry->d_lock); + + if (parent_lower_dentry != lower_cur_parent_dentry) { + err = 0; + goto out; + } + + if (dentry < lower_dentry) { + spin_lock(&dentry->d_lock); + spin_lock_nested(&lower_dentry->d_lock, DENTRY_D_LOCK_NESTED); + } else { + spin_lock(&lower_dentry->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + } + + if (!qstr_case_eq(&dentry->d_name, &lower_dentry->d_name)) { + err = 0; + } + + if (dentry < lower_dentry) { + spin_unlock(&lower_dentry->d_lock); + spin_unlock(&dentry->d_lock); + } else { + spin_unlock(&dentry->d_lock); + spin_unlock(&lower_dentry->d_lock); + } + if (!err) + goto out; + + /* If our top's inode is gone, we may be out of date */ + inode = igrab(d_inode(dentry)); + if (inode) { + data = top_data_get(SDCARDFS_I(inode)); + if (!data || data->abandoned) { + err = 0; + } + if (data) + data_put(data); + iput(inode); + } + +out: + dput(parent_dentry); + dput(lower_cur_parent_dentry); + sdcardfs_put_lower_path(parent_dentry, &parent_lower_path); + sdcardfs_put_real_lower(dentry, &lower_path); + return err; +} + +/* 1 = delete, 0 = cache */ +static int sdcardfs_d_delete(const struct dentry *d) +{ + return SDCARDFS_SB(d->d_sb)->options.nocache ? 1 : 0; +} + +static void sdcardfs_d_release(struct dentry *dentry) +{ + if (!dentry || !dentry->d_fsdata) + return; + /* release and reset the lower paths */ + if (has_graft_path(dentry)) + sdcardfs_put_reset_orig_path(dentry); + sdcardfs_put_reset_lower_path(dentry); + free_dentry_private_data(dentry); +} + +static int sdcardfs_hash_ci(const struct dentry *dentry, + struct qstr *qstr) +{ + /* + * This function is copy of vfat_hashi. + * FIXME Should we support national language? + * Refer to vfat_hashi() + * struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io; + */ + const unsigned char *name; + unsigned int len; + unsigned long hash; + + name = qstr->name; + len = qstr->len; + + hash = init_name_hash(dentry); + while (len--) + hash = partial_name_hash(tolower(*name++), hash); + qstr->hash = end_name_hash(hash); + + return 0; +} + +/* + * Case insensitive compare of two vfat names. + */ +static int sdcardfs_cmp_ci(const struct dentry *dentry, + unsigned int len, const char *str, const struct qstr *name) +{ + /* FIXME Should we support national language? */ + + if (name->len == len) { + if (str_n_case_eq(name->name, str, len)) + return 0; + } + return 1; +} + +static void sdcardfs_canonical_path(const struct path *path, + struct path *actual_path) +{ + sdcardfs_get_real_lower(path->dentry, actual_path); +} + +const struct dentry_operations sdcardfs_ci_dops = { + .d_revalidate = sdcardfs_d_revalidate, + .d_delete = sdcardfs_d_delete, + .d_release = sdcardfs_d_release, + .d_hash = sdcardfs_hash_ci, + .d_compare = sdcardfs_cmp_ci, + .d_canonical_path = sdcardfs_canonical_path, +}; +
diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c new file mode 100644 index 0000000..78a669c --- /dev/null +++ b/fs/sdcardfs/derived_perm.c
@@ -0,0 +1,477 @@ +/* + * fs/sdcardfs/derived_perm.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" + +/* copy derived state from parent inode */ +static void inherit_derived_state(struct inode *parent, struct inode *child) +{ + struct sdcardfs_inode_info *pi = SDCARDFS_I(parent); + struct sdcardfs_inode_info *ci = SDCARDFS_I(child); + + ci->data->perm = PERM_INHERIT; + ci->data->userid = pi->data->userid; + ci->data->d_uid = pi->data->d_uid; + ci->data->under_android = pi->data->under_android; + ci->data->under_cache = pi->data->under_cache; + ci->data->under_obb = pi->data->under_obb; +} + +/* helper function for derived state */ +void setup_derived_state(struct inode *inode, perm_t perm, userid_t userid, + uid_t uid) +{ + struct sdcardfs_inode_info *info = SDCARDFS_I(inode); + + info->data->perm = perm; + info->data->userid = userid; + info->data->d_uid = uid; + info->data->under_android = false; + info->data->under_cache = false; + info->data->under_obb = false; +} + +/* While renaming, there is a point where we want the path from dentry, + * but the name from newdentry + */ +void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, + const struct qstr *name) +{ + struct sdcardfs_inode_info *info = SDCARDFS_I(d_inode(dentry)); + struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent)); + struct sdcardfs_inode_data *parent_data = parent_info->data; + appid_t appid; + unsigned long user_num; + int err; + struct qstr q_Android = QSTR_LITERAL("Android"); + struct qstr q_data = QSTR_LITERAL("data"); + struct qstr q_sandbox = QSTR_LITERAL("sandbox"); + struct qstr q_obb = QSTR_LITERAL("obb"); + struct qstr q_media = QSTR_LITERAL("media"); + struct qstr q_cache = QSTR_LITERAL("cache"); + + /* By default, each inode inherits from its parent. + * the properties are maintained on its private fields + * because the inode attributes will be modified with that of + * its lower inode. + * These values are used by our custom permission call instead + * of using the inode permissions. + */ + + inherit_derived_state(d_inode(parent), d_inode(dentry)); + + /* Files don't get special labels */ + if (!S_ISDIR(d_inode(dentry)->i_mode)) { + set_top(info, parent_info); + return; + } + /* Derive custom permissions based on parent and current node */ + switch (parent_data->perm) { + case PERM_INHERIT: + case PERM_ANDROID_PACKAGE_CACHE: + set_top(info, parent_info); + break; + case PERM_PRE_ROOT: + /* Legacy internal layout places users at top level */ + info->data->perm = PERM_ROOT; + err = kstrtoul(name->name, 10, &user_num); + if (err) + info->data->userid = 0; + else + info->data->userid = user_num; + break; + case PERM_ROOT: + /* Assume masked off by default. */ + if (qstr_case_eq(name, &q_Android)) { + /* App-specific directories inside; let anyone traverse */ + info->data->perm = PERM_ANDROID; + info->data->under_android = true; + } else { + set_top(info, parent_info); + } + break; + case PERM_ANDROID: + if (qstr_case_eq(name, &q_data)) { + /* App-specific directories inside; let anyone traverse */ + info->data->perm = PERM_ANDROID_DATA; + } else if (qstr_case_eq(name, &q_sandbox)) { + /* App-specific directories inside; let anyone traverse */ + info->data->perm = PERM_ANDROID_DATA; + } else if (qstr_case_eq(name, &q_obb)) { + /* App-specific directories inside; let anyone traverse */ + info->data->perm = PERM_ANDROID_OBB; + info->data->under_obb = true; + /* Single OBB directory is always shared */ + } else if (qstr_case_eq(name, &q_media)) { + /* App-specific directories inside; let anyone traverse */ + info->data->perm = PERM_ANDROID_MEDIA; + } else { + set_top(info, parent_info); + } + break; + case PERM_ANDROID_OBB: + case PERM_ANDROID_DATA: + case PERM_ANDROID_MEDIA: + info->data->perm = PERM_ANDROID_PACKAGE; + appid = get_appid(name->name); + if (appid != 0 && !is_excluded(name->name, parent_data->userid)) + info->data->d_uid = + multiuser_get_uid(parent_data->userid, appid); + break; + case PERM_ANDROID_PACKAGE: + if (qstr_case_eq(name, &q_cache)) { + info->data->perm = PERM_ANDROID_PACKAGE_CACHE; + info->data->under_cache = true; + } + set_top(info, parent_info); + break; + } +} + +void get_derived_permission(struct dentry *parent, struct dentry *dentry) +{ + get_derived_permission_new(parent, dentry, &dentry->d_name); +} + +static appid_t get_type(const char *name) +{ + const char *ext = strrchr(name, '.'); + appid_t id; + + if (ext && ext[0]) { + ext = &ext[1]; + id = get_ext_gid(ext); + return id?:AID_MEDIA_RW; + } + return AID_MEDIA_RW; +} + +void fixup_lower_ownership(struct dentry *dentry, const char *name) +{ + struct path path; + struct inode *inode; + struct inode *delegated_inode = NULL; + int error; + struct sdcardfs_inode_info *info; + struct sdcardfs_inode_data *info_d; + struct sdcardfs_inode_data *info_top; + perm_t perm; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + uid_t uid = sbi->options.fs_low_uid; + gid_t gid = sbi->options.fs_low_gid; + struct iattr newattrs; + + if (!sbi->options.gid_derivation) + return; + + info = SDCARDFS_I(d_inode(dentry)); + info_d = info->data; + perm = info_d->perm; + if (info_d->under_obb) { + perm = PERM_ANDROID_OBB; + } else if (info_d->under_cache) { + perm = PERM_ANDROID_PACKAGE_CACHE; + } else if (perm == PERM_INHERIT) { + info_top = top_data_get(info); + perm = info_top->perm; + data_put(info_top); + } + + switch (perm) { + case PERM_ROOT: + case PERM_ANDROID: + case PERM_ANDROID_DATA: + case PERM_ANDROID_MEDIA: + case PERM_ANDROID_PACKAGE: + case PERM_ANDROID_PACKAGE_CACHE: + uid = multiuser_get_uid(info_d->userid, uid); + break; + case PERM_ANDROID_OBB: + uid = AID_MEDIA_OBB; + break; + case PERM_PRE_ROOT: + default: + break; + } + switch (perm) { + case PERM_ROOT: + case PERM_ANDROID: + case PERM_ANDROID_DATA: + case PERM_ANDROID_MEDIA: + if (S_ISDIR(d_inode(dentry)->i_mode)) + gid = multiuser_get_uid(info_d->userid, AID_MEDIA_RW); + else + gid = multiuser_get_uid(info_d->userid, get_type(name)); + break; + case PERM_ANDROID_OBB: + gid = AID_MEDIA_OBB; + break; + case PERM_ANDROID_PACKAGE: + if (uid_is_app(info_d->d_uid)) + gid = multiuser_get_ext_gid(info_d->d_uid); + else + gid = multiuser_get_uid(info_d->userid, AID_MEDIA_RW); + break; + case PERM_ANDROID_PACKAGE_CACHE: + if (uid_is_app(info_d->d_uid)) + gid = multiuser_get_ext_cache_gid(info_d->d_uid); + else + gid = multiuser_get_uid(info_d->userid, AID_MEDIA_RW); + break; + case PERM_PRE_ROOT: + default: + break; + } + + sdcardfs_get_lower_path(dentry, &path); + inode = d_inode(path.dentry); + if (d_inode(path.dentry)->i_gid.val != gid || d_inode(path.dentry)->i_uid.val != uid) { +retry_deleg: + newattrs.ia_valid = ATTR_GID | ATTR_UID | ATTR_FORCE; + newattrs.ia_uid = make_kuid(current_user_ns(), uid); + newattrs.ia_gid = make_kgid(current_user_ns(), gid); + if (!S_ISDIR(inode->i_mode)) + newattrs.ia_valid |= + ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; + inode_lock(inode); + error = security_path_chown(&path, newattrs.ia_uid, newattrs.ia_gid); + if (!error) + error = notify_change2(path.mnt, path.dentry, &newattrs, &delegated_inode); + inode_unlock(inode); + if (delegated_inode) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry_deleg; + } + if (error) + pr_debug("sdcardfs: Failed to touch up lower fs gid/uid for %s\n", name); + } + sdcardfs_put_lower_path(dentry, &path); +} + +static int descendant_may_need_fixup(struct sdcardfs_inode_data *data, + struct limit_search *limit) +{ + if (data->perm == PERM_ROOT) + return (limit->flags & BY_USERID) ? + data->userid == limit->userid : 1; + if (data->perm == PERM_PRE_ROOT || data->perm == PERM_ANDROID) + return 1; + return 0; +} + +static int needs_fixup(perm_t perm) +{ + if (perm == PERM_ANDROID_DATA || perm == PERM_ANDROID_OBB + || perm == PERM_ANDROID_MEDIA) + return 1; + return 0; +} + +static void __fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit, int depth) +{ + struct dentry *child; + struct sdcardfs_inode_info *info; + + /* + * All paths will terminate their recursion on hitting PERM_ANDROID_OBB, + * PERM_ANDROID_MEDIA, or PERM_ANDROID_DATA. This happens at a depth of + * at most 3. + */ + WARN(depth > 3, "%s: Max expected depth exceeded!\n", __func__); + spin_lock_nested(&dentry->d_lock, depth); + if (!d_inode(dentry)) { + spin_unlock(&dentry->d_lock); + return; + } + info = SDCARDFS_I(d_inode(dentry)); + + if (needs_fixup(info->data->perm)) { + list_for_each_entry(child, &dentry->d_subdirs, d_child) { + spin_lock_nested(&child->d_lock, depth + 1); + if (!(limit->flags & BY_NAME) || qstr_case_eq(&child->d_name, &limit->name)) { + if (d_inode(child)) { + get_derived_permission(dentry, child); + fixup_tmp_permissions(d_inode(child)); + spin_unlock(&child->d_lock); + break; + } + } + spin_unlock(&child->d_lock); + } + } else if (descendant_may_need_fixup(info->data, limit)) { + list_for_each_entry(child, &dentry->d_subdirs, d_child) { + __fixup_perms_recursive(child, limit, depth + 1); + } + } + spin_unlock(&dentry->d_lock); +} + +void fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit) +{ + __fixup_perms_recursive(dentry, limit, 0); +} + +/* main function for updating derived permission */ +inline void update_derived_permission_lock(struct dentry *dentry) +{ + struct dentry *parent; + + if (!dentry || !d_inode(dentry)) { + pr_err("sdcardfs: %s: invalid dentry\n", __func__); + return; + } + /* FIXME: + * 1. need to check whether the dentry is updated or not + * 2. remove the root dentry update + */ + if (!IS_ROOT(dentry)) { + parent = dget_parent(dentry); + if (parent) { + get_derived_permission(parent, dentry); + dput(parent); + } + } + fixup_tmp_permissions(d_inode(dentry)); +} + +int need_graft_path(struct dentry *dentry) +{ + int ret = 0; + struct dentry *parent = dget_parent(dentry); + struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent)); + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + struct qstr obb = QSTR_LITERAL("obb"); + + if (!sbi->options.unshared_obb && + parent_info->data->perm == PERM_ANDROID && + qstr_case_eq(&dentry->d_name, &obb)) { + + /* /Android/obb is the base obbpath of DERIVED_UNIFIED */ + if (!(sbi->options.multiuser == false + && parent_info->data->userid == 0)) { + ret = 1; + } + } + dput(parent); + return ret; +} + +int is_obbpath_invalid(struct dentry *dent) +{ + int ret = 0; + struct sdcardfs_dentry_info *di = SDCARDFS_D(dent); + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dent->d_sb); + char *path_buf, *obbpath_s; + int need_put = 0; + struct path lower_path; + + /* check the base obbpath has been changed. + * this routine can check an uninitialized obb dentry as well. + * regarding the uninitialized obb, refer to the sdcardfs_mkdir() + */ + spin_lock(&di->lock); + if (di->orig_path.dentry) { + if (!di->lower_path.dentry) { + ret = 1; + } else { + path_get(&di->lower_path); + + path_buf = kmalloc(PATH_MAX, GFP_ATOMIC); + if (!path_buf) { + ret = 1; + pr_err("sdcardfs: fail to allocate path_buf in %s.\n", __func__); + } else { + obbpath_s = d_path(&di->lower_path, path_buf, PATH_MAX); + if (d_unhashed(di->lower_path.dentry) || + !str_case_eq(sbi->obbpath_s, obbpath_s)) { + ret = 1; + } + kfree(path_buf); + } + + pathcpy(&lower_path, &di->lower_path); + need_put = 1; + } + } + spin_unlock(&di->lock); + if (need_put) + path_put(&lower_path); + return ret; +} + +int is_base_obbpath(struct dentry *dentry) +{ + int ret = 0; + struct dentry *parent = dget_parent(dentry); + struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent)); + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + struct qstr q_obb = QSTR_LITERAL("obb"); + + spin_lock(&SDCARDFS_D(dentry)->lock); + if (sbi->options.multiuser) { + if (parent_info->data->perm == PERM_PRE_ROOT && + qstr_case_eq(&dentry->d_name, &q_obb)) { + ret = 1; + } + } else if (parent_info->data->perm == PERM_ANDROID && + qstr_case_eq(&dentry->d_name, &q_obb)) { + ret = 1; + } + spin_unlock(&SDCARDFS_D(dentry)->lock); + return ret; +} + +/* The lower_path will be stored to the dentry's orig_path + * and the base obbpath will be copyed to the lower_path variable. + * if an error returned, there's no change in the lower_path + * returns: -ERRNO if error (0: no error) + */ +int setup_obb_dentry(struct dentry *dentry, struct path *lower_path) +{ + int err = 0; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + struct path obbpath; + + /* A local obb dentry must have its own orig_path to support rmdir + * and mkdir of itself. Usually, we expect that the sbi->obbpath + * is avaiable on this stage. + */ + sdcardfs_set_orig_path(dentry, lower_path); + + err = kern_path(sbi->obbpath_s, + LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &obbpath); + + if (!err) { + /* the obbpath base has been found */ + pathcpy(lower_path, &obbpath); + } else { + /* if the sbi->obbpath is not available, we can optionally + * setup the lower_path with its orig_path. + * but, the current implementation just returns an error + * because the sdcard daemon also regards this case as + * a lookup fail. + */ + pr_info("sdcardfs: the sbi->obbpath is not available\n"); + } + return err; +} + +
diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c new file mode 100644 index 0000000..271c4c4 --- /dev/null +++ b/fs/sdcardfs/file.c
@@ -0,0 +1,467 @@ +/* + * fs/sdcardfs/file.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" +#ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE +#include <linux/backing-dev.h> +#endif + +static ssize_t sdcardfs_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + int err; + struct file *lower_file; + struct dentry *dentry = file->f_path.dentry; +#ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE + struct backing_dev_info *bdi; +#endif + + lower_file = sdcardfs_lower_file(file); + +#ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE + if (file->f_mode & FMODE_NOACTIVE) { + if (!(lower_file->f_mode & FMODE_NOACTIVE)) { + bdi = lower_file->f_mapping->backing_dev_info; + lower_file->f_ra.ra_pages = bdi->ra_pages * 2; + spin_lock(&lower_file->f_lock); + lower_file->f_mode |= FMODE_NOACTIVE; + spin_unlock(&lower_file->f_lock); + } + } +#endif + + err = vfs_read(lower_file, buf, count, ppos); + /* update our inode atime upon a successful lower read */ + if (err >= 0) + fsstack_copy_attr_atime(d_inode(dentry), + file_inode(lower_file)); + + return err; +} + +static ssize_t sdcardfs_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + int err; + struct file *lower_file; + struct dentry *dentry = file->f_path.dentry; + struct inode *inode = d_inode(dentry); + + /* check disk space */ + if (!check_min_free_space(dentry, count, 0)) { + pr_err("No minimum free space.\n"); + return -ENOSPC; + } + + lower_file = sdcardfs_lower_file(file); + err = vfs_write(lower_file, buf, count, ppos); + /* update our inode times+sizes upon a successful lower write */ + if (err >= 0) { + if (sizeof(loff_t) > sizeof(long)) + inode_lock(inode); + fsstack_copy_inode_size(inode, file_inode(lower_file)); + fsstack_copy_attr_times(inode, file_inode(lower_file)); + if (sizeof(loff_t) > sizeof(long)) + inode_unlock(inode); + } + + return err; +} + +static int sdcardfs_readdir(struct file *file, struct dir_context *ctx) +{ + int err; + struct file *lower_file = NULL; + struct dentry *dentry = file->f_path.dentry; + + lower_file = sdcardfs_lower_file(file); + + lower_file->f_pos = file->f_pos; + err = iterate_dir(lower_file, ctx); + file->f_pos = lower_file->f_pos; + if (err >= 0) /* copy the atime */ + fsstack_copy_attr_atime(d_inode(dentry), + file_inode(lower_file)); + return err; +} + +static long sdcardfs_unlocked_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + long err = -ENOTTY; + struct file *lower_file; + const struct cred *saved_cred = NULL; + struct dentry *dentry = file->f_path.dentry; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + + lower_file = sdcardfs_lower_file(file); + + /* XXX: use vfs_ioctl if/when VFS exports it */ + if (!lower_file || !lower_file->f_op) + goto out; + + /* save current_cred and override it */ + saved_cred = override_fsids(sbi, SDCARDFS_I(file_inode(file))->data); + if (!saved_cred) { + err = -ENOMEM; + goto out; + } + + if (lower_file->f_op->unlocked_ioctl) + err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); + + /* some ioctls can change inode attributes (EXT2_IOC_SETFLAGS) */ + if (!err) + sdcardfs_copy_and_fix_attrs(file_inode(file), + file_inode(lower_file)); + revert_fsids(saved_cred); +out: + return err; +} + +#ifdef CONFIG_COMPAT +static long sdcardfs_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + long err = -ENOTTY; + struct file *lower_file; + const struct cred *saved_cred = NULL; + struct dentry *dentry = file->f_path.dentry; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + + lower_file = sdcardfs_lower_file(file); + + /* XXX: use vfs_ioctl if/when VFS exports it */ + if (!lower_file || !lower_file->f_op) + goto out; + + /* save current_cred and override it */ + saved_cred = override_fsids(sbi, SDCARDFS_I(file_inode(file))->data); + if (!saved_cred) { + err = -ENOMEM; + goto out; + } + + if (lower_file->f_op->compat_ioctl) + err = lower_file->f_op->compat_ioctl(lower_file, cmd, arg); + + revert_fsids(saved_cred); +out: + return err; +} +#endif + +static int sdcardfs_mmap(struct file *file, struct vm_area_struct *vma) +{ + int err = 0; + bool willwrite; + struct file *lower_file; + const struct vm_operations_struct *saved_vm_ops = NULL; + + /* this might be deferred to mmap's writepage */ + willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags); + + /* + * File systems which do not implement ->writepage may use + * generic_file_readonly_mmap as their ->mmap op. If you call + * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL. + * But we cannot call the lower ->mmap op, so we can't tell that + * writeable mappings won't work. Therefore, our only choice is to + * check if the lower file system supports the ->writepage, and if + * not, return EINVAL (the same error that + * generic_file_readonly_mmap returns in that case). + */ + lower_file = sdcardfs_lower_file(file); + if (willwrite && !lower_file->f_mapping->a_ops->writepage) { + err = -EINVAL; + pr_err("sdcardfs: lower file system does not support writeable mmap\n"); + goto out; + } + + /* + * find and save lower vm_ops. + * + * XXX: the VFS should have a cleaner way of finding the lower vm_ops + */ + if (!SDCARDFS_F(file)->lower_vm_ops) { + err = lower_file->f_op->mmap(lower_file, vma); + if (err) { + pr_err("sdcardfs: lower mmap failed %d\n", err); + goto out; + } + saved_vm_ops = vma->vm_ops; /* save: came from lower ->mmap */ + } + + /* + * Next 3 lines are all I need from generic_file_mmap. I definitely + * don't want its test for ->readpage which returns -ENOEXEC. + */ + file_accessed(file); + vma->vm_ops = &sdcardfs_vm_ops; + + file->f_mapping->a_ops = &sdcardfs_aops; /* set our aops */ + if (!SDCARDFS_F(file)->lower_vm_ops) /* save for our ->fault */ + SDCARDFS_F(file)->lower_vm_ops = saved_vm_ops; + vma->vm_private_data = file; + get_file(lower_file); + vma->vm_file = lower_file; + +out: + return err; +} + +static int sdcardfs_open(struct inode *inode, struct file *file) +{ + int err = 0; + struct file *lower_file = NULL; + struct path lower_path; + struct dentry *dentry = file->f_path.dentry; + struct dentry *parent = dget_parent(dentry); + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + const struct cred *saved_cred = NULL; + + /* don't open unhashed/deleted files */ + if (d_unhashed(dentry)) { + err = -ENOENT; + goto out_err; + } + + if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) { + err = -EACCES; + goto out_err; + } + + /* save current_cred and override it */ + saved_cred = override_fsids(sbi, SDCARDFS_I(inode)->data); + if (!saved_cred) { + err = -ENOMEM; + goto out_err; + } + + file->private_data = + kzalloc(sizeof(struct sdcardfs_file_info), GFP_KERNEL); + if (!SDCARDFS_F(file)) { + err = -ENOMEM; + goto out_revert_cred; + } + + /* open lower object and link sdcardfs's file struct to lower's */ + sdcardfs_get_lower_path(file->f_path.dentry, &lower_path); + lower_file = dentry_open(&lower_path, file->f_flags, current_cred()); + path_put(&lower_path); + if (IS_ERR(lower_file)) { + err = PTR_ERR(lower_file); + lower_file = sdcardfs_lower_file(file); + if (lower_file) { + sdcardfs_set_lower_file(file, NULL); + fput(lower_file); /* fput calls dput for lower_dentry */ + } + } else { + sdcardfs_set_lower_file(file, lower_file); + } + + if (err) + kfree(SDCARDFS_F(file)); + else + sdcardfs_copy_and_fix_attrs(inode, sdcardfs_lower_inode(inode)); + +out_revert_cred: + revert_fsids(saved_cred); +out_err: + dput(parent); + return err; +} + +static int sdcardfs_flush(struct file *file, fl_owner_t id) +{ + int err = 0; + struct file *lower_file = NULL; + + lower_file = sdcardfs_lower_file(file); + if (lower_file && lower_file->f_op && lower_file->f_op->flush) { + filemap_write_and_wait(file->f_mapping); + err = lower_file->f_op->flush(lower_file, id); + } + + return err; +} + +/* release all lower object references & free the file info structure */ +static int sdcardfs_file_release(struct inode *inode, struct file *file) +{ + struct file *lower_file; + + lower_file = sdcardfs_lower_file(file); + if (lower_file) { + sdcardfs_set_lower_file(file, NULL); + fput(lower_file); + } + + kfree(SDCARDFS_F(file)); + return 0; +} + +static int sdcardfs_fsync(struct file *file, loff_t start, loff_t end, + int datasync) +{ + int err; + struct file *lower_file; + struct path lower_path; + struct dentry *dentry = file->f_path.dentry; + + err = __generic_file_fsync(file, start, end, datasync); + if (err) + goto out; + + lower_file = sdcardfs_lower_file(file); + sdcardfs_get_lower_path(dentry, &lower_path); + err = vfs_fsync_range(lower_file, start, end, datasync); + sdcardfs_put_lower_path(dentry, &lower_path); +out: + return err; +} + +static int sdcardfs_fasync(int fd, struct file *file, int flag) +{ + int err = 0; + struct file *lower_file = NULL; + + lower_file = sdcardfs_lower_file(file); + if (lower_file->f_op && lower_file->f_op->fasync) + err = lower_file->f_op->fasync(fd, lower_file, flag); + + return err; +} + +/* + * Sdcardfs cannot use generic_file_llseek as ->llseek, because it would + * only set the offset of the upper file. So we have to implement our + * own method to set both the upper and lower file offsets + * consistently. + */ +static loff_t sdcardfs_file_llseek(struct file *file, loff_t offset, int whence) +{ + int err; + struct file *lower_file; + + err = generic_file_llseek(file, offset, whence); + if (err < 0) + goto out; + + lower_file = sdcardfs_lower_file(file); + err = generic_file_llseek(lower_file, offset, whence); + +out: + return err; +} + +/* + * Sdcardfs read_iter, redirect modified iocb to lower read_iter + */ +ssize_t sdcardfs_read_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + int err; + struct file *file = iocb->ki_filp, *lower_file; + + lower_file = sdcardfs_lower_file(file); + if (!lower_file->f_op->read_iter) { + err = -EINVAL; + goto out; + } + + get_file(lower_file); /* prevent lower_file from being released */ + iocb->ki_filp = lower_file; + err = lower_file->f_op->read_iter(iocb, iter); + iocb->ki_filp = file; + fput(lower_file); + /* update upper inode atime as needed */ + if (err >= 0 || err == -EIOCBQUEUED) + fsstack_copy_attr_atime(file->f_path.dentry->d_inode, + file_inode(lower_file)); +out: + return err; +} + +/* + * Sdcardfs write_iter, redirect modified iocb to lower write_iter + */ +ssize_t sdcardfs_write_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + int err; + struct file *file = iocb->ki_filp, *lower_file; + struct inode *inode = file->f_path.dentry->d_inode; + + lower_file = sdcardfs_lower_file(file); + if (!lower_file->f_op->write_iter) { + err = -EINVAL; + goto out; + } + + get_file(lower_file); /* prevent lower_file from being released */ + iocb->ki_filp = lower_file; + err = lower_file->f_op->write_iter(iocb, iter); + iocb->ki_filp = file; + fput(lower_file); + /* update upper inode times/sizes as needed */ + if (err >= 0 || err == -EIOCBQUEUED) { + if (sizeof(loff_t) > sizeof(long)) + inode_lock(inode); + fsstack_copy_inode_size(inode, file_inode(lower_file)); + fsstack_copy_attr_times(inode, file_inode(lower_file)); + if (sizeof(loff_t) > sizeof(long)) + inode_unlock(inode); + } +out: + return err; +} + +const struct file_operations sdcardfs_main_fops = { + .llseek = generic_file_llseek, + .read = sdcardfs_read, + .write = sdcardfs_write, + .unlocked_ioctl = sdcardfs_unlocked_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = sdcardfs_compat_ioctl, +#endif + .mmap = sdcardfs_mmap, + .open = sdcardfs_open, + .flush = sdcardfs_flush, + .release = sdcardfs_file_release, + .fsync = sdcardfs_fsync, + .fasync = sdcardfs_fasync, + .read_iter = sdcardfs_read_iter, + .write_iter = sdcardfs_write_iter, +}; + +/* trimmed directory options */ +const struct file_operations sdcardfs_dir_fops = { + .llseek = sdcardfs_file_llseek, + .read = generic_read_dir, + .iterate = sdcardfs_readdir, + .unlocked_ioctl = sdcardfs_unlocked_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = sdcardfs_compat_ioctl, +#endif + .open = sdcardfs_open, + .release = sdcardfs_file_release, + .flush = sdcardfs_flush, + .fsync = sdcardfs_fsync, + .fasync = sdcardfs_fasync, +};
diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c new file mode 100644 index 0000000..4dd681e --- /dev/null +++ b/fs/sdcardfs/inode.c
@@ -0,0 +1,821 @@ +/* + * fs/sdcardfs/inode.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" +#include <linux/fs_struct.h> +#include <linux/ratelimit.h> +#include <linux/sched/task.h> + +const struct cred *override_fsids(struct sdcardfs_sb_info *sbi, + struct sdcardfs_inode_data *data) +{ + struct cred *cred; + const struct cred *old_cred; + uid_t uid; + + cred = prepare_creds(); + if (!cred) + return NULL; + + if (sbi->options.gid_derivation) { + if (data->under_obb) + uid = AID_MEDIA_OBB; + else + uid = multiuser_get_uid(data->userid, sbi->options.fs_low_uid); + } else { + uid = sbi->options.fs_low_uid; + } + cred->fsuid = make_kuid(&init_user_ns, uid); + cred->fsgid = make_kgid(&init_user_ns, sbi->options.fs_low_gid); + + old_cred = override_creds(cred); + + return old_cred; +} + +void revert_fsids(const struct cred *old_cred) +{ + const struct cred *cur_cred; + + cur_cred = current->cred; + revert_creds(old_cred); + put_cred(cur_cred); +} + +static int sdcardfs_create(struct inode *dir, struct dentry *dentry, + umode_t mode, bool want_excl) +{ + int err; + struct dentry *lower_dentry; + struct vfsmount *lower_dentry_mnt; + struct dentry *lower_parent_dentry = NULL; + struct path lower_path; + const struct cred *saved_cred = NULL; + struct fs_struct *saved_fs; + struct fs_struct *copied_fs; + + if (!check_caller_access_to_name(dir, &dentry->d_name)) { + err = -EACCES; + goto out_eacces; + } + + /* save current_cred and override it */ + saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb), + SDCARDFS_I(dir)->data); + if (!saved_cred) + return -ENOMEM; + + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_dentry_mnt = lower_path.mnt; + lower_parent_dentry = lock_parent(lower_dentry); + + /* set last 16bytes of mode field to 0664 */ + mode = (mode & S_IFMT) | 00664; + + /* temporarily change umask for lower fs write */ + saved_fs = current->fs; + copied_fs = copy_fs_struct(current->fs); + if (!copied_fs) { + err = -ENOMEM; + goto out_unlock; + } + copied_fs->umask = 0; + task_lock(current); + current->fs = copied_fs; + task_unlock(current); + + err = vfs_create2(lower_dentry_mnt, d_inode(lower_parent_dentry), lower_dentry, mode, want_excl); + if (err) + goto out; + + err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path, + SDCARDFS_I(dir)->data->userid); + if (err) + goto out; + fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); + fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry)); + fixup_lower_ownership(dentry, dentry->d_name.name); + +out: + task_lock(current); + current->fs = saved_fs; + task_unlock(current); + free_fs_struct(copied_fs); +out_unlock: + unlock_dir(lower_parent_dentry); + sdcardfs_put_lower_path(dentry, &lower_path); + revert_fsids(saved_cred); +out_eacces: + return err; +} + +static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry) +{ + int err; + struct dentry *lower_dentry; + struct vfsmount *lower_mnt; + struct inode *lower_dir_inode = sdcardfs_lower_inode(dir); + struct dentry *lower_dir_dentry; + struct path lower_path; + const struct cred *saved_cred = NULL; + + if (!check_caller_access_to_name(dir, &dentry->d_name)) { + err = -EACCES; + goto out_eacces; + } + + /* save current_cred and override it */ + saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb), + SDCARDFS_I(dir)->data); + if (!saved_cred) + return -ENOMEM; + + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_mnt = lower_path.mnt; + dget(lower_dentry); + lower_dir_dentry = lock_parent(lower_dentry); + + err = vfs_unlink2(lower_mnt, lower_dir_inode, lower_dentry, NULL); + + /* + * Note: unlinking on top of NFS can cause silly-renamed files. + * Trying to delete such files results in EBUSY from NFS + * below. Silly-renamed files will get deleted by NFS later on, so + * we just need to detect them here and treat such EBUSY errors as + * if the upper file was successfully deleted. + */ + if (err == -EBUSY && lower_dentry->d_flags & DCACHE_NFSFS_RENAMED) + err = 0; + if (err) + goto out; + fsstack_copy_attr_times(dir, lower_dir_inode); + fsstack_copy_inode_size(dir, lower_dir_inode); + set_nlink(d_inode(dentry), + sdcardfs_lower_inode(d_inode(dentry))->i_nlink); + d_inode(dentry)->i_ctime = dir->i_ctime; + d_drop(dentry); /* this is needed, else LTP fails (VFS won't do it) */ +out: + unlock_dir(lower_dir_dentry); + dput(lower_dentry); + sdcardfs_put_lower_path(dentry, &lower_path); + revert_fsids(saved_cred); +out_eacces: + return err; +} + +static int touch(char *abs_path, mode_t mode) +{ + struct file *filp = filp_open(abs_path, O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW, mode); + + if (IS_ERR(filp)) { + if (PTR_ERR(filp) == -EEXIST) { + return 0; + } else { + pr_err("sdcardfs: failed to open(%s): %ld\n", + abs_path, PTR_ERR(filp)); + return PTR_ERR(filp); + } + } + filp_close(filp, current->files); + return 0; +} + +static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + int err; + int make_nomedia_in_obb = 0; + struct dentry *lower_dentry; + struct vfsmount *lower_mnt; + struct dentry *lower_parent_dentry = NULL; + struct dentry *parent_dentry = NULL; + struct path lower_path; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + const struct cred *saved_cred = NULL; + struct sdcardfs_inode_data *pd = SDCARDFS_I(dir)->data; + int touch_err = 0; + struct fs_struct *saved_fs; + struct fs_struct *copied_fs; + struct qstr q_obb = QSTR_LITERAL("obb"); + struct qstr q_data = QSTR_LITERAL("data"); + + if (!check_caller_access_to_name(dir, &dentry->d_name)) { + err = -EACCES; + goto out_eacces; + } + + /* save current_cred and override it */ + saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb), + SDCARDFS_I(dir)->data); + if (!saved_cred) + return -ENOMEM; + + /* check disk space */ + parent_dentry = dget_parent(dentry); + if (!check_min_free_space(parent_dentry, 0, 1)) { + pr_err("sdcardfs: No minimum free space.\n"); + err = -ENOSPC; + dput(parent_dentry); + goto out_revert; + } + dput(parent_dentry); + + /* the lower_dentry is negative here */ + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_mnt = lower_path.mnt; + lower_parent_dentry = lock_parent(lower_dentry); + + /* set last 16bytes of mode field to 0775 */ + mode = (mode & S_IFMT) | 00775; + + /* temporarily change umask for lower fs write */ + saved_fs = current->fs; + copied_fs = copy_fs_struct(current->fs); + if (!copied_fs) { + err = -ENOMEM; + unlock_dir(lower_parent_dentry); + goto out_unlock; + } + copied_fs->umask = 0; + task_lock(current); + current->fs = copied_fs; + task_unlock(current); + + err = vfs_mkdir2(lower_mnt, d_inode(lower_parent_dentry), lower_dentry, mode); + + if (err) { + unlock_dir(lower_parent_dentry); + goto out; + } + + /* if it is a local obb dentry, setup it with the base obbpath */ + if (need_graft_path(dentry)) { + + err = setup_obb_dentry(dentry, &lower_path); + if (err) { + /* if the sbi->obbpath is not available, the lower_path won't be + * changed by setup_obb_dentry() but the lower path is saved to + * its orig_path. this dentry will be revalidated later. + * but now, the lower_path should be NULL + */ + sdcardfs_put_reset_lower_path(dentry); + + /* the newly created lower path which saved to its orig_path or + * the lower_path is the base obbpath. + * therefore, an additional path_get is required + */ + path_get(&lower_path); + } else + make_nomedia_in_obb = 1; + } + + err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path, pd->userid); + if (err) { + unlock_dir(lower_parent_dentry); + goto out; + } + + fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); + fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry)); + /* update number of links on parent directory */ + set_nlink(dir, sdcardfs_lower_inode(dir)->i_nlink); + fixup_lower_ownership(dentry, dentry->d_name.name); + unlock_dir(lower_parent_dentry); + if ((!sbi->options.multiuser) && (qstr_case_eq(&dentry->d_name, &q_obb)) + && (pd->perm == PERM_ANDROID) && (pd->userid == 0)) + make_nomedia_in_obb = 1; + + /* When creating /Android/data and /Android/obb, mark them as .nomedia */ + if (make_nomedia_in_obb || + ((pd->perm == PERM_ANDROID) + && (qstr_case_eq(&dentry->d_name, &q_data)))) { + revert_fsids(saved_cred); + saved_cred = override_fsids(sbi, + SDCARDFS_I(d_inode(dentry))->data); + if (!saved_cred) { + pr_err("sdcardfs: failed to set up .nomedia in %s: %d\n", + lower_path.dentry->d_name.name, + -ENOMEM); + goto out; + } + set_fs_pwd(current->fs, &lower_path); + touch_err = touch(".nomedia", 0664); + if (touch_err) { + pr_err("sdcardfs: failed to create .nomedia in %s: %d\n", + lower_path.dentry->d_name.name, + touch_err); + goto out; + } + } +out: + task_lock(current); + current->fs = saved_fs; + task_unlock(current); + + free_fs_struct(copied_fs); +out_unlock: + sdcardfs_put_lower_path(dentry, &lower_path); +out_revert: + revert_fsids(saved_cred); +out_eacces: + return err; +} + +static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct dentry *lower_dentry; + struct dentry *lower_dir_dentry; + struct vfsmount *lower_mnt; + int err; + struct path lower_path; + const struct cred *saved_cred = NULL; + + if (!check_caller_access_to_name(dir, &dentry->d_name)) { + err = -EACCES; + goto out_eacces; + } + + /* save current_cred and override it */ + saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb), + SDCARDFS_I(dir)->data); + if (!saved_cred) + return -ENOMEM; + + /* sdcardfs_get_real_lower(): in case of remove an user's obb dentry + * the dentry on the original path should be deleted. + */ + sdcardfs_get_real_lower(dentry, &lower_path); + + lower_dentry = lower_path.dentry; + lower_mnt = lower_path.mnt; + lower_dir_dentry = lock_parent(lower_dentry); + + err = vfs_rmdir2(lower_mnt, d_inode(lower_dir_dentry), lower_dentry); + if (err) + goto out; + + d_drop(dentry); /* drop our dentry on success (why not VFS's job?) */ + if (d_inode(dentry)) + clear_nlink(d_inode(dentry)); + fsstack_copy_attr_times(dir, d_inode(lower_dir_dentry)); + fsstack_copy_inode_size(dir, d_inode(lower_dir_dentry)); + set_nlink(dir, d_inode(lower_dir_dentry)->i_nlink); + +out: + unlock_dir(lower_dir_dentry); + sdcardfs_put_real_lower(dentry, &lower_path); + revert_fsids(saved_cred); +out_eacces: + return err; +} + +/* + * The locking rules in sdcardfs_rename are complex. We could use a simpler + * superblock-level name-space lock for renames and copy-ups. + */ +static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + int err = 0; + struct dentry *lower_old_dentry = NULL; + struct dentry *lower_new_dentry = NULL; + struct dentry *lower_old_dir_dentry = NULL; + struct dentry *lower_new_dir_dentry = NULL; + struct vfsmount *lower_mnt = NULL; + struct dentry *trap = NULL; + struct path lower_old_path, lower_new_path; + const struct cred *saved_cred = NULL; + + if (flags) + return -EINVAL; + + if (!check_caller_access_to_name(old_dir, &old_dentry->d_name) || + !check_caller_access_to_name(new_dir, &new_dentry->d_name)) { + err = -EACCES; + goto out_eacces; + } + + /* save current_cred and override it */ + saved_cred = override_fsids(SDCARDFS_SB(old_dir->i_sb), + SDCARDFS_I(new_dir)->data); + if (!saved_cred) + return -ENOMEM; + + sdcardfs_get_real_lower(old_dentry, &lower_old_path); + sdcardfs_get_lower_path(new_dentry, &lower_new_path); + lower_old_dentry = lower_old_path.dentry; + lower_new_dentry = lower_new_path.dentry; + lower_mnt = lower_old_path.mnt; + lower_old_dir_dentry = dget_parent(lower_old_dentry); + lower_new_dir_dentry = dget_parent(lower_new_dentry); + + trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + /* source should not be ancestor of target */ + if (trap == lower_old_dentry) { + err = -EINVAL; + goto out; + } + /* target should not be ancestor of source */ + if (trap == lower_new_dentry) { + err = -ENOTEMPTY; + goto out; + } + + err = vfs_rename2(lower_mnt, + d_inode(lower_old_dir_dentry), lower_old_dentry, + d_inode(lower_new_dir_dentry), lower_new_dentry, + NULL, 0); + if (err) + goto out; + + /* Copy attrs from lower dir, but i_uid/i_gid */ + sdcardfs_copy_and_fix_attrs(new_dir, d_inode(lower_new_dir_dentry)); + fsstack_copy_inode_size(new_dir, d_inode(lower_new_dir_dentry)); + + if (new_dir != old_dir) { + sdcardfs_copy_and_fix_attrs(old_dir, d_inode(lower_old_dir_dentry)); + fsstack_copy_inode_size(old_dir, d_inode(lower_old_dir_dentry)); + } + get_derived_permission_new(new_dentry->d_parent, old_dentry, &new_dentry->d_name); + fixup_tmp_permissions(d_inode(old_dentry)); + fixup_lower_ownership(old_dentry, new_dentry->d_name.name); + d_invalidate(old_dentry); /* Can't fixup ownership recursively :( */ +out: + unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + dput(lower_old_dir_dentry); + dput(lower_new_dir_dentry); + sdcardfs_put_real_lower(old_dentry, &lower_old_path); + sdcardfs_put_lower_path(new_dentry, &lower_new_path); + revert_fsids(saved_cred); +out_eacces: + return err; +} + +#if 0 +static int sdcardfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) +{ + int err; + struct dentry *lower_dentry; + struct path lower_path; + /* XXX readlink does not requires overriding credential */ + + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + if (!d_inode(lower_dentry)->i_op || + !d_inode(lower_dentry)->i_op->readlink) { + err = -EINVAL; + goto out; + } + + err = d_inode(lower_dentry)->i_op->readlink(lower_dentry, + buf, bufsiz); + if (err < 0) + goto out; + fsstack_copy_attr_atime(d_inode(dentry), d_inode(lower_dentry)); + +out: + sdcardfs_put_lower_path(dentry, &lower_path); + return err; +} +#endif + +#if 0 +static const char *sdcardfs_follow_link(struct dentry *dentry, void **cookie) +{ + char *buf; + int len = PAGE_SIZE, err; + mm_segment_t old_fs; + + /* This is freed by the put_link method assuming a successful call. */ + buf = kmalloc(len, GFP_KERNEL); + if (!buf) { + buf = ERR_PTR(-ENOMEM); + return buf; + } + + /* read the symlink, and then we will follow it */ + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sdcardfs_readlink(dentry, buf, len); + set_fs(old_fs); + if (err < 0) { + kfree(buf); + buf = ERR_PTR(err); + } else { + buf[err] = '\0'; + } + return *cookie = buf; +} +#endif + +static int sdcardfs_permission_wrn(struct inode *inode, int mask) +{ + WARN_RATELIMIT(1, "sdcardfs does not support permission. Use permission2.\n"); + return -EINVAL; +} + +void copy_attrs(struct inode *dest, const struct inode *src) +{ + dest->i_mode = src->i_mode; + dest->i_uid = src->i_uid; + dest->i_gid = src->i_gid; + dest->i_rdev = src->i_rdev; + dest->i_atime = src->i_atime; + dest->i_mtime = src->i_mtime; + dest->i_ctime = src->i_ctime; + dest->i_blkbits = src->i_blkbits; + dest->i_flags = src->i_flags; +#ifdef CONFIG_FS_POSIX_ACL + dest->i_acl = src->i_acl; +#endif +#ifdef CONFIG_SECURITY + dest->i_security = src->i_security; +#endif +} + +static int sdcardfs_permission(struct vfsmount *mnt, struct inode *inode, int mask) +{ + int err; + struct inode tmp; + struct sdcardfs_inode_data *top = top_data_get(SDCARDFS_I(inode)); + + if (IS_ERR(mnt)) + return PTR_ERR(mnt); + + if (!top) + return -EINVAL; + + /* + * Permission check on sdcardfs inode. + * Calling process should have AID_SDCARD_RW permission + * Since generic_permission only needs i_mode, i_uid, + * i_gid, and i_sb, we can create a fake inode to pass + * this information down in. + * + * The underlying code may attempt to take locks in some + * cases for features we're not using, but if that changes, + * locks must be dealt with to avoid undefined behavior. + */ + copy_attrs(&tmp, inode); + tmp.i_uid = make_kuid(&init_user_ns, top->d_uid); + tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, inode->i_sb, top)); + tmp.i_mode = (inode->i_mode & S_IFMT) + | get_mode(mnt, SDCARDFS_I(inode), top); + data_put(top); + tmp.i_sb = inode->i_sb; + if (IS_POSIXACL(inode)) + pr_warn("%s: This may be undefined behavior...\n", __func__); + err = generic_permission(&tmp, mask); + return err; +} + +static int sdcardfs_setattr_wrn(struct dentry *dentry, struct iattr *ia) +{ + WARN_RATELIMIT(1, "sdcardfs does not support setattr. User setattr2.\n"); + return -EINVAL; +} + +static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct iattr *ia) +{ + int err; + struct dentry *lower_dentry; + struct vfsmount *lower_mnt; + struct inode *inode; + struct inode *lower_inode; + struct path lower_path; + struct iattr lower_ia; + struct dentry *parent; + struct inode tmp; + struct dentry tmp_d; + struct sdcardfs_inode_data *top; + + const struct cred *saved_cred = NULL; + + inode = d_inode(dentry); + top = top_data_get(SDCARDFS_I(inode)); + + if (!top) + return -EINVAL; + + /* + * Permission check on sdcardfs inode. + * Calling process should have AID_SDCARD_RW permission + * Since generic_permission only needs i_mode, i_uid, + * i_gid, and i_sb, we can create a fake inode to pass + * this information down in. + * + * The underlying code may attempt to take locks in some + * cases for features we're not using, but if that changes, + * locks must be dealt with to avoid undefined behavior. + * + */ + copy_attrs(&tmp, inode); + tmp.i_uid = make_kuid(&init_user_ns, top->d_uid); + tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, dentry->d_sb, top)); + tmp.i_mode = (inode->i_mode & S_IFMT) + | get_mode(mnt, SDCARDFS_I(inode), top); + tmp.i_size = i_size_read(inode); + data_put(top); + tmp.i_sb = inode->i_sb; + tmp_d.d_inode = &tmp; + + /* + * Check if user has permission to change dentry. We don't check if + * this user can change the lower inode: that should happen when + * calling notify_change on the lower inode. + */ + /* prepare our own lower struct iattr (with the lower file) */ + memcpy(&lower_ia, ia, sizeof(lower_ia)); + /* Allow touch updating timestamps. A previous permission check ensures + * we have write access. Changes to mode, owner, and group are ignored + */ + ia->ia_valid |= ATTR_FORCE; + err = setattr_prepare(&tmp_d, ia); + + if (!err) { + /* check the Android group ID */ + parent = dget_parent(dentry); + if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) + err = -EACCES; + dput(parent); + } + + if (err) + goto out_err; + + /* save current_cred and override it */ + saved_cred = override_fsids(SDCARDFS_SB(dentry->d_sb), + SDCARDFS_I(inode)->data); + if (!saved_cred) + return -ENOMEM; + + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_mnt = lower_path.mnt; + lower_inode = sdcardfs_lower_inode(inode); + + if (ia->ia_valid & ATTR_FILE) + lower_ia.ia_file = sdcardfs_lower_file(ia->ia_file); + + lower_ia.ia_valid &= ~(ATTR_UID | ATTR_GID | ATTR_MODE); + + /* + * If shrinking, first truncate upper level to cancel writing dirty + * pages beyond the new eof; and also if its' maxbytes is more + * limiting (fail with -EFBIG before making any change to the lower + * level). There is no need to vmtruncate the upper level + * afterwards in the other cases: we fsstack_copy_inode_size from + * the lower level. + */ + if (ia->ia_valid & ATTR_SIZE) { + err = inode_newsize_ok(&tmp, ia->ia_size); + if (err) { + goto out; + } + truncate_setsize(inode, ia->ia_size); + } + + /* + * mode change is for clearing setuid/setgid bits. Allow lower fs + * to interpret this in its own way. + */ + if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) + lower_ia.ia_valid &= ~ATTR_MODE; + + /* notify the (possibly copied-up) lower inode */ + /* + * Note: we use d_inode(lower_dentry), because lower_inode may be + * unlinked (no inode->i_sb and i_ino==0. This happens if someone + * tries to open(), unlink(), then ftruncate() a file. + */ + inode_lock(d_inode(lower_dentry)); + err = notify_change2(lower_mnt, lower_dentry, &lower_ia, /* note: lower_ia */ + NULL); + inode_unlock(d_inode(lower_dentry)); + if (err) + goto out; + + /* get attributes from the lower inode and update derived permissions */ + sdcardfs_copy_and_fix_attrs(inode, lower_inode); + + /* + * Not running fsstack_copy_inode_size(inode, lower_inode), because + * VFS should update our inode size, and notify_change on + * lower_inode should update its size. + */ + +out: + sdcardfs_put_lower_path(dentry, &lower_path); + revert_fsids(saved_cred); +out_err: + return err; +} + +static int sdcardfs_fillattr(struct vfsmount *mnt, struct inode *inode, + struct kstat *lower_stat, struct kstat *stat) +{ + struct sdcardfs_inode_info *info = SDCARDFS_I(inode); + struct sdcardfs_inode_data *top = top_data_get(info); + struct super_block *sb = inode->i_sb; + + if (!top) + return -EINVAL; + + stat->dev = inode->i_sb->s_dev; + stat->ino = inode->i_ino; + stat->mode = (inode->i_mode & S_IFMT) | get_mode(mnt, info, top); + stat->nlink = inode->i_nlink; + stat->uid = make_kuid(&init_user_ns, top->d_uid); + stat->gid = make_kgid(&init_user_ns, get_gid(mnt, sb, top)); + stat->rdev = inode->i_rdev; + stat->size = lower_stat->size; + stat->atime = lower_stat->atime; + stat->mtime = lower_stat->mtime; + stat->ctime = lower_stat->ctime; + stat->blksize = lower_stat->blksize; + stat->blocks = lower_stat->blocks; + data_put(top); + return 0; +} +static int sdcardfs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + struct vfsmount *mnt = path->mnt; + struct dentry *dentry = path->dentry; + struct kstat lower_stat; + struct path lower_path; + struct dentry *parent; + int err; + + parent = dget_parent(dentry); + if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) { + dput(parent); + return -EACCES; + } + dput(parent); + + sdcardfs_get_lower_path(dentry, &lower_path); + err = vfs_getattr(&lower_path, &lower_stat, request_mask, flags); + if (err) + goto out; + sdcardfs_copy_and_fix_attrs(d_inode(dentry), + d_inode(lower_path.dentry)); + err = sdcardfs_fillattr(mnt, d_inode(dentry), &lower_stat, stat); +out: + sdcardfs_put_lower_path(dentry, &lower_path); + return err; +} + +const struct inode_operations sdcardfs_symlink_iops = { + .permission2 = sdcardfs_permission, + .setattr2 = sdcardfs_setattr, + /* XXX Following operations are implemented, + * but FUSE(sdcard) or FAT does not support them + * These methods are *NOT* perfectly tested. + .readlink = sdcardfs_readlink, + .follow_link = sdcardfs_follow_link, + .put_link = kfree_put_link, + */ +}; + +const struct inode_operations sdcardfs_dir_iops = { + .create = sdcardfs_create, + .lookup = sdcardfs_lookup, + .permission = sdcardfs_permission_wrn, + .permission2 = sdcardfs_permission, + .unlink = sdcardfs_unlink, + .mkdir = sdcardfs_mkdir, + .rmdir = sdcardfs_rmdir, + .rename = sdcardfs_rename, + .setattr = sdcardfs_setattr_wrn, + .setattr2 = sdcardfs_setattr, + .getattr = sdcardfs_getattr, +}; + +const struct inode_operations sdcardfs_main_iops = { + .permission = sdcardfs_permission_wrn, + .permission2 = sdcardfs_permission, + .setattr = sdcardfs_setattr_wrn, + .setattr2 = sdcardfs_setattr, + .getattr = sdcardfs_getattr, +};
diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c new file mode 100644 index 0000000..73179ce2 --- /dev/null +++ b/fs/sdcardfs/lookup.c
@@ -0,0 +1,470 @@ +/* + * fs/sdcardfs/lookup.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" +#include "linux/delay.h" + +/* The dentry cache is just so we have properly sized dentries */ +static struct kmem_cache *sdcardfs_dentry_cachep; + +int sdcardfs_init_dentry_cache(void) +{ + sdcardfs_dentry_cachep = + kmem_cache_create("sdcardfs_dentry", + sizeof(struct sdcardfs_dentry_info), + 0, SLAB_RECLAIM_ACCOUNT, NULL); + + return sdcardfs_dentry_cachep ? 0 : -ENOMEM; +} + +void sdcardfs_destroy_dentry_cache(void) +{ + kmem_cache_destroy(sdcardfs_dentry_cachep); +} + +void free_dentry_private_data(struct dentry *dentry) +{ + kmem_cache_free(sdcardfs_dentry_cachep, dentry->d_fsdata); + dentry->d_fsdata = NULL; +} + +/* allocate new dentry private data */ +int new_dentry_private_data(struct dentry *dentry) +{ + struct sdcardfs_dentry_info *info = SDCARDFS_D(dentry); + + /* use zalloc to init dentry_info.lower_path */ + info = kmem_cache_zalloc(sdcardfs_dentry_cachep, GFP_ATOMIC); + if (!info) + return -ENOMEM; + + spin_lock_init(&info->lock); + dentry->d_fsdata = info; + + return 0; +} + +struct inode_data { + struct inode *lower_inode; + userid_t id; +}; + +static int sdcardfs_inode_test(struct inode *inode, void *candidate_data/*void *candidate_lower_inode*/) +{ + struct inode *current_lower_inode = sdcardfs_lower_inode(inode); + userid_t current_userid = SDCARDFS_I(inode)->data->userid; + + if (current_lower_inode == ((struct inode_data *)candidate_data)->lower_inode && + current_userid == ((struct inode_data *)candidate_data)->id) + return 1; /* found a match */ + else + return 0; /* no match */ +} + +static int sdcardfs_inode_set(struct inode *inode, void *lower_inode) +{ + /* we do actual inode initialization in sdcardfs_iget */ + return 0; +} + +struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode, userid_t id) +{ + struct sdcardfs_inode_info *info; + struct inode_data data; + struct inode *inode; /* the new inode to return */ + + if (!igrab(lower_inode)) + return ERR_PTR(-ESTALE); + + data.id = id; + data.lower_inode = lower_inode; + inode = iget5_locked(sb, /* our superblock */ + /* + * hashval: we use inode number, but we can + * also use "(unsigned long)lower_inode" + * instead. + */ + lower_inode->i_ino, /* hashval */ + sdcardfs_inode_test, /* inode comparison function */ + sdcardfs_inode_set, /* inode init function */ + &data); /* data passed to test+set fxns */ + if (!inode) { + iput(lower_inode); + return ERR_PTR(-ENOMEM); + } + /* if found a cached inode, then just return it (after iput) */ + if (!(inode->i_state & I_NEW)) { + iput(lower_inode); + return inode; + } + + /* initialize new inode */ + info = SDCARDFS_I(inode); + + inode->i_ino = lower_inode->i_ino; + sdcardfs_set_lower_inode(inode, lower_inode); + + inode->i_version++; + + /* use different set of inode ops for symlinks & directories */ + if (S_ISDIR(lower_inode->i_mode)) + inode->i_op = &sdcardfs_dir_iops; + else if (S_ISLNK(lower_inode->i_mode)) + inode->i_op = &sdcardfs_symlink_iops; + else + inode->i_op = &sdcardfs_main_iops; + + /* use different set of file ops for directories */ + if (S_ISDIR(lower_inode->i_mode)) + inode->i_fop = &sdcardfs_dir_fops; + else + inode->i_fop = &sdcardfs_main_fops; + + inode->i_mapping->a_ops = &sdcardfs_aops; + + inode->i_atime.tv_sec = 0; + inode->i_atime.tv_nsec = 0; + inode->i_mtime.tv_sec = 0; + inode->i_mtime.tv_nsec = 0; + inode->i_ctime.tv_sec = 0; + inode->i_ctime.tv_nsec = 0; + + /* properly initialize special inodes */ + if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) || + S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode)) + init_special_inode(inode, lower_inode->i_mode, + lower_inode->i_rdev); + + /* all well, copy inode attributes */ + sdcardfs_copy_and_fix_attrs(inode, lower_inode); + fsstack_copy_inode_size(inode, lower_inode); + + unlock_new_inode(inode); + return inode; +} + +/* + * Helper interpose routine, called directly by ->lookup to handle + * spliced dentries. + */ +static struct dentry *__sdcardfs_interpose(struct dentry *dentry, + struct super_block *sb, + struct path *lower_path, + userid_t id) +{ + struct inode *inode; + struct inode *lower_inode; + struct super_block *lower_sb; + struct dentry *ret_dentry; + + lower_inode = d_inode(lower_path->dentry); + lower_sb = sdcardfs_lower_super(sb); + + /* check that the lower file system didn't cross a mount point */ + if (lower_inode->i_sb != lower_sb) { + ret_dentry = ERR_PTR(-EXDEV); + goto out; + } + + /* + * We allocate our new inode below by calling sdcardfs_iget, + * which will initialize some of the new inode's fields + */ + + /* inherit lower inode number for sdcardfs's inode */ + inode = sdcardfs_iget(sb, lower_inode, id); + if (IS_ERR(inode)) { + ret_dentry = ERR_CAST(inode); + goto out; + } + + ret_dentry = d_splice_alias(inode, dentry); + dentry = ret_dentry ?: dentry; + if (!IS_ERR(dentry)) + update_derived_permission_lock(dentry); +out: + return ret_dentry; +} + +/* + * Connect an sdcardfs inode dentry/inode with several lower ones. This is + * the classic stackable file system "vnode interposition" action. + * + * @dentry: sdcardfs's dentry which interposes on lower one + * @sb: sdcardfs's super_block + * @lower_path: the lower path (caller does path_get/put) + */ +int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, + struct path *lower_path, userid_t id) +{ + struct dentry *ret_dentry; + + ret_dentry = __sdcardfs_interpose(dentry, sb, lower_path, id); + return PTR_ERR(ret_dentry); +} + +struct sdcardfs_name_data { + struct dir_context ctx; + const struct qstr *to_find; + char *name; + bool found; +}; + +static int sdcardfs_name_match(struct dir_context *ctx, const char *name, + int namelen, loff_t offset, u64 ino, unsigned int d_type) +{ + struct sdcardfs_name_data *buf = container_of(ctx, struct sdcardfs_name_data, ctx); + struct qstr candidate = QSTR_INIT(name, namelen); + + if (qstr_case_eq(buf->to_find, &candidate)) { + memcpy(buf->name, name, namelen); + buf->name[namelen] = 0; + buf->found = true; + return 1; + } + return 0; +} + +/* + * Main driver function for sdcardfs's lookup. + * + * Returns: NULL (ok), ERR_PTR if an error occurred. + * Fills in lower_parent_path with <dentry,mnt> on success. + */ +static struct dentry *__sdcardfs_lookup(struct dentry *dentry, + unsigned int flags, struct path *lower_parent_path, userid_t id) +{ + int err = 0; + struct vfsmount *lower_dir_mnt; + struct dentry *lower_dir_dentry = NULL; + struct dentry *lower_dentry; + const struct qstr *name; + struct path lower_path; + struct qstr dname; + struct dentry *ret_dentry = NULL; + struct sdcardfs_sb_info *sbi; + + sbi = SDCARDFS_SB(dentry->d_sb); + /* must initialize dentry operations */ + d_set_d_op(dentry, &sdcardfs_ci_dops); + + if (IS_ROOT(dentry)) + goto out; + + name = &dentry->d_name; + + /* now start the actual lookup procedure */ + lower_dir_dentry = lower_parent_path->dentry; + lower_dir_mnt = lower_parent_path->mnt; + + /* Use vfs_path_lookup to check if the dentry exists or not */ + err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name->name, 0, + &lower_path); + /* check for other cases */ + if (err == -ENOENT) { + struct file *file; + const struct cred *cred = current_cred(); + + struct sdcardfs_name_data buffer = { + .ctx.actor = sdcardfs_name_match, + .to_find = name, + .name = __getname(), + .found = false, + }; + + if (!buffer.name) { + err = -ENOMEM; + goto out; + } + file = dentry_open(lower_parent_path, O_RDONLY, cred); + if (IS_ERR(file)) { + err = PTR_ERR(file); + goto put_name; + } + err = iterate_dir(file, &buffer.ctx); + fput(file); + if (err) + goto put_name; + + if (buffer.found) + err = vfs_path_lookup(lower_dir_dentry, + lower_dir_mnt, + buffer.name, 0, + &lower_path); + else + err = -ENOENT; +put_name: + __putname(buffer.name); + } + + /* no error: handle positive dentries */ + if (!err) { + /* check if the dentry is an obb dentry + * if true, the lower_inode must be replaced with + * the inode of the graft path + */ + + if (need_graft_path(dentry)) { + + /* setup_obb_dentry() + * The lower_path will be stored to the dentry's orig_path + * and the base obbpath will be copyed to the lower_path variable. + * if an error returned, there's no change in the lower_path + * returns: -ERRNO if error (0: no error) + */ + err = setup_obb_dentry(dentry, &lower_path); + + if (err) { + /* if the sbi->obbpath is not available, we can optionally + * setup the lower_path with its orig_path. + * but, the current implementation just returns an error + * because the sdcard daemon also regards this case as + * a lookup fail. + */ + pr_info("sdcardfs: base obbpath is not available\n"); + sdcardfs_put_reset_orig_path(dentry); + goto out; + } + } + + sdcardfs_set_lower_path(dentry, &lower_path); + ret_dentry = + __sdcardfs_interpose(dentry, dentry->d_sb, &lower_path, id); + if (IS_ERR(ret_dentry)) { + err = PTR_ERR(ret_dentry); + /* path_put underlying path on error */ + sdcardfs_put_reset_lower_path(dentry); + } + goto out; + } + + /* + * We don't consider ENOENT an error, and we want to return a + * negative dentry. + */ + if (err && err != -ENOENT) + goto out; + + /* instatiate a new negative dentry */ + dname.name = name->name; + dname.len = name->len; + + /* See if the low-level filesystem might want + * to use its own hash + */ + lower_dentry = d_hash_and_lookup(lower_dir_dentry, &dname); + if (IS_ERR(lower_dentry)) + return lower_dentry; + + if (!lower_dentry) { + /* We called vfs_path_lookup earlier, and did not get a negative + * dentry then. Don't confuse the lower filesystem by forcing + * one on it now... + */ + err = -ENOENT; + goto out; + } + + lower_path.dentry = lower_dentry; + lower_path.mnt = mntget(lower_dir_mnt); + sdcardfs_set_lower_path(dentry, &lower_path); + + /* + * If the intent is to create a file, then don't return an error, so + * the VFS will continue the process of making this negative dentry + * into a positive one. + */ + if (flags & (LOOKUP_CREATE|LOOKUP_RENAME_TARGET)) + err = 0; + +out: + if (err) + return ERR_PTR(err); + return ret_dentry; +} + +/* + * On success: + * fills dentry object appropriate values and returns NULL. + * On fail (== error) + * returns error ptr + * + * @dir : Parent inode. + * @dentry : Target dentry to lookup. we should set each of fields. + * (dentry->d_name is initialized already) + * @nd : nameidata of parent inode + */ +struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + struct dentry *ret = NULL, *parent; + struct path lower_parent_path; + int err = 0; + const struct cred *saved_cred = NULL; + + parent = dget_parent(dentry); + + if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) { + ret = ERR_PTR(-EACCES); + goto out_err; + } + + /* save current_cred and override it */ + saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb), + SDCARDFS_I(dir)->data); + if (!saved_cred) { + ret = ERR_PTR(-ENOMEM); + goto out_err; + } + + sdcardfs_get_lower_path(parent, &lower_parent_path); + + /* allocate dentry private data. We free it in ->d_release */ + err = new_dentry_private_data(dentry); + if (err) { + ret = ERR_PTR(err); + goto out; + } + + ret = __sdcardfs_lookup(dentry, flags, &lower_parent_path, + SDCARDFS_I(dir)->data->userid); + if (IS_ERR(ret)) + goto out; + if (ret) + dentry = ret; + if (d_inode(dentry)) { + fsstack_copy_attr_times(d_inode(dentry), + sdcardfs_lower_inode(d_inode(dentry))); + /* get derived permission */ + get_derived_permission(parent, dentry); + fixup_tmp_permissions(d_inode(dentry)); + fixup_lower_ownership(dentry, dentry->d_name.name); + } + /* update parent directory's atime */ + fsstack_copy_attr_atime(d_inode(parent), + sdcardfs_lower_inode(d_inode(parent))); + +out: + sdcardfs_put_lower_path(parent, &lower_parent_path); + revert_fsids(saved_cred); +out_err: + dput(parent); + return ret; +}
diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c new file mode 100644 index 0000000..d890c57 --- /dev/null +++ b/fs/sdcardfs/main.c
@@ -0,0 +1,511 @@ +/* + * fs/sdcardfs/main.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" +#include <linux/module.h> +#include <linux/types.h> +#include <linux/parser.h> + +enum { + Opt_fsuid, + Opt_fsgid, + Opt_gid, + Opt_debug, + Opt_mask, + Opt_multiuser, + Opt_userid, + Opt_reserved_mb, + Opt_gid_derivation, + Opt_default_normal, + Opt_nocache, + Opt_unshared_obb, + Opt_err, +}; + +static const match_table_t sdcardfs_tokens = { + {Opt_fsuid, "fsuid=%u"}, + {Opt_fsgid, "fsgid=%u"}, + {Opt_gid, "gid=%u"}, + {Opt_debug, "debug"}, + {Opt_mask, "mask=%u"}, + {Opt_userid, "userid=%d"}, + {Opt_multiuser, "multiuser"}, + {Opt_gid_derivation, "derive_gid"}, + {Opt_default_normal, "default_normal"}, + {Opt_unshared_obb, "unshared_obb"}, + {Opt_reserved_mb, "reserved_mb=%u"}, + {Opt_nocache, "nocache"}, + {Opt_err, NULL} +}; + +static int parse_options(struct super_block *sb, char *options, int silent, + int *debug, struct sdcardfs_vfsmount_options *vfsopts, + struct sdcardfs_mount_options *opts) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + int option; + + /* by default, we use AID_MEDIA_RW as uid, gid */ + opts->fs_low_uid = AID_MEDIA_RW; + opts->fs_low_gid = AID_MEDIA_RW; + vfsopts->mask = 0; + opts->multiuser = false; + opts->fs_user_id = 0; + vfsopts->gid = 0; + /* by default, 0MB is reserved */ + opts->reserved_mb = 0; + /* by default, gid derivation is off */ + opts->gid_derivation = false; + opts->default_normal = false; + opts->nocache = false; + + *debug = 0; + + if (!options) + return 0; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + + if (!*p) + continue; + + token = match_token(p, sdcardfs_tokens, args); + + switch (token) { + case Opt_debug: + *debug = 1; + break; + case Opt_fsuid: + if (match_int(&args[0], &option)) + return 0; + opts->fs_low_uid = option; + break; + case Opt_fsgid: + if (match_int(&args[0], &option)) + return 0; + opts->fs_low_gid = option; + break; + case Opt_gid: + if (match_int(&args[0], &option)) + return 0; + vfsopts->gid = option; + break; + case Opt_userid: + if (match_int(&args[0], &option)) + return 0; + opts->fs_user_id = option; + break; + case Opt_mask: + if (match_int(&args[0], &option)) + return 0; + vfsopts->mask = option; + break; + case Opt_multiuser: + opts->multiuser = true; + break; + case Opt_reserved_mb: + if (match_int(&args[0], &option)) + return 0; + opts->reserved_mb = option; + break; + case Opt_gid_derivation: + opts->gid_derivation = true; + break; + case Opt_default_normal: + opts->default_normal = true; + break; + case Opt_nocache: + opts->nocache = true; + break; + case Opt_unshared_obb: + opts->unshared_obb = true; + break; + /* unknown option */ + default: + if (!silent) + pr_err("Unrecognized mount option \"%s\" or missing value", p); + return -EINVAL; + } + } + + if (*debug) { + pr_info("sdcardfs : options - debug:%d\n", *debug); + pr_info("sdcardfs : options - uid:%d\n", + opts->fs_low_uid); + pr_info("sdcardfs : options - gid:%d\n", + opts->fs_low_gid); + } + + return 0; +} + +int parse_options_remount(struct super_block *sb, char *options, int silent, + struct sdcardfs_vfsmount_options *vfsopts) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + int option; + int debug; + + if (!options) + return 0; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + + if (!*p) + continue; + + token = match_token(p, sdcardfs_tokens, args); + + switch (token) { + case Opt_debug: + debug = 1; + break; + case Opt_gid: + if (match_int(&args[0], &option)) + return 0; + vfsopts->gid = option; + + break; + case Opt_mask: + if (match_int(&args[0], &option)) + return 0; + vfsopts->mask = option; + break; + case Opt_unshared_obb: + case Opt_default_normal: + case Opt_multiuser: + case Opt_userid: + case Opt_fsuid: + case Opt_fsgid: + case Opt_reserved_mb: + case Opt_gid_derivation: + if (!silent) + pr_warn("Option \"%s\" can't be changed during remount\n", p); + break; + /* unknown option */ + default: + if (!silent) + pr_err("Unrecognized mount option \"%s\" or missing value", p); + return -EINVAL; + } + } + + if (debug) { + pr_info("sdcardfs : options - debug:%d\n", debug); + pr_info("sdcardfs : options - gid:%d\n", vfsopts->gid); + pr_info("sdcardfs : options - mask:%d\n", vfsopts->mask); + } + + return 0; +} + +#if 0 +/* + * our custom d_alloc_root work-alike + * + * we can't use d_alloc_root if we want to use our own interpose function + * unchanged, so we simply call our own "fake" d_alloc_root + */ +static struct dentry *sdcardfs_d_alloc_root(struct super_block *sb) +{ + struct dentry *ret = NULL; + + if (sb) { + static const struct qstr name = { + .name = "/", + .len = 1 + }; + + ret = d_alloc(NULL, &name); + if (ret) { + d_set_d_op(ret, &sdcardfs_ci_dops); + ret->d_sb = sb; + ret->d_parent = ret; + } + } + return ret; +} +#endif + +DEFINE_MUTEX(sdcardfs_super_list_lock); +EXPORT_SYMBOL_GPL(sdcardfs_super_list_lock); +LIST_HEAD(sdcardfs_super_list); +EXPORT_SYMBOL_GPL(sdcardfs_super_list); + +/* + * There is no need to lock the sdcardfs_super_info's rwsem as there is no + * way anyone can have a reference to the superblock at this point in time. + */ +static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb, + const char *dev_name, void *raw_data, int silent) +{ + int err = 0; + int debug; + struct super_block *lower_sb; + struct path lower_path; + struct sdcardfs_sb_info *sb_info; + struct sdcardfs_vfsmount_options *mnt_opt = mnt->data; + struct inode *inode; + + pr_info("sdcardfs version 2.0\n"); + + if (!dev_name) { + pr_err("sdcardfs: read_super: missing dev_name argument\n"); + err = -EINVAL; + goto out; + } + + pr_info("sdcardfs: dev_name -> %s\n", dev_name); + pr_info("sdcardfs: options -> %s\n", (char *)raw_data); + pr_info("sdcardfs: mnt -> %p\n", mnt); + + /* parse lower path */ + err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, + &lower_path); + if (err) { + pr_err("sdcardfs: error accessing lower directory '%s'\n", dev_name); + goto out; + } + + /* allocate superblock private data */ + sb->s_fs_info = kzalloc(sizeof(struct sdcardfs_sb_info), GFP_KERNEL); + if (!SDCARDFS_SB(sb)) { + pr_crit("sdcardfs: read_super: out of memory\n"); + err = -ENOMEM; + goto out_free; + } + + sb_info = sb->s_fs_info; + /* parse options */ + err = parse_options(sb, raw_data, silent, &debug, mnt_opt, &sb_info->options); + if (err) { + pr_err("sdcardfs: invalid options\n"); + goto out_freesbi; + } + + /* set the lower superblock field of upper superblock */ + lower_sb = lower_path.dentry->d_sb; + atomic_inc(&lower_sb->s_active); + sdcardfs_set_lower_super(sb, lower_sb); + + sb->s_stack_depth = lower_sb->s_stack_depth + 1; + if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { + pr_err("sdcardfs: maximum fs stacking depth exceeded\n"); + err = -EINVAL; + goto out_sput; + } + + /* inherit maxbytes from lower file system */ + sb->s_maxbytes = lower_sb->s_maxbytes; + + /* + * Our c/m/atime granularity is 1 ns because we may stack on file + * systems whose granularity is as good. + */ + sb->s_time_gran = 1; + + sb->s_magic = SDCARDFS_SUPER_MAGIC; + sb->s_op = &sdcardfs_sops; + + /* get a new inode and allocate our root dentry */ + inode = sdcardfs_iget(sb, d_inode(lower_path.dentry), 0); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_sput; + } + sb->s_root = d_make_root(inode); + if (!sb->s_root) { + err = -ENOMEM; + goto out_sput; + } + d_set_d_op(sb->s_root, &sdcardfs_ci_dops); + + /* link the upper and lower dentries */ + sb->s_root->d_fsdata = NULL; + err = new_dentry_private_data(sb->s_root); + if (err) + goto out_freeroot; + + /* set the lower dentries for s_root */ + sdcardfs_set_lower_path(sb->s_root, &lower_path); + + /* + * No need to call interpose because we already have a positive + * dentry, which was instantiated by d_make_root. Just need to + * d_rehash it. + */ + d_rehash(sb->s_root); + + /* setup permission policy */ + sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); + mutex_lock(&sdcardfs_super_list_lock); + if (sb_info->options.multiuser) { + setup_derived_state(d_inode(sb->s_root), PERM_PRE_ROOT, + sb_info->options.fs_user_id, AID_ROOT); + snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name); + } else { + setup_derived_state(d_inode(sb->s_root), PERM_ROOT, + sb_info->options.fs_user_id, AID_ROOT); + snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name); + } + fixup_tmp_permissions(d_inode(sb->s_root)); + sb_info->sb = sb; + list_add(&sb_info->list, &sdcardfs_super_list); + mutex_unlock(&sdcardfs_super_list_lock); + + if (!silent) + pr_info("sdcardfs: mounted on top of %s type %s\n", + dev_name, lower_sb->s_type->name); + goto out; /* all is well */ + + /* no longer needed: free_dentry_private_data(sb->s_root); */ +out_freeroot: + dput(sb->s_root); + sb->s_root = NULL; +out_sput: + /* drop refs we took earlier */ + atomic_dec(&lower_sb->s_active); +out_freesbi: + kfree(SDCARDFS_SB(sb)); + sb->s_fs_info = NULL; +out_free: + path_put(&lower_path); + +out: + return err; +} + +struct sdcardfs_mount_private { + struct vfsmount *mnt; + const char *dev_name; + void *raw_data; +}; + +static int __sdcardfs_fill_super( + struct super_block *sb, + void *_priv, int silent) +{ + struct sdcardfs_mount_private *priv = _priv; + + return sdcardfs_read_super(priv->mnt, + sb, priv->dev_name, priv->raw_data, silent); +} + +static struct dentry *sdcardfs_mount(struct vfsmount *mnt, + struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) +{ + struct sdcardfs_mount_private priv = { + .mnt = mnt, + .dev_name = dev_name, + .raw_data = raw_data + }; + + return mount_nodev(fs_type, flags, + &priv, __sdcardfs_fill_super); +} + +static struct dentry *sdcardfs_mount_wrn(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data) +{ + WARN(1, "sdcardfs does not support mount. Use mount2.\n"); + return ERR_PTR(-EINVAL); +} + +void *sdcardfs_alloc_mnt_data(void) +{ + return kmalloc(sizeof(struct sdcardfs_vfsmount_options), GFP_KERNEL); +} + +void sdcardfs_kill_sb(struct super_block *sb) +{ + struct sdcardfs_sb_info *sbi; + + if (sb->s_magic == SDCARDFS_SUPER_MAGIC && sb->s_fs_info) { + sbi = SDCARDFS_SB(sb); + mutex_lock(&sdcardfs_super_list_lock); + list_del(&sbi->list); + mutex_unlock(&sdcardfs_super_list_lock); + } + kill_anon_super(sb); +} + +static struct file_system_type sdcardfs_fs_type = { + .owner = THIS_MODULE, + .name = SDCARDFS_NAME, + .mount = sdcardfs_mount_wrn, + .mount2 = sdcardfs_mount, + .alloc_mnt_data = sdcardfs_alloc_mnt_data, + .kill_sb = sdcardfs_kill_sb, + .fs_flags = 0, +}; +MODULE_ALIAS_FS(SDCARDFS_NAME); + +static int __init init_sdcardfs_fs(void) +{ + int err; + + pr_info("Registering sdcardfs " SDCARDFS_VERSION "\n"); + + err = sdcardfs_init_inode_cache(); + if (err) + goto out; + err = sdcardfs_init_dentry_cache(); + if (err) + goto out; + err = packagelist_init(); + if (err) + goto out; + err = register_filesystem(&sdcardfs_fs_type); +out: + if (err) { + sdcardfs_destroy_inode_cache(); + sdcardfs_destroy_dentry_cache(); + packagelist_exit(); + } + return err; +} + +static void __exit exit_sdcardfs_fs(void) +{ + sdcardfs_destroy_inode_cache(); + sdcardfs_destroy_dentry_cache(); + packagelist_exit(); + unregister_filesystem(&sdcardfs_fs_type); + pr_info("Completed sdcardfs module unload\n"); +} + +/* Original wrapfs authors */ +MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University (http://www.fsl.cs.sunysb.edu/)"); + +/* Original sdcardfs authors */ +MODULE_AUTHOR("Woojoong Lee, Daeho Jeong, Kitae Lee, Yeongjin Gil System Memory Lab., Samsung Electronics"); + +/* Current maintainer */ +MODULE_AUTHOR("Daniel Rosenberg, Google"); +MODULE_DESCRIPTION("Sdcardfs " SDCARDFS_VERSION); +MODULE_LICENSE("GPL"); + +module_init(init_sdcardfs_fs); +module_exit(exit_sdcardfs_fs);
diff --git a/fs/sdcardfs/mmap.c b/fs/sdcardfs/mmap.c new file mode 100644 index 0000000..2847c0e --- /dev/null +++ b/fs/sdcardfs/mmap.c
@@ -0,0 +1,87 @@ +/* + * fs/sdcardfs/mmap.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" + +static int sdcardfs_fault(struct vm_fault *vmf) +{ + int err; + struct file *file; + const struct vm_operations_struct *lower_vm_ops; + + file = (struct file *)vmf->vma->vm_private_data; + lower_vm_ops = SDCARDFS_F(file)->lower_vm_ops; + BUG_ON(!lower_vm_ops); + + err = lower_vm_ops->fault(vmf); + return err; +} + +static void sdcardfs_vm_open(struct vm_area_struct *vma) +{ + struct file *file = (struct file *)vma->vm_private_data; + + get_file(file); +} + +static void sdcardfs_vm_close(struct vm_area_struct *vma) +{ + struct file *file = (struct file *)vma->vm_private_data; + + fput(file); +} + +static int sdcardfs_page_mkwrite(struct vm_fault *vmf) +{ + int err = 0; + struct file *file; + const struct vm_operations_struct *lower_vm_ops; + + file = (struct file *)vmf->vma->vm_private_data; + lower_vm_ops = SDCARDFS_F(file)->lower_vm_ops; + BUG_ON(!lower_vm_ops); + if (!lower_vm_ops->page_mkwrite) + goto out; + + err = lower_vm_ops->page_mkwrite(vmf); +out: + return err; +} + +static ssize_t sdcardfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) +{ + /* + * This function should never be called directly. We need it + * to exist, to get past a check in open_check_o_direct(), + * which is called from do_last(). + */ + return -EINVAL; +} + +const struct address_space_operations sdcardfs_aops = { + .direct_IO = sdcardfs_direct_IO, +}; + +const struct vm_operations_struct sdcardfs_vm_ops = { + .fault = sdcardfs_fault, + .page_mkwrite = sdcardfs_page_mkwrite, + .open = sdcardfs_vm_open, + .close = sdcardfs_vm_close, +};
diff --git a/fs/sdcardfs/multiuser.h b/fs/sdcardfs/multiuser.h new file mode 100644 index 0000000..85341e7 --- /dev/null +++ b/fs/sdcardfs/multiuser.h
@@ -0,0 +1,53 @@ +/* + * fs/sdcardfs/multiuser.h + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#define AID_USER_OFFSET 100000 /* offset for uid ranges for each user */ +#define AID_APP_START 10000 /* first app user */ +#define AID_APP_END 19999 /* last app user */ +#define AID_CACHE_GID_START 20000 /* start of gids for apps to mark cached data */ +#define AID_EXT_GID_START 30000 /* start of gids for apps to mark external data */ +#define AID_EXT_CACHE_GID_START 40000 /* start of gids for apps to mark external cached data */ +#define AID_EXT_CACHE_GID_END 49999 /* end of gids for apps to mark external cached data */ +#define AID_SHARED_GID_START 50000 /* start of gids for apps in each user to share */ + +typedef uid_t userid_t; +typedef uid_t appid_t; + +static inline uid_t multiuser_get_uid(userid_t user_id, appid_t app_id) +{ + return (user_id * AID_USER_OFFSET) + (app_id % AID_USER_OFFSET); +} + +static inline bool uid_is_app(uid_t uid) +{ + appid_t appid = uid % AID_USER_OFFSET; + + return appid >= AID_APP_START && appid <= AID_APP_END; +} + +static inline gid_t multiuser_get_ext_cache_gid(uid_t uid) +{ + return uid - AID_APP_START + AID_EXT_CACHE_GID_START; +} + +static inline gid_t multiuser_get_ext_gid(uid_t uid) +{ + return uid - AID_APP_START + AID_EXT_GID_START; +}
diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c new file mode 100644 index 0000000..4b9a563 --- /dev/null +++ b/fs/sdcardfs/packagelist.c
@@ -0,0 +1,882 @@ +/* + * fs/sdcardfs/packagelist.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" +#include <linux/hashtable.h> +#include <linux/ctype.h> +#include <linux/delay.h> +#include <linux/radix-tree.h> +#include <linux/dcache.h> + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> + +#include <linux/configfs.h> + +struct hashtable_entry { + struct hlist_node hlist; + struct hlist_node dlist; /* for deletion cleanup */ + struct qstr key; + atomic_t value; +}; + +static DEFINE_HASHTABLE(package_to_appid, 8); +static DEFINE_HASHTABLE(package_to_userid, 8); +static DEFINE_HASHTABLE(ext_to_groupid, 8); + + +static struct kmem_cache *hashtable_entry_cachep; + +static unsigned int full_name_case_hash(const void *salt, const unsigned char *name, unsigned int len) +{ + unsigned long hash = init_name_hash(salt); + + while (len--) + hash = partial_name_hash(tolower(*name++), hash); + return end_name_hash(hash); +} + +static inline void qstr_init(struct qstr *q, const char *name) +{ + q->name = name; + q->len = strlen(q->name); + q->hash = full_name_case_hash(0, q->name, q->len); +} + +static inline int qstr_copy(const struct qstr *src, struct qstr *dest) +{ + dest->name = kstrdup(src->name, GFP_KERNEL); + dest->hash_len = src->hash_len; + return !!dest->name; +} + + +static appid_t __get_appid(const struct qstr *key) +{ + struct hashtable_entry *hash_cur; + unsigned int hash = key->hash; + appid_t ret_id; + + rcu_read_lock(); + hash_for_each_possible_rcu(package_to_appid, hash_cur, hlist, hash) { + if (qstr_case_eq(key, &hash_cur->key)) { + ret_id = atomic_read(&hash_cur->value); + rcu_read_unlock(); + return ret_id; + } + } + rcu_read_unlock(); + return 0; +} + +appid_t get_appid(const char *key) +{ + struct qstr q; + + qstr_init(&q, key); + return __get_appid(&q); +} + +static appid_t __get_ext_gid(const struct qstr *key) +{ + struct hashtable_entry *hash_cur; + unsigned int hash = key->hash; + appid_t ret_id; + + rcu_read_lock(); + hash_for_each_possible_rcu(ext_to_groupid, hash_cur, hlist, hash) { + if (qstr_case_eq(key, &hash_cur->key)) { + ret_id = atomic_read(&hash_cur->value); + rcu_read_unlock(); + return ret_id; + } + } + rcu_read_unlock(); + return 0; +} + +appid_t get_ext_gid(const char *key) +{ + struct qstr q; + + qstr_init(&q, key); + return __get_ext_gid(&q); +} + +static appid_t __is_excluded(const struct qstr *app_name, userid_t user) +{ + struct hashtable_entry *hash_cur; + unsigned int hash = app_name->hash; + + rcu_read_lock(); + hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) { + if (atomic_read(&hash_cur->value) == user && + qstr_case_eq(app_name, &hash_cur->key)) { + rcu_read_unlock(); + return 1; + } + } + rcu_read_unlock(); + return 0; +} + +appid_t is_excluded(const char *key, userid_t user) +{ + struct qstr q; + qstr_init(&q, key); + return __is_excluded(&q, user); +} + +/* Kernel has already enforced everything we returned through + * derive_permissions_locked(), so this is used to lock down access + * even further, such as enforcing that apps hold sdcard_rw. + */ +int check_caller_access_to_name(struct inode *parent_node, const struct qstr *name) +{ + struct qstr q_autorun = QSTR_LITERAL("autorun.inf"); + struct qstr q__android_secure = QSTR_LITERAL(".android_secure"); + struct qstr q_android_secure = QSTR_LITERAL("android_secure"); + + /* Always block security-sensitive files at root */ + if (parent_node && SDCARDFS_I(parent_node)->data->perm == PERM_ROOT) { + if (qstr_case_eq(name, &q_autorun) + || qstr_case_eq(name, &q__android_secure) + || qstr_case_eq(name, &q_android_secure)) { + return 0; + } + } + + /* Root always has access; access for any other UIDs should always + * be controlled through packages.list. + */ + if (from_kuid(&init_user_ns, current_fsuid()) == 0) + return 1; + + /* No extra permissions to enforce */ + return 1; +} + +static struct hashtable_entry *alloc_hashtable_entry(const struct qstr *key, + appid_t value) +{ + struct hashtable_entry *ret = kmem_cache_alloc(hashtable_entry_cachep, + GFP_KERNEL); + if (!ret) + return NULL; + INIT_HLIST_NODE(&ret->dlist); + INIT_HLIST_NODE(&ret->hlist); + + if (!qstr_copy(key, &ret->key)) { + kmem_cache_free(hashtable_entry_cachep, ret); + return NULL; + } + + atomic_set(&ret->value, value); + return ret; +} + +static int insert_packagelist_appid_entry_locked(const struct qstr *key, appid_t value) +{ + struct hashtable_entry *hash_cur; + struct hashtable_entry *new_entry; + unsigned int hash = key->hash; + + hash_for_each_possible_rcu(package_to_appid, hash_cur, hlist, hash) { + if (qstr_case_eq(key, &hash_cur->key)) { + atomic_set(&hash_cur->value, value); + return 0; + } + } + new_entry = alloc_hashtable_entry(key, value); + if (!new_entry) + return -ENOMEM; + hash_add_rcu(package_to_appid, &new_entry->hlist, hash); + return 0; +} + +static int insert_ext_gid_entry_locked(const struct qstr *key, appid_t value) +{ + struct hashtable_entry *hash_cur; + struct hashtable_entry *new_entry; + unsigned int hash = key->hash; + + /* An extension can only belong to one gid */ + hash_for_each_possible_rcu(ext_to_groupid, hash_cur, hlist, hash) { + if (qstr_case_eq(key, &hash_cur->key)) + return -EINVAL; + } + new_entry = alloc_hashtable_entry(key, value); + if (!new_entry) + return -ENOMEM; + hash_add_rcu(ext_to_groupid, &new_entry->hlist, hash); + return 0; +} + +static int insert_userid_exclude_entry_locked(const struct qstr *key, userid_t value) +{ + struct hashtable_entry *hash_cur; + struct hashtable_entry *new_entry; + unsigned int hash = key->hash; + + /* Only insert if not already present */ + hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) { + if (atomic_read(&hash_cur->value) == value && + qstr_case_eq(key, &hash_cur->key)) + return 0; + } + new_entry = alloc_hashtable_entry(key, value); + if (!new_entry) + return -ENOMEM; + hash_add_rcu(package_to_userid, &new_entry->hlist, hash); + return 0; +} + +static void fixup_all_perms_name(const struct qstr *key) +{ + struct sdcardfs_sb_info *sbinfo; + struct limit_search limit = { + .flags = BY_NAME, + .name = QSTR_INIT(key->name, key->len), + }; + list_for_each_entry(sbinfo, &sdcardfs_super_list, list) { + if (sbinfo_has_sdcard_magic(sbinfo)) + fixup_perms_recursive(sbinfo->sb->s_root, &limit); + } +} + +static void fixup_all_perms_name_userid(const struct qstr *key, userid_t userid) +{ + struct sdcardfs_sb_info *sbinfo; + struct limit_search limit = { + .flags = BY_NAME | BY_USERID, + .name = QSTR_INIT(key->name, key->len), + .userid = userid, + }; + list_for_each_entry(sbinfo, &sdcardfs_super_list, list) { + if (sbinfo_has_sdcard_magic(sbinfo)) + fixup_perms_recursive(sbinfo->sb->s_root, &limit); + } +} + +static void fixup_all_perms_userid(userid_t userid) +{ + struct sdcardfs_sb_info *sbinfo; + struct limit_search limit = { + .flags = BY_USERID, + .userid = userid, + }; + list_for_each_entry(sbinfo, &sdcardfs_super_list, list) { + if (sbinfo_has_sdcard_magic(sbinfo)) + fixup_perms_recursive(sbinfo->sb->s_root, &limit); + } +} + +static int insert_packagelist_entry(const struct qstr *key, appid_t value) +{ + int err; + + mutex_lock(&sdcardfs_super_list_lock); + err = insert_packagelist_appid_entry_locked(key, value); + if (!err) + fixup_all_perms_name(key); + mutex_unlock(&sdcardfs_super_list_lock); + + return err; +} + +static int insert_ext_gid_entry(const struct qstr *key, appid_t value) +{ + int err; + + mutex_lock(&sdcardfs_super_list_lock); + err = insert_ext_gid_entry_locked(key, value); + mutex_unlock(&sdcardfs_super_list_lock); + + return err; +} + +static int insert_userid_exclude_entry(const struct qstr *key, userid_t value) +{ + int err; + + mutex_lock(&sdcardfs_super_list_lock); + err = insert_userid_exclude_entry_locked(key, value); + if (!err) + fixup_all_perms_name_userid(key, value); + mutex_unlock(&sdcardfs_super_list_lock); + + return err; +} + +static void free_hashtable_entry(struct hashtable_entry *entry) +{ + kfree(entry->key.name); + kmem_cache_free(hashtable_entry_cachep, entry); +} + +static void remove_packagelist_entry_locked(const struct qstr *key) +{ + struct hashtable_entry *hash_cur; + unsigned int hash = key->hash; + struct hlist_node *h_t; + HLIST_HEAD(free_list); + + hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) { + if (qstr_case_eq(key, &hash_cur->key)) { + hash_del_rcu(&hash_cur->hlist); + hlist_add_head(&hash_cur->dlist, &free_list); + } + } + hash_for_each_possible_rcu(package_to_appid, hash_cur, hlist, hash) { + if (qstr_case_eq(key, &hash_cur->key)) { + hash_del_rcu(&hash_cur->hlist); + hlist_add_head(&hash_cur->dlist, &free_list); + break; + } + } + synchronize_rcu(); + hlist_for_each_entry_safe(hash_cur, h_t, &free_list, dlist) + free_hashtable_entry(hash_cur); +} + +static void remove_packagelist_entry(const struct qstr *key) +{ + mutex_lock(&sdcardfs_super_list_lock); + remove_packagelist_entry_locked(key); + fixup_all_perms_name(key); + mutex_unlock(&sdcardfs_super_list_lock); +} + +static void remove_ext_gid_entry_locked(const struct qstr *key, gid_t group) +{ + struct hashtable_entry *hash_cur; + unsigned int hash = key->hash; + + hash_for_each_possible_rcu(ext_to_groupid, hash_cur, hlist, hash) { + if (qstr_case_eq(key, &hash_cur->key) && atomic_read(&hash_cur->value) == group) { + hash_del_rcu(&hash_cur->hlist); + synchronize_rcu(); + free_hashtable_entry(hash_cur); + break; + } + } +} + +static void remove_ext_gid_entry(const struct qstr *key, gid_t group) +{ + mutex_lock(&sdcardfs_super_list_lock); + remove_ext_gid_entry_locked(key, group); + mutex_unlock(&sdcardfs_super_list_lock); +} + +static void remove_userid_all_entry_locked(userid_t userid) +{ + struct hashtable_entry *hash_cur; + struct hlist_node *h_t; + HLIST_HEAD(free_list); + int i; + + hash_for_each_rcu(package_to_userid, i, hash_cur, hlist) { + if (atomic_read(&hash_cur->value) == userid) { + hash_del_rcu(&hash_cur->hlist); + hlist_add_head(&hash_cur->dlist, &free_list); + } + } + synchronize_rcu(); + hlist_for_each_entry_safe(hash_cur, h_t, &free_list, dlist) { + free_hashtable_entry(hash_cur); + } +} + +static void remove_userid_all_entry(userid_t userid) +{ + mutex_lock(&sdcardfs_super_list_lock); + remove_userid_all_entry_locked(userid); + fixup_all_perms_userid(userid); + mutex_unlock(&sdcardfs_super_list_lock); +} + +static void remove_userid_exclude_entry_locked(const struct qstr *key, userid_t userid) +{ + struct hashtable_entry *hash_cur; + unsigned int hash = key->hash; + + hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) { + if (qstr_case_eq(key, &hash_cur->key) && + atomic_read(&hash_cur->value) == userid) { + hash_del_rcu(&hash_cur->hlist); + synchronize_rcu(); + free_hashtable_entry(hash_cur); + break; + } + } +} + +static void remove_userid_exclude_entry(const struct qstr *key, userid_t userid) +{ + mutex_lock(&sdcardfs_super_list_lock); + remove_userid_exclude_entry_locked(key, userid); + fixup_all_perms_name_userid(key, userid); + mutex_unlock(&sdcardfs_super_list_lock); +} + +static void packagelist_destroy(void) +{ + struct hashtable_entry *hash_cur; + struct hlist_node *h_t; + HLIST_HEAD(free_list); + int i; + + mutex_lock(&sdcardfs_super_list_lock); + hash_for_each_rcu(package_to_appid, i, hash_cur, hlist) { + hash_del_rcu(&hash_cur->hlist); + hlist_add_head(&hash_cur->dlist, &free_list); + } + hash_for_each_rcu(package_to_userid, i, hash_cur, hlist) { + hash_del_rcu(&hash_cur->hlist); + hlist_add_head(&hash_cur->dlist, &free_list); + } + synchronize_rcu(); + hlist_for_each_entry_safe(hash_cur, h_t, &free_list, dlist) + free_hashtable_entry(hash_cur); + mutex_unlock(&sdcardfs_super_list_lock); + pr_info("sdcardfs: destroyed packagelist pkgld\n"); +} + +#define SDCARDFS_CONFIGFS_ATTR(_pfx, _name) \ +static struct configfs_attribute _pfx##attr_##_name = { \ + .ca_name = __stringify(_name), \ + .ca_mode = S_IRUGO | S_IWUGO, \ + .ca_owner = THIS_MODULE, \ + .show = _pfx##_name##_show, \ + .store = _pfx##_name##_store, \ +} + +#define SDCARDFS_CONFIGFS_ATTR_RO(_pfx, _name) \ +static struct configfs_attribute _pfx##attr_##_name = { \ + .ca_name = __stringify(_name), \ + .ca_mode = S_IRUGO, \ + .ca_owner = THIS_MODULE, \ + .show = _pfx##_name##_show, \ +} + +#define SDCARDFS_CONFIGFS_ATTR_WO(_pfx, _name) \ +static struct configfs_attribute _pfx##attr_##_name = { \ + .ca_name = __stringify(_name), \ + .ca_mode = S_IWUGO, \ + .ca_owner = THIS_MODULE, \ + .store = _pfx##_name##_store, \ +} + +struct package_details { + struct config_item item; + struct qstr name; +}; + +static inline struct package_details *to_package_details(struct config_item *item) +{ + return item ? container_of(item, struct package_details, item) : NULL; +} + +static ssize_t package_details_appid_show(struct config_item *item, char *page) +{ + return scnprintf(page, PAGE_SIZE, "%u\n", __get_appid(&to_package_details(item)->name)); +} + +static ssize_t package_details_appid_store(struct config_item *item, + const char *page, size_t count) +{ + unsigned int tmp; + int ret; + + ret = kstrtouint(page, 10, &tmp); + if (ret) + return ret; + + ret = insert_packagelist_entry(&to_package_details(item)->name, tmp); + + if (ret) + return ret; + + return count; +} + +static ssize_t package_details_excluded_userids_show(struct config_item *item, + char *page) +{ + struct package_details *package_details = to_package_details(item); + struct hashtable_entry *hash_cur; + unsigned int hash = package_details->name.hash; + int count = 0; + + rcu_read_lock(); + hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) { + if (qstr_case_eq(&package_details->name, &hash_cur->key)) + count += scnprintf(page + count, PAGE_SIZE - count, + "%d ", atomic_read(&hash_cur->value)); + } + rcu_read_unlock(); + if (count) + count--; + count += scnprintf(page + count, PAGE_SIZE - count, "\n"); + return count; +} + +static ssize_t package_details_excluded_userids_store(struct config_item *item, + const char *page, size_t count) +{ + unsigned int tmp; + int ret; + + ret = kstrtouint(page, 10, &tmp); + if (ret) + return ret; + + ret = insert_userid_exclude_entry(&to_package_details(item)->name, tmp); + + if (ret) + return ret; + + return count; +} + +static ssize_t package_details_clear_userid_store(struct config_item *item, + const char *page, size_t count) +{ + unsigned int tmp; + int ret; + + ret = kstrtouint(page, 10, &tmp); + if (ret) + return ret; + remove_userid_exclude_entry(&to_package_details(item)->name, tmp); + return count; +} + +static void package_details_release(struct config_item *item) +{ + struct package_details *package_details = to_package_details(item); + + pr_info("sdcardfs: removing %s\n", package_details->name.name); + remove_packagelist_entry(&package_details->name); + kfree(package_details->name.name); + kfree(package_details); +} + +SDCARDFS_CONFIGFS_ATTR(package_details_, appid); +SDCARDFS_CONFIGFS_ATTR(package_details_, excluded_userids); +SDCARDFS_CONFIGFS_ATTR_WO(package_details_, clear_userid); + +static struct configfs_attribute *package_details_attrs[] = { + &package_details_attr_appid, + &package_details_attr_excluded_userids, + &package_details_attr_clear_userid, + NULL, +}; + +static struct configfs_item_operations package_details_item_ops = { + .release = package_details_release, +}; + +static struct config_item_type package_appid_type = { + .ct_item_ops = &package_details_item_ops, + .ct_attrs = package_details_attrs, + .ct_owner = THIS_MODULE, +}; + +struct extensions_value { + struct config_group group; + unsigned int num; +}; + +struct extension_details { + struct config_item item; + struct qstr name; + unsigned int num; +}; + +static inline struct extensions_value *to_extensions_value(struct config_item *item) +{ + return item ? container_of(to_config_group(item), struct extensions_value, group) : NULL; +} + +static inline struct extension_details *to_extension_details(struct config_item *item) +{ + return item ? container_of(item, struct extension_details, item) : NULL; +} + +static void extension_details_release(struct config_item *item) +{ + struct extension_details *extension_details = to_extension_details(item); + + pr_info("sdcardfs: No longer mapping %s files to gid %d\n", + extension_details->name.name, extension_details->num); + remove_ext_gid_entry(&extension_details->name, extension_details->num); + kfree(extension_details->name.name); + kfree(extension_details); +} + +static struct configfs_item_operations extension_details_item_ops = { + .release = extension_details_release, +}; + +static struct config_item_type extension_details_type = { + .ct_item_ops = &extension_details_item_ops, + .ct_owner = THIS_MODULE, +}; + +static struct config_item *extension_details_make_item(struct config_group *group, const char *name) +{ + struct extensions_value *extensions_value = to_extensions_value(&group->cg_item); + struct extension_details *extension_details = kzalloc(sizeof(struct extension_details), GFP_KERNEL); + const char *tmp; + int ret; + + if (!extension_details) + return ERR_PTR(-ENOMEM); + + tmp = kstrdup(name, GFP_KERNEL); + if (!tmp) { + kfree(extension_details); + return ERR_PTR(-ENOMEM); + } + qstr_init(&extension_details->name, tmp); + extension_details->num = extensions_value->num; + ret = insert_ext_gid_entry(&extension_details->name, extensions_value->num); + + if (ret) { + kfree(extension_details->name.name); + kfree(extension_details); + return ERR_PTR(ret); + } + config_item_init_type_name(&extension_details->item, name, &extension_details_type); + + return &extension_details->item; +} + +static struct configfs_group_operations extensions_value_group_ops = { + .make_item = extension_details_make_item, +}; + +static struct config_item_type extensions_name_type = { + .ct_group_ops = &extensions_value_group_ops, + .ct_owner = THIS_MODULE, +}; + +static struct config_group *extensions_make_group(struct config_group *group, const char *name) +{ + struct extensions_value *extensions_value; + unsigned int tmp; + int ret; + + extensions_value = kzalloc(sizeof(struct extensions_value), GFP_KERNEL); + if (!extensions_value) + return ERR_PTR(-ENOMEM); + ret = kstrtouint(name, 10, &tmp); + if (ret) { + kfree(extensions_value); + return ERR_PTR(ret); + } + + extensions_value->num = tmp; + config_group_init_type_name(&extensions_value->group, name, + &extensions_name_type); + return &extensions_value->group; +} + +static void extensions_drop_group(struct config_group *group, struct config_item *item) +{ + struct extensions_value *value = to_extensions_value(item); + + pr_info("sdcardfs: No longer mapping any files to gid %d\n", value->num); + kfree(value); +} + +static struct configfs_group_operations extensions_group_ops = { + .make_group = extensions_make_group, + .drop_item = extensions_drop_group, +}; + +static struct config_item_type extensions_type = { + .ct_group_ops = &extensions_group_ops, + .ct_owner = THIS_MODULE, +}; + +struct config_group extension_group = { + .cg_item = { + .ci_namebuf = "extensions", + .ci_type = &extensions_type, + }, +}; + +static struct config_item *packages_make_item(struct config_group *group, const char *name) +{ + struct package_details *package_details; + const char *tmp; + + package_details = kzalloc(sizeof(struct package_details), GFP_KERNEL); + if (!package_details) + return ERR_PTR(-ENOMEM); + tmp = kstrdup(name, GFP_KERNEL); + if (!tmp) { + kfree(package_details); + return ERR_PTR(-ENOMEM); + } + qstr_init(&package_details->name, tmp); + config_item_init_type_name(&package_details->item, name, + &package_appid_type); + + return &package_details->item; +} + +static ssize_t packages_list_show(struct config_item *item, char *page) +{ + struct hashtable_entry *hash_cur_app; + struct hashtable_entry *hash_cur_user; + int i; + int count = 0, written = 0; + const char errormsg[] = "<truncated>\n"; + unsigned int hash; + + rcu_read_lock(); + hash_for_each_rcu(package_to_appid, i, hash_cur_app, hlist) { + written = scnprintf(page + count, PAGE_SIZE - sizeof(errormsg) - count, "%s %d\n", + hash_cur_app->key.name, atomic_read(&hash_cur_app->value)); + hash = hash_cur_app->key.hash; + hash_for_each_possible_rcu(package_to_userid, hash_cur_user, hlist, hash) { + if (qstr_case_eq(&hash_cur_app->key, &hash_cur_user->key)) { + written += scnprintf(page + count + written - 1, + PAGE_SIZE - sizeof(errormsg) - count - written + 1, + " %d\n", atomic_read(&hash_cur_user->value)) - 1; + } + } + if (count + written == PAGE_SIZE - sizeof(errormsg) - 1) { + count += scnprintf(page + count, PAGE_SIZE - count, errormsg); + break; + } + count += written; + } + rcu_read_unlock(); + + return count; +} + +static ssize_t packages_remove_userid_store(struct config_item *item, + const char *page, size_t count) +{ + unsigned int tmp; + int ret; + + ret = kstrtouint(page, 10, &tmp); + if (ret) + return ret; + remove_userid_all_entry(tmp); + return count; +} + +static struct configfs_attribute packages_attr_packages_gid_list = { + .ca_name = "packages_gid.list", + .ca_mode = S_IRUGO, + .ca_owner = THIS_MODULE, + .show = packages_list_show, +}; + +SDCARDFS_CONFIGFS_ATTR_WO(packages_, remove_userid); + +static struct configfs_attribute *packages_attrs[] = { + &packages_attr_packages_gid_list, + &packages_attr_remove_userid, + NULL, +}; + +/* + * Note that, since no extra work is required on ->drop_item(), + * no ->drop_item() is provided. + */ +static struct configfs_group_operations packages_group_ops = { + .make_item = packages_make_item, +}; + +static struct config_item_type packages_type = { + .ct_group_ops = &packages_group_ops, + .ct_attrs = packages_attrs, + .ct_owner = THIS_MODULE, +}; + +struct config_group *sd_default_groups[] = { + &extension_group, + NULL, +}; + +static struct configfs_subsystem sdcardfs_packages = { + .su_group = { + .cg_item = { + .ci_namebuf = "sdcardfs", + .ci_type = &packages_type, + }, + }, +}; + +static int configfs_sdcardfs_init(void) +{ + int ret, i; + struct configfs_subsystem *subsys = &sdcardfs_packages; + + config_group_init(&subsys->su_group); + for (i = 0; sd_default_groups[i]; i++) { + config_group_init(sd_default_groups[i]); + configfs_add_default_group(sd_default_groups[i], &subsys->su_group); + } + mutex_init(&subsys->su_mutex); + ret = configfs_register_subsystem(subsys); + if (ret) { + pr_err("Error %d while registering subsystem %s\n", + ret, + subsys->su_group.cg_item.ci_namebuf); + } + return ret; +} + +static void configfs_sdcardfs_exit(void) +{ + configfs_unregister_subsystem(&sdcardfs_packages); +} + +int packagelist_init(void) +{ + hashtable_entry_cachep = + kmem_cache_create("packagelist_hashtable_entry", + sizeof(struct hashtable_entry), 0, 0, NULL); + if (!hashtable_entry_cachep) { + pr_err("sdcardfs: failed creating pkgl_hashtable entry slab cache\n"); + return -ENOMEM; + } + + configfs_sdcardfs_init(); + return 0; +} + +void packagelist_exit(void) +{ + configfs_sdcardfs_exit(); + packagelist_destroy(); + kmem_cache_destroy(hashtable_entry_cachep); +}
diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h new file mode 100644 index 0000000..6219771 --- /dev/null +++ b/fs/sdcardfs/sdcardfs.h
@@ -0,0 +1,654 @@ +/* + * fs/sdcardfs/sdcardfs.h + * + * The sdcardfs v2.0 + * This file system replaces the sdcard daemon on Android + * On version 2.0, some of the daemon functions have been ported + * to support the multi-user concepts of Android 4.4 + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#ifndef _SDCARDFS_H_ +#define _SDCARDFS_H_ + +#include <linux/dcache.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/aio.h> +#include <linux/kref.h> +#include <linux/mm.h> +#include <linux/mount.h> +#include <linux/namei.h> +#include <linux/seq_file.h> +#include <linux/statfs.h> +#include <linux/fs_stack.h> +#include <linux/magic.h> +#include <linux/uaccess.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/types.h> +#include <linux/security.h> +#include <linux/string.h> +#include <linux/list.h> +#include "multiuser.h" + +/* the file system name */ +#define SDCARDFS_NAME "sdcardfs" + +/* sdcardfs root inode number */ +#define SDCARDFS_ROOT_INO 1 + +/* useful for tracking code reachability */ +#define UDBG pr_default("DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__) + +#define SDCARDFS_DIRENT_SIZE 256 + +/* temporary static uid settings for development */ +#define AID_ROOT 0 /* uid for accessing /mnt/sdcard & extSdcard */ +#define AID_MEDIA_RW 1023 /* internal media storage write access */ + +#define AID_SDCARD_RW 1015 /* external storage write access */ +#define AID_SDCARD_R 1028 /* external storage read access */ +#define AID_SDCARD_PICS 1033 /* external storage photos access */ +#define AID_SDCARD_AV 1034 /* external storage audio/video access */ +#define AID_SDCARD_ALL 1035 /* access all users external storage */ +#define AID_MEDIA_OBB 1059 /* obb files */ + +#define AID_SDCARD_IMAGE 1057 + +#define AID_PACKAGE_INFO 1027 + + +/* + * Permissions are handled by our permission function. + * We don't want anyone who happens to look at our inode value to prematurely + * block access, so store more permissive values. These are probably never + * used. + */ +#define fixup_tmp_permissions(x) \ + do { \ + (x)->i_uid = make_kuid(&init_user_ns, \ + SDCARDFS_I(x)->data->d_uid); \ + (x)->i_gid = make_kgid(&init_user_ns, AID_SDCARD_RW); \ + (x)->i_mode = ((x)->i_mode & S_IFMT) | 0775;\ + } while (0) + +/* Android 5.0 support */ + +/* Permission mode for a specific node. Controls how file permissions + * are derived for children nodes. + */ +typedef enum { + /* Nothing special; this node should just inherit from its parent. */ + PERM_INHERIT, + /* This node is one level above a normal root; used for legacy layouts + * which use the first level to represent user_id. + */ + PERM_PRE_ROOT, + /* This node is "/" */ + PERM_ROOT, + /* This node is "/Android" */ + PERM_ANDROID, + /* This node is "/Android/data" */ + PERM_ANDROID_DATA, + /* This node is "/Android/obb" */ + PERM_ANDROID_OBB, + /* This node is "/Android/media" */ + PERM_ANDROID_MEDIA, + /* This node is "/Android/[data|media|obb]/[package]" */ + PERM_ANDROID_PACKAGE, + /* This node is "/Android/[data|media|obb]/[package]/cache" */ + PERM_ANDROID_PACKAGE_CACHE, +} perm_t; + +struct sdcardfs_sb_info; +struct sdcardfs_mount_options; +struct sdcardfs_inode_info; +struct sdcardfs_inode_data; + +/* Do not directly use this function. Use OVERRIDE_CRED() instead. */ +const struct cred *override_fsids(struct sdcardfs_sb_info *sbi, + struct sdcardfs_inode_data *data); +/* Do not directly use this function, use REVERT_CRED() instead. */ +void revert_fsids(const struct cred *old_cred); + +/* operations vectors defined in specific files */ +extern const struct file_operations sdcardfs_main_fops; +extern const struct file_operations sdcardfs_dir_fops; +extern const struct inode_operations sdcardfs_main_iops; +extern const struct inode_operations sdcardfs_dir_iops; +extern const struct inode_operations sdcardfs_symlink_iops; +extern const struct super_operations sdcardfs_sops; +extern const struct dentry_operations sdcardfs_ci_dops; +extern const struct address_space_operations sdcardfs_aops, sdcardfs_dummy_aops; +extern const struct vm_operations_struct sdcardfs_vm_ops; + +extern int sdcardfs_init_inode_cache(void); +extern void sdcardfs_destroy_inode_cache(void); +extern int sdcardfs_init_dentry_cache(void); +extern void sdcardfs_destroy_dentry_cache(void); +extern int new_dentry_private_data(struct dentry *dentry); +extern void free_dentry_private_data(struct dentry *dentry); +extern struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags); +extern struct inode *sdcardfs_iget(struct super_block *sb, + struct inode *lower_inode, userid_t id); +extern int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, + struct path *lower_path, userid_t id); + +/* file private data */ +struct sdcardfs_file_info { + struct file *lower_file; + const struct vm_operations_struct *lower_vm_ops; +}; + +struct sdcardfs_inode_data { + struct kref refcount; + bool abandoned; + + perm_t perm; + userid_t userid; + uid_t d_uid; + bool under_android; + bool under_cache; + bool under_obb; +}; + +/* sdcardfs inode data in memory */ +struct sdcardfs_inode_info { + struct inode *lower_inode; + /* state derived based on current position in hierarchy */ + struct sdcardfs_inode_data *data; + + /* top folder for ownership */ + spinlock_t top_lock; + struct sdcardfs_inode_data *top_data; + + struct inode vfs_inode; +}; + + +/* sdcardfs dentry data in memory */ +struct sdcardfs_dentry_info { + spinlock_t lock; /* protects lower_path */ + struct path lower_path; + struct path orig_path; +}; + +struct sdcardfs_mount_options { + uid_t fs_low_uid; + gid_t fs_low_gid; + userid_t fs_user_id; + bool multiuser; + bool gid_derivation; + bool default_normal; + bool unshared_obb; + unsigned int reserved_mb; + bool nocache; +}; + +struct sdcardfs_vfsmount_options { + gid_t gid; + mode_t mask; +}; + +extern int parse_options_remount(struct super_block *sb, char *options, int silent, + struct sdcardfs_vfsmount_options *vfsopts); + +/* sdcardfs super-block data in memory */ +struct sdcardfs_sb_info { + struct super_block *sb; + struct super_block *lower_sb; + /* derived perm policy : some of options have been added + * to sdcardfs_mount_options (Android 4.4 support) + */ + struct sdcardfs_mount_options options; + spinlock_t lock; /* protects obbpath */ + char *obbpath_s; + struct path obbpath; + void *pkgl_id; + struct list_head list; +}; + +/* + * inode to private data + * + * Since we use containers and the struct inode is _inside_ the + * sdcardfs_inode_info structure, SDCARDFS_I will always (given a non-NULL + * inode pointer), return a valid non-NULL pointer. + */ +static inline struct sdcardfs_inode_info *SDCARDFS_I(const struct inode *inode) +{ + return container_of(inode, struct sdcardfs_inode_info, vfs_inode); +} + +/* dentry to private data */ +#define SDCARDFS_D(dent) ((struct sdcardfs_dentry_info *)(dent)->d_fsdata) + +/* superblock to private data */ +#define SDCARDFS_SB(super) ((struct sdcardfs_sb_info *)(super)->s_fs_info) + +/* file to private Data */ +#define SDCARDFS_F(file) ((struct sdcardfs_file_info *)((file)->private_data)) + +/* file to lower file */ +static inline struct file *sdcardfs_lower_file(const struct file *f) +{ + return SDCARDFS_F(f)->lower_file; +} + +static inline void sdcardfs_set_lower_file(struct file *f, struct file *val) +{ + SDCARDFS_F(f)->lower_file = val; +} + +/* inode to lower inode. */ +static inline struct inode *sdcardfs_lower_inode(const struct inode *i) +{ + return SDCARDFS_I(i)->lower_inode; +} + +static inline void sdcardfs_set_lower_inode(struct inode *i, struct inode *val) +{ + SDCARDFS_I(i)->lower_inode = val; +} + +/* superblock to lower superblock */ +static inline struct super_block *sdcardfs_lower_super( + const struct super_block *sb) +{ + return SDCARDFS_SB(sb)->lower_sb; +} + +static inline void sdcardfs_set_lower_super(struct super_block *sb, + struct super_block *val) +{ + SDCARDFS_SB(sb)->lower_sb = val; +} + +/* path based (dentry/mnt) macros */ +static inline void pathcpy(struct path *dst, const struct path *src) +{ + dst->dentry = src->dentry; + dst->mnt = src->mnt; +} + +/* sdcardfs_get_pname functions calls path_get() + * therefore, the caller must call "proper" path_put functions + */ +#define SDCARDFS_DENT_FUNC(pname) \ +static inline void sdcardfs_get_##pname(const struct dentry *dent, \ + struct path *pname) \ +{ \ + spin_lock(&SDCARDFS_D(dent)->lock); \ + pathcpy(pname, &SDCARDFS_D(dent)->pname); \ + path_get(pname); \ + spin_unlock(&SDCARDFS_D(dent)->lock); \ + return; \ +} \ +static inline void sdcardfs_put_##pname(const struct dentry *dent, \ + struct path *pname) \ +{ \ + path_put(pname); \ + return; \ +} \ +static inline void sdcardfs_set_##pname(const struct dentry *dent, \ + struct path *pname) \ +{ \ + spin_lock(&SDCARDFS_D(dent)->lock); \ + pathcpy(&SDCARDFS_D(dent)->pname, pname); \ + spin_unlock(&SDCARDFS_D(dent)->lock); \ + return; \ +} \ +static inline void sdcardfs_reset_##pname(const struct dentry *dent) \ +{ \ + spin_lock(&SDCARDFS_D(dent)->lock); \ + SDCARDFS_D(dent)->pname.dentry = NULL; \ + SDCARDFS_D(dent)->pname.mnt = NULL; \ + spin_unlock(&SDCARDFS_D(dent)->lock); \ + return; \ +} \ +static inline void sdcardfs_put_reset_##pname(const struct dentry *dent) \ +{ \ + struct path pname; \ + spin_lock(&SDCARDFS_D(dent)->lock); \ + if (SDCARDFS_D(dent)->pname.dentry) { \ + pathcpy(&pname, &SDCARDFS_D(dent)->pname); \ + SDCARDFS_D(dent)->pname.dentry = NULL; \ + SDCARDFS_D(dent)->pname.mnt = NULL; \ + spin_unlock(&SDCARDFS_D(dent)->lock); \ + path_put(&pname); \ + } else \ + spin_unlock(&SDCARDFS_D(dent)->lock); \ + return; \ +} + +SDCARDFS_DENT_FUNC(lower_path) +SDCARDFS_DENT_FUNC(orig_path) + +static inline bool sbinfo_has_sdcard_magic(struct sdcardfs_sb_info *sbinfo) +{ + return sbinfo && sbinfo->sb + && sbinfo->sb->s_magic == SDCARDFS_SUPER_MAGIC; +} + +static inline struct sdcardfs_inode_data *data_get( + struct sdcardfs_inode_data *data) +{ + if (data) + kref_get(&data->refcount); + return data; +} + +static inline struct sdcardfs_inode_data *top_data_get( + struct sdcardfs_inode_info *info) +{ + struct sdcardfs_inode_data *top_data; + + spin_lock(&info->top_lock); + top_data = data_get(info->top_data); + spin_unlock(&info->top_lock); + return top_data; +} + +extern void data_release(struct kref *ref); + +static inline void data_put(struct sdcardfs_inode_data *data) +{ + kref_put(&data->refcount, data_release); +} + +static inline void release_own_data(struct sdcardfs_inode_info *info) +{ + /* + * This happens exactly once per inode. At this point, the inode that + * originally held this data is about to be freed, and all references + * to it are held as a top value, and will likely be released soon. + */ + info->data->abandoned = true; + data_put(info->data); +} + +static inline void set_top(struct sdcardfs_inode_info *info, + struct sdcardfs_inode_info *top_owner) +{ + struct sdcardfs_inode_data *old_top; + struct sdcardfs_inode_data *new_top = NULL; + + if (top_owner) + new_top = top_data_get(top_owner); + + spin_lock(&info->top_lock); + old_top = info->top_data; + info->top_data = new_top; + if (old_top) + data_put(old_top); + spin_unlock(&info->top_lock); +} + +static inline int get_gid(struct vfsmount *mnt, + struct super_block *sb, + struct sdcardfs_inode_data *data) +{ + struct sdcardfs_vfsmount_options *vfsopts = mnt->data; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(sb); + + if (vfsopts->gid == AID_SDCARD_RW && !sbi->options.default_normal) + /* As an optimization, certain trusted system components only run + * as owner but operate across all users. Since we're now handing + * out the sdcard_rw GID only to trusted apps, we're okay relaxing + * the user boundary enforcement for the default view. The UIDs + * assigned to app directories are still multiuser aware. + */ + return AID_SDCARD_RW; + else + return multiuser_get_uid(data->userid, vfsopts->gid); +} + +static inline int get_mode(struct vfsmount *mnt, + struct sdcardfs_inode_info *info, + struct sdcardfs_inode_data *data) +{ + int owner_mode; + int filtered_mode; + struct sdcardfs_vfsmount_options *opts = mnt->data; + int visible_mode = 0775 & ~opts->mask; + + + if (data->perm == PERM_PRE_ROOT) { + /* Top of multi-user view should always be visible to ensure + * secondary users can traverse inside. + */ + visible_mode = 0711; + } else if (data->under_android) { + /* Block "other" access to Android directories, since only apps + * belonging to a specific user should be in there; we still + * leave +x open for the default view. + */ + if (opts->gid == AID_SDCARD_RW) + visible_mode = visible_mode & ~0006; + else + visible_mode = visible_mode & ~0007; + } + owner_mode = info->lower_inode->i_mode & 0700; + filtered_mode = visible_mode & (owner_mode | (owner_mode >> 3) | (owner_mode >> 6)); + return filtered_mode; +} + +static inline int has_graft_path(const struct dentry *dent) +{ + int ret = 0; + + spin_lock(&SDCARDFS_D(dent)->lock); + if (SDCARDFS_D(dent)->orig_path.dentry != NULL) + ret = 1; + spin_unlock(&SDCARDFS_D(dent)->lock); + + return ret; +} + +static inline void sdcardfs_get_real_lower(const struct dentry *dent, + struct path *real_lower) +{ + /* in case of a local obb dentry + * the orig_path should be returned + */ + if (has_graft_path(dent)) + sdcardfs_get_orig_path(dent, real_lower); + else + sdcardfs_get_lower_path(dent, real_lower); +} + +static inline void sdcardfs_put_real_lower(const struct dentry *dent, + struct path *real_lower) +{ + if (has_graft_path(dent)) + sdcardfs_put_orig_path(dent, real_lower); + else + sdcardfs_put_lower_path(dent, real_lower); +} + +extern struct mutex sdcardfs_super_list_lock; +extern struct list_head sdcardfs_super_list; + +/* for packagelist.c */ +extern appid_t get_appid(const char *app_name); +extern appid_t get_ext_gid(const char *app_name); +extern appid_t is_excluded(const char *app_name, userid_t userid); +extern int check_caller_access_to_name(struct inode *parent_node, const struct qstr *name); +extern int packagelist_init(void); +extern void packagelist_exit(void); + +/* for derived_perm.c */ +#define BY_NAME (1 << 0) +#define BY_USERID (1 << 1) +struct limit_search { + unsigned int flags; + struct qstr name; + userid_t userid; +}; + +extern void setup_derived_state(struct inode *inode, perm_t perm, + userid_t userid, uid_t uid); +extern void get_derived_permission(struct dentry *parent, struct dentry *dentry); +extern void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, const struct qstr *name); +extern void fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit); + +extern void update_derived_permission_lock(struct dentry *dentry); +void fixup_lower_ownership(struct dentry *dentry, const char *name); +extern int need_graft_path(struct dentry *dentry); +extern int is_base_obbpath(struct dentry *dentry); +extern int is_obbpath_invalid(struct dentry *dentry); +extern int setup_obb_dentry(struct dentry *dentry, struct path *lower_path); + +/* locking helpers */ +static inline struct dentry *lock_parent(struct dentry *dentry) +{ + struct dentry *dir = dget_parent(dentry); + + inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); + return dir; +} + +static inline void unlock_dir(struct dentry *dir) +{ + inode_unlock(d_inode(dir)); + dput(dir); +} + +static inline int prepare_dir(const char *path_s, uid_t uid, gid_t gid, mode_t mode) +{ + int err; + struct dentry *dent; + struct iattr attrs; + struct path parent; + + dent = kern_path_locked(path_s, &parent); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + if (err == -EEXIST) + err = 0; + goto out_unlock; + } + + err = vfs_mkdir2(parent.mnt, d_inode(parent.dentry), dent, mode); + if (err) { + if (err == -EEXIST) + err = 0; + goto out_dput; + } + + attrs.ia_uid = make_kuid(&init_user_ns, uid); + attrs.ia_gid = make_kgid(&init_user_ns, gid); + attrs.ia_valid = ATTR_UID | ATTR_GID; + inode_lock(d_inode(dent)); + notify_change2(parent.mnt, dent, &attrs, NULL); + inode_unlock(d_inode(dent)); + +out_dput: + dput(dent); + +out_unlock: + /* parent dentry locked by lookup_create */ + inode_unlock(d_inode(parent.dentry)); + path_put(&parent); + return err; +} + +/* + * Return 1, if a disk has enough free space, otherwise 0. + * We assume that any files can not be overwritten. + */ +static inline int check_min_free_space(struct dentry *dentry, size_t size, int dir) +{ + int err; + struct path lower_path; + struct kstatfs statfs; + u64 avail; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + + if (sbi->options.reserved_mb) { + /* Get fs stat of lower filesystem. */ + sdcardfs_get_lower_path(dentry, &lower_path); + err = vfs_statfs(&lower_path, &statfs); + sdcardfs_put_lower_path(dentry, &lower_path); + + if (unlikely(err)) + return 0; + + /* Invalid statfs informations. */ + if (unlikely(statfs.f_bsize == 0)) + return 0; + + /* if you are checking directory, set size to f_bsize. */ + if (unlikely(dir)) + size = statfs.f_bsize; + + /* available size */ + avail = statfs.f_bavail * statfs.f_bsize; + + /* not enough space */ + if ((u64)size > avail) + return 0; + + /* enough space */ + if ((avail - size) > (sbi->options.reserved_mb * 1024 * 1024)) + return 1; + + return 0; + } else + return 1; +} + +/* + * Copies attrs and maintains sdcardfs managed attrs + * Since our permission check handles all special permissions, set those to be open + */ +static inline void sdcardfs_copy_and_fix_attrs(struct inode *dest, const struct inode *src) +{ + dest->i_mode = (src->i_mode & S_IFMT) | S_IRWXU | S_IRWXG | + S_IROTH | S_IXOTH; /* 0775 */ + dest->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(dest)->data->d_uid); + dest->i_gid = make_kgid(&init_user_ns, AID_SDCARD_RW); + dest->i_rdev = src->i_rdev; + dest->i_atime = src->i_atime; + dest->i_mtime = src->i_mtime; + dest->i_ctime = src->i_ctime; + dest->i_blkbits = src->i_blkbits; + dest->i_flags = src->i_flags; + set_nlink(dest, src->i_nlink); +} + +static inline bool str_case_eq(const char *s1, const char *s2) +{ + return !strcasecmp(s1, s2); +} + +static inline bool str_n_case_eq(const char *s1, const char *s2, size_t len) +{ + return !strncasecmp(s1, s2, len); +} + +static inline bool qstr_case_eq(const struct qstr *q1, const struct qstr *q2) +{ + return q1->len == q2->len && str_n_case_eq(q1->name, q2->name, q2->len); +} + +#define QSTR_LITERAL(string) QSTR_INIT(string, sizeof(string)-1) + +#endif /* not _SDCARDFS_H_ */
diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c new file mode 100644 index 0000000..140696e --- /dev/null +++ b/fs/sdcardfs/super.c
@@ -0,0 +1,333 @@ +/* + * fs/sdcardfs/super.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" + +/* + * The inode cache is used with alloc_inode for both our inode info and the + * vfs inode. + */ +static struct kmem_cache *sdcardfs_inode_cachep; + +/* + * To support the top references, we must track some data separately. + * An sdcardfs_inode_info always has a reference to its data, and once set up, + * also has a reference to its top. The top may be itself, in which case it + * holds two references to its data. When top is changed, it takes a ref to the + * new data and then drops the ref to the old data. + */ +static struct kmem_cache *sdcardfs_inode_data_cachep; + +void data_release(struct kref *ref) +{ + struct sdcardfs_inode_data *data = + container_of(ref, struct sdcardfs_inode_data, refcount); + + kmem_cache_free(sdcardfs_inode_data_cachep, data); +} + +/* final actions when unmounting a file system */ +static void sdcardfs_put_super(struct super_block *sb) +{ + struct sdcardfs_sb_info *spd; + struct super_block *s; + + spd = SDCARDFS_SB(sb); + if (!spd) + return; + + if (spd->obbpath_s) { + kfree(spd->obbpath_s); + path_put(&spd->obbpath); + } + + /* decrement lower super references */ + s = sdcardfs_lower_super(sb); + sdcardfs_set_lower_super(sb, NULL); + atomic_dec(&s->s_active); + + kfree(spd); + sb->s_fs_info = NULL; +} + +static int sdcardfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + int err; + struct path lower_path; + u32 min_blocks; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + + sdcardfs_get_lower_path(dentry, &lower_path); + err = vfs_statfs(&lower_path, buf); + sdcardfs_put_lower_path(dentry, &lower_path); + + if (sbi->options.reserved_mb) { + /* Invalid statfs informations. */ + if (buf->f_bsize == 0) { + pr_err("Returned block size is zero.\n"); + return -EINVAL; + } + + min_blocks = ((sbi->options.reserved_mb * 1024 * 1024)/buf->f_bsize); + buf->f_blocks -= min_blocks; + + if (buf->f_bavail > min_blocks) + buf->f_bavail -= min_blocks; + else + buf->f_bavail = 0; + + /* Make reserved blocks invisiable to media storage */ + buf->f_bfree = buf->f_bavail; + } + + /* set return buf to our f/s to avoid confusing user-level utils */ + buf->f_type = SDCARDFS_SUPER_MAGIC; + + return err; +} + +/* + * @flags: numeric mount options + * @options: mount options string + */ +static int sdcardfs_remount_fs(struct super_block *sb, int *flags, char *options) +{ + int err = 0; + + /* + * The VFS will take care of "ro" and "rw" flags among others. We + * can safely accept a few flags (RDONLY, MANDLOCK), and honor + * SILENT, but anything else left over is an error. + */ + if ((*flags & ~(MS_RDONLY | MS_MANDLOCK | MS_SILENT)) != 0) { + pr_err("sdcardfs: remount flags 0x%x unsupported\n", *flags); + err = -EINVAL; + } + + return err; +} + +/* + * @mnt: mount point we are remounting + * @sb: superblock we are remounting + * @flags: numeric mount options + * @options: mount options string + */ +static int sdcardfs_remount_fs2(struct vfsmount *mnt, struct super_block *sb, + int *flags, char *options) +{ + int err = 0; + + /* + * The VFS will take care of "ro" and "rw" flags among others. We + * can safely accept a few flags (RDONLY, MANDLOCK), and honor + * SILENT, but anything else left over is an error. + */ + if ((*flags & ~(MS_RDONLY | MS_MANDLOCK | MS_SILENT | MS_REMOUNT)) != 0) { + pr_err("sdcardfs: remount flags 0x%x unsupported\n", *flags); + err = -EINVAL; + } + pr_info("Remount options were %s for vfsmnt %p.\n", options, mnt); + err = parse_options_remount(sb, options, *flags & ~MS_SILENT, mnt->data); + + + return err; +} + +static void *sdcardfs_clone_mnt_data(void *data) +{ + struct sdcardfs_vfsmount_options *opt = kmalloc(sizeof(struct sdcardfs_vfsmount_options), GFP_KERNEL); + struct sdcardfs_vfsmount_options *old = data; + + if (!opt) + return NULL; + opt->gid = old->gid; + opt->mask = old->mask; + return opt; +} + +static void sdcardfs_copy_mnt_data(void *data, void *newdata) +{ + struct sdcardfs_vfsmount_options *old = data; + struct sdcardfs_vfsmount_options *new = newdata; + + old->gid = new->gid; + old->mask = new->mask; +} + +/* + * Called by iput() when the inode reference count reached zero + * and the inode is not hashed anywhere. Used to clear anything + * that needs to be, before the inode is completely destroyed and put + * on the inode free list. + */ +static void sdcardfs_evict_inode(struct inode *inode) +{ + struct inode *lower_inode; + + truncate_inode_pages(&inode->i_data, 0); + set_top(SDCARDFS_I(inode), NULL); + clear_inode(inode); + /* + * Decrement a reference to a lower_inode, which was incremented + * by our read_inode when it was created initially. + */ + lower_inode = sdcardfs_lower_inode(inode); + sdcardfs_set_lower_inode(inode, NULL); + iput(lower_inode); +} + +static struct inode *sdcardfs_alloc_inode(struct super_block *sb) +{ + struct sdcardfs_inode_info *i; + struct sdcardfs_inode_data *d; + + i = kmem_cache_alloc(sdcardfs_inode_cachep, GFP_KERNEL); + if (!i) + return NULL; + + /* memset everything up to the inode to 0 */ + memset(i, 0, offsetof(struct sdcardfs_inode_info, vfs_inode)); + + d = kmem_cache_alloc(sdcardfs_inode_data_cachep, + GFP_KERNEL | __GFP_ZERO); + if (!d) { + kmem_cache_free(sdcardfs_inode_cachep, i); + return NULL; + } + + i->data = d; + kref_init(&d->refcount); + i->top_data = d; + spin_lock_init(&i->top_lock); + kref_get(&d->refcount); + + i->vfs_inode.i_version = 1; + return &i->vfs_inode; +} + +static void i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + + release_own_data(SDCARDFS_I(inode)); + kmem_cache_free(sdcardfs_inode_cachep, SDCARDFS_I(inode)); +} + +static void sdcardfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, i_callback); +} + +/* sdcardfs inode cache constructor */ +static void init_once(void *obj) +{ + struct sdcardfs_inode_info *i = obj; + + inode_init_once(&i->vfs_inode); +} + +int sdcardfs_init_inode_cache(void) +{ + sdcardfs_inode_cachep = + kmem_cache_create("sdcardfs_inode_cache", + sizeof(struct sdcardfs_inode_info), 0, + SLAB_RECLAIM_ACCOUNT, init_once); + + if (!sdcardfs_inode_cachep) + return -ENOMEM; + + sdcardfs_inode_data_cachep = + kmem_cache_create("sdcardfs_inode_data_cache", + sizeof(struct sdcardfs_inode_data), 0, + SLAB_RECLAIM_ACCOUNT, NULL); + if (!sdcardfs_inode_data_cachep) { + kmem_cache_destroy(sdcardfs_inode_cachep); + return -ENOMEM; + } + + return 0; +} + +/* sdcardfs inode cache destructor */ +void sdcardfs_destroy_inode_cache(void) +{ + kmem_cache_destroy(sdcardfs_inode_data_cachep); + kmem_cache_destroy(sdcardfs_inode_cachep); +} + +/* + * Used only in nfs, to kill any pending RPC tasks, so that subsequent + * code can actually succeed and won't leave tasks that need handling. + */ +static void sdcardfs_umount_begin(struct super_block *sb) +{ + struct super_block *lower_sb; + + lower_sb = sdcardfs_lower_super(sb); + if (lower_sb && lower_sb->s_op && lower_sb->s_op->umount_begin) + lower_sb->s_op->umount_begin(lower_sb); +} + +static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m, + struct dentry *root) +{ + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(root->d_sb); + struct sdcardfs_mount_options *opts = &sbi->options; + struct sdcardfs_vfsmount_options *vfsopts = mnt->data; + + if (opts->fs_low_uid != 0) + seq_printf(m, ",fsuid=%u", opts->fs_low_uid); + if (opts->fs_low_gid != 0) + seq_printf(m, ",fsgid=%u", opts->fs_low_gid); + if (vfsopts->gid != 0) + seq_printf(m, ",gid=%u", vfsopts->gid); + if (opts->multiuser) + seq_puts(m, ",multiuser"); + if (vfsopts->mask) + seq_printf(m, ",mask=%u", vfsopts->mask); + if (opts->fs_user_id) + seq_printf(m, ",userid=%u", opts->fs_user_id); + if (opts->gid_derivation) + seq_puts(m, ",derive_gid"); + if (opts->default_normal) + seq_puts(m, ",default_normal"); + if (opts->reserved_mb != 0) + seq_printf(m, ",reserved=%uMB", opts->reserved_mb); + if (opts->nocache) + seq_printf(m, ",nocache"); + + return 0; +}; + +const struct super_operations sdcardfs_sops = { + .put_super = sdcardfs_put_super, + .statfs = sdcardfs_statfs, + .remount_fs = sdcardfs_remount_fs, + .remount_fs2 = sdcardfs_remount_fs2, + .clone_mnt_data = sdcardfs_clone_mnt_data, + .copy_mnt_data = sdcardfs_copy_mnt_data, + .evict_inode = sdcardfs_evict_inode, + .umount_begin = sdcardfs_umount_begin, + .show_options2 = sdcardfs_show_options, + .alloc_inode = sdcardfs_alloc_inode, + .destroy_inode = sdcardfs_destroy_inode, + .drop_inode = generic_delete_inode, +};
diff --git a/fs/super.c b/fs/super.c index 219f7ca..589f919 100644 --- a/fs/super.c +++ b/fs/super.c
@@ -814,7 +814,8 @@ struct super_block *user_get_super(dev_t dev) } /** - * do_remount_sb - asks filesystem to change mount options. + * do_remount_sb2 - asks filesystem to change mount options. + * @mnt: mount we are looking at * @sb: superblock in question * @sb_flags: revised superblock flags * @data: the rest of options @@ -822,7 +823,7 @@ struct super_block *user_get_super(dev_t dev) * * Alters the mount options of a mounted file system. */ -int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force) +int do_remount_sb2(struct vfsmount *mnt, struct super_block *sb, int sb_flags, void *data, int force) { int retval; int remount_ro; @@ -864,7 +865,16 @@ int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force) } } - if (sb->s_op->remount_fs) { + if (mnt && sb->s_op->remount_fs2) { + retval = sb->s_op->remount_fs2(mnt, sb, &sb_flags, data); + if (retval) { + if (!force) + goto cancel_readonly; + /* If forced remount, go ahead despite any errors */ + WARN(1, "forced remount of a %s fs returned %i\n", + sb->s_type->name, retval); + } + } else if (sb->s_op->remount_fs) { retval = sb->s_op->remount_fs(sb, &sb_flags, data); if (retval) { if (!force) @@ -896,12 +906,17 @@ int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force) return retval; } +int do_remount_sb(struct super_block *sb, int flags, void *data, int force) +{ + return do_remount_sb2(NULL, sb, flags, data, force); +} + static void do_emergency_remount(struct work_struct *work) { struct super_block *sb, *p = NULL; spin_lock(&sb_lock); - list_for_each_entry(sb, &super_blocks, s_list) { + list_for_each_entry_reverse(sb, &super_blocks, s_list) { if (hlist_unhashed(&sb->s_instances)) continue; sb->s_count++; @@ -1217,7 +1232,7 @@ struct dentry *mount_single(struct file_system_type *fs_type, EXPORT_SYMBOL(mount_single); struct dentry * -mount_fs(struct file_system_type *type, int flags, const char *name, void *data) +mount_fs(struct file_system_type *type, int flags, const char *name, struct vfsmount *mnt, void *data) { struct dentry *root; struct super_block *sb; @@ -1234,7 +1249,10 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) goto out_free_secdata; } - root = type->mount(type, flags, name, data); + if (type->mount2) + root = type->mount2(mnt, type, flags, name, data); + else + root = type->mount(type, flags, name, data); if (IS_ERR(root)) { error = PTR_ERR(root); goto out_free_secdata;
diff --git a/fs/sync.c b/fs/sync.c index 83ac79a..12f2aa5 100644 --- a/fs/sync.c +++ b/fs/sync.c
@@ -9,7 +9,7 @@ #include <linux/slab.h> #include <linux/export.h> #include <linux/namei.h> -#include <linux/sched.h> +#include <linux/sched/xacct.h> #include <linux/writeback.h> #include <linux/syscalls.h> #include <linux/linkage.h> @@ -219,6 +219,7 @@ static int do_fsync(unsigned int fd, int datasync) if (f.file) { ret = vfs_fsync(f.file, datasync); fdput(f); + inc_syscfs(current); } return ret; }
diff --git a/fs/ubifs/crypto.c b/fs/ubifs/crypto.c index 16a5d5c..55c508f 100644 --- a/fs/ubifs/crypto.c +++ b/fs/ubifs/crypto.c
@@ -24,14 +24,6 @@ static bool ubifs_crypt_empty_dir(struct inode *inode) return ubifs_check_dir_empty(inode) == 0; } -static unsigned int ubifs_crypt_max_namelen(struct inode *inode) -{ - if (S_ISLNK(inode->i_mode)) - return UBIFS_MAX_INO_DATA; - else - return UBIFS_MAX_NLEN; -} - int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn, unsigned int in_len, unsigned int *out_len, int block) { @@ -88,7 +80,6 @@ const struct fscrypt_operations ubifs_crypt_operations = { .key_prefix = "ubifs:", .get_context = ubifs_crypt_get_context, .set_context = ubifs_crypt_set_context, - .is_encrypted = __ubifs_crypt_is_encrypted, .empty_dir = ubifs_crypt_empty_dir, - .max_namelen = ubifs_crypt_max_namelen, + .max_namelen = UBIFS_MAX_NLEN, };
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 4e6e32c..246e7d1 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c
@@ -220,20 +220,9 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino); - if (ubifs_crypt_is_encrypted(dir)) { - err = fscrypt_get_encryption_info(dir); - - /* - * DCACHE_ENCRYPTED_WITH_KEY is set if the dentry is - * created while the directory was encrypted and we - * have access to the key. - */ - if (fscrypt_has_encryption_key(dir)) - fscrypt_set_encrypted_dentry(dentry); - fscrypt_set_d_op(dentry); - if (err && err != -ENOKEY) - return ERR_PTR(err); - } + err = fscrypt_prepare_lookup(dir, dentry, flags); + if (err) + return ERR_PTR(err); err = fscrypt_setup_filename(dir, &dentry->d_name, 1, &nm); if (err) @@ -1148,38 +1137,24 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, struct ubifs_inode *dir_ui = ubifs_inode(dir); struct ubifs_info *c = dir->i_sb->s_fs_info; int err, sz_change, len = strlen(symname); - struct fscrypt_str disk_link = FSTR_INIT((char *)symname, len + 1); - struct fscrypt_symlink_data *sd = NULL; + struct fscrypt_str disk_link; struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, .new_ino_d = ALIGN(len, 8), .dirtied_ino = 1 }; struct fscrypt_name nm; - if (ubifs_crypt_is_encrypted(dir)) { - err = fscrypt_get_encryption_info(dir); - if (err) - goto out_budg; + dbg_gen("dent '%pd', target '%s' in dir ino %lu", dentry, + symname, dir->i_ino); - if (!fscrypt_has_encryption_key(dir)) { - err = -EPERM; - goto out_budg; - } - - disk_link.len = (fscrypt_fname_encrypted_size(dir, len) + - sizeof(struct fscrypt_symlink_data)); - } + err = fscrypt_prepare_symlink(dir, symname, len, UBIFS_MAX_INO_DATA, + &disk_link); + if (err) + return err; /* * Budget request settings: new inode, new direntry and changing parent * directory inode. */ - - dbg_gen("dent '%pd', target '%s' in dir ino %lu", dentry, - symname, dir->i_ino); - - if (disk_link.len > UBIFS_MAX_INO_DATA) - return -ENAMETOOLONG; - err = ubifs_budget_space(c, &req); if (err) return err; @@ -1203,36 +1178,20 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, goto out_inode; } - if (ubifs_crypt_is_encrypted(dir)) { - struct qstr istr = QSTR_INIT(symname, len); - struct fscrypt_str ostr; - - sd = kzalloc(disk_link.len, GFP_NOFS); - if (!sd) { - err = -ENOMEM; - goto out_inode; - } - - ostr.name = sd->encrypted_path; - ostr.len = disk_link.len; - - err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr); + if (IS_ENCRYPTED(inode)) { + disk_link.name = ui->data; /* encrypt directly into ui->data */ + err = fscrypt_encrypt_symlink(inode, symname, len, &disk_link); if (err) goto out_inode; - - sd->len = cpu_to_le16(ostr.len); - disk_link.name = (char *)sd; } else { + memcpy(ui->data, disk_link.name, disk_link.len); inode->i_link = ui->data; } - memcpy(ui->data, disk_link.name, disk_link.len); - ((char *)ui->data)[disk_link.len - 1] = '\0'; - /* * The terminating zero byte is not written to the flash media and it * is put just to make later in-memory string processing simpler. Thus, - * data length is @len, not @len + %1. + * data length is @disk_link.len - 1, not @disk_link.len. */ ui->data_len = disk_link.len - 1; inode->i_size = ubifs_inode(inode)->ui_size = disk_link.len - 1; @@ -1266,7 +1225,6 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, fscrypt_free_filename(&nm); out_budg: ubifs_release_budget(c, &req); - kfree(sd); return err; }
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index a02aa59d..ff44fd9 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c
@@ -1662,49 +1662,17 @@ static const char *ubifs_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { - int err; - struct fscrypt_symlink_data *sd; struct ubifs_inode *ui = ubifs_inode(inode); - struct fscrypt_str cstr; - struct fscrypt_str pstr; - if (!ubifs_crypt_is_encrypted(inode)) + if (!IS_ENCRYPTED(inode)) return ui->data; if (!dentry) return ERR_PTR(-ECHILD); - err = fscrypt_get_encryption_info(inode); - if (err) - return ERR_PTR(err); - - sd = (struct fscrypt_symlink_data *)ui->data; - cstr.name = sd->encrypted_path; - cstr.len = le16_to_cpu(sd->len); - - if (cstr.len == 0) - return ERR_PTR(-ENOENT); - - if ((cstr.len + sizeof(struct fscrypt_symlink_data) - 1) > ui->data_len) - return ERR_PTR(-EIO); - - err = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr); - if (err) - return ERR_PTR(err); - - err = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr); - if (err) { - fscrypt_fname_free_buffer(&pstr); - return ERR_PTR(err); - } - - pstr.name[pstr.len] = '\0'; - - set_delayed_call(done, kfree_link, pstr.name); - return pstr.name; + return fscrypt_get_symlink(inode, ui->data, ui->data_len, done); } - const struct address_space_operations ubifs_file_address_operations = { .readpage = ubifs_readpage, .writepage = ubifs_writepage,
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index fdc3112..0164bcc 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c
@@ -38,7 +38,8 @@ void ubifs_set_inode_flags(struct inode *inode) { unsigned int flags = ubifs_inode(inode)->flags; - inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_DIRSYNC); + inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_DIRSYNC | + S_ENCRYPTED); if (flags & UBIFS_SYNC_FL) inode->i_flags |= S_SYNC; if (flags & UBIFS_APPEND_FL) @@ -47,6 +48,8 @@ void ubifs_set_inode_flags(struct inode *inode) inode->i_flags |= S_IMMUTABLE; if (flags & UBIFS_DIRSYNC_FL) inode->i_flags |= S_DIRSYNC; + if (flags & UBIFS_CRYPT_FL) + inode->i_flags |= S_ENCRYPTED; } /*
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index ad827cf..8dd12b0 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c
@@ -379,9 +379,7 @@ static void ubifs_evict_inode(struct inode *inode) } done: clear_inode(inode); -#ifdef CONFIG_UBIFS_FS_ENCRYPTION - fscrypt_put_encryption_info(inode, NULL); -#endif + fscrypt_put_encryption_info(inode); } static void ubifs_dirty_inode(struct inode *inode, int flags) @@ -2016,12 +2014,6 @@ static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi) return c; } -#ifndef CONFIG_UBIFS_FS_ENCRYPTION -const struct fscrypt_operations ubifs_crypt_operations = { - .is_encrypted = __ubifs_crypt_is_encrypted, -}; -#endif - static int ubifs_fill_super(struct super_block *sb, void *data, int silent) { struct ubifs_info *c = sb->s_fs_info; @@ -2064,7 +2056,9 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE; sb->s_op = &ubifs_super_operations; sb->s_xattr = ubifs_xattr_handlers; +#ifdef CONFIG_UBIFS_FS_ENCRYPTION sb->s_cop = &ubifs_crypt_operations; +#endif mutex_lock(&c->umount_mutex); err = mount_ubifs(c);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index cd43651..63c7468 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h
@@ -38,12 +38,11 @@ #include <linux/backing-dev.h> #include <linux/security.h> #include <linux/xattr.h> -#ifdef CONFIG_UBIFS_FS_ENCRYPTION -#include <linux/fscrypt_supp.h> -#else -#include <linux/fscrypt_notsupp.h> -#endif #include <linux/random.h> + +#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_UBIFS_FS_ENCRYPTION) +#include <linux/fscrypt.h> + #include "ubifs-media.h" /* Version of this UBIFS implementation */ @@ -1835,16 +1834,11 @@ int ubifs_decrypt(const struct inode *inode, struct ubifs_data_node *dn, extern const struct fscrypt_operations ubifs_crypt_operations; -static inline bool __ubifs_crypt_is_encrypted(struct inode *inode) -{ - struct ubifs_inode *ui = ubifs_inode(inode); - - return ui->flags & UBIFS_CRYPT_FL; -} - static inline bool ubifs_crypt_is_encrypted(const struct inode *inode) { - return __ubifs_crypt_is_encrypted((struct inode *)inode); + const struct ubifs_inode *ui = ubifs_inode(inode); + + return ui->flags & UBIFS_CRYPT_FL; } /* Normal UBIFS messages */
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index c13eae8..5ddc89d 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c
@@ -170,6 +170,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, err = ubifs_jnl_update(c, host, nm, inode, 0, 1); if (err) goto out_cancel; + ubifs_set_inode_flags(host); mutex_unlock(&host_ui->ui_mutex); ubifs_release_budget(c, &req);
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 5f10052..5b0651d 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c
@@ -881,7 +881,8 @@ static int userfaultfd_release(struct inode *inode, struct file *file) new_flags, vma->anon_vma, vma->vm_file, vma->vm_pgoff, vma_policy(vma), - NULL_VM_UFFD_CTX); + NULL_VM_UFFD_CTX, + vma_get_anon_name(vma)); if (prev) vma = prev; else @@ -1438,7 +1439,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, prev = vma_merge(mm, prev, start, vma_end, new_flags, vma->anon_vma, vma->vm_file, vma->vm_pgoff, vma_policy(vma), - ((struct vm_userfaultfd_ctx){ ctx })); + ((struct vm_userfaultfd_ctx){ ctx }), + vma_get_anon_name(vma)); if (prev) { vma = prev; goto next; @@ -1598,7 +1600,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, prev = vma_merge(mm, prev, start, vma_end, new_flags, vma->anon_vma, vma->vm_file, vma->vm_pgoff, vma_policy(vma), - NULL_VM_UFFD_CTX); + NULL_VM_UFFD_CTX, + vma_get_anon_name(vma)); if (prev) { vma = prev; goto next;
diff --git a/fs/utimes.c b/fs/utimes.c index e4b3d7c..4f3b158 100644 --- a/fs/utimes.c +++ b/fs/utimes.c
@@ -88,7 +88,7 @@ static int utimes_common(const struct path *path, struct timespec64 *times) } retry_deleg: inode_lock(inode); - error = notify_change(path->dentry, &newattrs, &delegated_inode); + error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode); inode_unlock(inode); if (delegated_inode) { error = break_deleg_wait(&delegated_inode);
diff --git a/fs/xattr.c b/fs/xattr.c index 5002981..3e5d157 100644 --- a/fs/xattr.c +++ b/fs/xattr.c
@@ -131,7 +131,7 @@ xattr_permission(struct inode *inode, const char *name, int mask) return -EPERM; } - return inode_permission(inode, mask); + return inode_permission2(ERR_PTR(-EOPNOTSUPP), inode, mask); } int
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index fcec26d..6224bb4 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h
@@ -67,10 +67,12 @@ */ #ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* +#define TEXT_CFI_MAIN .text.cfi .text.[0-9a-zA-Z_]*.cfi #define DATA_MAIN .data .data.[0-9a-zA-Z_]* #define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* #else #define TEXT_MAIN .text +#define TEXT_CFI_MAIN .text.cfi #define DATA_MAIN .data #define BSS_MAIN .bss #endif @@ -105,7 +107,7 @@ #ifdef CONFIG_FTRACE_MCOUNT_RECORD #define MCOUNT_REC() . = ALIGN(8); \ VMLINUX_SYMBOL(__start_mcount_loc) = .; \ - *(__mcount_loc) \ + KEEP(*(__mcount_loc)) \ VMLINUX_SYMBOL(__stop_mcount_loc) = .; #else #define MCOUNT_REC() @@ -460,6 +462,8 @@ ALIGN_FUNCTION(); \ *(.text.hot TEXT_MAIN .text.fixup .text.unlikely) \ *(.text..refcount) \ + *(.text..ftrace) \ + *(TEXT_CFI_MAIN) \ *(.ref.text) \ MEM_KEEP(init.text) \ MEM_KEEP(exit.text) \ @@ -512,7 +516,7 @@ VMLINUX_SYMBOL(__softirqentry_text_end) = .; /* Section used for early init (in .S files) */ -#define HEAD_TEXT *(.head.text) +#define HEAD_TEXT KEEP(*(.head.text)) #define HEAD_TEXT_SECTION \ .head.text : AT(ADDR(.head.text) - LOAD_OFFSET) { \ @@ -557,7 +561,7 @@ MEM_DISCARD(init.data) \ KERNEL_CTORS() \ MCOUNT_REC() \ - *(.init.rodata) \ + *(.init.rodata .init.rodata.*) \ FTRACE_EVENTS() \ TRACE_SYSCALLS() \ KPROBE_BLACKLIST() \ @@ -576,7 +580,7 @@ EARLYCON_TABLE() #define INIT_TEXT \ - *(.init.text) \ + *(.init.text .init.text.*) \ *(.text.startup) \ MEM_DISCARD(init.text) @@ -593,7 +597,7 @@ MEM_DISCARD(exit.text) #define EXIT_CALL \ - *(.exitcall.exit) + KEEP(*(.exitcall.exit)) /* * bss (Block Started by Symbol) - uninitialized data
diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h new file mode 100644 index 0000000..3d261f5 --- /dev/null +++ b/include/crypto/chacha.h
@@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common values and helper functions for the ChaCha and XChaCha stream ciphers. + * + * XChaCha extends ChaCha's nonce to 192 bits, while provably retaining ChaCha's + * security. Here they share the same key size, tfm context, and setkey + * function; only their IV size and encrypt/decrypt function differ. + * + * The ChaCha paper specifies 20, 12, and 8-round variants. In general, it is + * recommended to use the 20-round variant ChaCha20. However, the other + * variants can be needed in some performance-sensitive scenarios. The generic + * ChaCha code currently allows only the 20 and 12-round variants. + */ + +#ifndef _CRYPTO_CHACHA_H +#define _CRYPTO_CHACHA_H + +#include <crypto/skcipher.h> +#include <linux/types.h> +#include <linux/crypto.h> + +/* 32-bit stream position, then 96-bit nonce (RFC7539 convention) */ +#define CHACHA_IV_SIZE 16 + +#define CHACHA_KEY_SIZE 32 +#define CHACHA_BLOCK_SIZE 64 + +/* 192-bit nonce, then 64-bit stream position */ +#define XCHACHA_IV_SIZE 32 + +struct chacha_ctx { + u32 key[8]; + int nrounds; +}; + +void chacha_block(u32 *state, u8 *stream, int nrounds); +static inline void chacha20_block(u32 *state, u8 *stream) +{ + chacha_block(state, stream, 20); +} +void hchacha_block(const u32 *in, u32 *out, int nrounds); + +void crypto_chacha_init(u32 *state, struct chacha_ctx *ctx, u8 *iv); + +int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keysize); +int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keysize); + +int crypto_chacha_crypt(struct skcipher_request *req); +int crypto_xchacha_crypt(struct skcipher_request *req); + +#endif /* _CRYPTO_CHACHA_H */
diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h deleted file mode 100644 index caaa470..0000000 --- a/include/crypto/chacha20.h +++ /dev/null
@@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Common values for the ChaCha20 algorithm - */ - -#ifndef _CRYPTO_CHACHA20_H -#define _CRYPTO_CHACHA20_H - -#include <crypto/skcipher.h> -#include <linux/types.h> -#include <linux/crypto.h> - -#define CHACHA20_IV_SIZE 16 -#define CHACHA20_KEY_SIZE 32 -#define CHACHA20_BLOCK_SIZE 64 - -struct chacha20_ctx { - u32 key[8]; -}; - -void chacha20_block(u32 *state, void *stream); -void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv); -int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keysize); -int crypto_chacha20_crypt(struct skcipher_request *req); - -#endif
diff --git a/include/crypto/nhpoly1305.h b/include/crypto/nhpoly1305.h new file mode 100644 index 0000000..53c0442 --- /dev/null +++ b/include/crypto/nhpoly1305.h
@@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common values and helper functions for the NHPoly1305 hash function. + */ + +#ifndef _NHPOLY1305_H +#define _NHPOLY1305_H + +#include <crypto/hash.h> +#include <crypto/poly1305.h> + +/* NH parameterization: */ + +/* Endianness: little */ +/* Word size: 32 bits (works well on NEON, SSE2, AVX2) */ + +/* Stride: 2 words (optimal on ARM32 NEON; works okay on other CPUs too) */ +#define NH_PAIR_STRIDE 2 +#define NH_MESSAGE_UNIT (NH_PAIR_STRIDE * 2 * sizeof(u32)) + +/* Num passes (Toeplitz iteration count): 4, to give ε = 2^{-128} */ +#define NH_NUM_PASSES 4 +#define NH_HASH_BYTES (NH_NUM_PASSES * sizeof(u64)) + +/* Max message size: 1024 bytes (32x compression factor) */ +#define NH_NUM_STRIDES 64 +#define NH_MESSAGE_WORDS (NH_PAIR_STRIDE * 2 * NH_NUM_STRIDES) +#define NH_MESSAGE_BYTES (NH_MESSAGE_WORDS * sizeof(u32)) +#define NH_KEY_WORDS (NH_MESSAGE_WORDS + \ + NH_PAIR_STRIDE * 2 * (NH_NUM_PASSES - 1)) +#define NH_KEY_BYTES (NH_KEY_WORDS * sizeof(u32)) + +#define NHPOLY1305_KEY_SIZE (POLY1305_BLOCK_SIZE + NH_KEY_BYTES) + +struct nhpoly1305_key { + struct poly1305_key poly_key; + u32 nh_key[NH_KEY_WORDS]; +}; + +struct nhpoly1305_state { + + /* Running total of polynomial evaluation */ + struct poly1305_state poly_state; + + /* Partial block buffer */ + u8 buffer[NH_MESSAGE_UNIT]; + unsigned int buflen; + + /* + * Number of bytes remaining until the current NH message reaches + * NH_MESSAGE_BYTES. When nonzero, 'nh_hash' holds the partial NH hash. + */ + unsigned int nh_remaining; + + __le64 nh_hash[NH_NUM_PASSES]; +}; + +typedef void (*nh_t)(const u32 *key, const u8 *message, size_t message_len, + __le64 hash[NH_NUM_PASSES]); + +int crypto_nhpoly1305_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen); + +int crypto_nhpoly1305_init(struct shash_desc *desc); +int crypto_nhpoly1305_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen); +int crypto_nhpoly1305_update_helper(struct shash_desc *desc, + const u8 *src, unsigned int srclen, + nh_t nh_fn); +int crypto_nhpoly1305_final(struct shash_desc *desc, u8 *dst); +int crypto_nhpoly1305_final_helper(struct shash_desc *desc, u8 *dst, + nh_t nh_fn); + +#endif /* _NHPOLY1305_H */
diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h index f718a19..34317ed 100644 --- a/include/crypto/poly1305.h +++ b/include/crypto/poly1305.h
@@ -13,13 +13,21 @@ #define POLY1305_KEY_SIZE 32 #define POLY1305_DIGEST_SIZE 16 +struct poly1305_key { + u32 r[5]; /* key, base 2^26 */ +}; + +struct poly1305_state { + u32 h[5]; /* accumulator, base 2^26 */ +}; + struct poly1305_desc_ctx { /* key */ - u32 r[5]; + struct poly1305_key r; /* finalize key */ u32 s[4]; /* accumulator */ - u32 h[5]; + struct poly1305_state h; /* partial buffer */ u8 buf[POLY1305_BLOCK_SIZE]; /* bytes used in partial buffer */ @@ -30,6 +38,22 @@ struct poly1305_desc_ctx { bool sset; }; +/* + * Poly1305 core functions. These implement the ε-almost-∆-universal hash + * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce + * ("s key") at the end. They also only support block-aligned inputs. + */ +void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key); +static inline void poly1305_core_init(struct poly1305_state *state) +{ + memset(state->h, 0, sizeof(state->h)); +} +void poly1305_core_blocks(struct poly1305_state *state, + const struct poly1305_key *key, + const void *src, unsigned int nblocks); +void poly1305_core_emit(const struct poly1305_state *state, void *dst); + +/* Crypto API helper functions for the Poly1305 MAC */ int crypto_poly1305_init(struct shash_desc *desc); unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int srclen);
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h index da8357b..02dc2f3a 100644 --- a/include/linux/amba/mmci.h +++ b/include/linux/amba/mmci.h
@@ -6,6 +6,15 @@ #define AMBA_MMCI_H #include <linux/mmc/host.h> +#include <linux/mmc/card.h> +#include <linux/mmc/sdio_func.h> + +struct embedded_sdio_data { + struct sdio_cis cis; + struct sdio_cccr cccr; + struct sdio_embedded_func *funcs; + int num_funcs; +}; /** * struct mmci_platform_data - platform configuration for the MMCI @@ -32,6 +41,7 @@ struct mmci_platform_data { int gpio_wp; int gpio_cd; bool cd_invert; + struct embedded_sdio_data *embedded_sdio; }; #endif
diff --git a/include/linux/android_aid.h b/include/linux/android_aid.h new file mode 100644 index 0000000..6f1fa179 --- /dev/null +++ b/include/linux/android_aid.h
@@ -0,0 +1,28 @@ +/* include/linux/android_aid.h + * + * Copyright (C) 2008 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _LINUX_ANDROID_AID_H +#define _LINUX_ANDROID_AID_H + +/* AIDs that the kernel treats differently */ +#define AID_OBSOLETE_000 KGIDT_INIT(3001) /* was NET_BT_ADMIN */ +#define AID_OBSOLETE_001 KGIDT_INIT(3002) /* was NET_BT */ +#define AID_INET KGIDT_INIT(3003) +#define AID_NET_RAW KGIDT_INIT(3004) +#define AID_NET_ADMIN KGIDT_INIT(3005) +#define AID_NET_BW_STATS KGIDT_INIT(3006) /* read bandwidth statistics */ +#define AID_NET_BW_ACCT KGIDT_INIT(3007) /* change bandwidth statistics accounting */ + +#endif
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index d4fcb0e..d70f954 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h
@@ -6,15 +6,43 @@ #define _LINUX_ARCH_TOPOLOGY_H_ #include <linux/types.h> +#include <linux/percpu.h> void topology_normalize_cpu_scale(void); +int topology_detect_flags(void); +int topology_smt_flags(void); +int topology_core_flags(void); +int topology_cpu_flags(void); +int topology_update_cpu_topology(void); struct device_node; bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu); +DECLARE_PER_CPU(unsigned long, cpu_scale); + struct sched_domain; -unsigned long topology_get_cpu_scale(struct sched_domain *sd, int cpu); +static inline +unsigned long topology_get_cpu_scale(struct sched_domain *sd, int cpu) +{ + return per_cpu(cpu_scale, cpu); +} void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity); +DECLARE_PER_CPU(unsigned long, freq_scale); + +static inline +unsigned long topology_get_freq_scale(struct sched_domain *sd, int cpu) +{ + return per_cpu(freq_scale, cpu); +} + +DECLARE_PER_CPU(unsigned long, max_freq_scale); + +static inline +unsigned long topology_get_max_freq_scale(struct sched_domain *sd, int cpu) +{ + return per_cpu(max_freq_scale, cpu); +} + #endif /* _LINUX_ARCH_TOPOLOGY_H_ */
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 18863d5..9f721ee 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h
@@ -160,6 +160,7 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, #define SMCCC_SMC_INST "smc #0" #define SMCCC_HVC_INST "hvc #0" +#define SMCCC_REG(n) asm("x" # n) #elif defined(CONFIG_ARM) #include <asm/opcodes-sec.h> @@ -167,6 +168,7 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, #define SMCCC_SMC_INST __SMC(0) #define SMCCC_HVC_INST __HVC(0) +#define SMCCC_REG(n) asm("r" # n) #endif @@ -199,57 +201,57 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, #define __declare_arg_0(a0, res) \ struct arm_smccc_res *___res = res; \ - register unsigned long r0 asm("r0") = (u32)a0; \ - register unsigned long r1 asm("r1"); \ - register unsigned long r2 asm("r2"); \ - register unsigned long r3 asm("r3") + register unsigned long r0 SMCCC_REG(0) = (u32)a0; \ + register unsigned long r1 SMCCC_REG(1); \ + register unsigned long r2 SMCCC_REG(2); \ + register unsigned long r3 SMCCC_REG(3) #define __declare_arg_1(a0, a1, res) \ typeof(a1) __a1 = a1; \ struct arm_smccc_res *___res = res; \ - register unsigned long r0 asm("r0") = (u32)a0; \ - register unsigned long r1 asm("r1") = __a1; \ - register unsigned long r2 asm("r2"); \ - register unsigned long r3 asm("r3") + register unsigned long r0 SMCCC_REG(0) = (u32)a0; \ + register unsigned long r1 SMCCC_REG(1) = __a1; \ + register unsigned long r2 SMCCC_REG(2); \ + register unsigned long r3 SMCCC_REG(3) #define __declare_arg_2(a0, a1, a2, res) \ typeof(a1) __a1 = a1; \ typeof(a2) __a2 = a2; \ struct arm_smccc_res *___res = res; \ - register unsigned long r0 asm("r0") = (u32)a0; \ - register unsigned long r1 asm("r1") = __a1; \ - register unsigned long r2 asm("r2") = __a2; \ - register unsigned long r3 asm("r3") + register unsigned long r0 SMCCC_REG(0) = (u32)a0; \ + register unsigned long r1 SMCCC_REG(1) = __a1; \ + register unsigned long r2 SMCCC_REG(2) = __a2; \ + register unsigned long r3 SMCCC_REG(3) #define __declare_arg_3(a0, a1, a2, a3, res) \ typeof(a1) __a1 = a1; \ typeof(a2) __a2 = a2; \ typeof(a3) __a3 = a3; \ struct arm_smccc_res *___res = res; \ - register unsigned long r0 asm("r0") = (u32)a0; \ - register unsigned long r1 asm("r1") = __a1; \ - register unsigned long r2 asm("r2") = __a2; \ - register unsigned long r3 asm("r3") = __a3 + register unsigned long r0 SMCCC_REG(0) = (u32)a0; \ + register unsigned long r1 SMCCC_REG(1) = __a1; \ + register unsigned long r2 SMCCC_REG(2) = __a2; \ + register unsigned long r3 SMCCC_REG(3) = __a3 #define __declare_arg_4(a0, a1, a2, a3, a4, res) \ typeof(a4) __a4 = a4; \ __declare_arg_3(a0, a1, a2, a3, res); \ - register unsigned long r4 asm("r4") = __a4 + register unsigned long r4 SMCCC_REG(4) = __a4 #define __declare_arg_5(a0, a1, a2, a3, a4, a5, res) \ typeof(a5) __a5 = a5; \ __declare_arg_4(a0, a1, a2, a3, a4, res); \ - register unsigned long r5 asm("r5") = __a5 + register unsigned long r5 SMCCC_REG(5) = __a5 #define __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res) \ typeof(a6) __a6 = a6; \ __declare_arg_5(a0, a1, a2, a3, a4, a5, res); \ - register unsigned long r6 asm("r6") = __a6 + register unsigned long r6 SMCCC_REG(6) = __a6 #define __declare_arg_7(a0, a1, a2, a3, a4, a5, a6, a7, res) \ typeof(a7) __a7 = a7; \ __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res); \ - register unsigned long r7 asm("r7") = __a7 + register unsigned long r7 SMCCC_REG(7) = __a7 #define ___declare_args(count, ...) __declare_arg_ ## count(__VA_ARGS__) #define __declare_args(count, ...) ___declare_args(count, __VA_ARGS__)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c9d2a1a..52fb5dc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h
@@ -199,6 +199,9 @@ struct bpf_prog_aux { struct bpf_map **used_maps; struct bpf_prog *prog; struct user_struct *user; +#ifdef CONFIG_SECURITY + void *security; +#endif union { struct work_struct work; struct rcu_head rcu; @@ -254,6 +257,9 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); +extern const struct file_operations bpf_map_fops; +extern const struct file_operations bpf_prog_fops; + #define BPF_PROG_TYPE(_id, _ops) \ extern const struct bpf_verifier_ops _ops; #define BPF_MAP_TYPE(_id, _ops) \ @@ -283,11 +289,11 @@ void bpf_map_area_free(void *base); extern int sysctl_unprivileged_bpf_disabled; -int bpf_map_new_fd(struct bpf_map *map); +int bpf_map_new_fd(struct bpf_map *map, int flags); int bpf_prog_new_fd(struct bpf_prog *prog); int bpf_obj_pin_user(u32 ufd, const char __user *pathname); -int bpf_obj_get_user(const char __user *pathname); +int bpf_obj_get_user(const char __user *pathname, int flags); int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); @@ -305,6 +311,8 @@ int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); +int bpf_get_file_flag(int flags); + /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and * forced to use 'long' read/writes to try to atomically copy long counters. * Best-effort only. No barriers here, since it _will_ race with concurrent @@ -381,7 +389,7 @@ static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages) { } -static inline int bpf_obj_get_user(const char __user *pathname) +static inline int bpf_obj_get_user(const char __user *pathname, int flags) { return -EOPNOTSUPP; }
diff --git a/include/linux/cfi.h b/include/linux/cfi.h new file mode 100644 index 0000000..e27033d --- /dev/null +++ b/include/linux/cfi.h
@@ -0,0 +1,38 @@ +#ifndef _LINUX_CFI_H +#define _LINUX_CFI_H + +#include <linux/stringify.h> + +#ifdef CONFIG_CFI_CLANG +#ifdef CONFIG_MODULES + +typedef void (*cfi_check_fn)(uint64_t, void *, void *); + +/* Compiler-generated function in each module, and the kernel */ +#define CFI_CHECK_FN __cfi_check +#define CFI_CHECK_FN_NAME __stringify(CFI_CHECK_FN) + +extern void CFI_CHECK_FN(uint64_t, void *, void *); + +#ifdef CONFIG_CFI_CLANG_SHADOW +extern void cfi_module_add(struct module *mod, unsigned long min_addr, + unsigned long max_addr); + +extern void cfi_module_remove(struct module *mod, unsigned long min_addr, + unsigned long max_addr); +#else +static inline void cfi_module_add(struct module *mod, unsigned long min_addr, + unsigned long max_addr) +{ +} + +static inline void cfi_module_remove(struct module *mod, unsigned long min_addr, + unsigned long max_addr) +{ +} +#endif /* CONFIG_CFI_CLANG_SHADOW */ + +#endif /* CONFIG_MODULES */ +#endif /* CONFIG_CFI_CLANG */ + +#endif /* _LINUX_CFI_H */
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index acb77dcf..8996c09 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h
@@ -21,6 +21,10 @@ SUBSYS(cpu) SUBSYS(cpuacct) #endif +#if IS_ENABLED(CONFIG_SCHED_TUNE) +SUBSYS(schedtune) +#endif + #if IS_ENABLED(CONFIG_BLK_CGROUP) SUBSYS(io) #endif
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 673fa522..6c5b4d7 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h
@@ -25,6 +25,15 @@ #undef __noretpoline #endif +#ifdef CONFIG_LTO_CLANG +#ifdef CONFIG_FTRACE_MCOUNT_RECORD +#define __norecordmcount \ + __attribute__((__section__(".text..ftrace"))) +#endif + +#define __nocfi __attribute__((no_sanitize("cfi"))) +#endif + /* * Not all versions of clang implement the the type-generic versions * of the builtin overflow checkers. Fortunately, clang implements
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 4be464a..e9ce906 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h
@@ -265,6 +265,14 @@ struct ftrace_likely_data { # define __nostackprotector #endif +#ifndef __norecordmcount +#define __norecordmcount +#endif + +#ifndef __nocfi +#define __nocfi +#endif + /* * Assume alignment of return value. */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index cbf85c4..938b2a5 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h
@@ -920,6 +920,11 @@ static inline bool policy_has_boost_freq(struct cpufreq_policy *policy) extern void arch_freq_prepare_all(void); extern unsigned int arch_freq_get_on_cpu(int cpu); +extern void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq, + unsigned long max_freq); +extern void arch_set_max_freq_scale(struct cpumask *cpus, + unsigned long policy_max_freq); + /* the following are really really optional */ extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs; extern struct freq_attr cpufreq_freq_attr_scaling_boost_freqs;
diff --git a/include/linux/cpufreq_times.h b/include/linux/cpufreq_times.h new file mode 100644 index 0000000..757bf0c --- /dev/null +++ b/include/linux/cpufreq_times.h
@@ -0,0 +1,45 @@ +/* drivers/cpufreq/cpufreq_times.c + * + * Copyright (C) 2018 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _LINUX_CPUFREQ_TIMES_H +#define _LINUX_CPUFREQ_TIMES_H + +#include <linux/cpufreq.h> +#include <linux/pid.h> + +#ifdef CONFIG_CPU_FREQ_TIMES +void cpufreq_task_times_init(struct task_struct *p); +void cpufreq_task_times_alloc(struct task_struct *p); +void cpufreq_task_times_exit(struct task_struct *p); +int proc_time_in_state_show(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *p); +void cpufreq_acct_update_power(struct task_struct *p, u64 cputime); +void cpufreq_times_create_policy(struct cpufreq_policy *policy); +void cpufreq_times_record_transition(struct cpufreq_freqs *freq); +void cpufreq_task_times_remove_uids(uid_t uid_start, uid_t uid_end); +int single_uid_time_in_state_open(struct inode *inode, struct file *file); +#else +static inline void cpufreq_task_times_init(struct task_struct *p) {} +static inline void cpufreq_task_times_alloc(struct task_struct *p) {} +static inline void cpufreq_task_times_exit(struct task_struct *p) {} +static inline void cpufreq_acct_update_power(struct task_struct *p, + u64 cputime) {} +static inline void cpufreq_times_create_policy(struct cpufreq_policy *policy) {} +static inline void cpufreq_times_record_transition( + struct cpufreq_freqs *freq) {} +static inline void cpufreq_task_times_remove_uids(uid_t uid_start, + uid_t uid_end) {} +#endif /* CONFIG_CPU_FREQ_TIMES */ +#endif /* _LINUX_CPUFREQ_TIMES_H */
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index a6989e0..113a148 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h
@@ -131,7 +131,8 @@ extern bool cpuidle_not_available(struct cpuidle_driver *drv, struct cpuidle_device *dev); extern int cpuidle_select(struct cpuidle_driver *drv, - struct cpuidle_device *dev); + struct cpuidle_device *dev, + bool *stop_tick); extern int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index); extern void cpuidle_reflect(struct cpuidle_device *dev, int index); @@ -163,7 +164,7 @@ static inline bool cpuidle_not_available(struct cpuidle_driver *drv, struct cpuidle_device *dev) {return true; } static inline int cpuidle_select(struct cpuidle_driver *drv, - struct cpuidle_device *dev) + struct cpuidle_device *dev, bool *stop_tick) {return -ENODEV; } static inline int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) @@ -214,7 +215,7 @@ static inline void cpuidle_use_deepest_state(bool enable) #endif /* kernel/sched/idle.c */ -extern void sched_idle_set_state(struct cpuidle_state *idle_state); +extern void sched_idle_set_state(struct cpuidle_state *idle_state, int index); extern void default_idle_call(void); #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED @@ -246,7 +247,8 @@ struct cpuidle_governor { struct cpuidle_device *dev); int (*select) (struct cpuidle_driver *drv, - struct cpuidle_device *dev); + struct cpuidle_device *dev, + bool *stop_tick); void (*reflect) (struct cpuidle_device *dev, int index); };
diff --git a/include/linux/crypto.h b/include/linux/crypto.h index de96913..5e14fac 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h
@@ -24,6 +24,7 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/uaccess.h> +#include <linux/completion.h> /* * Autoloaded crypto modules should only use a prefixed name to avoid allowing @@ -481,6 +482,45 @@ struct crypto_alg { } CRYPTO_MINALIGN_ATTR; /* + * A helper struct for waiting for completion of async crypto ops + */ +struct crypto_wait { + struct completion completion; + int err; +}; + +/* + * Macro for declaring a crypto op async wait object on stack + */ +#define DECLARE_CRYPTO_WAIT(_wait) \ + struct crypto_wait _wait = { \ + COMPLETION_INITIALIZER_ONSTACK((_wait).completion), 0 } + +/* + * Async ops completion helper functioons + */ +void crypto_req_done(struct crypto_async_request *req, int err); + +static inline int crypto_wait_req(int err, struct crypto_wait *wait) +{ + switch (err) { + case -EINPROGRESS: + case -EBUSY: + wait_for_completion(&wait->completion); + reinit_completion(&wait->completion); + err = wait->err; + break; + }; + + return err; +} + +static inline void crypto_init_wait(struct crypto_wait *wait) +{ + init_completion(&wait->completion); +} + +/* * Algorithm registration interface. */ int crypto_register_alg(struct crypto_alg *alg);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 006f4cc..e79b7ba 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h
@@ -149,6 +149,7 @@ struct dentry_operations { int (*d_manage)(const struct path *, bool); struct dentry *(*d_real)(struct dentry *, const struct inode *, unsigned int, unsigned int); + void (*d_canonical_path)(const struct path *, struct path *); } ____cacheline_aligned; /*
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index a553843..a79c930 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h
@@ -431,6 +431,12 @@ void dm_set_mdptr(struct mapped_device *md, void *ptr); void *dm_get_mdptr(struct mapped_device *md); /* + * Export the device via the ioctl interface (uses mdptr). + */ +int dm_ioctl_export(struct mapped_device *md, const char *name, + const char *uuid); + +/* * A device can still be used while suspended, but I/O is deferred. */ int dm_suspend(struct mapped_device *md, unsigned suspend_flags); @@ -459,6 +465,13 @@ union map_info *dm_get_rq_mapinfo(struct request *rq); struct queue_limits *dm_get_queue_limits(struct mapped_device *md); +void dm_lock_md_type(struct mapped_device *md); +void dm_unlock_md_type(struct mapped_device *md); +void dm_set_md_type(struct mapped_device *md, unsigned type); +unsigned dm_get_md_type(struct mapped_device *md); +int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t); +unsigned dm_table_get_type(struct dm_table *t); + /* * Geometry functions. */
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 1718950..6385ecd 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h
@@ -111,6 +111,7 @@ struct dma_fence_cb { * @get_driver_name: returns the driver name. * @get_timeline_name: return the name of the context this fence belongs to. * @enable_signaling: enable software signaling of fence. + * @disable_signaling: disable software signaling of fence (optional). * @signaled: [optional] peek whether the fence is signaled, can be null. * @wait: custom wait implementation, or dma_fence_default_wait. * @release: [optional] called on destruction of fence, can be null @@ -170,6 +171,7 @@ struct dma_fence_ops { const char * (*get_driver_name)(struct dma_fence *fence); const char * (*get_timeline_name)(struct dma_fence *fence); bool (*enable_signaling)(struct dma_fence *fence); + void (*disable_signaling)(struct dma_fence *fence); bool (*signaled)(struct dma_fence *fence); signed long (*wait)(struct dma_fence *fence, bool intr, signed long timeout);
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 2a0c453..d771104 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h
@@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /** * include/linux/f2fs_fs.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _LINUX_F2FS_FS_H #define _LINUX_F2FS_FS_H @@ -21,6 +18,7 @@ #define F2FS_BLKSIZE 4096 /* support only 4KB block */ #define F2FS_BLKSIZE_BITS 12 /* bits for F2FS_BLKSIZE */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ +#define F2FS_EXTENSION_LEN 8 /* max size of extension */ #define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) >> F2FS_BLKSIZE_BITS) #define NULL_ADDR ((block_t)0) /* used as block_t addresses */ @@ -36,15 +34,16 @@ #define F2FS_NODE_INO(sbi) ((sbi)->node_ino_num) #define F2FS_META_INO(sbi) ((sbi)->meta_ino_num) -#define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */ -#define F2FS_IO_SIZE_KB(sbi) (1 << ((sbi)->write_io_size_bits + 2)) /* KB */ -#define F2FS_IO_SIZE_BYTES(sbi) (1 << ((sbi)->write_io_size_bits + 12)) /* B */ -#define F2FS_IO_SIZE_BITS(sbi) ((sbi)->write_io_size_bits) /* power of 2 */ +#define F2FS_MAX_QUOTAS 3 + +#define F2FS_IO_SIZE(sbi) (1 << F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */ +#define F2FS_IO_SIZE_KB(sbi) (1 << (F2FS_OPTION(sbi).write_io_size_bits + 2)) /* KB */ +#define F2FS_IO_SIZE_BYTES(sbi) (1 << (F2FS_OPTION(sbi).write_io_size_bits + 12)) /* B */ +#define F2FS_IO_SIZE_BITS(sbi) (F2FS_OPTION(sbi).write_io_size_bits) /* power of 2 */ #define F2FS_IO_SIZE_MASK(sbi) (F2FS_IO_SIZE(sbi) - 1) /* This flag is used by node and meta inodes, and by recovery */ #define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) -#define GFP_F2FS_HIGH_ZERO (GFP_NOFS | __GFP_ZERO | __GFP_HIGHMEM) /* * For further optimization on multi-head logs, on-disk layout supports maximum @@ -100,7 +99,7 @@ struct f2fs_super_block { __u8 uuid[16]; /* 128-bit uuid for volume */ __le16 volume_name[MAX_VOLUME_NAME]; /* volume name */ __le32 extension_count; /* # of extensions below */ - __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ + __u8 extension_list[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN];/* extension array */ __le32 cp_payload; __u8 version[VERSION_LEN]; /* the kernel version */ __u8 init_version[VERSION_LEN]; /* the initial kernel version */ @@ -108,12 +107,19 @@ struct f2fs_super_block { __u8 encryption_level; /* versioning level for encryption */ __u8 encrypt_pw_salt[16]; /* Salt used for string2key algorithm */ struct f2fs_device devs[MAX_DEVICES]; /* device list */ - __u8 reserved[327]; /* valid reserved region */ + __le32 qf_ino[F2FS_MAX_QUOTAS]; /* quota inode numbers */ + __u8 hot_ext_count; /* # of hot file extension */ + __u8 reserved[310]; /* valid reserved region */ + __le32 crc; /* checksum of superblock */ } __packed; /* * For checkpoint */ +#define CP_DISABLED_FLAG 0x00001000 +#define CP_QUOTA_NEED_FSCK_FLAG 0x00000800 +#define CP_LARGE_NAT_BITMAP_FLAG 0x00000400 +#define CP_NOCRC_RECOVERY_FLAG 0x00000200 #define CP_TRIMMED_FLAG 0x00000100 #define CP_NAT_BITS_FLAG 0x00000080 #define CP_CRC_RECOVERY_FLAG 0x00000040 @@ -184,7 +190,8 @@ struct f2fs_extent { } __packed; #define F2FS_NAME_LEN 255 -#define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */ +/* 200 bytes for inline xattrs by default */ +#define DEFAULT_INLINE_XATTR_ADDRS 50 #define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ #define CUR_ADDRS_PER_INODE(inode) (DEF_ADDRS_PER_INODE - \ get_extra_isize(inode)) @@ -208,6 +215,7 @@ struct f2fs_extent { #define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */ #define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries */ #define F2FS_EXTRA_ATTR 0x20 /* file having extra attribute */ +#define F2FS_PIN_FILE 0x40 /* file should not be gced */ struct f2fs_inode { __le16 i_mode; /* file mode */ @@ -225,7 +233,13 @@ struct f2fs_inode { __le32 i_ctime_nsec; /* change time in nano scale */ __le32 i_mtime_nsec; /* modification time in nano scale */ __le32 i_generation; /* file version (for NFS) */ - __le32 i_current_depth; /* only for directory depth */ + union { + __le32 i_current_depth; /* only for directory depth */ + __le16 i_gc_failures; /* + * # of gc failures on pinned file. + * only for regular files. + */ + }; __le32 i_xattr_nid; /* nid to save xattr */ __le32 i_flags; /* file attributes */ __le32 i_pino; /* parent inode number */ @@ -238,11 +252,13 @@ struct f2fs_inode { union { struct { __le16 i_extra_isize; /* extra inode attribute size */ - __le16 i_padding; /* padding */ + __le16 i_inline_xattr_size; /* inline xattr size, unit: 4 bytes */ __le32 i_projid; /* project id */ __le32 i_inode_checksum;/* inode meta checksum */ + __le64 i_crtime; /* creation time */ + __le32 i_crtime_nsec; /* creation time in nano scale */ __le32 i_extra_end[0]; /* for attribute size calculation */ - }; + } __packed; __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ }; __le32 i_nid[DEF_NIDS_PER_INODE]; /* direct(2), indirect(2), @@ -288,7 +304,6 @@ struct f2fs_node { * For NAT entries */ #define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry)) -#define NAT_ENTRY_BITMAP_SIZE ((NAT_ENTRY_PER_BLOCK + 7) / 8) struct f2fs_nat_entry { __u8 version; /* latest version of cached nat entry */
diff --git a/include/linux/fs.h b/include/linux/fs.h index f6a577e..c62a079 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h
@@ -1599,13 +1599,21 @@ extern bool inode_owner_or_capable(const struct inode *inode); * VFS helper functions.. */ extern int vfs_create(struct inode *, struct dentry *, umode_t, bool); +extern int vfs_create2(struct vfsmount *, struct inode *, struct dentry *, umode_t, bool); extern int vfs_mkdir(struct inode *, struct dentry *, umode_t); +extern int vfs_mkdir2(struct vfsmount *, struct inode *, struct dentry *, umode_t); extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); +extern int vfs_mknod2(struct vfsmount *, struct inode *, struct dentry *, umode_t, dev_t); extern int vfs_symlink(struct inode *, struct dentry *, const char *); +extern int vfs_symlink2(struct vfsmount *, struct inode *, struct dentry *, const char *); extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **); +extern int vfs_link2(struct vfsmount *, struct dentry *, struct inode *, struct dentry *, struct inode **); extern int vfs_rmdir(struct inode *, struct dentry *); +extern int vfs_rmdir2(struct vfsmount *, struct inode *, struct dentry *); extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); +extern int vfs_unlink2(struct vfsmount *, struct inode *, struct dentry *, struct inode **); extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); +extern int vfs_rename2(struct vfsmount *, struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); extern int vfs_whiteout(struct inode *, struct dentry *); extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, @@ -1736,6 +1744,7 @@ struct inode_operations { struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *); int (*permission) (struct inode *, int); + int (*permission2) (struct vfsmount *, struct inode *, int); struct posix_acl * (*get_acl)(struct inode *, int); int (*readlink) (struct dentry *, char __user *,int); @@ -1750,7 +1759,8 @@ struct inode_operations { int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*setattr) (struct dentry *, struct iattr *); - int (*getattr) (const struct path *, struct kstat *, u32, unsigned int); + int (*setattr2) (struct vfsmount *, struct dentry *, struct iattr *); + int (*getattr) (const struct path *, struct kstat *, u32, unsigned int); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); @@ -1820,9 +1830,13 @@ struct super_operations { int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); + int (*remount_fs2) (struct vfsmount *, struct super_block *, int *, char *); + void *(*clone_mnt_data) (void *); + void (*copy_mnt_data) (void *, void *); void (*umount_begin) (struct super_block *); int (*show_options)(struct seq_file *, struct dentry *); + int (*show_options2)(struct vfsmount *,struct seq_file *, struct dentry *); int (*show_devname)(struct seq_file *, struct dentry *); int (*show_path)(struct seq_file *, struct dentry *); int (*show_stats)(struct seq_file *, struct dentry *); @@ -1859,6 +1873,7 @@ struct super_operations { #else #define S_DAX 0 /* Make all the DAX code disappear */ #endif +#define S_ENCRYPTED 16384 /* Encrypted file (using fs/crypto/) */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -1898,6 +1913,7 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) #define IS_DAX(inode) ((inode)->i_flags & S_DAX) +#define IS_ENCRYPTED(inode) ((inode)->i_flags & S_ENCRYPTED) #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ (inode)->i_rdev == WHITEOUT_DEV) @@ -2080,6 +2096,9 @@ struct file_system_type { #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); + struct dentry *(*mount2) (struct vfsmount *, struct file_system_type *, int, + const char *, void *); + void *(*alloc_mnt_data) (void); void (*kill_sb) (struct super_block *); struct module *owner; struct file_system_type * next; @@ -2381,6 +2400,8 @@ struct filename { extern long vfs_truncate(const struct path *, loff_t); extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); +extern int do_truncate2(struct vfsmount *, struct dentry *, loff_t start, + unsigned int time_attrs, struct file *filp); extern int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len); extern long do_sys_open(int dfd, const char __user *filename, int flags, @@ -2685,8 +2706,11 @@ extern void emergency_remount(void); extern sector_t bmap(struct inode *, sector_t); #endif extern int notify_change(struct dentry *, struct iattr *, struct inode **); +extern int notify_change2(struct vfsmount *, struct dentry *, struct iattr *, struct inode **); extern int inode_permission(struct inode *, int); +extern int inode_permission2(struct vfsmount *, struct inode *, int); extern int __inode_permission(struct inode *, int); +extern int __inode_permission2(struct vfsmount *, struct inode *, int); extern int generic_permission(struct inode *, int); extern int __check_sticky(struct inode *dir, struct inode *inode);
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h new file mode 100644 index 0000000..952ab97 --- /dev/null +++ b/include/linux/fscrypt.h
@@ -0,0 +1,254 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fscrypt.h: declarations for per-file encryption + * + * Filesystems that implement per-file encryption include this header + * file with the __FS_HAS_ENCRYPTION set according to whether that filesystem + * is being built with encryption support or not. + * + * Copyright (C) 2015, Google, Inc. + * + * Written by Michael Halcrow, 2015. + * Modified by Jaegeuk Kim, 2015. + */ +#ifndef _LINUX_FSCRYPT_H +#define _LINUX_FSCRYPT_H + +#include <linux/fs.h> + +#define FS_CRYPTO_BLOCK_SIZE 16 + +struct fscrypt_ctx; +struct fscrypt_info; + +struct fscrypt_str { + unsigned char *name; + u32 len; +}; + +struct fscrypt_name { + const struct qstr *usr_fname; + struct fscrypt_str disk_name; + u32 hash; + u32 minor_hash; + struct fscrypt_str crypto_buf; +}; + +#define FSTR_INIT(n, l) { .name = n, .len = l } +#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len) +#define fname_name(p) ((p)->disk_name.name) +#define fname_len(p) ((p)->disk_name.len) + +/* Maximum value for the third parameter of fscrypt_operations.set_context(). */ +#define FSCRYPT_SET_CONTEXT_MAX_SIZE 28 + +#if __FS_HAS_ENCRYPTION +#include <linux/fscrypt_supp.h> +#else +#include <linux/fscrypt_notsupp.h> +#endif + +/** + * fscrypt_require_key - require an inode's encryption key + * @inode: the inode we need the key for + * + * If the inode is encrypted, set up its encryption key if not already done. + * Then require that the key be present and return -ENOKEY otherwise. + * + * No locks are needed, and the key will live as long as the struct inode --- so + * it won't go away from under you. + * + * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code + * if a problem occurred while setting up the encryption key. + */ +static inline int fscrypt_require_key(struct inode *inode) +{ + if (IS_ENCRYPTED(inode)) { + int err = fscrypt_get_encryption_info(inode); + + if (err) + return err; + if (!fscrypt_has_encryption_key(inode)) + return -ENOKEY; + } + return 0; +} + +/** + * fscrypt_prepare_link - prepare to link an inode into a possibly-encrypted directory + * @old_dentry: an existing dentry for the inode being linked + * @dir: the target directory + * @dentry: negative dentry for the target filename + * + * A new link can only be added to an encrypted directory if the directory's + * encryption key is available --- since otherwise we'd have no way to encrypt + * the filename. Therefore, we first set up the directory's encryption key (if + * not already done) and return an error if it's unavailable. + * + * We also verify that the link will not violate the constraint that all files + * in an encrypted directory tree use the same encryption policy. + * + * Return: 0 on success, -ENOKEY if the directory's encryption key is missing, + * -EPERM if the link would result in an inconsistent encryption policy, or + * another -errno code. + */ +static inline int fscrypt_prepare_link(struct dentry *old_dentry, + struct inode *dir, + struct dentry *dentry) +{ + if (IS_ENCRYPTED(dir)) + return __fscrypt_prepare_link(d_inode(old_dentry), dir); + return 0; +} + +/** + * fscrypt_prepare_rename - prepare for a rename between possibly-encrypted directories + * @old_dir: source directory + * @old_dentry: dentry for source file + * @new_dir: target directory + * @new_dentry: dentry for target location (may be negative unless exchanging) + * @flags: rename flags (we care at least about %RENAME_EXCHANGE) + * + * Prepare for ->rename() where the source and/or target directories may be + * encrypted. A new link can only be added to an encrypted directory if the + * directory's encryption key is available --- since otherwise we'd have no way + * to encrypt the filename. A rename to an existing name, on the other hand, + * *is* cryptographically possible without the key. However, we take the more + * conservative approach and just forbid all no-key renames. + * + * We also verify that the rename will not violate the constraint that all files + * in an encrypted directory tree use the same encryption policy. + * + * Return: 0 on success, -ENOKEY if an encryption key is missing, -EPERM if the + * rename would cause inconsistent encryption policies, or another -errno code. + */ +static inline int fscrypt_prepare_rename(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry, + unsigned int flags) +{ + if (IS_ENCRYPTED(old_dir) || IS_ENCRYPTED(new_dir)) + return __fscrypt_prepare_rename(old_dir, old_dentry, + new_dir, new_dentry, flags); + return 0; +} + +/** + * fscrypt_prepare_lookup - prepare to lookup a name in a possibly-encrypted directory + * @dir: directory being searched + * @dentry: filename being looked up + * @flags: lookup flags + * + * Prepare for ->lookup() in a directory which may be encrypted. Lookups can be + * done with or without the directory's encryption key; without the key, + * filenames are presented in encrypted form. Therefore, we'll try to set up + * the directory's encryption key, but even without it the lookup can continue. + * + * To allow invalidating stale dentries if the directory's encryption key is + * added later, we also install a custom ->d_revalidate() method and use the + * DCACHE_ENCRYPTED_WITH_KEY flag to indicate whether a given dentry is a + * plaintext name (flag set) or a ciphertext name (flag cleared). + * + * Return: 0 on success, -errno if a problem occurred while setting up the + * encryption key + */ +static inline int fscrypt_prepare_lookup(struct inode *dir, + struct dentry *dentry, + unsigned int flags) +{ + if (IS_ENCRYPTED(dir)) + return __fscrypt_prepare_lookup(dir, dentry); + return 0; +} + +/** + * fscrypt_prepare_setattr - prepare to change a possibly-encrypted inode's attributes + * @dentry: dentry through which the inode is being changed + * @attr: attributes to change + * + * Prepare for ->setattr() on a possibly-encrypted inode. On an encrypted file, + * most attribute changes are allowed even without the encryption key. However, + * without the encryption key we do have to forbid truncates. This is needed + * because the size being truncated to may not be a multiple of the filesystem + * block size, and in that case we'd have to decrypt the final block, zero the + * portion past i_size, and re-encrypt it. (We *could* allow truncating to a + * filesystem block boundary, but it's simpler to just forbid all truncates --- + * and we already forbid all other contents modifications without the key.) + * + * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code + * if a problem occurred while setting up the encryption key. + */ +static inline int fscrypt_prepare_setattr(struct dentry *dentry, + struct iattr *attr) +{ + if (attr->ia_valid & ATTR_SIZE) + return fscrypt_require_key(d_inode(dentry)); + return 0; +} + +/** + * fscrypt_prepare_symlink - prepare to create a possibly-encrypted symlink + * @dir: directory in which the symlink is being created + * @target: plaintext symlink target + * @len: length of @target excluding null terminator + * @max_len: space the filesystem has available to store the symlink target + * @disk_link: (out) the on-disk symlink target being prepared + * + * This function computes the size the symlink target will require on-disk, + * stores it in @disk_link->len, and validates it against @max_len. An + * encrypted symlink may be longer than the original. + * + * Additionally, @disk_link->name is set to @target if the symlink will be + * unencrypted, but left NULL if the symlink will be encrypted. For encrypted + * symlinks, the filesystem must call fscrypt_encrypt_symlink() to create the + * on-disk target later. (The reason for the two-step process is that some + * filesystems need to know the size of the symlink target before creating the + * inode, e.g. to determine whether it will be a "fast" or "slow" symlink.) + * + * Return: 0 on success, -ENAMETOOLONG if the symlink target is too long, + * -ENOKEY if the encryption key is missing, or another -errno code if a problem + * occurred while setting up the encryption key. + */ +static inline int fscrypt_prepare_symlink(struct inode *dir, + const char *target, + unsigned int len, + unsigned int max_len, + struct fscrypt_str *disk_link) +{ + if (IS_ENCRYPTED(dir) || fscrypt_dummy_context_enabled(dir)) + return __fscrypt_prepare_symlink(dir, len, max_len, disk_link); + + disk_link->name = (unsigned char *)target; + disk_link->len = len + 1; + if (disk_link->len > max_len) + return -ENAMETOOLONG; + return 0; +} + +/** + * fscrypt_encrypt_symlink - encrypt the symlink target if needed + * @inode: symlink inode + * @target: plaintext symlink target + * @len: length of @target excluding null terminator + * @disk_link: (in/out) the on-disk symlink target being prepared + * + * If the symlink target needs to be encrypted, then this function encrypts it + * into @disk_link->name. fscrypt_prepare_symlink() must have been called + * previously to compute @disk_link->len. If the filesystem did not allocate a + * buffer for @disk_link->name after calling fscrypt_prepare_link(), then one + * will be kmalloc()'ed and the filesystem will be responsible for freeing it. + * + * Return: 0 on success, -errno on failure + */ +static inline int fscrypt_encrypt_symlink(struct inode *inode, + const char *target, + unsigned int len, + struct fscrypt_str *disk_link) +{ + if (IS_ENCRYPTED(inode)) + return __fscrypt_encrypt_symlink(inode, target, len, disk_link); + return 0; +} + +#endif /* _LINUX_FSCRYPT_H */
diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h deleted file mode 100644 index 854d724..0000000 --- a/include/linux/fscrypt_common.h +++ /dev/null
@@ -1,142 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * fscrypt_common.h: common declarations for per-file encryption - * - * Copyright (C) 2015, Google, Inc. - * - * Written by Michael Halcrow, 2015. - * Modified by Jaegeuk Kim, 2015. - */ - -#ifndef _LINUX_FSCRYPT_COMMON_H -#define _LINUX_FSCRYPT_COMMON_H - -#include <linux/key.h> -#include <linux/fs.h> -#include <linux/mm.h> -#include <linux/bio.h> -#include <linux/dcache.h> -#include <crypto/skcipher.h> -#include <uapi/linux/fs.h> - -#define FS_CRYPTO_BLOCK_SIZE 16 - -struct fscrypt_info; - -struct fscrypt_ctx { - union { - struct { - struct page *bounce_page; /* Ciphertext page */ - struct page *control_page; /* Original page */ - } w; - struct { - struct bio *bio; - struct work_struct work; - } r; - struct list_head free_list; /* Free list */ - }; - u8 flags; /* Flags */ -}; - -/** - * For encrypted symlinks, the ciphertext length is stored at the beginning - * of the string in little-endian format. - */ -struct fscrypt_symlink_data { - __le16 len; - char encrypted_path[1]; -} __packed; - -struct fscrypt_str { - unsigned char *name; - u32 len; -}; - -struct fscrypt_name { - const struct qstr *usr_fname; - struct fscrypt_str disk_name; - u32 hash; - u32 minor_hash; - struct fscrypt_str crypto_buf; -}; - -#define FSTR_INIT(n, l) { .name = n, .len = l } -#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len) -#define fname_name(p) ((p)->disk_name.name) -#define fname_len(p) ((p)->disk_name.len) - -/* - * fscrypt superblock flags - */ -#define FS_CFLG_OWN_PAGES (1U << 1) - -/* - * crypto opertions for filesystems - */ -struct fscrypt_operations { - unsigned int flags; - const char *key_prefix; - int (*get_context)(struct inode *, void *, size_t); - int (*set_context)(struct inode *, const void *, size_t, void *); - bool (*dummy_context)(struct inode *); - bool (*is_encrypted)(struct inode *); - bool (*empty_dir)(struct inode *); - unsigned (*max_namelen)(struct inode *); -}; - -/* Maximum value for the third parameter of fscrypt_operations.set_context(). */ -#define FSCRYPT_SET_CONTEXT_MAX_SIZE 28 - -static inline bool fscrypt_dummy_context_enabled(struct inode *inode) -{ - if (inode->i_sb->s_cop->dummy_context && - inode->i_sb->s_cop->dummy_context(inode)) - return true; - return false; -} - -static inline bool fscrypt_valid_enc_modes(u32 contents_mode, - u32 filenames_mode) -{ - if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC && - filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS) - return true; - - if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS && - filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS) - return true; - - return false; -} - -static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) -{ - if (str->len == 1 && str->name[0] == '.') - return true; - - if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') - return true; - - return false; -} - -static inline struct page *fscrypt_control_page(struct page *page) -{ -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) - return ((struct fscrypt_ctx *)page_private(page))->w.control_page; -#else - WARN_ON_ONCE(1); - return ERR_PTR(-EINVAL); -#endif -} - -static inline int fscrypt_has_encryption_key(const struct inode *inode) -{ -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) - return (inode->i_crypt_info != NULL); -#else - return 0; -#endif -} - -#endif /* _LINUX_FSCRYPT_COMMON_H */
diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index 19609ce..ee8b43e 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h
@@ -4,14 +4,31 @@ * * This stubs out the fscrypt functions for filesystems configured without * encryption support. + * + * Do not include this file directly. Use fscrypt.h instead! */ +#ifndef _LINUX_FSCRYPT_H +#error "Incorrect include of linux/fscrypt_notsupp.h!" +#endif #ifndef _LINUX_FSCRYPT_NOTSUPP_H #define _LINUX_FSCRYPT_NOTSUPP_H -#include <linux/fscrypt_common.h> +static inline bool fscrypt_has_encryption_key(const struct inode *inode) +{ + return false; +} + +static inline bool fscrypt_dummy_context_enabled(struct inode *inode) +{ + return false; +} /* crypto.c */ +static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) +{ +} + static inline struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, gfp_t gfp_flags) { @@ -40,22 +57,17 @@ static inline int fscrypt_decrypt_page(const struct inode *inode, return -EOPNOTSUPP; } +static inline struct page *fscrypt_control_page(struct page *page) +{ + WARN_ON_ONCE(1); + return ERR_PTR(-EINVAL); +} static inline void fscrypt_restore_control_page(struct page *page) { return; } -static inline void fscrypt_set_d_op(struct dentry *dentry) -{ - return; -} - -static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry) -{ - return; -} - /* policy.c */ static inline int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg) @@ -87,8 +99,7 @@ static inline int fscrypt_get_encryption_info(struct inode *inode) return -EOPNOTSUPP; } -static inline void fscrypt_put_encryption_info(struct inode *inode, - struct fscrypt_info *ci) +static inline void fscrypt_put_encryption_info(struct inode *inode) { return; } @@ -98,7 +109,7 @@ static inline int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, int lookup, struct fscrypt_name *fname) { - if (dir->i_sb->s_cop->is_encrypted(dir)) + if (IS_ENCRYPTED(dir)) return -EOPNOTSUPP; memset(fname, 0, sizeof(struct fscrypt_name)); @@ -113,16 +124,8 @@ static inline void fscrypt_free_filename(struct fscrypt_name *fname) return; } -static inline u32 fscrypt_fname_encrypted_size(const struct inode *inode, - u32 ilen) -{ - /* never happens */ - WARN_ON(1); - return 0; -} - static inline int fscrypt_fname_alloc_buffer(const struct inode *inode, - u32 ilen, + u32 max_encrypted_len, struct fscrypt_str *crypto_str) { return -EOPNOTSUPP; @@ -141,13 +144,6 @@ static inline int fscrypt_fname_disk_to_usr(struct inode *inode, return -EOPNOTSUPP; } -static inline int fscrypt_fname_usr_to_disk(struct inode *inode, - const struct qstr *iname, - struct fscrypt_str *oname) -{ - return -EOPNOTSUPP; -} - static inline bool fscrypt_match_name(const struct fscrypt_name *fname, const u8 *de_name, u32 de_name_len) { @@ -158,10 +154,13 @@ static inline bool fscrypt_match_name(const struct fscrypt_name *fname, } /* bio.c */ -static inline void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, - struct bio *bio) +static inline void fscrypt_decrypt_bio(struct bio *bio) { - return; +} + +static inline void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, + struct bio *bio) +{ } static inline void fscrypt_pullback_bio_page(struct page **page, bool restore) @@ -175,4 +174,58 @@ static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, return -EOPNOTSUPP; } +/* hooks.c */ + +static inline int fscrypt_file_open(struct inode *inode, struct file *filp) +{ + if (IS_ENCRYPTED(inode)) + return -EOPNOTSUPP; + return 0; +} + +static inline int __fscrypt_prepare_link(struct inode *inode, + struct inode *dir) +{ + return -EOPNOTSUPP; +} + +static inline int __fscrypt_prepare_rename(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry, + unsigned int flags) +{ + return -EOPNOTSUPP; +} + +static inline int __fscrypt_prepare_lookup(struct inode *dir, + struct dentry *dentry) +{ + return -EOPNOTSUPP; +} + +static inline int __fscrypt_prepare_symlink(struct inode *dir, + unsigned int len, + unsigned int max_len, + struct fscrypt_str *disk_link) +{ + return -EOPNOTSUPP; +} + +static inline int __fscrypt_encrypt_symlink(struct inode *inode, + const char *target, + unsigned int len, + struct fscrypt_str *disk_link) +{ + return -EOPNOTSUPP; +} + +static inline const char *fscrypt_get_symlink(struct inode *inode, + const void *caddr, + unsigned int max_size, + struct delayed_call *done) +{ + return ERR_PTR(-EOPNOTSUPP); +} + #endif /* _LINUX_FSCRYPT_NOTSUPP_H */
diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index 5153dce..6456c6b 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h
@@ -2,16 +2,64 @@ /* * fscrypt_supp.h * - * This is included by filesystems configured with encryption support. + * Do not include this file directly. Use fscrypt.h instead! */ +#ifndef _LINUX_FSCRYPT_H +#error "Incorrect include of linux/fscrypt_supp.h!" +#endif #ifndef _LINUX_FSCRYPT_SUPP_H #define _LINUX_FSCRYPT_SUPP_H -#include <linux/fscrypt_common.h> +#include <linux/mm.h> +#include <linux/slab.h> + +/* + * fscrypt superblock flags + */ +#define FS_CFLG_OWN_PAGES (1U << 1) + +/* + * crypto operations for filesystems + */ +struct fscrypt_operations { + unsigned int flags; + const char *key_prefix; + int (*get_context)(struct inode *, void *, size_t); + int (*set_context)(struct inode *, const void *, size_t, void *); + bool (*dummy_context)(struct inode *); + bool (*empty_dir)(struct inode *); + unsigned int max_namelen; +}; + +struct fscrypt_ctx { + union { + struct { + struct page *bounce_page; /* Ciphertext page */ + struct page *control_page; /* Original page */ + } w; + struct { + struct bio *bio; + struct work_struct work; + } r; + struct list_head free_list; /* Free list */ + }; + u8 flags; /* Flags */ +}; + +static inline bool fscrypt_has_encryption_key(const struct inode *inode) +{ + return (inode->i_crypt_info != NULL); +} + +static inline bool fscrypt_dummy_context_enabled(struct inode *inode) +{ + return inode->i_sb->s_cop->dummy_context && + inode->i_sb->s_cop->dummy_context(inode); +} /* crypto.c */ -extern struct kmem_cache *fscrypt_info_cachep; +extern void fscrypt_enqueue_decrypt_work(struct work_struct *); extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t); extern void fscrypt_release_ctx(struct fscrypt_ctx *); extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *, @@ -19,22 +67,14 @@ extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *, u64, gfp_t); extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int, unsigned int, u64); + +static inline struct page *fscrypt_control_page(struct page *page) +{ + return ((struct fscrypt_ctx *)page_private(page))->w.control_page; +} + extern void fscrypt_restore_control_page(struct page *); -extern const struct dentry_operations fscrypt_d_ops; - -static inline void fscrypt_set_d_op(struct dentry *dentry) -{ - d_set_d_op(dentry, &fscrypt_d_ops); -} - -static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry) -{ - spin_lock(&dentry->d_lock); - dentry->d_flags |= DCACHE_ENCRYPTED_WITH_KEY; - spin_unlock(&dentry->d_lock); -} - /* policy.c */ extern int fscrypt_ioctl_set_policy(struct file *, const void __user *); extern int fscrypt_ioctl_get_policy(struct file *, void __user *); @@ -43,7 +83,7 @@ extern int fscrypt_inherit_context(struct inode *, struct inode *, void *, bool); /* keyinfo.c */ extern int fscrypt_get_encryption_info(struct inode *); -extern void fscrypt_put_encryption_info(struct inode *, struct fscrypt_info *); +extern void fscrypt_put_encryption_info(struct inode *); /* fname.c */ extern int fscrypt_setup_filename(struct inode *, const struct qstr *, @@ -54,14 +94,11 @@ static inline void fscrypt_free_filename(struct fscrypt_name *fname) kfree(fname->crypto_buf.name); } -extern u32 fscrypt_fname_encrypted_size(const struct inode *, u32); extern int fscrypt_fname_alloc_buffer(const struct inode *, u32, struct fscrypt_str *); extern void fscrypt_fname_free_buffer(struct fscrypt_str *); extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32, const struct fscrypt_str *, struct fscrypt_str *); -extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *, - struct fscrypt_str *); #define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE 32 @@ -138,9 +175,30 @@ static inline bool fscrypt_match_name(const struct fscrypt_name *fname, } /* bio.c */ -extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *); +extern void fscrypt_decrypt_bio(struct bio *); +extern void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, + struct bio *bio); extern void fscrypt_pullback_bio_page(struct page **, bool); extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, unsigned int); +/* hooks.c */ +extern int fscrypt_file_open(struct inode *inode, struct file *filp); +extern int __fscrypt_prepare_link(struct inode *inode, struct inode *dir); +extern int __fscrypt_prepare_rename(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry, + unsigned int flags); +extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry); +extern int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, + unsigned int max_len, + struct fscrypt_str *disk_link); +extern int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, + unsigned int len, + struct fscrypt_str *disk_link); +extern const char *fscrypt_get_symlink(struct inode *inode, const void *caddr, + unsigned int max_size, + struct delayed_call *done); + #endif /* _LINUX_FSCRYPT_SUPP_H */
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index bdaf225..4636b8f 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h
@@ -214,12 +214,19 @@ static inline void fsnotify_modify(struct file *file) static inline void fsnotify_open(struct file *file) { const struct path *path = &file->f_path; + struct path lower_path; struct inode *inode = path->dentry->d_inode; __u32 mask = FS_OPEN; if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; + if (path->dentry->d_op && path->dentry->d_op->d_canonical_path) { + path->dentry->d_op->d_canonical_path(path, &lower_path); + fsnotify_parent(&lower_path, NULL, mask); + fsnotify(lower_path.dentry->d_inode, mask, &lower_path, FSNOTIFY_EVENT_PATH, NULL, 0); + path_put(&lower_path); + } fsnotify_parent(path, NULL, mask); fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); }
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e54d2579..ff3e5b1 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h
@@ -254,8 +254,16 @@ static inline int ftrace_function_local_disabled(struct ftrace_ops *ops) return *this_cpu_ptr(ops->disabled); } +#ifdef CONFIG_CFI_CLANG +/* Use a C stub with the correct type for CFI */ +static inline void ftrace_stub(unsigned long a0, unsigned long a1, + struct ftrace_ops *op, struct pt_regs *regs) +{ +} +#else extern void ftrace_stub(unsigned long a0, unsigned long a1, struct ftrace_ops *op, struct pt_regs *regs); +#endif #else /* !CONFIG_FUNCTION_TRACER */ /* @@ -743,7 +751,8 @@ static inline unsigned long get_lock_parent_ip(void) static inline void time_hardirqs_off(unsigned long a0, unsigned long a1) { } #endif -#ifdef CONFIG_PREEMPT_TRACER +#if defined(CONFIG_PREEMPT_TRACER) || \ + (defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_PREEMPTIRQ_EVENTS)) extern void trace_preempt_on(unsigned long a0, unsigned long a1); extern void trace_preempt_off(unsigned long a0, unsigned long a1); #else
diff --git a/include/linux/gpio_event.h b/include/linux/gpio_event.h new file mode 100644 index 0000000..2613fc5 --- /dev/null +++ b/include/linux/gpio_event.h
@@ -0,0 +1,170 @@ +/* include/linux/gpio_event.h + * + * Copyright (C) 2007 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _LINUX_GPIO_EVENT_H +#define _LINUX_GPIO_EVENT_H + +#include <linux/input.h> + +struct gpio_event_input_devs { + int count; + struct input_dev *dev[]; +}; +enum { + GPIO_EVENT_FUNC_UNINIT = 0x0, + GPIO_EVENT_FUNC_INIT = 0x1, + GPIO_EVENT_FUNC_SUSPEND = 0x2, + GPIO_EVENT_FUNC_RESUME = 0x3, +}; +struct gpio_event_info { + int (*func)(struct gpio_event_input_devs *input_devs, + struct gpio_event_info *info, + void **data, int func); + int (*event)(struct gpio_event_input_devs *input_devs, + struct gpio_event_info *info, + void **data, unsigned int dev, unsigned int type, + unsigned int code, int value); /* out events */ + bool no_suspend; +}; + +struct gpio_event_platform_data { + const char *name; + struct gpio_event_info **info; + size_t info_count; + int (*power)(const struct gpio_event_platform_data *pdata, bool on); + const char *names[]; /* If name is NULL, names contain a NULL */ + /* terminated list of input devices to create */ +}; + +#define GPIO_EVENT_DEV_NAME "gpio-event" + +/* Key matrix */ + +enum gpio_event_matrix_flags { + /* unset: drive active output low, set: drive active output high */ + GPIOKPF_ACTIVE_HIGH = 1U << 0, + GPIOKPF_DEBOUNCE = 1U << 1, + GPIOKPF_REMOVE_SOME_PHANTOM_KEYS = 1U << 2, + GPIOKPF_REMOVE_PHANTOM_KEYS = GPIOKPF_REMOVE_SOME_PHANTOM_KEYS | + GPIOKPF_DEBOUNCE, + GPIOKPF_DRIVE_INACTIVE = 1U << 3, + GPIOKPF_LEVEL_TRIGGERED_IRQ = 1U << 4, + GPIOKPF_PRINT_UNMAPPED_KEYS = 1U << 16, + GPIOKPF_PRINT_MAPPED_KEYS = 1U << 17, + GPIOKPF_PRINT_PHANTOM_KEYS = 1U << 18, +}; + +#define MATRIX_CODE_BITS (10) +#define MATRIX_KEY_MASK ((1U << MATRIX_CODE_BITS) - 1) +#define MATRIX_KEY(dev, code) \ + (((dev) << MATRIX_CODE_BITS) | (code & MATRIX_KEY_MASK)) + +extern int gpio_event_matrix_func(struct gpio_event_input_devs *input_devs, + struct gpio_event_info *info, void **data, int func); +struct gpio_event_matrix_info { + /* initialize to gpio_event_matrix_func */ + struct gpio_event_info info; + /* size must be ninputs * noutputs */ + const unsigned short *keymap; + unsigned int *input_gpios; + unsigned int *output_gpios; + unsigned int ninputs; + unsigned int noutputs; + /* time to wait before reading inputs after driving each output */ + ktime_t settle_time; + /* time to wait before scanning the keypad a second time */ + ktime_t debounce_delay; + ktime_t poll_time; + unsigned flags; +}; + +/* Directly connected inputs and outputs */ + +enum gpio_event_direct_flags { + GPIOEDF_ACTIVE_HIGH = 1U << 0, +/* GPIOEDF_USE_DOWN_IRQ = 1U << 1, */ +/* GPIOEDF_USE_IRQ = (1U << 2) | GPIOIDF_USE_DOWN_IRQ, */ + GPIOEDF_PRINT_KEYS = 1U << 8, + GPIOEDF_PRINT_KEY_DEBOUNCE = 1U << 9, + GPIOEDF_PRINT_KEY_UNSTABLE = 1U << 10, +}; + +struct gpio_event_direct_entry { + uint32_t gpio:16; + uint32_t code:10; + uint32_t dev:6; +}; + +/* inputs */ +extern int gpio_event_input_func(struct gpio_event_input_devs *input_devs, + struct gpio_event_info *info, void **data, int func); +struct gpio_event_input_info { + /* initialize to gpio_event_input_func */ + struct gpio_event_info info; + ktime_t debounce_time; + ktime_t poll_time; + uint16_t flags; + uint16_t type; + const struct gpio_event_direct_entry *keymap; + size_t keymap_size; +}; + +/* outputs */ +extern int gpio_event_output_func(struct gpio_event_input_devs *input_devs, + struct gpio_event_info *info, void **data, int func); +extern int gpio_event_output_event(struct gpio_event_input_devs *input_devs, + struct gpio_event_info *info, void **data, + unsigned int dev, unsigned int type, + unsigned int code, int value); +struct gpio_event_output_info { + /* initialize to gpio_event_output_func and gpio_event_output_event */ + struct gpio_event_info info; + uint16_t flags; + uint16_t type; + const struct gpio_event_direct_entry *keymap; + size_t keymap_size; +}; + + +/* axes */ + +enum gpio_event_axis_flags { + GPIOEAF_PRINT_UNKNOWN_DIRECTION = 1U << 16, + GPIOEAF_PRINT_RAW = 1U << 17, + GPIOEAF_PRINT_EVENT = 1U << 18, +}; + +extern int gpio_event_axis_func(struct gpio_event_input_devs *input_devs, + struct gpio_event_info *info, void **data, int func); +struct gpio_event_axis_info { + /* initialize to gpio_event_axis_func */ + struct gpio_event_info info; + uint8_t count; /* number of gpios for this axis */ + uint8_t dev; /* device index when using multiple input devices */ + uint8_t type; /* EV_REL or EV_ABS */ + uint16_t code; + uint16_t decoded_size; + uint16_t (*map)(struct gpio_event_axis_info *info, uint16_t in); + uint32_t *gpio; + uint32_t flags; +}; +#define gpio_axis_2bit_gray_map gpio_axis_4bit_gray_map +#define gpio_axis_3bit_gray_map gpio_axis_4bit_gray_map +uint16_t gpio_axis_4bit_gray_map( + struct gpio_event_axis_info *info, uint16_t in); +uint16_t gpio_axis_5bit_singletrack_map( + struct gpio_event_axis_info *info, uint16_t in); + +#endif
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 012c37f..27f14f2 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h
@@ -405,6 +405,7 @@ static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer) } extern u64 hrtimer_get_next_event(void); +extern u64 hrtimer_next_event_without(const struct hrtimer *exclude); extern bool hrtimer_active(const struct hrtimer *timer);
diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 3355efc..4125f60e 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h
@@ -54,6 +54,7 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev) case ARPHRD_IPGRE: case ARPHRD_VOID: case ARPHRD_NONE: + case ARPHRD_RAWIP: return false; default: return true;
diff --git a/include/linux/init.h b/include/linux/init.h index 07cab8a..f138e5b 100644 --- a/include/linux/init.h +++ b/include/linux/init.h
@@ -47,7 +47,7 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(.init.text) __cold __inittrace __latent_entropy __noinitretpoline +#define __init __section(.init.text) __cold __inittrace __latent_entropy __noinitretpoline __nocfi #define __initdata __section(.init.data) #define __initconst __section(.init.rodata) #define __exitdata __section(.exit.data) @@ -153,6 +153,15 @@ extern bool initcall_debug; #ifndef __ASSEMBLY__ +#ifdef CONFIG_LTO_CLANG + /* prepend the variable name with __COUNTER__ to ensure correct ordering */ + #define ___initcall_name2(c, fn, id) __initcall_##c##_##fn##id + #define ___initcall_name1(c, fn, id) ___initcall_name2(c, fn, id) + #define __initcall_name(fn, id) ___initcall_name1(__COUNTER__, fn, id) +#else + #define __initcall_name(fn, id) __initcall_##fn##id +#endif + /* * initcalls are now grouped by functionality into separate * subsections. Ordering inside the subsections is determined @@ -170,7 +179,7 @@ extern bool initcall_debug; */ #define __define_initcall(fn, id) \ - static initcall_t __initcall_##fn##id __used \ + static initcall_t __initcall_name(fn, id) __used \ __attribute__((__section__(".initcall" #id ".init"))) = fn; /*
diff --git a/include/linux/initramfs.h b/include/linux/initramfs.h new file mode 100644 index 0000000..fc7da63 --- /dev/null +++ b/include/linux/initramfs.h
@@ -0,0 +1,32 @@ +/* + * include/linux/initramfs.h + * + * Copyright (C) 2015, Google + * Rom Lemarchand <romlem@android.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_INITRAMFS_H +#define _LINUX_INITRAMFS_H + +#include <linux/kconfig.h> + +#if IS_BUILTIN(CONFIG_BLK_DEV_INITRD) + +int __init default_rootfs(void); + +#endif + +#endif /* _LINUX_INITRAMFS_H */
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 067a6fa..3e3893f 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h
@@ -42,6 +42,7 @@ struct ipv6_devconf { __s32 accept_ra_rt_info_max_plen; #endif #endif + __s32 accept_ra_rt_table; __s32 proxy_ndp; __s32 accept_source_route; __s32 accept_ra_from_local;
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 9385aa5..a27cf66 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h
@@ -62,8 +62,11 @@ extern int register_refined_jiffies(long clock_tick_rate); /* TICK_NSEC is the time between ticks in nsec assuming SHIFTED_HZ */ #define TICK_NSEC ((NSEC_PER_SEC+HZ/2)/HZ) -/* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */ -#define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ) +/* TICK_USEC is the time between ticks in usec assuming SHIFTED_HZ */ +#define TICK_USEC ((USEC_PER_SEC + HZ/2) / HZ) + +/* USER_TICK_USEC is the time between ticks in usec assuming fake USER_HZ */ +#define USER_TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ) #ifndef __jiffy_arch_data #define __jiffy_arch_data
diff --git a/include/linux/keychord.h b/include/linux/keychord.h new file mode 100644 index 0000000..08cf540 --- /dev/null +++ b/include/linux/keychord.h
@@ -0,0 +1,23 @@ +/* + * Key chord input driver + * + * Copyright (C) 2008 Google, Inc. + * Author: Mike Lockwood <lockwood@android.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * +*/ + +#ifndef __LINUX_KEYCHORD_H_ +#define __LINUX_KEYCHORD_H_ + +#include <uapi/linux/keychord.h> + +#endif /* __LINUX_KEYCHORD_H_ */
diff --git a/include/linux/keycombo.h b/include/linux/keycombo.h new file mode 100644 index 0000000..c6db262 --- /dev/null +++ b/include/linux/keycombo.h
@@ -0,0 +1,36 @@ +/* + * include/linux/keycombo.h - platform data structure for keycombo driver + * + * Copyright (C) 2014 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _LINUX_KEYCOMBO_H +#define _LINUX_KEYCOMBO_H + +#define KEYCOMBO_NAME "keycombo" + +/* + * if key_down_fn and key_up_fn are both present, you are guaranteed that + * key_down_fn will return before key_up_fn is called, and that key_up_fn + * is called iff key_down_fn is called. + */ +struct keycombo_platform_data { + void (*key_down_fn)(void *); + void (*key_up_fn)(void *); + void *priv; + int key_down_delay; /* Time in ms */ + int *keys_up; + int keys_down[]; /* 0 terminated */ +}; + +#endif /* _LINUX_KEYCOMBO_H */
diff --git a/include/linux/keyreset.h b/include/linux/keyreset.h new file mode 100644 index 0000000..2e34afa --- /dev/null +++ b/include/linux/keyreset.h
@@ -0,0 +1,29 @@ +/* + * include/linux/keyreset.h - platform data structure for resetkeys driver + * + * Copyright (C) 2014 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _LINUX_KEYRESET_H +#define _LINUX_KEYRESET_H + +#define KEYRESET_NAME "keyreset" + +struct keyreset_platform_data { + int (*reset_fn)(void); + int key_down_delay; + int *keys_up; + int keys_down[]; /* 0 terminated */ +}; + +#endif /* _LINUX_KEYRESET_H */
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index c925812..7161d8e 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h
@@ -1351,6 +1351,40 @@ * @inode we wish to get the security context of. * @ctx is a pointer in which to place the allocated security context. * @ctxlen points to the place to put the length of @ctx. + * + * Security hooks for using the eBPF maps and programs functionalities through + * eBPF syscalls. + * + * @bpf: + * Do a initial check for all bpf syscalls after the attribute is copied + * into the kernel. The actual security module can implement their own + * rules to check the specific cmd they need. + * + * @bpf_map: + * Do a check when the kernel generate and return a file descriptor for + * eBPF maps. + * + * @map: bpf map that we want to access + * @mask: the access flags + * + * @bpf_prog: + * Do a check when the kernel generate and return a file descriptor for + * eBPF programs. + * + * @prog: bpf prog that userspace want to use. + * + * @bpf_map_alloc_security: + * Initialize the security field inside bpf map. + * + * @bpf_map_free_security: + * Clean up the security information stored inside bpf map. + * + * @bpf_prog_alloc_security: + * Initialize the security field inside bpf program. + * + * @bpf_prog_free_security: + * Clean up the security information stored inside bpf prog. + * */ union security_list_options { int (*binder_set_context_mgr)(struct task_struct *mgr); @@ -1682,6 +1716,17 @@ union security_list_options { struct audit_context *actx); void (*audit_rule_free)(void *lsmrule); #endif /* CONFIG_AUDIT */ + +#ifdef CONFIG_BPF_SYSCALL + int (*bpf)(int cmd, union bpf_attr *attr, + unsigned int size); + int (*bpf_map)(struct bpf_map *map, fmode_t fmode); + int (*bpf_prog)(struct bpf_prog *prog); + int (*bpf_map_alloc_security)(struct bpf_map *map); + void (*bpf_map_free_security)(struct bpf_map *map); + int (*bpf_prog_alloc_security)(struct bpf_prog_aux *aux); + void (*bpf_prog_free_security)(struct bpf_prog_aux *aux); +#endif /* CONFIG_BPF_SYSCALL */ }; struct security_hook_heads { @@ -1901,6 +1946,15 @@ struct security_hook_heads { struct list_head audit_rule_match; struct list_head audit_rule_free; #endif /* CONFIG_AUDIT */ +#ifdef CONFIG_BPF_SYSCALL + struct list_head bpf; + struct list_head bpf_map; + struct list_head bpf_prog; + struct list_head bpf_map_alloc_security; + struct list_head bpf_map_free_security; + struct list_head bpf_prog_alloc_security; + struct list_head bpf_prog_free_security; +#endif /* CONFIG_BPF_SYSCALL */ } __randomize_layout; /*
diff --git a/include/linux/memory-state-time.h b/include/linux/memory-state-time.h new file mode 100644 index 0000000..d2212b0 --- /dev/null +++ b/include/linux/memory-state-time.h
@@ -0,0 +1,42 @@ +/* include/linux/memory-state-time.h + * + * Copyright (C) 2016 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/workqueue.h> + +#define UPDATE_MEMORY_STATE(BLOCK, VALUE) BLOCK->update_call(BLOCK, VALUE) + +struct memory_state_update_block; + +typedef void (*memory_state_update_fn_t)(struct memory_state_update_block *ub, + int value); + +/* This struct is populated when you pass it to a memory_state_register* + * function. The update_call function is used for an update and defined in the + * typedef memory_state_update_fn_t + */ +struct memory_state_update_block { + memory_state_update_fn_t update_call; + int id; +}; + +/* Register a frequency struct memory_state_update_block to provide updates to + * memory_state_time about frequency changes using its update_call function. + */ +struct memory_state_update_block *memory_state_register_frequency_source(void); + +/* Register a bandwidth struct memory_state_update_block to provide updates to + * memory_state_time about bandwidth changes using its update_call function. + */ +struct memory_state_update_block *memory_state_register_bandwidth_source(void);
diff --git a/include/linux/mm.h b/include/linux/mm.h index 58f2263..df01cc9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h
@@ -1223,6 +1223,8 @@ extern void pagefault_out_of_memory(void); extern void show_free_areas(unsigned int flags, nodemask_t *nodemask); +void shmem_set_file(struct vm_area_struct *vma, struct file *file); + extern bool can_do_mlock(void); extern int user_shm_lock(size_t, struct user_struct *); extern void user_shm_unlock(size_t, struct user_struct *); @@ -2094,7 +2096,7 @@ static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start, extern struct vm_area_struct *vma_merge(struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, - struct mempolicy *, struct vm_userfaultfd_ctx); + struct mempolicy *, struct vm_userfaultfd_ctx, const char __user *); extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); extern int __split_vma(struct mm_struct *, struct vm_area_struct *, unsigned long addr, int new_below);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e41ef53..1831bca 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h
@@ -304,11 +304,18 @@ struct vm_area_struct { /* * For areas with an address space and backing store, * linkage into the address_space->i_mmap interval tree. + * + * For private anonymous mappings, a pointer to a null terminated string + * in the user process containing the name given to the vma, or NULL + * if unnamed. */ - struct { - struct rb_node rb; - unsigned long rb_subtree_last; - } shared; + union { + struct { + struct rb_node rb; + unsigned long rb_subtree_last; + } shared; + const char __user *anon_name; + }; /* * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma @@ -655,4 +662,13 @@ typedef struct { unsigned long val; } swp_entry_t; +/* Return the name for an anonymous mapping or NULL for a file-backed mapping */ +static inline const char __user *vma_get_anon_name(struct vm_area_struct *vma) +{ + if (vma->vm_file) + return NULL; + + return vma->anon_name; +} + #endif /* _LINUX_MM_TYPES_H */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 9a43763a..227961c 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h
@@ -439,6 +439,15 @@ struct mmc_host { bool cqe_enabled; bool cqe_on; +#ifdef CONFIG_MMC_EMBEDDED_SDIO + struct { + struct sdio_cis *cis; + struct sdio_cccr *cccr; + struct sdio_embedded_func *funcs; + int num_funcs; + } embedded_sdio_data; +#endif + unsigned long private[0] ____cacheline_aligned; }; @@ -451,6 +460,14 @@ void mmc_free_host(struct mmc_host *); int mmc_of_parse(struct mmc_host *host); int mmc_of_parse_voltage(struct device_node *np, u32 *mask); +#ifdef CONFIG_MMC_EMBEDDED_SDIO +extern void mmc_set_embedded_sdio_data(struct mmc_host *host, + struct sdio_cis *cis, + struct sdio_cccr *cccr, + struct sdio_embedded_func *funcs, + int num_funcs); +#endif + static inline void *mmc_priv(struct mmc_host *host) { return (void *)host->private;
diff --git a/include/linux/mmc/pm.h b/include/linux/mmc/pm.h index 4a13920..6e2d6a1 100644 --- a/include/linux/mmc/pm.h +++ b/include/linux/mmc/pm.h
@@ -26,5 +26,6 @@ typedef unsigned int mmc_pm_flag_t; #define MMC_PM_KEEP_POWER (1 << 0) /* preserve card power during suspend */ #define MMC_PM_WAKE_SDIO_IRQ (1 << 1) /* wake up host system on SDIO IRQ assertion */ +#define MMC_PM_IGNORE_PM_NOTIFY (1 << 2) /* ignore mmc pm notify */ #endif /* LINUX_MMC_PM_H */
diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h index 97ca105..f466f38 100644 --- a/include/linux/mmc/sdio_func.h +++ b/include/linux/mmc/sdio_func.h
@@ -23,6 +23,14 @@ struct sdio_func; typedef void (sdio_irq_handler_t)(struct sdio_func *); /* + * Structure used to hold embedded SDIO device data from platform layer + */ +struct sdio_embedded_func { + uint8_t f_class; + uint32_t f_maxblksize; +}; + +/* * SDIO function CIS tuple (unknown to the core) */ struct sdio_func_tuple {
diff --git a/include/linux/module.h b/include/linux/module.h index a9d546c..2d01e23 100644 --- a/include/linux/module.h +++ b/include/linux/module.h
@@ -19,6 +19,7 @@ #include <linux/jump_label.h> #include <linux/export.h> #include <linux/rbtree_latch.h> +#include <linux/cfi.h> #include <linux/percpu.h> #include <asm/module.h> @@ -346,6 +347,10 @@ struct module { const s32 *crcs; unsigned int num_syms; +#ifdef CONFIG_CFI_CLANG + cfi_check_fn cfi_check; +#endif + /* Kernel parameters. */ #ifdef CONFIG_SYSFS struct mutex param_lock;
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 1d7140f..ba36506 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h
@@ -228,19 +228,11 @@ struct kparam_array VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } } /* Obsolete - use module_param_cb() */ -#define module_param_call(name, set, get, arg, perm) \ - static const struct kernel_param_ops __param_ops_##name = \ - { .flags = 0, (void *)set, (void *)get }; \ +#define module_param_call(name, _set, _get, arg, perm) \ + static const struct kernel_param_ops __param_ops_##name = \ + { .flags = 0, .set = _set, .get = _get }; \ __module_param_call(MODULE_PARAM_PREFIX, \ - name, &__param_ops_##name, arg, \ - (perm) + sizeof(__check_old_set_param(set))*0, -1, 0) - -/* We don't get oldget: it's often a new-style param_get_uint, etc. */ -static inline int -__check_old_set_param(int (*oldset)(const char *, struct kernel_param *)) -{ - return 0; -} + name, &__param_ops_##name, arg, perm, -1, 0) #ifdef CONFIG_SYSFS extern void kernel_param_lock(struct module *mod);
diff --git a/include/linux/mount.h b/include/linux/mount.h index 45b1f56..1ff21c1 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h
@@ -68,6 +68,7 @@ struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ struct super_block *mnt_sb; /* pointer to superblock */ int mnt_flags; + void *data; } __randomize_layout; struct file; /* forward dec */
diff --git a/include/linux/namei.h b/include/linux/namei.h index a982bb7..db4f5c0 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h
@@ -80,8 +80,11 @@ extern struct dentry *user_path_create(int, const char __user *, struct path *, extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); extern int kern_path_mountpoint(int, const char *, struct path *, unsigned int); +extern int vfs_path_lookup(struct dentry *, struct vfsmount *, + const char *, unsigned int, struct path *); extern struct dentry *lookup_one_len(const char *, struct dentry *, int); +extern struct dentry *lookup_one_len2(const char *, struct vfsmount *mnt, struct dentry *, int); extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int); extern int follow_down_one(struct path *);
diff --git a/include/linux/netfilter/xt_quota2.h b/include/linux/netfilter/xt_quota2.h new file mode 100644 index 0000000..eadc69033 --- /dev/null +++ b/include/linux/netfilter/xt_quota2.h
@@ -0,0 +1,25 @@ +#ifndef _XT_QUOTA_H +#define _XT_QUOTA_H + +enum xt_quota_flags { + XT_QUOTA_INVERT = 1 << 0, + XT_QUOTA_GROW = 1 << 1, + XT_QUOTA_PACKET = 1 << 2, + XT_QUOTA_NO_CHANGE = 1 << 3, + XT_QUOTA_MASK = 0x0F, +}; + +struct xt_quota_counter; + +struct xt_quota_mtinfo2 { + char name[15]; + u_int8_t flags; + + /* Comparison-invariant */ + aligned_u64 quota; + + /* Used internally by the kernel */ + struct xt_quota_counter *master __attribute__((aligned(8))); +}; + +#endif /* _XT_QUOTA_H */
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 013c541..064bac7 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h
@@ -66,6 +66,27 @@ extern unsigned long of_get_flat_dt_root(void); extern int of_get_flat_dt_size(void); extern uint32_t of_get_flat_dt_phandle(unsigned long node); +/* + * early_init_dt_scan_chosen - scan the device tree for ramdisk and bootargs + * + * The boot arguments will be placed into the memory pointed to by @data. + * That memory should be COMMAND_LINE_SIZE big and initialized to be a valid + * (possibly empty) string. Logic for what will be in @data after this + * function finishes: + * + * - CONFIG_CMDLINE_FORCE=true + * CONFIG_CMDLINE + * - CONFIG_CMDLINE_EXTEND=true, @data is non-empty string + * @data + dt bootargs (even if dt bootargs are empty) + * - CONFIG_CMDLINE_EXTEND=true, @data is empty string + * CONFIG_CMDLINE + dt bootargs (even if dt bootargs are empty) + * - CMDLINE_FROM_BOOTLOADER=true, dt bootargs=non-empty: + * dt bootargs + * - CMDLINE_FROM_BOOTLOADER=true, dt bootargs=empty, @data is non-empty string + * @data is left unchanged + * - CMDLINE_FROM_BOOTLOADER=true, dt bootargs=empty, @data is empty string + * CONFIG_CMDLINE (or "" if that's not defined) + */ extern int early_init_dt_scan_chosen(unsigned long node, const char *uname, int depth, void *data); extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
diff --git a/include/linux/overflow.h b/include/linux/overflow.h index c8890ec..8712ff7 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h
@@ -202,4 +202,77 @@ #endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */ +/** + * array_size() - Calculate size of 2-dimensional array. + * + * @a: dimension one + * @b: dimension two + * + * Calculates size of 2-dimensional array: @a * @b. + * + * Returns: number of bytes needed to represent the array or SIZE_MAX on + * overflow. + */ +static inline __must_check size_t array_size(size_t a, size_t b) +{ + size_t bytes; + + if (check_mul_overflow(a, b, &bytes)) + return SIZE_MAX; + + return bytes; +} + +/** + * array3_size() - Calculate size of 3-dimensional array. + * + * @a: dimension one + * @b: dimension two + * @c: dimension three + * + * Calculates size of 3-dimensional array: @a * @b * @c. + * + * Returns: number of bytes needed to represent the array or SIZE_MAX on + * overflow. + */ +static inline __must_check size_t array3_size(size_t a, size_t b, size_t c) +{ + size_t bytes; + + if (check_mul_overflow(a, b, &bytes)) + return SIZE_MAX; + if (check_mul_overflow(bytes, c, &bytes)) + return SIZE_MAX; + + return bytes; +} + +static inline __must_check size_t __ab_c_size(size_t n, size_t size, size_t c) +{ + size_t bytes; + + if (check_mul_overflow(n, size, &bytes)) + return SIZE_MAX; + if (check_add_overflow(bytes, c, &bytes)) + return SIZE_MAX; + + return bytes; +} + +/** + * struct_size() - Calculate size of structure with trailing array. + * @p: Pointer to the structure. + * @member: Name of the array member. + * @n: Number of elements in the array. + * + * Calculates size of memory needed for structure @p followed by an + * array of @n @member elements. + * + * Return: number of bytes needed or SIZE_MAX on overflow. + */ +#define struct_size(p, member, n) \ + __ab_c_size(n, \ + sizeof(*(p)->member) + __must_be_array((p)->member),\ + sizeof(*(p))) + #endif /* __LINUX_OVERFLOW_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e08b533..467af151 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h
@@ -243,7 +243,7 @@ static inline gfp_t readahead_gfp_mask(struct address_space *x) __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN; } -typedef int filler_t(void *, struct page *); +typedef int filler_t(struct file *, struct page *); pgoff_t page_cache_next_hole(struct address_space *mapping, pgoff_t index, unsigned long max_scan); @@ -366,8 +366,16 @@ static inline unsigned find_get_pages(struct address_space *mapping, } unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start, unsigned int nr_pages, struct page **pages); -unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, - int tag, unsigned int nr_pages, struct page **pages); +unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, + pgoff_t end, int tag, unsigned int nr_pages, + struct page **pages); +static inline unsigned find_get_pages_tag(struct address_space *mapping, + pgoff_t *index, int tag, unsigned int nr_pages, + struct page **pages) +{ + return find_get_pages_range_tag(mapping, index, (pgoff_t)-1, tag, + nr_pages, pages); +} unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start, int tag, unsigned int nr_entries, struct page **entries, pgoff_t *indices); @@ -394,7 +402,7 @@ extern int read_cache_pages(struct address_space *mapping, static inline struct page *read_mapping_page(struct address_space *mapping, pgoff_t index, void *data) { - filler_t *filler = (filler_t *)mapping->a_ops->readpage; + filler_t *filler = mapping->a_ops->readpage; return read_cache_page(mapping, index, filler, data); }
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 2636c0c..4f56e0a 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h
@@ -38,9 +38,17 @@ static inline unsigned pagevec_lookup(struct pagevec *pvec, return pagevec_lookup_range(pvec, mapping, start, (pgoff_t)-1); } -unsigned pagevec_lookup_tag(struct pagevec *pvec, - struct address_space *mapping, pgoff_t *index, int tag, - unsigned nr_pages); +unsigned pagevec_lookup_range_tag(struct pagevec *pvec, + struct address_space *mapping, pgoff_t *index, pgoff_t end, + int tag); +unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec, + struct address_space *mapping, pgoff_t *index, pgoff_t end, + int tag, unsigned max_pages); +static inline unsigned pagevec_lookup_tag(struct pagevec *pvec, + struct address_space *mapping, pgoff_t *index, int tag) +{ + return pagevec_lookup_range_tag(pvec, mapping, index, (pgoff_t)-1, tag); +} static inline void pagevec_init(struct pagevec *pvec, int cold) {
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8e22f24..b7fecdfa 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h
@@ -1165,6 +1165,11 @@ extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, int perf_event_max_stack_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +static inline bool perf_paranoid_any(void) +{ + return sysctl_perf_event_paranoid > 2; +} + static inline bool perf_paranoid_tracepoint_raw(void) { return sysctl_perf_event_paranoid > -1;
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index b2b7255..540595a 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h
@@ -12,6 +12,7 @@ #include <linux/bug.h> #include <linux/slab.h> #include <linux/rcupdate.h> +#include <linux/refcount.h> #include <uapi/linux/posix_acl.h> struct posix_acl_entry { @@ -24,7 +25,7 @@ struct posix_acl_entry { }; struct posix_acl { - atomic_t a_refcount; + refcount_t a_refcount; struct rcu_head a_rcu; unsigned int a_count; struct posix_acl_entry a_entries[0]; @@ -41,7 +42,7 @@ static inline struct posix_acl * posix_acl_dup(struct posix_acl *acl) { if (acl) - atomic_inc(&acl->a_refcount); + refcount_inc(&acl->a_refcount); return acl; } @@ -51,7 +52,7 @@ posix_acl_dup(struct posix_acl *acl) static inline void posix_acl_release(struct posix_acl *acl) { - if (acl && atomic_dec_and_test(&acl->a_refcount)) + if (acl && refcount_dec_and_test(&acl->a_refcount)) kfree_rcu(acl, a_rcu); }
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 4617cf4..c25e166 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h
@@ -18,6 +18,7 @@ #include <linux/leds.h> #include <linux/spinlock.h> #include <linux/notifier.h> +#include <linux/types.h> /* * All voltages, currents, charges, energies, time and temperatures in uV, @@ -149,6 +150,12 @@ enum power_supply_property { POWER_SUPPLY_PROP_PRECHARGE_CURRENT, POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT, POWER_SUPPLY_PROP_CALIBRATE, + /* Local extensions */ + POWER_SUPPLY_PROP_USB_HC, + POWER_SUPPLY_PROP_USB_OTG, + POWER_SUPPLY_PROP_CHARGE_ENABLED, + /* Local extensions of type int64_t */ + POWER_SUPPLY_PROP_CHARGE_COUNTER_EXT, /* Properties of type `const char *' */ POWER_SUPPLY_PROP_MODEL_NAME, POWER_SUPPLY_PROP_MANUFACTURER, @@ -177,6 +184,7 @@ enum power_supply_notifier_events { union power_supply_propval { int intval; const char *strval; + int64_t int64val; }; struct device_node;
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 928ef9e..d1c705e 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h
@@ -71,6 +71,12 @@ static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *p #endif /* CONFIG_PROC_FS */ +#ifdef CONFIG_PROC_UID +extern void proc_register_uid(kuid_t uid); +#else +static inline void proc_register_uid(kuid_t uid) {} +#endif + struct net; static inline struct proc_dir_entry *proc_net_mkdir(
diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 9395f06..10fe2e2 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h
@@ -80,6 +80,8 @@ void persistent_ram_free_old(struct persistent_ram_zone *prz); ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz, char *str, size_t len); +void ramoops_console_write_buf(const char *buf, size_t size); + /* * Ramoops platform data * @mem_size memory size for ramoops
diff --git a/include/linux/sched.h b/include/linux/sched.h index 866439c..44b54d6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h
@@ -206,6 +206,15 @@ struct task_group; /* Task command name length: */ #define TASK_COMM_LEN 16 +enum task_event { + PUT_PREV_TASK = 0, + PICK_NEXT_TASK = 1, + TASK_WAKE = 2, + TASK_MIGRATE = 3, + TASK_UPDATE = 4, + IRQ_UPDATE = 5, +}; + extern cpumask_var_t cpu_isolated_map; extern void scheduler_tick(void); @@ -317,6 +326,34 @@ struct load_weight { u32 inv_weight; }; +/** + * struct util_est - Estimation utilization of FAIR tasks + * @enqueued: instantaneous estimated utilization of a task/cpu + * @ewma: the Exponential Weighted Moving Average (EWMA) + * utilization of a task + * + * Support data structure to track an Exponential Weighted Moving Average + * (EWMA) of a FAIR task's utilization. New samples are added to the moving + * average each time a task completes an activation. Sample's weight is chosen + * so that the EWMA will be relatively insensitive to transient changes to the + * task's workload. + * + * The enqueued attribute has a slightly different meaning for tasks and cpus: + * - task: the task's util_avg at last task dequeue time + * - cfs_rq: the sum of util_est.enqueued for each RUNNABLE task on that CPU + * Thus, the util_est.enqueued of a task represents the contribution on the + * estimated utilization of the CPU where that task is currently enqueued. + * + * Only for tasks we track a moving average of the past instantaneous + * estimated utilization. This allows to absorb sporadic drops in utilization + * of an otherwise almost periodic task. + */ +struct util_est { + unsigned int enqueued; + unsigned int ewma; +#define UTIL_EST_WEIGHT_SHIFT 2 +}; + /* * The load_avg/util_avg accumulates an infinite geometric series * (see __update_load_avg() in kernel/sched/fair.c). @@ -376,6 +413,7 @@ struct sched_avg { u32 period_contrib; unsigned long load_avg; unsigned long util_avg; + struct util_est util_est; }; struct sched_statistics { @@ -450,6 +488,41 @@ struct sched_entity { #endif }; +#ifdef CONFIG_SCHED_WALT +#define RAVG_HIST_SIZE_MAX 5 + +/* ravg represents frequency scaled cpu-demand of tasks */ +struct ravg { + /* + * 'mark_start' marks the beginning of an event (task waking up, task + * starting to execute, task being preempted) within a window + * + * 'sum' represents how runnable a task has been within current + * window. It incorporates both running time and wait time and is + * frequency scaled. + * + * 'sum_history' keeps track of history of 'sum' seen over previous + * RAVG_HIST_SIZE windows. Windows where task was entirely sleeping are + * ignored. + * + * 'demand' represents maximum sum seen over previous + * sysctl_sched_ravg_hist_size windows. 'demand' could drive frequency + * demand for tasks. + * + * 'curr_window' represents task's contribution to cpu busy time + * statistics (rq->curr_runnable_sum) in current window + * + * 'prev_window' represents task's contribution to cpu busy time + * statistics (rq->prev_runnable_sum) in previous window + */ + u64 mark_start; + u32 sum, demand; + u32 sum_history[RAVG_HIST_SIZE_MAX]; + u32 curr_window, prev_window; + u16 active_windows; +}; +#endif + struct sched_rt_entity { struct list_head run_list; unsigned long timeout; @@ -602,6 +675,16 @@ struct task_struct { const struct sched_class *sched_class; struct sched_entity se; struct sched_rt_entity rt; +#ifdef CONFIG_SCHED_WALT + struct ravg ravg; + /* + * 'init_load_pct' represents the initial task load assigned to children + * of this task + */ + u32 init_load_pct; + u64 last_sleep_ts; +#endif + #ifdef CONFIG_CGROUP_SCHED struct task_group *sched_task_group; #endif @@ -752,6 +835,10 @@ struct task_struct { u64 stimescaled; #endif u64 gtime; +#ifdef CONFIG_CPU_FREQ_TIMES + u64 *time_in_state; + unsigned int max_state; +#endif struct prev_cputime prev_cputime; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN struct vtime vtime;
diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h index d1ad3d82..0b55834 100644 --- a/include/linux/sched/cpufreq.h +++ b/include/linux/sched/cpufreq.h
@@ -12,8 +12,6 @@ #define SCHED_CPUFREQ_DL (1U << 1) #define SCHED_CPUFREQ_IOWAIT (1U << 2) -#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL) - #ifdef CONFIG_CPU_FREQ struct update_util_data { void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
diff --git a/include/linux/sched/energy.h b/include/linux/sched/energy.h new file mode 100644 index 0000000..a9b0349 --- /dev/null +++ b/include/linux/sched/energy.h
@@ -0,0 +1,40 @@ +#ifndef _LINUX_SCHED_ENERGY_H +#define _LINUX_SCHED_ENERGY_H + +#include <linux/sched.h> +#include <linux/slab.h> + +/* + * There doesn't seem to be an NR_CPUS style max number of sched domain + * levels so here's an arbitrary constant one for the moment. + * + * The levels alluded to here correspond to entries in struct + * sched_domain_topology_level that are meant to be populated by arch + * specific code (topology.c). + */ +#define NR_SD_LEVELS 8 + +#define SD_LEVEL0 0 +#define SD_LEVEL1 1 +#define SD_LEVEL2 2 +#define SD_LEVEL3 3 +#define SD_LEVEL4 4 +#define SD_LEVEL5 5 +#define SD_LEVEL6 6 +#define SD_LEVEL7 7 + +/* + * Convenience macro for iterating through said sd levels. + */ +#define for_each_possible_sd_level(level) \ + for (level = 0; level < NR_SD_LEVELS; level++) + +extern struct sched_group_energy *sge_array[NR_CPUS][NR_SD_LEVELS]; + +#ifdef CONFIG_GENERIC_ARCH_TOPOLOGY +void init_sched_energy_costs(void); +#else +void init_sched_energy_costs(void) {} +#endif + +#endif
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index d6a18a3..e076ff8 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h
@@ -21,8 +21,16 @@ enum { sysctl_hung_task_timeout_secs = 0 }; extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; +extern unsigned int sysctl_sched_sync_hint_enable; +extern unsigned int sysctl_sched_cstate_aware; extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_child_runs_first; +#ifdef CONFIG_SCHED_WALT +extern unsigned int sysctl_sched_use_walt_cpu_util; +extern unsigned int sysctl_sched_use_walt_task_util; +extern unsigned int sysctl_sched_walt_init_task_load_pct; +extern unsigned int sysctl_sched_walt_cpu_high_irqload; +#endif enum sched_tunable_scaling { SCHED_TUNABLESCALING_NONE,
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index cf257c2e..5fcb7dd 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h
@@ -26,6 +26,7 @@ #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ #define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ #define SD_NUMA 0x4000 /* cross-node balancing */ +#define SD_SHARE_CAP_STATES 0x8000 /* Domain members share capacity state */ /* * Increase resolution of cpu_capacity calculations @@ -66,12 +67,31 @@ struct sched_domain_attr { extern int sched_domain_level_max; +struct capacity_state { + unsigned long cap; /* compute capacity */ + unsigned long frequency;/* frequency */ + unsigned long power; /* power consumption at this compute capacity */ +}; + +struct idle_state { + unsigned long power; /* power consumption in this idle state */ +}; + +struct sched_group_energy { + unsigned int nr_idle_states; /* number of idle states */ + struct idle_state *idle_states; /* ptr to idle state array */ + unsigned int nr_cap_states; /* number of capacity states */ + struct capacity_state *cap_states; /* ptr to capacity state array */ +}; + struct sched_group; struct sched_domain_shared { atomic_t ref; atomic_t nr_busy_cpus; int has_idle_cores; + + bool overutilized; }; struct sched_domain { @@ -173,6 +193,9 @@ bool cpus_share_cache(int this_cpu, int that_cpu); typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); typedef int (*sched_domain_flags_f)(void); +typedef +const struct sched_group_energy * const(*sched_domain_energy_f)(int cpu); +extern bool energy_aware(void); #define SDTL_OVERLAP 0x01 @@ -186,6 +209,7 @@ struct sd_data { struct sched_domain_topology_level { sched_domain_mask_f mask; sched_domain_flags_f sd_flags; + sched_domain_energy_f energy; int flags; int numa_level; struct sd_data data;
diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h index 10b19a1..a3661e9 100644 --- a/include/linux/sched/wake_q.h +++ b/include/linux/sched/wake_q.h
@@ -34,6 +34,7 @@ struct wake_q_head { struct wake_q_node *first; struct wake_q_node **lastp; + int count; }; #define WAKE_Q_TAIL ((struct wake_q_node *) 0x01) @@ -45,6 +46,7 @@ static inline void wake_q_init(struct wake_q_head *head) { head->first = WAKE_Q_TAIL; head->lastp = &head->first; + head->count = 0; } extern void wake_q_add(struct wake_q_head *head,
diff --git a/include/linux/sched/xacct.h b/include/linux/sched/xacct.h index c078f0a..9544c9d 100644 --- a/include/linux/sched/xacct.h +++ b/include/linux/sched/xacct.h
@@ -28,6 +28,11 @@ static inline void inc_syscw(struct task_struct *tsk) { tsk->ioac.syscw++; } + +static inline void inc_syscfs(struct task_struct *tsk) +{ + tsk->ioac.syscfs++; +} #else static inline void add_rchar(struct task_struct *tsk, ssize_t amt) { @@ -44,6 +49,10 @@ static inline void inc_syscr(struct task_struct *tsk) static inline void inc_syscw(struct task_struct *tsk) { } + +static inline void inc_syscfs(struct task_struct *tsk) +{ +} #endif #endif /* _LINUX_SCHED_XACCT_H */
diff --git a/include/linux/security.h b/include/linux/security.h index ce62659..73f1ef6 100644 --- a/include/linux/security.h +++ b/include/linux/security.h
@@ -1730,6 +1730,54 @@ static inline void securityfs_remove(struct dentry *dentry) #endif +#ifdef CONFIG_BPF_SYSCALL +union bpf_attr; +struct bpf_map; +struct bpf_prog; +struct bpf_prog_aux; +#ifdef CONFIG_SECURITY +extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size); +extern int security_bpf_map(struct bpf_map *map, fmode_t fmode); +extern int security_bpf_prog(struct bpf_prog *prog); +extern int security_bpf_map_alloc(struct bpf_map *map); +extern void security_bpf_map_free(struct bpf_map *map); +extern int security_bpf_prog_alloc(struct bpf_prog_aux *aux); +extern void security_bpf_prog_free(struct bpf_prog_aux *aux); +#else +static inline int security_bpf(int cmd, union bpf_attr *attr, + unsigned int size) +{ + return 0; +} + +static inline int security_bpf_map(struct bpf_map *map, fmode_t fmode) +{ + return 0; +} + +static inline int security_bpf_prog(struct bpf_prog *prog) +{ + return 0; +} + +static inline int security_bpf_map_alloc(struct bpf_map *map) +{ + return 0; +} + +static inline void security_bpf_map_free(struct bpf_map *map) +{ } + +static inline int security_bpf_prog_alloc(struct bpf_prog_aux *aux) +{ + return 0; +} + +static inline void security_bpf_prog_free(struct bpf_prog_aux *aux) +{ } +#endif /* CONFIG_SECURITY */ +#endif /* CONFIG_BPF_SYSCALL */ + #ifdef CONFIG_SECURITY static inline char *alloc_secdata(void)
diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 8544357..2e5aa1b 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h
@@ -444,6 +444,7 @@ extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); extern void pm_wakep_autosleep_enabled(bool set); extern void pm_print_active_wakeup_sources(void); +extern void pm_get_active_wakeup_sources(char *pending_sources, size_t max); static inline void lock_system_sleep(void) {
diff --git a/include/linux/task_io_accounting.h b/include/linux/task_io_accounting.h index 6f6acce..bb26108 100644 --- a/include/linux/task_io_accounting.h +++ b/include/linux/task_io_accounting.h
@@ -19,6 +19,8 @@ struct task_io_accounting { u64 syscr; /* # of write syscalls */ u64 syscw; + /* # of fsync syscalls */ + u64 syscfs; #endif /* CONFIG_TASK_XACCT */ #ifdef CONFIG_TASK_IO_ACCOUNTING
diff --git a/include/linux/task_io_accounting_ops.h b/include/linux/task_io_accounting_ops.h index bb5498b..733ab62 100644 --- a/include/linux/task_io_accounting_ops.h +++ b/include/linux/task_io_accounting_ops.h
@@ -97,6 +97,7 @@ static inline void task_chr_io_accounting_add(struct task_io_accounting *dst, dst->wchar += src->wchar; dst->syscr += src->syscr; dst->syscw += src->syscw; + dst->syscfs += src->syscfs; } #else static inline void task_chr_io_accounting_add(struct task_io_accounting *dst,
diff --git a/include/linux/tick.h b/include/linux/tick.h index 5cdac11..bd82199 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h
@@ -114,26 +114,45 @@ enum tick_dep_bits { #ifdef CONFIG_NO_HZ_COMMON extern bool tick_nohz_enabled; extern int tick_nohz_tick_stopped(void); +extern void tick_nohz_idle_stop_tick(void); +extern void tick_nohz_idle_retain_tick(void); +extern void tick_nohz_idle_restart_tick(void); extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); -extern ktime_t tick_nohz_get_sleep_length(void); +extern bool tick_nohz_idle_got_tick(void); +extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next); extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); + +static inline void tick_nohz_idle_stop_tick_protected(void) +{ + local_irq_disable(); + tick_nohz_idle_stop_tick(); + local_irq_enable(); +} + #else /* !CONFIG_NO_HZ_COMMON */ #define tick_nohz_enabled (0) static inline int tick_nohz_tick_stopped(void) { return 0; } +static inline void tick_nohz_idle_stop_tick(void) { } +static inline void tick_nohz_idle_retain_tick(void) { } +static inline void tick_nohz_idle_restart_tick(void) { } static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } +static inline bool tick_nohz_idle_got_tick(void) { return false; } -static inline ktime_t tick_nohz_get_sleep_length(void) +static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) { - return NSEC_PER_SEC / HZ; + *delta_next = TICK_NSEC; + return *delta_next; } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } + +static inline void tick_nohz_idle_stop_tick_protected(void) { } #endif /* !CONFIG_NO_HZ_COMMON */ #ifdef CONFIG_NO_HZ_FULL
diff --git a/include/linux/usb/class-dual-role.h b/include/linux/usb/class-dual-role.h new file mode 100644 index 0000000..c6df223 --- /dev/null +++ b/include/linux/usb/class-dual-role.h
@@ -0,0 +1,129 @@ +#ifndef __LINUX_CLASS_DUAL_ROLE_H__ +#define __LINUX_CLASS_DUAL_ROLE_H__ + +#include <linux/workqueue.h> +#include <linux/errno.h> +#include <linux/types.h> + +struct device; + +enum dual_role_supported_modes { + DUAL_ROLE_SUPPORTED_MODES_DFP_AND_UFP = 0, + DUAL_ROLE_SUPPORTED_MODES_DFP, + DUAL_ROLE_SUPPORTED_MODES_UFP, +/*The following should be the last element*/ + DUAL_ROLE_PROP_SUPPORTED_MODES_TOTAL, +}; + +enum { + DUAL_ROLE_PROP_MODE_UFP = 0, + DUAL_ROLE_PROP_MODE_DFP, + DUAL_ROLE_PROP_MODE_NONE, +/*The following should be the last element*/ + DUAL_ROLE_PROP_MODE_TOTAL, +}; + +enum { + DUAL_ROLE_PROP_PR_SRC = 0, + DUAL_ROLE_PROP_PR_SNK, + DUAL_ROLE_PROP_PR_NONE, +/*The following should be the last element*/ + DUAL_ROLE_PROP_PR_TOTAL, + +}; + +enum { + DUAL_ROLE_PROP_DR_HOST = 0, + DUAL_ROLE_PROP_DR_DEVICE, + DUAL_ROLE_PROP_DR_NONE, +/*The following should be the last element*/ + DUAL_ROLE_PROP_DR_TOTAL, +}; + +enum { + DUAL_ROLE_PROP_VCONN_SUPPLY_NO = 0, + DUAL_ROLE_PROP_VCONN_SUPPLY_YES, +/*The following should be the last element*/ + DUAL_ROLE_PROP_VCONN_SUPPLY_TOTAL, +}; + +enum dual_role_property { + DUAL_ROLE_PROP_SUPPORTED_MODES = 0, + DUAL_ROLE_PROP_MODE, + DUAL_ROLE_PROP_PR, + DUAL_ROLE_PROP_DR, + DUAL_ROLE_PROP_VCONN_SUPPLY, +}; + +struct dual_role_phy_instance; + +/* Description of typec port */ +struct dual_role_phy_desc { + /* /sys/class/dual_role_usb/<name>/ */ + const char *name; + enum dual_role_supported_modes supported_modes; + enum dual_role_property *properties; + size_t num_properties; + + /* Callback for "cat /sys/class/dual_role_usb/<name>/<property>" */ + int (*get_property)(struct dual_role_phy_instance *dual_role, + enum dual_role_property prop, + unsigned int *val); + /* Callback for "echo <value> > + * /sys/class/dual_role_usb/<name>/<property>" */ + int (*set_property)(struct dual_role_phy_instance *dual_role, + enum dual_role_property prop, + const unsigned int *val); + /* Decides whether userspace can change a specific property */ + int (*property_is_writeable)(struct dual_role_phy_instance *dual_role, + enum dual_role_property prop); +}; + +struct dual_role_phy_instance { + const struct dual_role_phy_desc *desc; + + /* Driver private data */ + void *drv_data; + + struct device dev; + struct work_struct changed_work; +}; + +#if IS_ENABLED(CONFIG_DUAL_ROLE_USB_INTF) +extern void dual_role_instance_changed(struct dual_role_phy_instance + *dual_role); +extern struct dual_role_phy_instance *__must_check +devm_dual_role_instance_register(struct device *parent, + const struct dual_role_phy_desc *desc); +extern void devm_dual_role_instance_unregister(struct device *dev, + struct dual_role_phy_instance + *dual_role); +extern int dual_role_get_property(struct dual_role_phy_instance *dual_role, + enum dual_role_property prop, + unsigned int *val); +extern int dual_role_set_property(struct dual_role_phy_instance *dual_role, + enum dual_role_property prop, + const unsigned int *val); +extern int dual_role_property_is_writeable(struct dual_role_phy_instance + *dual_role, + enum dual_role_property prop); +extern void *dual_role_get_drvdata(struct dual_role_phy_instance *dual_role); +#else /* CONFIG_DUAL_ROLE_USB_INTF */ +static inline void dual_role_instance_changed(struct dual_role_phy_instance + *dual_role){} +static inline struct dual_role_phy_instance *__must_check +devm_dual_role_instance_register(struct device *parent, + const struct dual_role_phy_desc *desc) +{ + return ERR_PTR(-ENOSYS); +} +static inline void devm_dual_role_instance_unregister(struct device *dev, + struct dual_role_phy_instance + *dual_role){} +static inline void *dual_role_get_drvdata(struct dual_role_phy_instance + *dual_role) +{ + return ERR_PTR(-ENOSYS); +} +#endif /* CONFIG_DUAL_ROLE_USB_INTF */ +#endif /* __LINUX_CLASS_DUAL_ROLE_H__ */
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 590d313..9baea7b 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h
@@ -586,6 +586,7 @@ struct usb_function_instance { struct config_group group; struct list_head cfs_list; struct usb_function_driver *fd; + struct usb_function *f; int (*set_inst_name)(struct usb_function_instance *inst, const char *name); void (*free_func_inst)(struct usb_function_instance *inst);
diff --git a/include/linux/usb/f_accessory.h b/include/linux/usb/f_accessory.h new file mode 100644 index 0000000..ebe3c4d --- /dev/null +++ b/include/linux/usb/f_accessory.h
@@ -0,0 +1,23 @@ +/* + * Gadget Function Driver for Android USB accessories + * + * Copyright (C) 2011 Google, Inc. + * Author: Mike Lockwood <lockwood@android.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __LINUX_USB_F_ACCESSORY_H +#define __LINUX_USB_F_ACCESSORY_H + +#include <uapi/linux/usb/f_accessory.h> + +#endif /* __LINUX_USB_F_ACCESSORY_H */
diff --git a/include/linux/verification.h b/include/linux/verification.h index cfa4730..60ea906 100644 --- a/include/linux/verification.h +++ b/include/linux/verification.h
@@ -32,9 +32,13 @@ enum key_being_used_for { }; extern const char *const key_being_used_for[NR__KEY_BEING_USED_FOR]; -#ifdef CONFIG_SYSTEM_DATA_VERIFICATION - struct key; +struct public_key_signature; + +extern int verify_signature_one(const struct public_key_signature *sig, + struct key *trusted_keys, const char *keyid); + +#ifdef CONFIG_SYSTEM_DATA_VERIFICATION extern int verify_pkcs7_signature(const void *data, size_t len, const void *raw_pkcs7, size_t pkcs7_len,
diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h new file mode 100644 index 0000000..d84d8c3 --- /dev/null +++ b/include/linux/wakeup_reason.h
@@ -0,0 +1,32 @@ +/* + * include/linux/wakeup_reason.h + * + * Logs the reason which caused the kernel to resume + * from the suspend mode. + * + * Copyright (C) 2014 Google, Inc. + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _LINUX_WAKEUP_REASON_H +#define _LINUX_WAKEUP_REASON_H + +#define MAX_SUSPEND_ABORT_LEN 256 + +void log_wakeup_reason(int irq); +int check_wakeup_reason(int irq); + +#ifdef CONFIG_SUSPEND +void log_suspend_abort_reason(const char *fmt, ...); +#else +static inline void log_suspend_abort_reason(const char *fmt, ...) { } +#endif + +#endif /* _LINUX_WAKEUP_REASON_H */
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 57a8e98..2219cce 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h
@@ -47,6 +47,8 @@ void zs_destroy_pool(struct zs_pool *pool); unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t flags); void zs_free(struct zs_pool *pool, unsigned long obj); +size_t zs_huge_class_size(struct zs_pool *pool); + void *zs_map_object(struct zs_pool *pool, unsigned long handle, enum zs_mapmode mm); void zs_unmap_object(struct zs_pool *pool, unsigned long handle);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 35f5aab..bcd9b88 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h
@@ -261,6 +261,8 @@ static inline bool ipv6_is_mld(struct sk_buff *skb, int nexthdr, int offset) void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao); +u32 addrconf_rt_table(const struct net_device *dev, u32 default_table); + /* * anycast prototypes (anycast.c) */
diff --git a/include/net/dst.h b/include/net/dst.h index ebfb432..cf7ce4fd 100644 --- a/include/net/dst.h +++ b/include/net/dst.h
@@ -490,6 +490,14 @@ static inline struct dst_entry *xfrm_lookup(struct net *net, return dst_orig; } +static inline struct dst_entry * +xfrm_lookup_with_ifid(struct net *net, struct dst_entry *dst_orig, + const struct flowi *fl, const struct sock *sk, + int flags, u32 if_id) +{ + return dst_orig; +} + static inline struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, const struct flowi *fl, @@ -509,6 +517,12 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, const struct flowi *fl, const struct sock *sk, int flags); +struct dst_entry *xfrm_lookup_with_ifid(struct net *net, + struct dst_entry *dst_orig, + const struct flowi *fl, + const struct sock *sk, int flags, + u32 if_id); + struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, const struct flowi *fl, const struct sock *sk, int flags);
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 792c3f6..f5223bf 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h
@@ -285,7 +285,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct) struct kernel_param; -int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); +int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp); int nf_conntrack_hash_resize(unsigned int hashsize); extern struct hlist_nulls_head *nf_conntrack_hash;
diff --git a/include/net/tcp.h b/include/net/tcp.h index 0c828aa..a307f83 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h
@@ -275,6 +275,7 @@ extern int sysctl_tcp_autocorking; extern int sysctl_tcp_invalid_ratelimit; extern int sysctl_tcp_pacing_ss_ratio; extern int sysctl_tcp_pacing_ca_ratio; +extern int sysctl_tcp_default_init_rwnd; extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h index db99efb..d290de0 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h
@@ -23,6 +23,7 @@ #include <net/ipv6.h> #include <net/ip6_fib.h> #include <net/flow.h> +#include <net/gro_cells.h> #include <linux/interrupt.h> @@ -147,6 +148,7 @@ struct xfrm_state { struct xfrm_id id; struct xfrm_selector sel; struct xfrm_mark mark; + u32 if_id; u32 tfcpad; u32 genid; @@ -166,7 +168,7 @@ struct xfrm_state { int header_len; int trailer_len; u32 extra_flags; - u32 output_mark; + struct xfrm_mark smark; } props; struct xfrm_lifetime_cfg lft; @@ -292,6 +294,13 @@ struct xfrm_replay { int (*overflow)(struct xfrm_state *x, struct sk_buff *skb); }; +struct xfrm_if_cb { + struct xfrm_if *(*decode_session)(struct sk_buff *skb); +}; + +void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb); +void xfrm_if_unregister_cb(void); + struct net_device; struct xfrm_type; struct xfrm_dst; @@ -573,6 +582,7 @@ struct xfrm_policy { atomic_t genid; u32 priority; u32 index; + u32 if_id; struct xfrm_mark mark; struct xfrm_selector selector; struct xfrm_lifetime_cfg lft; @@ -1006,6 +1016,22 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); +struct xfrm_if_parms { + char name[IFNAMSIZ]; /* name of XFRM device */ + int link; /* ifindex of underlying L2 interface */ + u32 if_id; /* interface identifyer */ +}; + +struct xfrm_if { + struct xfrm_if __rcu *next; /* next interface in list */ + struct net_device *dev; /* virtual device associated with interface */ + struct net_device *phydev; /* physical device */ + struct net *net; /* netns for packet i/o */ + struct xfrm_if_parms p; /* interface parms */ + + struct gro_cells gro_cells; +}; + struct xfrm_offload { /* Output sequence number for replay protection on offloading. */ struct { @@ -1500,8 +1526,8 @@ struct xfrm_state *xfrm_state_find(const xfrm_address_t *daddr, const struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, - unsigned short family); -struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, + unsigned short family, u32 if_id); +struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id, xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, @@ -1658,20 +1684,20 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, void *); void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); -struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, +struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id, u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete, int *err); -struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8, int dir, - u32 id, int delete, int *err); +struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id, u8, + int dir, u32 id, int delete, int *err); int xfrm_policy_flush(struct net *net, u8 type, bool task_valid); void xfrm_policy_hash_rebuild(struct net *net); u32 xfrm_get_acqseq(void); int verify_spi_info(u8 proto, u32 min, u32 max); int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi); struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, - u8 mode, u32 reqid, u8 proto, + u8 mode, u32 reqid, u32 if_id, u8 proto, const xfrm_address_t *daddr, const xfrm_address_t *saddr, int create, unsigned short family); @@ -1948,6 +1974,22 @@ static inline int xfrm_mark_put(struct sk_buff *skb, const struct xfrm_mark *m) return ret; } +static inline __u32 xfrm_smark_get(__u32 mark, struct xfrm_state *x) +{ + struct xfrm_mark *m = &x->props.smark; + + return (m->v & m->m) | (mark & ~m->m); +} + +static inline int xfrm_if_id_put(struct sk_buff *skb, __u32 if_id) +{ + int ret = 0; + + if (if_id) + ret = nla_put_u32(skb, XFRMA_IF_ID, if_id); + return ret; +} + static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x, unsigned int family) {
diff --git a/include/trace/events/android_fs.h b/include/trace/events/android_fs.h new file mode 100644 index 0000000..4950953 --- /dev/null +++ b/include/trace/events/android_fs.h
@@ -0,0 +1,65 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM android_fs + +#if !defined(_TRACE_ANDROID_FS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_ANDROID_FS_H + +#include <linux/tracepoint.h> +#include <trace/events/android_fs_template.h> + +DEFINE_EVENT(android_fs_data_start_template, android_fs_dataread_start, + TP_PROTO(struct inode *inode, loff_t offset, int bytes, + pid_t pid, char *pathname, char *command), + TP_ARGS(inode, offset, bytes, pid, pathname, command)); + +DEFINE_EVENT(android_fs_data_end_template, android_fs_dataread_end, + TP_PROTO(struct inode *inode, loff_t offset, int bytes), + TP_ARGS(inode, offset, bytes)); + +DEFINE_EVENT(android_fs_data_start_template, android_fs_datawrite_start, + TP_PROTO(struct inode *inode, loff_t offset, int bytes, + pid_t pid, char *pathname, char *command), + TP_ARGS(inode, offset, bytes, pid, pathname, command)); + +DEFINE_EVENT(android_fs_data_end_template, android_fs_datawrite_end, + TP_PROTO(struct inode *inode, loff_t offset, int bytes), + TP_ARGS(inode, offset, bytes)); + +#endif /* _TRACE_ANDROID_FS_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> + +#ifndef ANDROID_FSTRACE_GET_PATHNAME +#define ANDROID_FSTRACE_GET_PATHNAME + +/* Sizes an on-stack array, so careful if sizing this up ! */ +#define MAX_TRACE_PATHBUF_LEN 256 + +static inline char * +android_fstrace_get_pathname(char *buf, int buflen, struct inode *inode) +{ + char *path; + struct dentry *d; + + /* + * d_obtain_alias() will either iput() if it locates an existing + * dentry or transfer the reference to the new dentry created. + * So get an extra reference here. + */ + ihold(inode); + d = d_obtain_alias(inode); + if (likely(!IS_ERR(d))) { + path = dentry_path_raw(d, buf, buflen); + if (unlikely(IS_ERR(path))) { + strcpy(buf, "ERROR"); + path = buf; + } + dput(d); + } else { + strcpy(buf, "ERROR"); + path = buf; + } + return path; +} +#endif
diff --git a/include/trace/events/android_fs_template.h b/include/trace/events/android_fs_template.h new file mode 100644 index 0000000..b23d17b --- /dev/null +++ b/include/trace/events/android_fs_template.h
@@ -0,0 +1,64 @@ +#if !defined(_TRACE_ANDROID_FS_TEMPLATE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_ANDROID_FS_TEMPLATE_H + +#include <linux/tracepoint.h> + +DECLARE_EVENT_CLASS(android_fs_data_start_template, + TP_PROTO(struct inode *inode, loff_t offset, int bytes, + pid_t pid, char *pathname, char *command), + TP_ARGS(inode, offset, bytes, pid, pathname, command), + TP_STRUCT__entry( + __string(pathbuf, pathname); + __field(loff_t, offset); + __field(int, bytes); + __field(loff_t, i_size); + __string(cmdline, command); + __field(pid_t, pid); + __field(ino_t, ino); + ), + TP_fast_assign( + { + /* + * Replace the spaces in filenames and cmdlines + * because this screws up the tooling that parses + * the traces. + */ + __assign_str(pathbuf, pathname); + (void)strreplace(__get_str(pathbuf), ' ', '_'); + __entry->offset = offset; + __entry->bytes = bytes; + __entry->i_size = i_size_read(inode); + __assign_str(cmdline, command); + (void)strreplace(__get_str(cmdline), ' ', '_'); + __entry->pid = pid; + __entry->ino = inode->i_ino; + } + ), + TP_printk("entry_name %s, offset %llu, bytes %d, cmdline %s," + " pid %d, i_size %llu, ino %lu", + __get_str(pathbuf), __entry->offset, __entry->bytes, + __get_str(cmdline), __entry->pid, __entry->i_size, + (unsigned long) __entry->ino) +); + +DECLARE_EVENT_CLASS(android_fs_data_end_template, + TP_PROTO(struct inode *inode, loff_t offset, int bytes), + TP_ARGS(inode, offset, bytes), + TP_STRUCT__entry( + __field(ino_t, ino); + __field(loff_t, offset); + __field(int, bytes); + ), + TP_fast_assign( + { + __entry->ino = inode->i_ino; + __entry->offset = offset; + __entry->bytes = bytes; + } + ), + TP_printk("ino %lu, offset %llu, bytes %d", + (unsigned long) __entry->ino, + __entry->offset, __entry->bytes) +); + +#endif /* _TRACE_ANDROID_FS_TEMPLATE_H */
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 7ab4049..06c87f9 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h
@@ -137,6 +137,19 @@ TRACE_DEFINE_ENUM(CP_TRIMMED); { CP_UMOUNT, "Umount" }, \ { CP_TRIMMED, "Trimmed" }) +#define show_fsync_cpreason(type) \ + __print_symbolic(type, \ + { CP_NO_NEEDED, "no needed" }, \ + { CP_NON_REGULAR, "non regular" }, \ + { CP_HARDLINK, "hardlink" }, \ + { CP_SB_NEED_CP, "sb needs cp" }, \ + { CP_WRONG_PINO, "wrong pino" }, \ + { CP_NO_SPC_ROLL, "no space roll forward" }, \ + { CP_NODE_NEED_CP, "node needs cp" }, \ + { CP_FASTBOOT_MODE, "fastboot mode" }, \ + { CP_SPEC_LOG_NUM, "log type is 2" }, \ + { CP_RECOVER_DIR, "dir needs recovery" }) + struct victim_sel_policy; struct f2fs_map_blocks; @@ -211,14 +224,14 @@ DEFINE_EVENT(f2fs__inode, f2fs_sync_file_enter, TRACE_EVENT(f2fs_sync_file_exit, - TP_PROTO(struct inode *inode, int need_cp, int datasync, int ret), + TP_PROTO(struct inode *inode, int cp_reason, int datasync, int ret), - TP_ARGS(inode, need_cp, datasync, ret), + TP_ARGS(inode, cp_reason, datasync, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(int, need_cp) + __field(int, cp_reason) __field(int, datasync) __field(int, ret) ), @@ -226,15 +239,15 @@ TRACE_EVENT(f2fs_sync_file_exit, TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; - __entry->need_cp = need_cp; + __entry->cp_reason = cp_reason; __entry->datasync = datasync; __entry->ret = ret; ), - TP_printk("dev = (%d,%d), ino = %lu, checkpoint is %s, " + TP_printk("dev = (%d,%d), ino = %lu, cp_reason: %s, " "datasync = %d, ret = %d", show_dev_ino(__entry), - __entry->need_cp ? "needed" : "not needed", + show_fsync_cpreason(__entry->cp_reason), __entry->datasync, __entry->ret) ); @@ -729,6 +742,91 @@ TRACE_EVENT(f2fs_get_victim, __entry->free) ); +TRACE_EVENT(f2fs_lookup_start, + + TP_PROTO(struct inode *dir, struct dentry *dentry, unsigned int flags), + + TP_ARGS(dir, dentry, flags), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(const char *, name) + __field(unsigned int, flags) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->ino = dir->i_ino; + __entry->name = dentry->d_name.name; + __entry->flags = flags; + ), + + TP_printk("dev = (%d,%d), pino = %lu, name:%s, flags:%u", + show_dev_ino(__entry), + __entry->name, + __entry->flags) +); + +TRACE_EVENT(f2fs_lookup_end, + + TP_PROTO(struct inode *dir, struct dentry *dentry, nid_t ino, + int err), + + TP_ARGS(dir, dentry, ino, err), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(const char *, name) + __field(nid_t, cino) + __field(int, err) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->ino = dir->i_ino; + __entry->name = dentry->d_name.name; + __entry->cino = ino; + __entry->err = err; + ), + + TP_printk("dev = (%d,%d), pino = %lu, name:%s, ino:%u, err:%d", + show_dev_ino(__entry), + __entry->name, + __entry->cino, + __entry->err) +); + +TRACE_EVENT(f2fs_readdir, + + TP_PROTO(struct inode *dir, loff_t start_pos, loff_t end_pos, int err), + + TP_ARGS(dir, start_pos, end_pos, err), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, start) + __field(loff_t, end) + __field(int, err) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->ino = dir->i_ino; + __entry->start = start_pos; + __entry->end = end_pos; + __entry->err = err; + ), + + TP_printk("dev = (%d,%d), ino = %lu, start_pos:%llu, end_pos:%llu, err:%d", + show_dev_ino(__entry), + __entry->start, + __entry->end, + __entry->err) +); + TRACE_EVENT(f2fs_fallocate, TP_PROTO(struct inode *inode, int mode, @@ -1287,6 +1385,13 @@ DEFINE_EVENT(f2fs_discard, f2fs_issue_discard, TP_ARGS(dev, blkstart, blklen) ); +DEFINE_EVENT(f2fs_discard, f2fs_remove_discard, + + TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen), + + TP_ARGS(dev, blkstart, blklen) +); + TRACE_EVENT(f2fs_issue_reset_zone, TP_PROTO(struct block_device *dev, block_t blkstart),
diff --git a/include/trace/events/gpu.h b/include/trace/events/gpu.h new file mode 100644 index 0000000..7e15cdf --- /dev/null +++ b/include/trace/events/gpu.h
@@ -0,0 +1,143 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM gpu + +#if !defined(_TRACE_GPU_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_GPU_H + +#include <linux/tracepoint.h> +#include <linux/time.h> + +#define show_secs_from_ns(ns) \ + ({ \ + u64 t = ns + (NSEC_PER_USEC / 2); \ + do_div(t, NSEC_PER_SEC); \ + t; \ + }) + +#define show_usecs_from_ns(ns) \ + ({ \ + u64 t = ns + (NSEC_PER_USEC / 2) ; \ + u32 rem; \ + do_div(t, NSEC_PER_USEC); \ + rem = do_div(t, USEC_PER_SEC); \ + }) + +/* + * The gpu_sched_switch event indicates that a switch from one GPU context to + * another occurred on one of the GPU hardware blocks. + * + * The gpu_name argument identifies the GPU hardware block. Each independently + * scheduled GPU hardware block should have a different name. This may be used + * in different ways for different GPUs. For example, if a GPU includes + * multiple processing cores it may use names "GPU 0", "GPU 1", etc. If a GPU + * includes a separately scheduled 2D and 3D hardware block, it might use the + * names "2D" and "3D". + * + * The timestamp argument is the timestamp at which the switch occurred on the + * GPU. These timestamps are in units of nanoseconds and must use + * approximately the same time as sched_clock, though they need not come from + * any CPU clock. The timestamps for a single hardware block must be + * monotonically nondecreasing. This means that if a variable compensation + * offset is used to translate from some other clock to the sched_clock, then + * care must be taken when increasing that offset, and doing so may result in + * multiple events with the same timestamp. + * + * The next_ctx_id argument identifies the next context that was running on + * the GPU hardware block. A value of 0 indicates that the hardware block + * will be idle. + * + * The next_prio argument indicates the priority of the next context at the + * time of the event. The exact numeric values may mean different things for + * different GPUs, but they should follow the rule that lower values indicate a + * higher priority. + * + * The next_job_id argument identifies the batch of work that the GPU will be + * working on. This should correspond to a job_id that was previously traced + * as a gpu_job_enqueue event when the batch of work was created. + */ +TRACE_EVENT(gpu_sched_switch, + + TP_PROTO(const char *gpu_name, u64 timestamp, + u32 next_ctx_id, s32 next_prio, u32 next_job_id), + + TP_ARGS(gpu_name, timestamp, next_ctx_id, next_prio, next_job_id), + + TP_STRUCT__entry( + __string( gpu_name, gpu_name ) + __field( u64, timestamp ) + __field( u32, next_ctx_id ) + __field( s32, next_prio ) + __field( u32, next_job_id ) + ), + + TP_fast_assign( + __assign_str(gpu_name, gpu_name); + __entry->timestamp = timestamp; + __entry->next_ctx_id = next_ctx_id; + __entry->next_prio = next_prio; + __entry->next_job_id = next_job_id; + ), + + TP_printk("gpu_name=%s ts=%llu.%06lu next_ctx_id=%lu next_prio=%ld " + "next_job_id=%lu", + __get_str(gpu_name), + (unsigned long long)show_secs_from_ns(__entry->timestamp), + (unsigned long)show_usecs_from_ns(__entry->timestamp), + (unsigned long)__entry->next_ctx_id, + (long)__entry->next_prio, + (unsigned long)__entry->next_job_id) +); + +/* + * The gpu_job_enqueue event indicates that a batch of work has been queued up + * to be processed by the GPU. This event is not intended to indicate that + * the batch of work has been submitted to the GPU hardware, but rather that + * it has been submitted to the GPU kernel driver. + * + * This event should be traced on the thread that initiated the work being + * queued. For example, if a batch of work is submitted to the kernel by a + * userland thread, the event should be traced on that thread. + * + * The ctx_id field identifies the GPU context in which the batch of work + * being queued is to be run. + * + * The job_id field identifies the batch of work being queued within the given + * GPU context. The first batch of work submitted for a given GPU context + * should have a job_id of 0, and each subsequent batch of work should + * increment the job_id by 1. + * + * The type field identifies the type of the job being enqueued. The job + * types may be different for different GPU hardware. For example, a GPU may + * differentiate between "2D", "3D", and "compute" jobs. + */ +TRACE_EVENT(gpu_job_enqueue, + + TP_PROTO(u32 ctx_id, u32 job_id, const char *type), + + TP_ARGS(ctx_id, job_id, type), + + TP_STRUCT__entry( + __field( u32, ctx_id ) + __field( u32, job_id ) + __string( type, type ) + ), + + TP_fast_assign( + __entry->ctx_id = ctx_id; + __entry->job_id = job_id; + __assign_str(type, type); + ), + + TP_printk("ctx_id=%lu job_id=%lu type=%s", + (unsigned long)__entry->ctx_id, + (unsigned long)__entry->job_id, + __get_str(type)) +); + +#undef show_secs_from_ns +#undef show_usecs_from_ns + +#endif /* _TRACE_GPU_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h>
diff --git a/include/trace/events/net.h b/include/trace/events/net.h index 9c886739..f1a300c 100644 --- a/include/trace/events/net.h +++ b/include/trace/events/net.h
@@ -58,7 +58,7 @@ TRACE_EVENT(net_dev_start_xmit, __entry->gso_type = skb_shinfo(skb)->gso_type; ), - TP_printk("dev=%s queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d len=%u data_len=%u network_offset=%d transport_offset_valid=%d transport_offset=%d tx_flags=%d gso_size=%d gso_segs=%d gso_type=%#x", + TP_printk("dev=%s queue_mapping=%u skbaddr=%pK vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d len=%u data_len=%u network_offset=%d transport_offset_valid=%d transport_offset=%d tx_flags=%d gso_size=%d gso_segs=%d gso_type=%#x", __get_str(name), __entry->queue_mapping, __entry->skbaddr, __entry->vlan_tagged, __entry->vlan_proto, __entry->vlan_tci, __entry->protocol, __entry->ip_summed, __entry->len, @@ -91,7 +91,7 @@ TRACE_EVENT(net_dev_xmit, __assign_str(name, dev->name); ), - TP_printk("dev=%s skbaddr=%p len=%u rc=%d", + TP_printk("dev=%s skbaddr=%pK len=%u rc=%d", __get_str(name), __entry->skbaddr, __entry->len, __entry->rc) ); @@ -113,7 +113,7 @@ DECLARE_EVENT_CLASS(net_dev_template, __assign_str(name, skb->dev->name); ), - TP_printk("dev=%s skbaddr=%p len=%u", + TP_printk("dev=%s skbaddr=%pK len=%u", __get_str(name), __entry->skbaddr, __entry->len) ) @@ -192,7 +192,7 @@ DECLARE_EVENT_CLASS(net_dev_rx_verbose_template, __entry->gso_type = skb_shinfo(skb)->gso_type; ), - TP_printk("dev=%s napi_id=%#x queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d hash=0x%08x l4_hash=%d len=%u data_len=%u truesize=%u mac_header_valid=%d mac_header=%d nr_frags=%d gso_size=%d gso_type=%#x", + TP_printk("dev=%s napi_id=%#x queue_mapping=%u skbaddr=%pK vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d hash=0x%08x l4_hash=%d len=%u data_len=%u truesize=%u mac_header_valid=%d mac_header=%d nr_frags=%d gso_size=%d gso_type=%#x", __get_str(name), __entry->napi_id, __entry->queue_mapping, __entry->skbaddr, __entry->vlan_tagged, __entry->vlan_proto, __entry->vlan_tci, __entry->protocol, __entry->ip_summed,
diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 908977d..3eac1f9 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h
@@ -148,6 +148,31 @@ DEFINE_EVENT(cpu, cpu_frequency, TP_ARGS(frequency, cpu_id) ); +TRACE_EVENT(cpu_frequency_limits, + + TP_PROTO(unsigned int max_freq, unsigned int min_freq, + unsigned int cpu_id), + + TP_ARGS(max_freq, min_freq, cpu_id), + + TP_STRUCT__entry( + __field( u32, min_freq ) + __field( u32, max_freq ) + __field( u32, cpu_id ) + ), + + TP_fast_assign( + __entry->min_freq = min_freq; + __entry->max_freq = max_freq; + __entry->cpu_id = cpu_id; + ), + + TP_printk("min=%lu max=%lu cpu_id=%lu", + (unsigned long)__entry->min_freq, + (unsigned long)__entry->max_freq, + (unsigned long)__entry->cpu_id) +); + TRACE_EVENT(device_pm_callback_start, TP_PROTO(struct device *dev, const char *pm_ops, int event), @@ -301,6 +326,25 @@ DEFINE_EVENT(clock, clock_set_rate, TP_ARGS(name, state, cpu_id) ); +TRACE_EVENT(clock_set_parent, + + TP_PROTO(const char *name, const char *parent_name), + + TP_ARGS(name, parent_name), + + TP_STRUCT__entry( + __string( name, name ) + __string( parent_name, parent_name ) + ), + + TP_fast_assign( + __assign_str(name, name); + __assign_str(parent_name, parent_name); + ), + + TP_printk("%s parent=%s", __get_str(name), __get_str(parent_name)) +); + /* * The power domain events are used for power domains transitions */
diff --git a/include/trace/events/preemptirq.h b/include/trace/events/preemptirq.h new file mode 100644 index 0000000..9c4eb33 --- /dev/null +++ b/include/trace/events/preemptirq.h
@@ -0,0 +1,73 @@ +#ifdef CONFIG_PREEMPTIRQ_EVENTS + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM preemptirq + +#if !defined(_TRACE_PREEMPTIRQ_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PREEMPTIRQ_H + +#include <linux/ktime.h> +#include <linux/tracepoint.h> +#include <linux/string.h> +#include <asm/sections.h> + +DECLARE_EVENT_CLASS(preemptirq_template, + + TP_PROTO(unsigned long ip, unsigned long parent_ip), + + TP_ARGS(ip, parent_ip), + + TP_STRUCT__entry( + __field(u32, caller_offs) + __field(u32, parent_offs) + ), + + TP_fast_assign( + __entry->caller_offs = (u32)(ip - (unsigned long)_stext); + __entry->parent_offs = (u32)(parent_ip - (unsigned long)_stext); + ), + + TP_printk("caller=%pF parent=%pF", + (void *)((unsigned long)(_stext) + __entry->caller_offs), + (void *)((unsigned long)(_stext) + __entry->parent_offs)) +); + +#ifndef CONFIG_PROVE_LOCKING +DEFINE_EVENT(preemptirq_template, irq_disable, + TP_PROTO(unsigned long ip, unsigned long parent_ip), + TP_ARGS(ip, parent_ip)); + +DEFINE_EVENT(preemptirq_template, irq_enable, + TP_PROTO(unsigned long ip, unsigned long parent_ip), + TP_ARGS(ip, parent_ip)); +#endif + +#ifdef CONFIG_DEBUG_PREEMPT +DEFINE_EVENT(preemptirq_template, preempt_disable, + TP_PROTO(unsigned long ip, unsigned long parent_ip), + TP_ARGS(ip, parent_ip)); + +DEFINE_EVENT(preemptirq_template, preempt_enable, + TP_PROTO(unsigned long ip, unsigned long parent_ip), + TP_ARGS(ip, parent_ip)); +#endif + +#endif /* _TRACE_PREEMPTIRQ_H */ + +#include <trace/define_trace.h> + +#endif /* !CONFIG_PREEMPTIRQ_EVENTS */ + +#if !defined(CONFIG_PREEMPTIRQ_EVENTS) || defined(CONFIG_PROVE_LOCKING) +#define trace_irq_enable(...) +#define trace_irq_disable(...) +#define trace_irq_enable_rcuidle(...) +#define trace_irq_disable_rcuidle(...) +#endif + +#if !defined(CONFIG_PREEMPTIRQ_EVENTS) || !defined(CONFIG_DEBUG_PREEMPT) +#define trace_preempt_enable(...) +#define trace_preempt_disable(...) +#define trace_preempt_enable_rcuidle(...) +#define trace_preempt_disable_rcuidle(...) +#endif
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 0812cd54..ccc96c07 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h
@@ -226,7 +226,7 @@ DECLARE_EVENT_CLASS(sched_process_template, DEFINE_EVENT(sched_process_template, sched_process_free, TP_PROTO(struct task_struct *p), TP_ARGS(p)); - + /* * Tracepoint for a task exiting: @@ -381,6 +381,30 @@ DEFINE_EVENT(sched_stat_template, sched_stat_blocked, TP_ARGS(tsk, delay)); /* + * Tracepoint for recording the cause of uninterruptible sleep. + */ +TRACE_EVENT(sched_blocked_reason, + + TP_PROTO(struct task_struct *tsk), + + TP_ARGS(tsk), + + TP_STRUCT__entry( + __field( pid_t, pid ) + __field( void*, caller ) + __field( bool, io_wait ) + ), + + TP_fast_assign( + __entry->pid = tsk->pid; + __entry->caller = (void*)get_wchan(tsk); + __entry->io_wait = tsk->in_iowait; + ), + + TP_printk("pid=%d iowait=%d caller=%pS", __entry->pid, __entry->io_wait, __entry->caller) +); + +/* * Tracepoint for accounting runtime (time the task is executing * on a CPU). */ @@ -572,6 +596,621 @@ TRACE_EVENT(sched_wake_idle_without_ipi, TP_printk("cpu=%d", __entry->cpu) ); + +#ifdef CONFIG_SMP +#ifdef CREATE_TRACE_POINTS +static inline +int __trace_sched_cpu(struct cfs_rq *cfs_rq, struct sched_entity *se) +{ +#ifdef CONFIG_FAIR_GROUP_SCHED + struct rq *rq = cfs_rq ? cfs_rq->rq : NULL; +#else + struct rq *rq = cfs_rq ? container_of(cfs_rq, struct rq, cfs) : NULL; +#endif + return rq ? cpu_of(rq) + : task_cpu((container_of(se, struct task_struct, se))); +} + +static inline +int __trace_sched_path(struct cfs_rq *cfs_rq, char *path, int len) +{ +#ifdef CONFIG_FAIR_GROUP_SCHED + int l = path ? len : 0; + + if (cfs_rq && task_group_is_autogroup(cfs_rq->tg)) + return autogroup_path(cfs_rq->tg, path, l) + 1; + else if (cfs_rq && cfs_rq->tg->css.cgroup) + return cgroup_path(cfs_rq->tg->css.cgroup, path, l) + 1; +#endif + if (path) + strcpy(path, "(null)"); + + return strlen("(null)"); +} + +static inline +struct cfs_rq *__trace_sched_group_cfs_rq(struct sched_entity *se) +{ +#ifdef CONFIG_FAIR_GROUP_SCHED + return se->my_q; +#else + return NULL; +#endif +} +#endif /* CREATE_TRACE_POINTS */ + +#ifdef CONFIG_SCHED_WALT +extern unsigned int sysctl_sched_use_walt_cpu_util; +extern unsigned int sysctl_sched_use_walt_task_util; +extern unsigned int walt_ravg_window; +extern bool walt_disabled; + +#define walt_util(util_var, demand_sum) {\ + u64 sum = demand_sum << SCHED_CAPACITY_SHIFT;\ + do_div(sum, walt_ravg_window);\ + util_var = (typeof(util_var))sum;\ + } +#endif + +/* + * Tracepoint for cfs_rq load tracking: + */ +TRACE_EVENT(sched_load_cfs_rq, + + TP_PROTO(struct cfs_rq *cfs_rq), + + TP_ARGS(cfs_rq), + + TP_STRUCT__entry( + __field( int, cpu ) + __dynamic_array(char, path, + __trace_sched_path(cfs_rq, NULL, 0) ) + __field( unsigned long, load ) + __field( unsigned long, util ) + __field( unsigned long, util_pelt ) + __field( unsigned long, util_walt ) + ), + + TP_fast_assign( + __entry->cpu = __trace_sched_cpu(cfs_rq, NULL); + __trace_sched_path(cfs_rq, __get_dynamic_array(path), + __get_dynamic_array_len(path)); + __entry->load = cfs_rq->runnable_load_avg; + __entry->util = cfs_rq->avg.util_avg; + __entry->util_pelt = cfs_rq->avg.util_avg; + __entry->util_walt = 0; +#ifdef CONFIG_SCHED_WALT + if (&cfs_rq->rq->cfs == cfs_rq) { + walt_util(__entry->util_walt, + cfs_rq->rq->prev_runnable_sum); + if (!walt_disabled && sysctl_sched_use_walt_cpu_util) + __entry->util = __entry->util_walt; + } +#endif + ), + + TP_printk("cpu=%d path=%s load=%lu util=%lu util_pelt=%lu util_walt=%lu", + __entry->cpu, __get_str(path), __entry->load, __entry->util, + __entry->util_pelt, __entry->util_walt) +); + +/* + * Tracepoint for rt_rq load tracking: + */ +struct rt_rq; + +TRACE_EVENT(sched_load_rt_rq, + + TP_PROTO(int cpu, struct rt_rq *rt_rq), + + TP_ARGS(cpu, rt_rq), + + TP_STRUCT__entry( + __field( int, cpu ) + __field( unsigned long, util ) + ), + + TP_fast_assign( + __entry->cpu = cpu; + __entry->util = rt_rq->avg.util_avg; + ), + + TP_printk("cpu=%d util=%lu", __entry->cpu, + __entry->util) +); + +/* + * Tracepoint for sched_entity load tracking: + */ +TRACE_EVENT(sched_load_se, + + TP_PROTO(struct sched_entity *se), + + TP_ARGS(se), + + TP_STRUCT__entry( + __field( int, cpu ) + __dynamic_array(char, path, + __trace_sched_path(__trace_sched_group_cfs_rq(se), NULL, 0) ) + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( unsigned long, load ) + __field( unsigned long, util ) + __field( unsigned long, util_pelt ) + __field( unsigned long, util_walt ) + ), + + TP_fast_assign( + struct cfs_rq *gcfs_rq = __trace_sched_group_cfs_rq(se); + struct task_struct *p = gcfs_rq ? NULL + : container_of(se, struct task_struct, se); + + __entry->cpu = __trace_sched_cpu(gcfs_rq, se); + __trace_sched_path(gcfs_rq, __get_dynamic_array(path), + __get_dynamic_array_len(path)); + memcpy(__entry->comm, p ? p->comm : "(null)", + p ? TASK_COMM_LEN : sizeof("(null)")); + __entry->pid = p ? p->pid : -1; + __entry->load = se->avg.load_avg; + __entry->util = se->avg.util_avg; + __entry->util_pelt = __entry->util; + __entry->util_walt = 0; +#ifdef CONFIG_SCHED_WALT + if (!se->my_q) { + struct task_struct *p = container_of(se, struct task_struct, se); + walt_util(__entry->util_walt, p->ravg.demand); + if (!walt_disabled && sysctl_sched_use_walt_task_util) + __entry->util = __entry->util_walt; + } +#endif + ), + + TP_printk("cpu=%d path=%s comm=%s pid=%d load=%lu util=%lu util_pelt=%lu util_walt=%lu", + __entry->cpu, __get_str(path), __entry->comm, + __entry->pid, __entry->load, __entry->util, + __entry->util_pelt, __entry->util_walt) +); + +/* + * Tracepoint for task_group load tracking: + */ +#ifdef CONFIG_FAIR_GROUP_SCHED +TRACE_EVENT(sched_load_tg, + + TP_PROTO(struct cfs_rq *cfs_rq), + + TP_ARGS(cfs_rq), + + TP_STRUCT__entry( + __field( int, cpu ) + __dynamic_array(char, path, + __trace_sched_path(cfs_rq, NULL, 0) ) + __field( long, load ) + ), + + TP_fast_assign( + __entry->cpu = cfs_rq->rq->cpu; + __trace_sched_path(cfs_rq, __get_dynamic_array(path), + __get_dynamic_array_len(path)); + __entry->load = atomic_long_read(&cfs_rq->tg->load_avg); + ), + + TP_printk("cpu=%d path=%s load=%ld", __entry->cpu, __get_str(path), + __entry->load) +); +#endif /* CONFIG_FAIR_GROUP_SCHED */ + +/* + * Tracepoint for accounting CPU boosted utilization + */ +TRACE_EVENT(sched_boost_cpu, + + TP_PROTO(int cpu, unsigned long util, long margin), + + TP_ARGS(cpu, util, margin), + + TP_STRUCT__entry( + __field( int, cpu ) + __field( unsigned long, util ) + __field(long, margin ) + ), + + TP_fast_assign( + __entry->cpu = cpu; + __entry->util = util; + __entry->margin = margin; + ), + + TP_printk("cpu=%d util=%lu margin=%ld", + __entry->cpu, + __entry->util, + __entry->margin) +); + +/* + * Tracepoint for schedtune_tasks_update + */ +TRACE_EVENT(sched_tune_tasks_update, + + TP_PROTO(struct task_struct *tsk, int cpu, int tasks, int idx, + int boost, int max_boost, u64 group_ts), + + TP_ARGS(tsk, cpu, tasks, idx, boost, max_boost, group_ts), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, cpu ) + __field( int, tasks ) + __field( int, idx ) + __field( int, boost ) + __field( int, max_boost ) + __field( u64, group_ts ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->cpu = cpu; + __entry->tasks = tasks; + __entry->idx = idx; + __entry->boost = boost; + __entry->max_boost = max_boost; + __entry->group_ts = group_ts; + ), + + TP_printk("pid=%d comm=%s " + "cpu=%d tasks=%d idx=%d boost=%d max_boost=%d timeout=%llu", + __entry->pid, __entry->comm, + __entry->cpu, __entry->tasks, __entry->idx, + __entry->boost, __entry->max_boost, + __entry->group_ts) +); + +/* + * Tracepoint for schedtune_boostgroup_update + */ +TRACE_EVENT(sched_tune_boostgroup_update, + + TP_PROTO(int cpu, int variation, int max_boost), + + TP_ARGS(cpu, variation, max_boost), + + TP_STRUCT__entry( + __field( int, cpu ) + __field( int, variation ) + __field( int, max_boost ) + ), + + TP_fast_assign( + __entry->cpu = cpu; + __entry->variation = variation; + __entry->max_boost = max_boost; + ), + + TP_printk("cpu=%d variation=%d max_boost=%d", + __entry->cpu, __entry->variation, __entry->max_boost) +); + +/* + * Tracepoint for accounting task boosted utilization + */ +TRACE_EVENT(sched_boost_task, + + TP_PROTO(struct task_struct *tsk, unsigned long util, long margin), + + TP_ARGS(tsk, util, margin), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( unsigned long, util ) + __field( long, margin ) + + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->util = util; + __entry->margin = margin; + ), + + TP_printk("comm=%s pid=%d util=%lu margin=%ld", + __entry->comm, __entry->pid, + __entry->util, + __entry->margin) +); + +/* + * Tracepoint for system overutilized flag + */ +struct sched_domain; +TRACE_EVENT_CONDITION(sched_overutilized, + + TP_PROTO(struct sched_domain *sd, bool was_overutilized, bool overutilized), + + TP_ARGS(sd, was_overutilized, overutilized), + + TP_CONDITION(overutilized != was_overutilized), + + TP_STRUCT__entry( + __field( bool, overutilized ) + __array( char, cpulist , 32 ) + ), + + TP_fast_assign( + __entry->overutilized = overutilized; + scnprintf(__entry->cpulist, sizeof(__entry->cpulist), "%*pbl", cpumask_pr_args(sched_domain_span(sd))); + ), + + TP_printk("overutilized=%d sd_span=%s", + __entry->overutilized ? 1 : 0, __entry->cpulist) +); + +/* + * Tracepoint for find_best_target + */ +TRACE_EVENT(sched_find_best_target, + + TP_PROTO(struct task_struct *tsk, bool prefer_idle, + unsigned long min_util, int start_cpu, + int best_idle, int best_active, int target), + + TP_ARGS(tsk, prefer_idle, min_util, start_cpu, + best_idle, best_active, target), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( unsigned long, min_util ) + __field( bool, prefer_idle ) + __field( int, start_cpu ) + __field( int, best_idle ) + __field( int, best_active ) + __field( int, target ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->min_util = min_util; + __entry->prefer_idle = prefer_idle; + __entry->start_cpu = start_cpu; + __entry->best_idle = best_idle; + __entry->best_active = best_active; + __entry->target = target; + ), + + TP_printk("pid=%d comm=%s prefer_idle=%d start_cpu=%d " + "best_idle=%d best_active=%d target=%d", + __entry->pid, __entry->comm, + __entry->prefer_idle, __entry->start_cpu, + __entry->best_idle, __entry->best_active, + __entry->target) +); + +/* + * Tracepoint for tasks' estimated utilization. + */ +TRACE_EVENT(sched_util_est_task, + + TP_PROTO(struct task_struct *tsk, struct sched_avg *avg), + + TP_ARGS(tsk, avg), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, cpu ) + __field( unsigned int, util_avg ) + __field( unsigned int, est_enqueued ) + __field( unsigned int, est_ewma ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->cpu = task_cpu(tsk); + __entry->util_avg = avg->util_avg; + __entry->est_enqueued = avg->util_est.enqueued; + __entry->est_ewma = avg->util_est.ewma; + ), + + TP_printk("comm=%s pid=%d cpu=%d util_avg=%u util_est_ewma=%u util_est_enqueued=%u", + __entry->comm, + __entry->pid, + __entry->cpu, + __entry->util_avg, + __entry->est_ewma, + __entry->est_enqueued) +); + +/* + * Tracepoint for root cfs_rq's estimated utilization. + */ +TRACE_EVENT(sched_util_est_cpu, + + TP_PROTO(int cpu, struct cfs_rq *cfs_rq), + + TP_ARGS(cpu, cfs_rq), + + TP_STRUCT__entry( + __field( int, cpu ) + __field( unsigned int, util_avg ) + __field( unsigned int, util_est_enqueued ) + ), + + TP_fast_assign( + __entry->cpu = cpu; + __entry->util_avg = cfs_rq->avg.util_avg; + __entry->util_est_enqueued = cfs_rq->avg.util_est.enqueued; + ), + + TP_printk("cpu=%d util_avg=%u util_est_enqueued=%u", + __entry->cpu, + __entry->util_avg, + __entry->util_est_enqueued) +); + +#ifdef CONFIG_SCHED_WALT +struct rq; + +TRACE_EVENT(walt_update_task_ravg, + + TP_PROTO(struct task_struct *p, struct rq *rq, int evt, + u64 wallclock, u64 irqtime), + + TP_ARGS(p, rq, evt, wallclock, irqtime), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( pid_t, cur_pid ) + __field( u64, wallclock ) + __field( u64, mark_start ) + __field( u64, delta_m ) + __field( u64, win_start ) + __field( u64, delta ) + __field( u64, irqtime ) + __array( char, evt, 16 ) + __field(unsigned int, demand ) + __field(unsigned int, sum ) + __field( int, cpu ) + __field( u64, cs ) + __field( u64, ps ) + __field( u32, curr_window ) + __field( u32, prev_window ) + __field( u64, nt_cs ) + __field( u64, nt_ps ) + __field( u32, active_windows ) + ), + + TP_fast_assign( + static const char* walt_event_names[] = + { + "PUT_PREV_TASK", + "PICK_NEXT_TASK", + "TASK_WAKE", + "TASK_MIGRATE", + "TASK_UPDATE", + "IRQ_UPDATE" + }; + __entry->wallclock = wallclock; + __entry->win_start = rq->window_start; + __entry->delta = (wallclock - rq->window_start); + strcpy(__entry->evt, walt_event_names[evt]); + __entry->cpu = rq->cpu; + __entry->cur_pid = rq->curr->pid; + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->mark_start = p->ravg.mark_start; + __entry->delta_m = (wallclock - p->ravg.mark_start); + __entry->demand = p->ravg.demand; + __entry->sum = p->ravg.sum; + __entry->irqtime = irqtime; + __entry->cs = rq->curr_runnable_sum; + __entry->ps = rq->prev_runnable_sum; + __entry->curr_window = p->ravg.curr_window; + __entry->prev_window = p->ravg.prev_window; + __entry->nt_cs = rq->nt_curr_runnable_sum; + __entry->nt_ps = rq->nt_prev_runnable_sum; + __entry->active_windows = p->ravg.active_windows; + ), + + TP_printk("wallclock=%llu window_start=%llu delta=%llu event=%s cpu=%d cur_pid=%d pid=%d comm=%s" + " mark_start=%llu delta=%llu demand=%u sum=%u irqtime=%llu" + " curr_runnable_sum=%llu prev_runnable_sum=%llu cur_window=%u" + " prev_window=%u nt_curr_runnable_sum=%llu nt_prev_runnable_sum=%llu active_windows=%u", + __entry->wallclock, __entry->win_start, __entry->delta, + __entry->evt, __entry->cpu, __entry->cur_pid, + __entry->pid, __entry->comm, __entry->mark_start, + __entry->delta_m, __entry->demand, + __entry->sum, __entry->irqtime, + __entry->cs, __entry->ps, + __entry->curr_window, __entry->prev_window, + __entry->nt_cs, __entry->nt_ps, + __entry->active_windows + ) +); + +TRACE_EVENT(walt_update_history, + + TP_PROTO(struct rq *rq, struct task_struct *p, u32 runtime, int samples, + int evt), + + TP_ARGS(rq, p, runtime, samples, evt), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field(unsigned int, runtime ) + __field( int, samples ) + __field( int, evt ) + __field( u64, demand ) + __field(unsigned int, walt_avg ) + __field(unsigned int, pelt_avg ) + __array( u32, hist, RAVG_HIST_SIZE_MAX) + __field( int, cpu ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->runtime = runtime; + __entry->samples = samples; + __entry->evt = evt; + __entry->demand = p->ravg.demand; + walt_util(__entry->walt_avg,__entry->demand); + __entry->pelt_avg = p->se.avg.util_avg; + memcpy(__entry->hist, p->ravg.sum_history, + RAVG_HIST_SIZE_MAX * sizeof(u32)); + __entry->cpu = rq->cpu; + ), + + TP_printk("pid=%d comm=%s runtime=%u samples=%d event=%d demand=%llu ravg_window=%u" + " walt=%u pelt=%u hist0=%u hist1=%u hist2=%u hist3=%u hist4=%u cpu=%d", + __entry->pid, __entry->comm, + __entry->runtime, __entry->samples, __entry->evt, + __entry->demand, + walt_ravg_window, + __entry->walt_avg, + __entry->pelt_avg, + __entry->hist[0], __entry->hist[1], + __entry->hist[2], __entry->hist[3], + __entry->hist[4], __entry->cpu) +); + +TRACE_EVENT(walt_migration_update_sum, + + TP_PROTO(struct rq *rq, struct task_struct *p), + + TP_ARGS(rq, p), + + TP_STRUCT__entry( + __field(int, cpu ) + __field(int, pid ) + __field( u64, cs ) + __field( u64, ps ) + __field( s64, nt_cs ) + __field( s64, nt_ps ) + ), + + TP_fast_assign( + __entry->cpu = cpu_of(rq); + __entry->cs = rq->curr_runnable_sum; + __entry->ps = rq->prev_runnable_sum; + __entry->nt_cs = (s64)rq->nt_curr_runnable_sum; + __entry->nt_ps = (s64)rq->nt_prev_runnable_sum; + __entry->pid = p->pid; + ), + + TP_printk("cpu=%d curr_runnable_sum=%llu prev_runnable_sum=%llu nt_curr_runnable_sum=%lld nt_prev_runnable_sum=%lld pid=%d", + __entry->cpu, __entry->cs, __entry->ps, + __entry->nt_cs, __entry->nt_ps, __entry->pid) +); +#endif /* CONFIG_SCHED_WALT */ +#endif /* CONFIG_SMP */ #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */
diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index bfaec69..4a1c285 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h
@@ -38,9 +38,56 @@ enum { BINDER_TYPE_PTR = B_PACK_CHARS('p', 't', '*', B_TYPE_LARGE), }; -enum { +/** + * enum flat_binder_object_shifts: shift values for flat_binder_object_flags + * @FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT: shift for getting scheduler policy. + * + */ +enum flat_binder_object_shifts { + FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT = 9, +}; + +/** + * enum flat_binder_object_flags - flags for use in flat_binder_object.flags + */ +enum flat_binder_object_flags { + /** + * @FLAT_BINDER_FLAG_PRIORITY_MASK: bit-mask for min scheduler priority + * + * These bits can be used to set the minimum scheduler priority + * at which transactions into this node should run. Valid values + * in these bits depend on the scheduler policy encoded in + * @FLAT_BINDER_FLAG_SCHED_POLICY_MASK. + * + * For SCHED_NORMAL/SCHED_BATCH, the valid range is between [-20..19] + * For SCHED_FIFO/SCHED_RR, the value can run between [1..99] + */ FLAT_BINDER_FLAG_PRIORITY_MASK = 0xff, + /** + * @FLAT_BINDER_FLAG_ACCEPTS_FDS: whether the node accepts fds. + */ FLAT_BINDER_FLAG_ACCEPTS_FDS = 0x100, + /** + * @FLAT_BINDER_FLAG_SCHED_POLICY_MASK: bit-mask for scheduling policy + * + * These two bits can be used to set the min scheduling policy at which + * transactions on this node should run. These match the UAPI + * scheduler policy values, eg: + * 00b: SCHED_NORMAL + * 01b: SCHED_FIFO + * 10b: SCHED_RR + * 11b: SCHED_BATCH + */ + FLAT_BINDER_FLAG_SCHED_POLICY_MASK = + 3U << FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT, + + /** + * @FLAT_BINDER_FLAG_INHERIT_RT: whether the node inherits RT policy + * + * Only when set, calls into this node will inherit a real-time + * scheduling policy from the caller (for synchronous transactions). + */ + FLAT_BINDER_FLAG_INHERIT_RT = 0x800, }; #ifdef BINDER_IPC_32BIT @@ -200,6 +247,15 @@ struct binder_node_debug_info { __u32 has_weak_ref; }; +struct binder_node_info_for_ref { + __u32 handle; + __u32 strong_count; + __u32 weak_count; + __u32 reserved1; + __u32 reserved2; + __u32 reserved3; +}; + #define BINDER_WRITE_READ _IOWR('b', 1, struct binder_write_read) #define BINDER_SET_IDLE_TIMEOUT _IOW('b', 3, __s64) #define BINDER_SET_MAX_THREADS _IOW('b', 5, __u32) @@ -208,6 +264,7 @@ struct binder_node_debug_info { #define BINDER_THREAD_EXIT _IOW('b', 8, __s32) #define BINDER_VERSION _IOWR('b', 9, struct binder_version) #define BINDER_GET_NODE_DEBUG_INFO _IOWR('b', 11, struct binder_node_debug_info) +#define BINDER_GET_NODE_INFO_FOR_REF _IOWR('b', 12, struct binder_node_info_for_ref) /* * NOTE: Two special error codes you should check for when calling
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 30f2ce76..a88b2c45 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h
@@ -176,6 +176,10 @@ enum bpf_attach_type { /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) +/* Flags for accessing BPF object */ +#define BPF_F_RDONLY (1U << 3) +#define BPF_F_WRONLY (1U << 4) + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -216,6 +220,7 @@ union bpf_attr { struct { /* anonymous struct used by BPF_OBJ_* commands */ __aligned_u64 pathname; __u32 bpf_fd; + __u32 file_flags; }; struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ @@ -243,6 +248,7 @@ union bpf_attr { __u32 map_id; }; __u32 next_id; + __u32 open_flags; }; struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 6448cdd..586bf89 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h
@@ -41,6 +41,7 @@ #define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ #define F_SEAL_GROW 0x0004 /* prevent file from growing */ #define F_SEAL_WRITE 0x0008 /* prevent writes */ +#define F_SEAL_FUTURE_WRITE 0x0010 /* prevent writes */ /* (1U << 31) is reserved for signed error codes */ /*
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 4199f8a..e4d0261 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h
@@ -265,7 +265,8 @@ struct fsxattr { #define FS_POLICY_FLAGS_PAD_16 0x02 #define FS_POLICY_FLAGS_PAD_32 0x03 #define FS_POLICY_FLAGS_PAD_MASK 0x03 -#define FS_POLICY_FLAGS_VALID 0x03 +#define FS_POLICY_FLAG_DIRECT_KEY 0x04 /* use master key directly */ +#define FS_POLICY_FLAGS_VALID 0x07 /* Encryption algorithms */ #define FS_ENCRYPTION_MODE_INVALID 0 @@ -275,6 +276,9 @@ struct fsxattr { #define FS_ENCRYPTION_MODE_AES_256_CTS 4 #define FS_ENCRYPTION_MODE_AES_128_CBC 5 #define FS_ENCRYPTION_MODE_AES_128_CTS 6 +#define FS_ENCRYPTION_MODE_SPECK128_256_XTS 7 /* Removed, do not use. */ +#define FS_ENCRYPTION_MODE_SPECK128_256_CTS 8 /* Removed, do not use. */ +#define FS_ENCRYPTION_MODE_ADIANTUM 9 struct fscrypt_policy { __u8 version;
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 4b5001c5..d5ac0eb 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h
@@ -376,6 +376,7 @@ enum fuse_opcode { FUSE_READDIRPLUS = 44, FUSE_RENAME2 = 45, FUSE_LSEEK = 46, + FUSE_CANONICAL_PATH= 2016, /* CUSE specific operations */ CUSE_INIT = 4096,
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 1f00f0c..b9a7c97 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h
@@ -451,6 +451,16 @@ enum { #define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1) +/* XFRM section */ +enum { + IFLA_XFRM_UNSPEC, + IFLA_XFRM_LINK, + IFLA_XFRM_IF_ID, + __IFLA_XFRM_MAX +}; + +#define IFLA_XFRM_MAX (__IFLA_XFRM_MAX - 1) + enum macsec_validation_type { MACSEC_VALIDATE_DISABLED = 0, MACSEC_VALIDATE_CHECK = 1,
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index b22a9c4..8c3ea81 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h
@@ -166,6 +166,7 @@ enum { DEVCONF_ACCEPT_DAD, DEVCONF_FORCE_TLLAO, DEVCONF_NDISC_NOTIFY, + DEVCONF_ACCEPT_RA_RT_TABLE, DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL, DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL, DEVCONF_SUPPRESS_FRAG_NDISC,
diff --git a/include/uapi/linux/keychord.h b/include/uapi/linux/keychord.h new file mode 100644 index 0000000..ea7cf4d --- /dev/null +++ b/include/uapi/linux/keychord.h
@@ -0,0 +1,52 @@ +/* + * Key chord input driver + * + * Copyright (C) 2008 Google, Inc. + * Author: Mike Lockwood <lockwood@android.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * +*/ + +#ifndef _UAPI_LINUX_KEYCHORD_H_ +#define _UAPI_LINUX_KEYCHORD_H_ + +#include <linux/input.h> + +#define KEYCHORD_VERSION 1 + +/* + * One or more input_keychord structs are written to /dev/keychord + * at once to specify the list of keychords to monitor. + * Reading /dev/keychord returns the id of a keychord when the + * keychord combination is pressed. A keychord is signalled when + * all of the keys in the keycode list are in the pressed state. + * The order in which the keys are pressed does not matter. + * The keychord will not be signalled if keys not in the keycode + * list are pressed. + * Keychords will not be signalled on key release events. + */ +struct input_keychord { + /* should be KEYCHORD_VERSION */ + __u16 version; + /* + * client specified ID, returned from read() + * when this keychord is pressed. + */ + __u16 id; + + /* number of keycodes in this keychord */ + __u16 count; + + /* variable length array of keycodes */ + __u16 keycodes[]; +}; + +#endif /* _UAPI_LINUX_KEYCHORD_H_ */
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index aa50113..5bdb07f 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h
@@ -55,6 +55,8 @@ #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" #define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs" +#define SDCARDFS_SUPER_MAGIC 0x5dca2df5 + #define SMB_SUPER_MAGIC 0x517B #define CGROUP_SUPER_MAGIC 0x27e0eb #define CGROUP2_SUPER_MAGIC 0x63677270
diff --git a/include/uapi/linux/netfilter/xt_IDLETIMER.h b/include/uapi/linux/netfilter/xt_IDLETIMER.h index 3c586a1..c82a1c1 100644 --- a/include/uapi/linux/netfilter/xt_IDLETIMER.h +++ b/include/uapi/linux/netfilter/xt_IDLETIMER.h
@@ -5,6 +5,7 @@ * Header file for Xtables timer target module. * * Copyright (C) 2004, 2010 Nokia Corporation + * * Written by Timo Teras <ext-timo.teras@nokia.com> * * Converted to x_tables and forward-ported to 2.6.34 @@ -33,12 +34,19 @@ #include <linux/types.h> #define MAX_IDLETIMER_LABEL_SIZE 28 +#define NLMSG_MAX_SIZE 64 + +#define NL_EVENT_TYPE_INACTIVE 0 +#define NL_EVENT_TYPE_ACTIVE 1 struct idletimer_tg_info { __u32 timeout; char label[MAX_IDLETIMER_LABEL_SIZE]; + /* Use netlink messages for notification in addition to sysfs */ + __u8 send_nl_msg; + /* for kernel module internal use only */ struct idletimer_tg *timer __attribute__((aligned(8))); };
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 214102f..6040dba 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h
@@ -211,4 +211,7 @@ struct prctl_mm_map { # define PR_SPEC_DISABLE (1UL << 2) # define PR_SPEC_FORCE_DISABLE (1UL << 3) +#define PR_SET_VMA 0x53564d41 +# define PR_SET_VMA_ANON_NAME 0 + #endif /* _LINUX_PRCTL_H */
diff --git a/include/uapi/linux/usb/f_accessory.h b/include/uapi/linux/usb/f_accessory.h new file mode 100644 index 0000000..0baeb7d --- /dev/null +++ b/include/uapi/linux/usb/f_accessory.h
@@ -0,0 +1,146 @@ +/* + * Gadget Function Driver for Android USB accessories + * + * Copyright (C) 2011 Google, Inc. + * Author: Mike Lockwood <lockwood@android.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _UAPI_LINUX_USB_F_ACCESSORY_H +#define _UAPI_LINUX_USB_F_ACCESSORY_H + +/* Use Google Vendor ID when in accessory mode */ +#define USB_ACCESSORY_VENDOR_ID 0x18D1 + + +/* Product ID to use when in accessory mode */ +#define USB_ACCESSORY_PRODUCT_ID 0x2D00 + +/* Product ID to use when in accessory mode and adb is enabled */ +#define USB_ACCESSORY_ADB_PRODUCT_ID 0x2D01 + +/* Indexes for strings sent by the host via ACCESSORY_SEND_STRING */ +#define ACCESSORY_STRING_MANUFACTURER 0 +#define ACCESSORY_STRING_MODEL 1 +#define ACCESSORY_STRING_DESCRIPTION 2 +#define ACCESSORY_STRING_VERSION 3 +#define ACCESSORY_STRING_URI 4 +#define ACCESSORY_STRING_SERIAL 5 + +/* Control request for retrieving device's protocol version + * + * requestType: USB_DIR_IN | USB_TYPE_VENDOR + * request: ACCESSORY_GET_PROTOCOL + * value: 0 + * index: 0 + * data version number (16 bits little endian) + * 1 for original accessory support + * 2 adds HID and device to host audio support + */ +#define ACCESSORY_GET_PROTOCOL 51 + +/* Control request for host to send a string to the device + * + * requestType: USB_DIR_OUT | USB_TYPE_VENDOR + * request: ACCESSORY_SEND_STRING + * value: 0 + * index: string ID + * data zero terminated UTF8 string + * + * The device can later retrieve these strings via the + * ACCESSORY_GET_STRING_* ioctls + */ +#define ACCESSORY_SEND_STRING 52 + +/* Control request for starting device in accessory mode. + * The host sends this after setting all its strings to the device. + * + * requestType: USB_DIR_OUT | USB_TYPE_VENDOR + * request: ACCESSORY_START + * value: 0 + * index: 0 + * data none + */ +#define ACCESSORY_START 53 + +/* Control request for registering a HID device. + * Upon registering, a unique ID is sent by the accessory in the + * value parameter. This ID will be used for future commands for + * the device + * + * requestType: USB_DIR_OUT | USB_TYPE_VENDOR + * request: ACCESSORY_REGISTER_HID_DEVICE + * value: Accessory assigned ID for the HID device + * index: total length of the HID report descriptor + * data none + */ +#define ACCESSORY_REGISTER_HID 54 + +/* Control request for unregistering a HID device. + * + * requestType: USB_DIR_OUT | USB_TYPE_VENDOR + * request: ACCESSORY_REGISTER_HID + * value: Accessory assigned ID for the HID device + * index: 0 + * data none + */ +#define ACCESSORY_UNREGISTER_HID 55 + +/* Control request for sending the HID report descriptor. + * If the HID descriptor is longer than the endpoint zero max packet size, + * the descriptor will be sent in multiple ACCESSORY_SET_HID_REPORT_DESC + * commands. The data for the descriptor must be sent sequentially + * if multiple packets are needed. + * + * requestType: USB_DIR_OUT | USB_TYPE_VENDOR + * request: ACCESSORY_SET_HID_REPORT_DESC + * value: Accessory assigned ID for the HID device + * index: offset of data in descriptor + * (needed when HID descriptor is too big for one packet) + * data the HID report descriptor + */ +#define ACCESSORY_SET_HID_REPORT_DESC 56 + +/* Control request for sending HID events. + * + * requestType: USB_DIR_OUT | USB_TYPE_VENDOR + * request: ACCESSORY_SEND_HID_EVENT + * value: Accessory assigned ID for the HID device + * index: 0 + * data the HID report for the event + */ +#define ACCESSORY_SEND_HID_EVENT 57 + +/* Control request for setting the audio mode. + * + * requestType: USB_DIR_OUT | USB_TYPE_VENDOR + * request: ACCESSORY_SET_AUDIO_MODE + * value: 0 - no audio + * 1 - device to host, 44100 16-bit stereo PCM + * index: 0 + * data none + */ +#define ACCESSORY_SET_AUDIO_MODE 58 + +/* ioctls for retrieving strings set by the host */ +#define ACCESSORY_GET_STRING_MANUFACTURER _IOW('M', 1, char[256]) +#define ACCESSORY_GET_STRING_MODEL _IOW('M', 2, char[256]) +#define ACCESSORY_GET_STRING_DESCRIPTION _IOW('M', 3, char[256]) +#define ACCESSORY_GET_STRING_VERSION _IOW('M', 4, char[256]) +#define ACCESSORY_GET_STRING_URI _IOW('M', 5, char[256]) +#define ACCESSORY_GET_STRING_SERIAL _IOW('M', 6, char[256]) +/* returns 1 if there is a start request pending */ +#define ACCESSORY_IS_START_REQUESTED _IO('M', 7) +/* returns audio mode (set via the ACCESSORY_SET_AUDIO_MODE control request) */ +#define ACCESSORY_GET_AUDIO_MODE _IO('M', 8) + +#endif /* _UAPI_LINUX_USB_F_ACCESSORY_H */
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index e3af285..5f3b9fe 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h
@@ -305,9 +305,12 @@ enum xfrm_attr_type_t { XFRMA_ADDRESS_FILTER, /* struct xfrm_address_filter */ XFRMA_PAD, XFRMA_OFFLOAD_DEV, /* struct xfrm_state_offload */ - XFRMA_OUTPUT_MARK, /* __u32 */ + XFRMA_SET_MARK, /* __u32 */ + XFRMA_SET_MARK_MASK, /* __u32 */ + XFRMA_IF_ID, /* __u32 */ __XFRMA_MAX +#define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */ #define XFRMA_MAX (__XFRMA_MAX - 1) };
diff --git a/init/Kconfig b/init/Kconfig index 4607532..37ecab2 100644 --- a/init/Kconfig +++ b/init/Kconfig
@@ -400,6 +400,15 @@ If in doubt, say N here. +config SCHED_WALT + bool "Support window based load tracking" + depends on SMP + help + This feature will allow the scheduler to maintain a tunable window + based set of metrics for tasks and runqueues. These metrics can be + used to guide task placement as well as task frequency requirements + for cpufreq governors. + config BSD_PROCESS_ACCT bool "BSD Process Accounting" depends on MULTIUSER @@ -586,6 +595,41 @@ config GENERIC_SCHED_CLOCK bool +menu "FAIR Scheuler tunables" + +choice + prompt "Utilization's PELT half-Life" + default PELT_UTIL_HALFLIFE_32 + help + Allows choosing one of the possible values for the PELT half-life to + be used for the update of the utilization of tasks and CPUs. + The half-life is the amount of [ms] required by the PELT signal to + build up to 50% utilization. The higher the half-life the longer it + takes for a task to be represented as a big one. + + If not sure, use the default of 32 ms. + +config PELT_UTIL_HALFLIFE_32 + bool "32 ms, default for server" + +config PELT_UTIL_HALFLIFE_16 + bool "16 ms, suggested for interactive workloads" + help + Use 16ms as PELT half-life value. This will increase the ramp-up and + decay of utlization and load twice as fast as for the default + configuration using 32ms. + +config PELT_UTIL_HALFLIFE_8 + bool "8 ms, very fast" + help + Use 8ms as PELT half-life value. This will increase the ramp-up and + decay of utlization and load four time as fast as for the default + configuration using 32ms. + +endchoice + +endmenu # FAIR Scheduler tunables" + # # For architectures that want to enable the support for NUMA-affine scheduler # balancing logic: @@ -959,6 +1003,39 @@ desktop applications. Task group autogeneration is currently based upon task session. +config SCHED_TUNE + bool "Boosting for CFS tasks (EXPERIMENTAL)" + depends on SMP + help + This option enables support for task classification using a new + cgroup controller, schedtune. Schedtune allows tasks to be given + a boost value and marked as latency-sensitive or not. This option + provides the "schedtune" controller. + + This new controller: + 1. allows only a two layers hierarchy, where the root defines the + system-wide boost value and its direct childrens define each one a + different "class of tasks" to be boosted with a different value + 2. supports up to 16 different task classes, each one which could be + configured with a different boost value + + Latency-sensitive tasks are not subject to energy-aware wakeup + task placement. The boost value assigned to tasks is used to + influence task placement and CPU frequency selection (if + utilization-driven frequency selection is in use). + + If unsure, say N. + +config DEFAULT_USE_ENERGY_AWARE + bool "Default to enabling the Energy Aware Scheduler feature" + default n + help + This option defaults the ENERGY_AWARE scheduling feature to true, + as without SCHED_DEBUG set this feature can't be enabled or disabled + via sysctl. + + Say N if unsure. + config SYSFS_DEPRECATED bool "Enable deprecated sysfs features to support old userspace tools" depends on SYSFS @@ -1887,7 +1964,7 @@ config MODULES_TREE_LOOKUP def_bool y - depends on PERF_EVENTS || TRACING + depends on PERF_EVENTS || TRACING || CFI_CLANG config INIT_ALL_POSSIBLE bool
diff --git a/init/Makefile b/init/Makefile index 1dbb237..0320e1a 100644 --- a/init/Makefile +++ b/init/Makefile
@@ -6,11 +6,8 @@ ccflags-y := -fno-function-sections -fno-data-sections obj-y := main.o version.o mounts.o -ifneq ($(CONFIG_BLK_DEV_INITRD),y) obj-y += noinitramfs.o -else obj-$(CONFIG_BLK_DEV_INITRD) += initramfs.o -endif obj-$(CONFIG_GENERIC_CALIBRATE_DELAY) += calibrate.o ifneq ($(CONFIG_ARCH_INIT_TASK),y) @@ -21,6 +18,7 @@ mounts-$(CONFIG_BLK_DEV_RAM) += do_mounts_rd.o mounts-$(CONFIG_BLK_DEV_INITRD) += do_mounts_initrd.o mounts-$(CONFIG_BLK_DEV_MD) += do_mounts_md.o +mounts-$(CONFIG_BLK_DEV_DM) += do_mounts_dm.o # dependencies on generated files need to be listed explicitly $(obj)/version.o: include/generated/compile.h
diff --git a/init/do_mounts.c b/init/do_mounts.c index 7cf4f6d..2782669 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c
@@ -565,6 +565,7 @@ void __init prepare_namespace(void) wait_for_device_probe(); md_run_setup(); + dm_run_setup(); if (saved_root_name[0]) { root_device_name = saved_root_name;
diff --git a/init/do_mounts.h b/init/do_mounts.h index 5b05c8f..cd20112 100644 --- a/init/do_mounts.h +++ b/init/do_mounts.h
@@ -61,3 +61,13 @@ void md_run_setup(void); static inline void md_run_setup(void) {} #endif + +#ifdef CONFIG_BLK_DEV_DM + +void dm_run_setup(void); + +#else + +static inline void dm_run_setup(void) {} + +#endif
diff --git a/init/do_mounts_dm.c b/init/do_mounts_dm.c new file mode 100644 index 0000000..af84b01 --- /dev/null +++ b/init/do_mounts_dm.c
@@ -0,0 +1,470 @@ +/* do_mounts_dm.c + * Copyright (C) 2010 The Chromium OS Authors <chromium-os-dev@chromium.org> + * All Rights Reserved. + * Based on do_mounts_md.c + * + * This file is released under the GPL. + */ +#include <linux/async.h> +#include <linux/ctype.h> +#include <linux/device-mapper.h> +#include <linux/fs.h> +#include <linux/string.h> +#include <linux/delay.h> + +#include "do_mounts.h" + +#define DM_MAX_DEVICES 256 +#define DM_MAX_TARGETS 256 +#define DM_MAX_NAME 32 +#define DM_MAX_UUID 129 +#define DM_NO_UUID "none" + +#define DM_MSG_PREFIX "init" + +/* Separators used for parsing the dm= argument. */ +#define DM_FIELD_SEP " " +#define DM_LINE_SEP "," +#define DM_ANY_SEP DM_FIELD_SEP DM_LINE_SEP + +/* + * When the device-mapper and any targets are compiled into the kernel + * (not a module), one or more device-mappers may be created and used + * as the root device at boot time with the parameters given with the + * boot line dm=... + * + * Multiple device-mappers can be stacked specifing the number of + * devices. A device can have multiple targets if the the number of + * targets is specified. + * + * TODO(taysom:defect 32847) + * In the future, the <num> field will be mandatory. + * + * <device> ::= [<num>] <device-mapper>+ + * <device-mapper> ::= <head> "," <target>+ + * <head> ::= <name> <uuid> <mode> [<num>] + * <target> ::= <start> <length> <type> <options> "," + * <mode> ::= "ro" | "rw" + * <uuid> ::= xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx | "none" + * <type> ::= "verity" | "bootcache" | ... + * + * Example: + * 2 vboot none ro 1, + * 0 1768000 bootcache + * device=aa55b119-2a47-8c45-946a-5ac57765011f+1 + * signature=76e9be054b15884a9fa85973e9cb274c93afadb6 + * cache_start=1768000 max_blocks=100000 size_limit=23 max_trace=20000, + * vroot none ro 1, + * 0 1740800 verity payload=254:0 hashtree=254:0 hashstart=1740800 alg=sha1 + * root_hexdigest=76e9be054b15884a9fa85973e9cb274c93afadb6 + * salt=5b3549d54d6c7a3837b9b81ed72e49463a64c03680c47835bef94d768e5646fe + * + * Notes: + * 1. uuid is a label for the device and we set it to "none". + * 2. The <num> field will be optional initially and assumed to be 1. + * Once all the scripts that set these fields have been set, it will + * be made mandatory. + */ + +struct dm_setup_target { + sector_t begin; + sector_t length; + char *type; + char *params; + /* simple singly linked list */ + struct dm_setup_target *next; +}; + +struct dm_device { + int minor; + int ro; + char name[DM_MAX_NAME]; + char uuid[DM_MAX_UUID]; + unsigned long num_targets; + struct dm_setup_target *target; + int target_count; + struct dm_device *next; +}; + +struct dm_option { + char *start; + char *next; + size_t len; + char delim; +}; + +static struct { + unsigned long num_devices; + char *str; +} dm_setup_args __initdata; + +static __initdata int dm_early_setup; + +static int __init get_dm_option(struct dm_option *opt, const char *accept) +{ + char *str = opt->next; + char *endp; + + if (!str) + return 0; + + str = skip_spaces(str); + opt->start = str; + endp = strpbrk(str, accept); + if (!endp) { /* act like strchrnul */ + opt->len = strlen(str); + endp = str + opt->len; + } else { + opt->len = endp - str; + } + opt->delim = *endp; + if (*endp == 0) { + /* Don't advance past the nul. */ + opt->next = endp; + } else { + opt->next = endp + 1; + } + return opt->len != 0; +} + +static int __init dm_setup_cleanup(struct dm_device *devices) +{ + struct dm_device *dev = devices; + + while (dev) { + struct dm_device *old_dev = dev; + struct dm_setup_target *target = dev->target; + while (target) { + struct dm_setup_target *old_target = target; + kfree(target->type); + kfree(target->params); + target = target->next; + kfree(old_target); + dev->target_count--; + } + BUG_ON(dev->target_count); + dev = dev->next; + kfree(old_dev); + } + return 0; +} + +static char * __init dm_parse_device(struct dm_device *dev, char *str) +{ + struct dm_option opt; + size_t len; + + /* Grab the logical name of the device to be exported to udev */ + opt.next = str; + if (!get_dm_option(&opt, DM_FIELD_SEP)) { + DMERR("failed to parse device name"); + goto parse_fail; + } + len = min(opt.len + 1, sizeof(dev->name)); + strlcpy(dev->name, opt.start, len); /* includes nul */ + + /* Grab the UUID value or "none" */ + if (!get_dm_option(&opt, DM_FIELD_SEP)) { + DMERR("failed to parse device uuid"); + goto parse_fail; + } + len = min(opt.len + 1, sizeof(dev->uuid)); + strlcpy(dev->uuid, opt.start, len); + + /* Determine if the table/device will be read only or read-write */ + get_dm_option(&opt, DM_ANY_SEP); + if (!strncmp("ro", opt.start, opt.len)) { + dev->ro = 1; + } else if (!strncmp("rw", opt.start, opt.len)) { + dev->ro = 0; + } else { + DMERR("failed to parse table mode"); + goto parse_fail; + } + + /* Optional number field */ + /* XXX: The <num> field will be mandatory in the next round */ + if (opt.delim == DM_FIELD_SEP[0]) { + if (!get_dm_option(&opt, DM_LINE_SEP)) + return NULL; + dev->num_targets = simple_strtoul(opt.start, NULL, 10); + } else { + dev->num_targets = 1; + } + if (dev->num_targets > DM_MAX_TARGETS) { + DMERR("too many targets %lu > %d", + dev->num_targets, DM_MAX_TARGETS); + } + return opt.next; + +parse_fail: + return NULL; +} + +static char * __init dm_parse_targets(struct dm_device *dev, char *str) +{ + struct dm_option opt; + struct dm_setup_target **target = &dev->target; + unsigned long num_targets = dev->num_targets; + unsigned long i; + + /* Targets are defined as per the table format but with a + * comma as a newline separator. */ + opt.next = str; + for (i = 0; i < num_targets; i++) { + *target = kzalloc(sizeof(struct dm_setup_target), GFP_KERNEL); + if (!*target) { + DMERR("failed to allocate memory for target %s<%ld>", + dev->name, i); + goto parse_fail; + } + dev->target_count++; + + if (!get_dm_option(&opt, DM_FIELD_SEP)) { + DMERR("failed to parse starting sector" + " for target %s<%ld>", dev->name, i); + goto parse_fail; + } + (*target)->begin = simple_strtoull(opt.start, NULL, 10); + + if (!get_dm_option(&opt, DM_FIELD_SEP)) { + DMERR("failed to parse length for target %s<%ld>", + dev->name, i); + goto parse_fail; + } + (*target)->length = simple_strtoull(opt.start, NULL, 10); + + if (get_dm_option(&opt, DM_FIELD_SEP)) + (*target)->type = kstrndup(opt.start, opt.len, + GFP_KERNEL); + if (!((*target)->type)) { + DMERR("failed to parse type for target %s<%ld>", + dev->name, i); + goto parse_fail; + } + if (get_dm_option(&opt, DM_LINE_SEP)) + (*target)->params = kstrndup(opt.start, opt.len, + GFP_KERNEL); + if (!((*target)->params)) { + DMERR("failed to parse params for target %s<%ld>", + dev->name, i); + goto parse_fail; + } + target = &((*target)->next); + } + DMDEBUG("parsed %d targets", dev->target_count); + + return opt.next; + +parse_fail: + return NULL; +} + +static struct dm_device * __init dm_parse_args(void) +{ + struct dm_device *devices = NULL; + struct dm_device **tail = &devices; + struct dm_device *dev; + char *str = dm_setup_args.str; + unsigned long num_devices = dm_setup_args.num_devices; + unsigned long i; + + if (!str) + return NULL; + for (i = 0; i < num_devices; i++) { + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) { + DMERR("failed to allocated memory for dev"); + goto error; + } + *tail = dev; + tail = &dev->next; + /* + * devices are given minor numbers 0 - n-1 + * in the order they are found in the arg + * string. + */ + dev->minor = i; + str = dm_parse_device(dev, str); + if (!str) /* NULL indicates error in parsing, bail */ + goto error; + + str = dm_parse_targets(dev, str); + if (!str) + goto error; + } + return devices; +error: + dm_setup_cleanup(devices); + return NULL; +} + +/* + * Parse the command-line parameters given our kernel, but do not + * actually try to invoke the DM device now; that is handled by + * dm_setup_drives after the low-level disk drivers have initialised. + * dm format is described at the top of the file. + * + * Because dm minor numbers are assigned in assending order starting with 0, + * You can assume the first device is /dev/dm-0, the next device is /dev/dm-1, + * and so forth. + */ +static int __init dm_setup(char *str) +{ + struct dm_option opt; + unsigned long num_devices; + + if (!str) { + DMDEBUG("str is NULL"); + goto parse_fail; + } + opt.next = str; + if (!get_dm_option(&opt, DM_FIELD_SEP)) + goto parse_fail; + if (isdigit(opt.start[0])) { /* XXX: Optional number field */ + num_devices = simple_strtoul(opt.start, NULL, 10); + str = opt.next; + } else { + num_devices = 1; + /* Don't advance str */ + } + if (num_devices > DM_MAX_DEVICES) { + DMDEBUG("too many devices %lu > %d", + num_devices, DM_MAX_DEVICES); + } + dm_setup_args.str = str; + dm_setup_args.num_devices = num_devices; + DMINFO("will configure %lu devices", num_devices); + dm_early_setup = 1; + return 1; + +parse_fail: + DMWARN("Invalid arguments supplied to dm=."); + return 0; +} + +static void __init dm_setup_drives(void) +{ + struct mapped_device *md = NULL; + struct dm_table *table = NULL; + struct dm_setup_target *target; + struct dm_device *dev; + char *uuid; + fmode_t fmode = FMODE_READ; + struct dm_device *devices; + + devices = dm_parse_args(); + + for (dev = devices; dev; dev = dev->next) { + if (dm_create(dev->minor, &md)) { + DMDEBUG("failed to create the device"); + goto dm_create_fail; + } + DMDEBUG("created device '%s'", dm_device_name(md)); + + /* + * In addition to flagging the table below, the disk must be + * set explicitly ro/rw. + */ + set_disk_ro(dm_disk(md), dev->ro); + + if (!dev->ro) + fmode |= FMODE_WRITE; + if (dm_table_create(&table, fmode, dev->target_count, md)) { + DMDEBUG("failed to create the table"); + goto dm_table_create_fail; + } + + dm_lock_md_type(md); + + for (target = dev->target; target; target = target->next) { + DMINFO("adding target '%llu %llu %s %s'", + (unsigned long long) target->begin, + (unsigned long long) target->length, + target->type, target->params); + if (dm_table_add_target(table, target->type, + target->begin, + target->length, + target->params)) { + DMDEBUG("failed to add the target" + " to the table"); + goto add_target_fail; + } + } + if (dm_table_complete(table)) { + DMDEBUG("failed to complete the table"); + goto table_complete_fail; + } + + /* Suspend the device so that we can bind it to the table. */ + if (dm_suspend(md, 0)) { + DMDEBUG("failed to suspend the device pre-bind"); + goto suspend_fail; + } + + /* Initial table load: acquire type of table. */ + dm_set_md_type(md, dm_table_get_type(table)); + + /* Setup md->queue to reflect md's type. */ + if (dm_setup_md_queue(md, table)) { + DMWARN("unable to set up device queue for new table."); + goto setup_md_queue_fail; + } + + /* + * Bind the table to the device. This is the only way + * to associate md->map with the table and set the disk + * capacity directly. + */ + if (dm_swap_table(md, table)) { /* should return NULL. */ + DMDEBUG("failed to bind the device to the table"); + goto table_bind_fail; + } + + /* Finally, resume and the device should be ready. */ + if (dm_resume(md)) { + DMDEBUG("failed to resume the device"); + goto resume_fail; + } + + /* Export the dm device via the ioctl interface */ + if (!strcmp(DM_NO_UUID, dev->uuid)) + uuid = NULL; + if (dm_ioctl_export(md, dev->name, uuid)) { + DMDEBUG("failed to export device with given" + " name and uuid"); + goto export_fail; + } + + dm_unlock_md_type(md); + + DMINFO("dm-%d is ready", dev->minor); + } + dm_setup_cleanup(devices); + return; + +export_fail: +resume_fail: +table_bind_fail: +setup_md_queue_fail: +suspend_fail: +table_complete_fail: +add_target_fail: + dm_unlock_md_type(md); +dm_table_create_fail: + dm_put(md); +dm_create_fail: + DMWARN("starting dm-%d (%s) failed", + dev->minor, dev->name); + dm_setup_cleanup(devices); +} + +__setup("dm=", dm_setup); + +void __init dm_run_setup(void) +{ + if (!dm_early_setup) + return; + DMINFO("attempting early device configuration."); + dm_setup_drives(); +}
diff --git a/init/initramfs.c b/init/initramfs.c index 7046fef..5ea7f1b 100644 --- a/init/initramfs.c +++ b/init/initramfs.c
@@ -20,6 +20,7 @@ #include <linux/syscalls.h> #include <linux/utime.h> #include <linux/file.h> +#include <linux/initramfs.h> static ssize_t __init xwrite(int fd, const char *p, size_t count) { @@ -607,10 +608,29 @@ static void __init clean_rootfs(void) } #endif +static int __initdata do_skip_initramfs; + +static int __init skip_initramfs_param(char *str) +{ + if (*str) + return 0; + do_skip_initramfs = 1; + return 1; +} +__setup("skip_initramfs", skip_initramfs_param); + static int __init populate_rootfs(void) { + char *err; + + if (do_skip_initramfs) { + if (initrd_start) + free_initrd(); + return default_rootfs(); + } + /* Load the built in initramfs */ - char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size); + err = unpack_to_rootfs(__initramfs_start, __initramfs_size); if (err) panic("%s", err); /* Failed to decompress INTERNAL initramfs */ /* If available load the bootloader supplied initrd */
diff --git a/init/noinitramfs.c b/init/noinitramfs.c index 267739d8..bcc8bcb0 100644 --- a/init/noinitramfs.c +++ b/init/noinitramfs.c
@@ -21,11 +21,16 @@ #include <linux/stat.h> #include <linux/kdev_t.h> #include <linux/syscalls.h> +#include <linux/kconfig.h> +#include <linux/initramfs.h> /* * Create a simple rootfs that is similar to the default initramfs */ -static int __init default_rootfs(void) +#if !IS_BUILTIN(CONFIG_BLK_DEV_INITRD) +static +#endif +int __init default_rootfs(void) { int err; @@ -49,4 +54,6 @@ static int __init default_rootfs(void) printk(KERN_WARNING "Failed to create a rootfs\n"); return err; } +#if !IS_BUILTIN(CONFIG_BLK_DEV_INITRD) rootfs_initcall(default_rootfs); +#endif
diff --git a/ipc/mqueue.c b/ipc/mqueue.c index d240256..5f46c15 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c
@@ -747,7 +747,7 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct inode *dir, } mode &= ~current_umask(); - ret = vfs_create(dir, path->dentry, mode, true); + ret = vfs_create2(path->mnt, dir, path->dentry, mode, true); path->dentry->d_fsdata = NULL; if (ret) return ERR_PTR(ret); @@ -763,7 +763,7 @@ static struct file *do_open(struct path *path, int oflag) if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) return ERR_PTR(-EINVAL); acc = oflag2acc[oflag & O_ACCMODE]; - if (inode_permission(d_inode(path->dentry), acc)) + if (inode_permission2(path->mnt, d_inode(path->dentry), acc)) return ERR_PTR(-EACCES); return dentry_open(path, oflag, current_cred()); } @@ -792,7 +792,7 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, ro = mnt_want_write(mnt); /* we'll drop it in any case */ error = 0; inode_lock(d_inode(root)); - path.dentry = lookup_one_len(name->name, root, strlen(name->name)); + path.dentry = lookup_one_len2(name->name, mnt, root, strlen(name->name)); if (IS_ERR(path.dentry)) { error = PTR_ERR(path.dentry); goto out_putfd; @@ -872,7 +872,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) if (err) goto out_name; inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT); - dentry = lookup_one_len(name->name, mnt->mnt_root, + dentry = lookup_one_len2(name->name, mnt, mnt->mnt_root, strlen(name->name)); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); @@ -884,7 +884,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) err = -ENOENT; } else { ihold(inode); - err = vfs_unlink(d_inode(dentry->d_parent), dentry, NULL); + err = vfs_unlink2(mnt, d_inode(dentry->d_parent), dentry, NULL); } dput(dentry);
diff --git a/kernel/Makefile b/kernel/Makefile index 172d151d..de59ee7 100644 --- a/kernel/Makefile +++ b/kernel/Makefile
@@ -34,6 +34,9 @@ # cond_syscall is currently not LTO compatible CFLAGS_sys_ni.o = $(DISABLE_LTO) +# Don't instrument error handlers +CFLAGS_cfi.o = $(DISABLE_CFI_CLANG) + obj-y += sched/ obj-y += locking/ obj-y += power/ @@ -101,6 +104,7 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-$(CONFIG_CPU_PM) += cpu_pm.o obj-$(CONFIG_BPF) += bpf/ +obj-$(CONFIG_CFI_CLANG) += cfi.o obj-$(CONFIG_PERF_EVENTS) += events/
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index a8f55ea..148895e 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c
@@ -19,6 +19,9 @@ #include "map_in_map.h" +#define ARRAY_CREATE_FLAG_MASK \ + (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) + static void bpf_array_free_percpu(struct bpf_array *array) { int i; @@ -60,7 +63,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || - attr->value_size == 0 || attr->map_flags & ~BPF_F_NUMA_NODE || + attr->value_size == 0 || + attr->map_flags & ~ARRAY_CREATE_FLAG_MASK || (percpu && numa_node != NUMA_NO_NODE)) return ERR_PTR(-EINVAL);
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index e745d6a..ebdef54 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c
@@ -50,6 +50,9 @@ #include <linux/bpf.h> #include <linux/filter.h> +#define DEV_CREATE_FLAG_MASK \ + (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) + struct bpf_dtab_netdev { struct net_device *dev; struct bpf_dtab *dtab; @@ -83,7 +86,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || - attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) + attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); dtab = kzalloc(sizeof(*dtab), GFP_USER);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 3d0ecc27..73fad0b5 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c
@@ -18,8 +18,9 @@ #include "bpf_lru_list.h" #include "map_in_map.h" -#define HTAB_CREATE_FLAG_MASK \ - (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE) +#define HTAB_CREATE_FLAG_MASK \ + (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \ + BPF_F_RDONLY | BPF_F_WRONLY) struct bucket { struct hlist_nulls_head head;
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index be1dde9..01aaef1 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c
@@ -295,7 +295,7 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname) } static void *bpf_obj_do_get(const struct filename *pathname, - enum bpf_type *type) + enum bpf_type *type, int flags) { struct inode *inode; struct path path; @@ -307,7 +307,7 @@ static void *bpf_obj_do_get(const struct filename *pathname, return ERR_PTR(ret); inode = d_backing_inode(path.dentry); - ret = inode_permission(inode, MAY_WRITE); + ret = inode_permission(inode, ACC_MODE(flags)); if (ret) goto out; @@ -326,18 +326,23 @@ static void *bpf_obj_do_get(const struct filename *pathname, return ERR_PTR(ret); } -int bpf_obj_get_user(const char __user *pathname) +int bpf_obj_get_user(const char __user *pathname, int flags) { enum bpf_type type = BPF_TYPE_UNSPEC; struct filename *pname; int ret = -ENOENT; + int f_flags; void *raw; + f_flags = bpf_get_file_flag(flags); + if (f_flags < 0) + return f_flags; + pname = getname(pathname); if (IS_ERR(pname)) return PTR_ERR(pname); - raw = bpf_obj_do_get(pname, &type); + raw = bpf_obj_do_get(pname, &type, f_flags); if (IS_ERR(raw)) { ret = PTR_ERR(raw); goto out; @@ -346,7 +351,7 @@ int bpf_obj_get_user(const char __user *pathname) if (type == BPF_TYPE_PROG) ret = bpf_prog_new_fd(raw); else if (type == BPF_TYPE_MAP) - ret = bpf_map_new_fd(raw); + ret = bpf_map_new_fd(raw, f_flags); else goto out;
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index c28c584..224362f 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c
@@ -406,7 +406,8 @@ static int trie_delete_elem(struct bpf_map *map, void *key) #define LPM_KEY_SIZE_MAX LPM_KEY_SIZE(LPM_DATA_SIZE_MAX) #define LPM_KEY_SIZE_MIN LPM_KEY_SIZE(LPM_DATA_SIZE_MIN) -#define LPM_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_NUMA_NODE) +#define LPM_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_NUMA_NODE | \ + BPF_F_RDONLY | BPF_F_WRONLY) static struct bpf_map *trie_alloc(union bpf_attr *attr) {
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 22991e19..0fffca42 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c
@@ -41,6 +41,9 @@ #include <net/strparser.h> #include <net/tcp.h> +#define SOCK_CREATE_FLAG_MASK \ + (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) + struct bpf_stab { struct bpf_map map; struct sock **sock_map; @@ -511,7 +514,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || - attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) + attr->value_size != 4 || attr->map_flags & ~SOCK_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); if (attr->value_size > KMALLOC_MAX_SIZE)
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 135be43..a15bc63 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c
@@ -11,6 +11,9 @@ #include <linux/perf_event.h> #include "percpu_freelist.h" +#define STACK_CREATE_FLAG_MASK \ + (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) + struct stack_map_bucket { struct pcpu_freelist_node fnode; u32 hash; @@ -60,7 +63,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) if (!capable(CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); - if (attr->map_flags & ~BPF_F_NUMA_NODE) + if (attr->map_flags & ~STACK_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); /* check sanity of attributes */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 5c9deed..b922bbc 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c
@@ -31,6 +31,8 @@ #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map)) +#define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) + DEFINE_PER_CPU(int, bpf_prog_active); static DEFINE_IDR(prog_idr); static DEFINE_SPINLOCK(prog_idr_lock); @@ -207,6 +209,7 @@ static void bpf_map_free_deferred(struct work_struct *work) struct bpf_map *map = container_of(work, struct bpf_map, work); bpf_map_uncharge_memlock(map); + security_bpf_map_free(map); /* implementation dependent freeing */ map->ops->map_free(map); } @@ -291,17 +294,54 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) } #endif -static const struct file_operations bpf_map_fops = { +static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, + loff_t *ppos) +{ + /* We need this handler such that alloc_file() enables + * f_mode with FMODE_CAN_READ. + */ + return -EINVAL; +} + +static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, + size_t siz, loff_t *ppos) +{ + /* We need this handler such that alloc_file() enables + * f_mode with FMODE_CAN_WRITE. + */ + return -EINVAL; +} + +const struct file_operations bpf_map_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = bpf_map_show_fdinfo, #endif .release = bpf_map_release, + .read = bpf_dummy_read, + .write = bpf_dummy_write, }; -int bpf_map_new_fd(struct bpf_map *map) +int bpf_map_new_fd(struct bpf_map *map, int flags) { + int ret; + + ret = security_bpf_map(map, OPEN_FMODE(flags)); + if (ret < 0) + return ret; + return anon_inode_getfd("bpf-map", &bpf_map_fops, map, - O_RDWR | O_CLOEXEC); + flags | O_CLOEXEC); +} + +int bpf_get_file_flag(int flags) +{ + if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) + return -EINVAL; + if (flags & BPF_F_RDONLY) + return O_RDONLY; + if (flags & BPF_F_WRONLY) + return O_WRONLY; + return O_RDWR; } /* helper macro to check that unused fields 'union bpf_attr' are zero */ @@ -318,12 +358,17 @@ static int map_create(union bpf_attr *attr) { int numa_node = bpf_map_attr_numa_node(attr); struct bpf_map *map; + int f_flags; int err; err = CHECK_ATTR(BPF_MAP_CREATE); if (err) return -EINVAL; + f_flags = bpf_get_file_flag(attr->map_flags); + if (f_flags < 0) + return f_flags; + if (numa_node != NUMA_NO_NODE && ((unsigned int)numa_node >= nr_node_ids || !node_online(numa_node))) @@ -337,15 +382,19 @@ static int map_create(union bpf_attr *attr) atomic_set(&map->refcnt, 1); atomic_set(&map->usercnt, 1); - err = bpf_map_charge_memlock(map); + err = security_bpf_map_alloc(map); if (err) goto free_map_nouncharge; + err = bpf_map_charge_memlock(map); + if (err) + goto free_map_sec; + err = bpf_map_alloc_id(map); if (err) goto free_map; - err = bpf_map_new_fd(map); + err = bpf_map_new_fd(map, f_flags); if (err < 0) { /* failed to allocate fd. * bpf_map_put() is needed because the above @@ -362,6 +411,8 @@ static int map_create(union bpf_attr *attr) free_map: bpf_map_uncharge_memlock(map); +free_map_sec: + security_bpf_map_free(map); free_map_nouncharge: map->ops->map_free(map); return err; @@ -460,6 +511,11 @@ static int map_lookup_elem(union bpf_attr *attr) if (IS_ERR(map)) return PTR_ERR(map); + if (!(f.file->f_mode & FMODE_CAN_READ)) { + err = -EPERM; + goto err_put; + } + key = memdup_user(ukey, map->key_size); if (IS_ERR(key)) { err = PTR_ERR(key); @@ -551,6 +607,11 @@ static int map_update_elem(union bpf_attr *attr) if (IS_ERR(map)) return PTR_ERR(map); + if (!(f.file->f_mode & FMODE_CAN_WRITE)) { + err = -EPERM; + goto err_put; + } + key = memdup_user(ukey, map->key_size); if (IS_ERR(key)) { err = PTR_ERR(key); @@ -635,6 +696,11 @@ static int map_delete_elem(union bpf_attr *attr) if (IS_ERR(map)) return PTR_ERR(map); + if (!(f.file->f_mode & FMODE_CAN_WRITE)) { + err = -EPERM; + goto err_put; + } + key = memdup_user(ukey, map->key_size); if (IS_ERR(key)) { err = PTR_ERR(key); @@ -679,6 +745,11 @@ static int map_get_next_key(union bpf_attr *attr) if (IS_ERR(map)) return PTR_ERR(map); + if (!(f.file->f_mode & FMODE_CAN_READ)) { + err = -EPERM; + goto err_put; + } + if (ukey) { key = memdup_user(ukey, map->key_size); if (IS_ERR(key)) { @@ -833,6 +904,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu) free_used_maps(aux); bpf_prog_uncharge_memlock(aux->prog); + security_bpf_prog_free(aux); bpf_prog_free(aux->prog); } @@ -880,15 +952,23 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) } #endif -static const struct file_operations bpf_prog_fops = { +const struct file_operations bpf_prog_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = bpf_prog_show_fdinfo, #endif .release = bpf_prog_release, + .read = bpf_dummy_read, + .write = bpf_dummy_write, }; int bpf_prog_new_fd(struct bpf_prog *prog) { + int ret; + + ret = security_bpf_prog(prog); + if (ret < 0) + return ret; + return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC); } @@ -1028,10 +1108,14 @@ static int bpf_prog_load(union bpf_attr *attr) if (!prog) return -ENOMEM; - err = bpf_prog_charge_memlock(prog); + err = security_bpf_prog_alloc(prog->aux); if (err) goto free_prog_nouncharge; + err = bpf_prog_charge_memlock(prog); + if (err) + goto free_prog_sec; + prog->len = attr->insn_cnt; err = -EFAULT; @@ -1084,16 +1168,18 @@ static int bpf_prog_load(union bpf_attr *attr) free_used_maps(prog->aux); free_prog: bpf_prog_uncharge_memlock(prog); +free_prog_sec: + security_bpf_prog_free(prog->aux); free_prog_nouncharge: bpf_prog_free(prog); return err; } -#define BPF_OBJ_LAST_FIELD bpf_fd +#define BPF_OBJ_LAST_FIELD file_flags static int bpf_obj_pin(const union bpf_attr *attr) { - if (CHECK_ATTR(BPF_OBJ)) + if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0) return -EINVAL; return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); @@ -1101,10 +1187,12 @@ static int bpf_obj_pin(const union bpf_attr *attr) static int bpf_obj_get(const union bpf_attr *attr) { - if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0) + if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || + attr->file_flags & ~BPF_OBJ_FLAG_MASK) return -EINVAL; - return bpf_obj_get_user(u64_to_user_ptr(attr->pathname)); + return bpf_obj_get_user(u64_to_user_ptr(attr->pathname), + attr->file_flags); } #ifdef CONFIG_CGROUP_BPF @@ -1318,20 +1406,26 @@ static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) return fd; } -#define BPF_MAP_GET_FD_BY_ID_LAST_FIELD map_id +#define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags static int bpf_map_get_fd_by_id(const union bpf_attr *attr) { struct bpf_map *map; u32 id = attr->map_id; + int f_flags; int fd; - if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID)) + if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) || + attr->open_flags & ~BPF_OBJ_FLAG_MASK) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; + f_flags = bpf_get_file_flag(attr->open_flags); + if (f_flags < 0) + return f_flags; + spin_lock_bh(&map_idr_lock); map = idr_find(&map_idr, id); if (map) @@ -1343,7 +1437,7 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr) if (IS_ERR(map)) return PTR_ERR(map); - fd = bpf_map_new_fd(map); + fd = bpf_map_new_fd(map, f_flags); if (fd < 0) bpf_map_put(map); @@ -1480,6 +1574,10 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz if (copy_from_user(&attr, uattr, size) != 0) return -EFAULT; + err = security_bpf(cmd, &attr, size); + if (err < 0) + return err; + switch (cmd) { case BPF_MAP_CREATE: err = map_create(&attr);
diff --git a/kernel/cfi.c b/kernel/cfi.c new file mode 100644 index 0000000..3265b55 --- /dev/null +++ b/kernel/cfi.c
@@ -0,0 +1,308 @@ +/* + * CFI (Control Flow Integrity) error and slowpath handling + * + * Copyright (C) 2017 Google, Inc. + */ + +#include <linux/gfp.h> +#include <linux/module.h> +#include <linux/printk.h> +#include <linux/ratelimit.h> +#include <linux/rcupdate.h> +#include <linux/spinlock.h> +#include <asm/bug.h> +#include <asm/cacheflush.h> +#include <asm/memory.h> +#include <asm/set_memory.h> + +/* Compiler-defined handler names */ +#ifdef CONFIG_CFI_PERMISSIVE +#define cfi_failure_handler __ubsan_handle_cfi_check_fail +#define cfi_slowpath_handler __cfi_slowpath_diag +#else /* enforcing */ +#define cfi_failure_handler __ubsan_handle_cfi_check_fail_abort +#define cfi_slowpath_handler __cfi_slowpath +#endif /* CONFIG_CFI_PERMISSIVE */ + +static inline void handle_cfi_failure(void *ptr) +{ +#ifdef CONFIG_CFI_PERMISSIVE + WARN_RATELIMIT(1, "CFI failure (target: [<%px>] %pF):\n", ptr, ptr); +#else + pr_err("CFI failure (target: [<%px>] %pF):\n", ptr, ptr); + BUG(); +#endif +} + +#ifdef CONFIG_MODULES +#ifdef CONFIG_CFI_CLANG_SHADOW +struct shadow_range { + /* Module address range */ + unsigned long mod_min_addr; + unsigned long mod_max_addr; + /* Module page range */ + unsigned long min_page; + unsigned long max_page; +}; + +#define SHADOW_ORDER 1 +#define SHADOW_PAGES (1 << SHADOW_ORDER) +#define SHADOW_SIZE \ + ((SHADOW_PAGES * PAGE_SIZE - sizeof(struct shadow_range)) / sizeof(u16)) +#define SHADOW_INVALID 0xFFFF + +struct cfi_shadow { + /* Page range covered by the shadow */ + struct shadow_range r; + /* Page offsets to __cfi_check functions in modules */ + u16 shadow[SHADOW_SIZE]; +}; + +static DEFINE_SPINLOCK(shadow_update_lock); +static struct cfi_shadow __rcu *cfi_shadow __read_mostly = NULL; + +static inline int ptr_to_shadow(const struct cfi_shadow *s, unsigned long ptr) +{ + unsigned long index; + unsigned long page = ptr >> PAGE_SHIFT; + + if (unlikely(page < s->r.min_page)) + return -1; /* Outside of module area */ + + index = page - s->r.min_page; + + if (index >= SHADOW_SIZE) + return -1; /* Cannot be addressed with shadow */ + + return (int)index; +} + +static inline unsigned long shadow_to_ptr(const struct cfi_shadow *s, + int index) +{ + BUG_ON(index < 0 || index >= SHADOW_SIZE); + + if (unlikely(s->shadow[index] == SHADOW_INVALID)) + return 0; + + return (s->r.min_page + s->shadow[index]) << PAGE_SHIFT; +} + +static inline unsigned long shadow_to_page(const struct cfi_shadow *s, + int index) +{ + BUG_ON(index < 0 || index >= SHADOW_SIZE); + + return (s->r.min_page + index) << PAGE_SHIFT; +} + +static void prepare_next_shadow(const struct cfi_shadow __rcu *prev, + struct cfi_shadow *next) +{ + int i, index, check; + + /* Mark everything invalid */ + memset(next->shadow, 0xFF, sizeof(next->shadow)); + + if (!prev) + return; /* No previous shadow */ + + /* If the base address didn't change, update is not needed */ + if (prev->r.min_page == next->r.min_page) { + memcpy(next->shadow, prev->shadow, sizeof(next->shadow)); + return; + } + + /* Convert the previous shadow to the new address range */ + for (i = 0; i < SHADOW_SIZE; ++i) { + if (prev->shadow[i] == SHADOW_INVALID) + continue; + + index = ptr_to_shadow(next, shadow_to_page(prev, i)); + if (index < 0) + continue; + + check = ptr_to_shadow(next, + shadow_to_ptr(prev, prev->shadow[i])); + if (check < 0) + continue; + + next->shadow[index] = (u16)check; + } +} + +static void add_module_to_shadow(struct cfi_shadow *s, struct module *mod) +{ + unsigned long ptr; + unsigned long min_page_addr; + unsigned long max_page_addr; + unsigned long check = (unsigned long)mod->cfi_check; + int check_index = ptr_to_shadow(s, check); + + BUG_ON((check & PAGE_MASK) != check); /* Must be page aligned */ + + if (check_index < 0) + return; /* Module not addressable with shadow */ + + min_page_addr = (unsigned long)mod->core_layout.base & PAGE_MASK; + max_page_addr = (unsigned long)mod->core_layout.base + + mod->core_layout.text_size; + max_page_addr &= PAGE_MASK; + + /* For each page, store the check function index in the shadow */ + for (ptr = min_page_addr; ptr <= max_page_addr; ptr += PAGE_SIZE) { + int index = ptr_to_shadow(s, ptr); + if (index >= 0) { + /* Assume a page only contains code for one module */ + BUG_ON(s->shadow[index] != SHADOW_INVALID); + s->shadow[index] = (u16)check_index; + } + } +} + +static void remove_module_from_shadow(struct cfi_shadow *s, struct module *mod) +{ + unsigned long ptr; + unsigned long min_page_addr; + unsigned long max_page_addr; + + min_page_addr = (unsigned long)mod->core_layout.base & PAGE_MASK; + max_page_addr = (unsigned long)mod->core_layout.base + + mod->core_layout.text_size; + max_page_addr &= PAGE_MASK; + + for (ptr = min_page_addr; ptr <= max_page_addr; ptr += PAGE_SIZE) { + int index = ptr_to_shadow(s, ptr); + if (index >= 0) + s->shadow[index] = SHADOW_INVALID; + } +} + +typedef void (*update_shadow_fn)(struct cfi_shadow *, struct module *); + +static void update_shadow(struct module *mod, unsigned long min_addr, + unsigned long max_addr, update_shadow_fn fn) +{ + struct cfi_shadow *prev; + struct cfi_shadow *next = (struct cfi_shadow *) + __get_free_pages(GFP_KERNEL, SHADOW_ORDER); + + BUG_ON(!next); + + next->r.mod_min_addr = min_addr; + next->r.mod_max_addr = max_addr; + next->r.min_page = min_addr >> PAGE_SHIFT; + next->r.max_page = max_addr >> PAGE_SHIFT; + + spin_lock(&shadow_update_lock); + prev = rcu_dereference_protected(cfi_shadow, 1); + prepare_next_shadow(prev, next); + + fn(next, mod); + set_memory_ro((unsigned long)next, SHADOW_PAGES); + rcu_assign_pointer(cfi_shadow, next); + + spin_unlock(&shadow_update_lock); + synchronize_rcu(); + + if (prev) { + set_memory_rw((unsigned long)prev, SHADOW_PAGES); + free_pages((unsigned long)prev, SHADOW_ORDER); + } +} + +void cfi_module_add(struct module *mod, unsigned long min_addr, + unsigned long max_addr) +{ + update_shadow(mod, min_addr, max_addr, add_module_to_shadow); +} +EXPORT_SYMBOL(cfi_module_add); + +void cfi_module_remove(struct module *mod, unsigned long min_addr, + unsigned long max_addr) +{ + update_shadow(mod, min_addr, max_addr, remove_module_from_shadow); +} +EXPORT_SYMBOL(cfi_module_remove); + +static inline cfi_check_fn ptr_to_check_fn(const struct cfi_shadow __rcu *s, + unsigned long ptr) +{ + int index; + unsigned long check; + + if (unlikely(!s)) + return NULL; /* No shadow available */ + + if (ptr < s->r.mod_min_addr || ptr > s->r.mod_max_addr) + return NULL; /* Not in a mapped module */ + + index = ptr_to_shadow(s, ptr); + if (index < 0) + return NULL; /* Cannot be addressed with shadow */ + + return (cfi_check_fn)shadow_to_ptr(s, index); +} +#endif /* CONFIG_CFI_CLANG_SHADOW */ + +static inline cfi_check_fn find_module_cfi_check(void *ptr) +{ + struct module *mod; + + preempt_disable(); + mod = __module_address((unsigned long)ptr); + preempt_enable(); + + if (mod) + return mod->cfi_check; + + return CFI_CHECK_FN; +} + +static inline cfi_check_fn find_cfi_check(void *ptr) +{ +#ifdef CONFIG_CFI_CLANG_SHADOW + cfi_check_fn f; + + if (!rcu_access_pointer(cfi_shadow)) + return CFI_CHECK_FN; /* No loaded modules */ + + /* Look up the __cfi_check function to use */ + rcu_read_lock(); + f = ptr_to_check_fn(rcu_dereference(cfi_shadow), (unsigned long)ptr); + rcu_read_unlock(); + + if (f) + return f; + + /* + * Fall back to find_module_cfi_check, which works also for a larger + * module address space, but is slower. + */ +#endif /* CONFIG_CFI_CLANG_SHADOW */ + + return find_module_cfi_check(ptr); +} + +void cfi_slowpath_handler(uint64_t id, void *ptr, void *diag) +{ + cfi_check_fn check = find_cfi_check(ptr); + + if (likely(check)) + check(id, ptr, diag); + else /* Don't allow unchecked modules */ + handle_cfi_failure(ptr); +} +EXPORT_SYMBOL(cfi_slowpath_handler); +#endif /* CONFIG_MODULES */ + +void cfi_failure_handler(void *data, void *ptr, void *vtable) +{ + handle_cfi_failure(ptr); +} +EXPORT_SYMBOL(cfi_failure_handler); + +void __cfi_check_fail(void *data, void *ptr) +{ + handle_cfi_failure(ptr); +}
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index a2c05d2..292b601 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c
@@ -541,7 +541,8 @@ static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of, tcred = get_task_cred(task); if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && !uid_eq(cred->euid, tcred->uid) && - !uid_eq(cred->euid, tcred->suid)) + !uid_eq(cred->euid, tcred->suid) && + !ns_capable(tcred->user_ns, CAP_SYS_NICE)) ret = -EACCES; put_cred(tcred); if (ret)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 4657e29..2982fb7 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c
@@ -103,6 +103,7 @@ struct cpuset { /* user-configured CPUs and Memory Nodes allow to tasks */ cpumask_var_t cpus_allowed; + cpumask_var_t cpus_requested; nodemask_t mems_allowed; /* effective CPUs and Memory Nodes allow to tasks */ @@ -412,7 +413,7 @@ static void cpuset_update_task_spread_flag(struct cpuset *cs, static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) { - return cpumask_subset(p->cpus_allowed, q->cpus_allowed) && + return cpumask_subset(p->cpus_requested, q->cpus_requested) && nodes_subset(p->mems_allowed, q->mems_allowed) && is_cpu_exclusive(p) <= is_cpu_exclusive(q) && is_mem_exclusive(p) <= is_mem_exclusive(q); @@ -511,7 +512,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) cpuset_for_each_child(c, css, par) { if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && c != cur && - cpumask_intersects(trial->cpus_allowed, c->cpus_allowed)) + cpumask_intersects(trial->cpus_requested, c->cpus_requested)) goto out; if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) && c != cur && @@ -976,17 +977,18 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, if (!*buf) { cpumask_clear(trialcs->cpus_allowed); } else { - retval = cpulist_parse(buf, trialcs->cpus_allowed); + retval = cpulist_parse(buf, trialcs->cpus_requested); if (retval < 0) return retval; - if (!cpumask_subset(trialcs->cpus_allowed, - top_cpuset.cpus_allowed)) + if (!cpumask_subset(trialcs->cpus_requested, cpu_present_mask)) return -EINVAL; + + cpumask_and(trialcs->cpus_allowed, trialcs->cpus_requested, cpu_active_mask); } /* Nothing to do if the cpus didn't change */ - if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed)) + if (cpumask_equal(cs->cpus_requested, trialcs->cpus_requested)) return 0; retval = validate_change(cs, trialcs); @@ -995,6 +997,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, spin_lock_irq(&callback_lock); cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); + cpumask_copy(cs->cpus_requested, trialcs->cpus_requested); spin_unlock_irq(&callback_lock); /* use trialcs->cpus_allowed as a temp variable */ @@ -1763,7 +1766,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) switch (type) { case FILE_CPULIST: - seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed)); + seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_requested)); break; case FILE_MEMLIST: seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed)); @@ -1953,11 +1956,14 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css) return ERR_PTR(-ENOMEM); if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) goto free_cs; + if (!alloc_cpumask_var(&cs->cpus_requested, GFP_KERNEL)) + goto free_allowed; if (!alloc_cpumask_var(&cs->effective_cpus, GFP_KERNEL)) - goto free_cpus; + goto free_requested; set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); cpumask_clear(cs->cpus_allowed); + cpumask_clear(cs->cpus_requested); nodes_clear(cs->mems_allowed); cpumask_clear(cs->effective_cpus); nodes_clear(cs->effective_mems); @@ -1966,7 +1972,9 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css) return &cs->css; -free_cpus: +free_requested: + free_cpumask_var(cs->cpus_requested); +free_allowed: free_cpumask_var(cs->cpus_allowed); free_cs: kfree(cs); @@ -2029,6 +2037,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) cs->mems_allowed = parent->mems_allowed; cs->effective_mems = parent->mems_allowed; cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); + cpumask_copy(cs->cpus_requested, parent->cpus_requested); cpumask_copy(cs->effective_cpus, parent->cpus_allowed); spin_unlock_irq(&callback_lock); out_unlock: @@ -2063,6 +2072,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css) free_cpumask_var(cs->effective_cpus); free_cpumask_var(cs->cpus_allowed); + free_cpumask_var(cs->cpus_requested); kfree(cs); } @@ -2125,8 +2135,10 @@ int __init cpuset_init(void) BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL)); + BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_requested, GFP_KERNEL)); cpumask_setall(top_cpuset.cpus_allowed); + cpumask_setall(top_cpuset.cpus_requested); nodes_setall(top_cpuset.mems_allowed); cpumask_setall(top_cpuset.effective_cpus); nodes_setall(top_cpuset.effective_mems); @@ -2259,7 +2271,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs) goto retry; } - cpumask_and(&new_cpus, cs->cpus_allowed, parent_cs(cs)->effective_cpus); + cpumask_and(&new_cpus, cs->cpus_requested, parent_cs(cs)->effective_cpus); nodes_and(new_mems, cs->mems_allowed, parent_cs(cs)->effective_mems); cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
diff --git a/kernel/configs/android-base.config b/kernel/configs/android-base.config deleted file mode 100644 index d3fd428..0000000 --- a/kernel/configs/android-base.config +++ /dev/null
@@ -1,161 +0,0 @@ -# KEEP ALPHABETICALLY SORTED -# CONFIG_DEVKMEM is not set -# CONFIG_DEVMEM is not set -# CONFIG_FHANDLE is not set -# CONFIG_INET_LRO is not set -# CONFIG_NFSD is not set -# CONFIG_NFS_FS is not set -# CONFIG_OABI_COMPAT is not set -# CONFIG_SYSVIPC is not set -# CONFIG_USELIB is not set -CONFIG_ANDROID=y -CONFIG_ANDROID_BINDER_IPC=y -CONFIG_ANDROID_BINDER_DEVICES=binder,hwbinder,vndbinder -CONFIG_ANDROID_LOW_MEMORY_KILLER=y -CONFIG_ARMV8_DEPRECATED=y -CONFIG_ASHMEM=y -CONFIG_AUDIT=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_CGROUPS=y -CONFIG_CGROUP_BPF=y -CONFIG_CGROUP_CPUACCT=y -CONFIG_CGROUP_DEBUG=y -CONFIG_CGROUP_FREEZER=y -CONFIG_CGROUP_SCHED=y -CONFIG_CP15_BARRIER_EMULATION=y -CONFIG_DEFAULT_SECURITY_SELINUX=y -CONFIG_EMBEDDED=y -CONFIG_FB=y -CONFIG_HARDENED_USERCOPY=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_INET6_AH=y -CONFIG_INET6_ESP=y -CONFIG_INET6_IPCOMP=y -CONFIG_INET=y -CONFIG_INET_DIAG_DESTROY=y -CONFIG_INET_ESP=y -CONFIG_INET_XFRM_MODE_TUNNEL=y -CONFIG_IP6_NF_FILTER=y -CONFIG_IP6_NF_IPTABLES=y -CONFIG_IP6_NF_MANGLE=y -CONFIG_IP6_NF_RAW=y -CONFIG_IP6_NF_TARGET_REJECT=y -CONFIG_IPV6=y -CONFIG_IPV6_MIP6=y -CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_IPV6_OPTIMISTIC_DAD=y -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_IPV6_ROUTE_INFO=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_NF_ARPFILTER=y -CONFIG_IP_NF_ARPTABLES=y -CONFIG_IP_NF_ARP_MANGLE=y -CONFIG_IP_NF_FILTER=y -CONFIG_IP_NF_IPTABLES=y -CONFIG_IP_NF_MANGLE=y -CONFIG_IP_NF_MATCH_AH=y -CONFIG_IP_NF_MATCH_ECN=y -CONFIG_IP_NF_MATCH_TTL=y -CONFIG_IP_NF_NAT=y -CONFIG_IP_NF_RAW=y -CONFIG_IP_NF_SECURITY=y -CONFIG_IP_NF_TARGET_MASQUERADE=y -CONFIG_IP_NF_TARGET_NETMAP=y -CONFIG_IP_NF_TARGET_REDIRECT=y -CONFIG_IP_NF_TARGET_REJECT=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_NET=y -CONFIG_NETDEVICES=y -CONFIG_NETFILTER=y -CONFIG_NETFILTER_TPROXY=y -CONFIG_NETFILTER_XT_MATCH_COMMENT=y -CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y -CONFIG_NETFILTER_XT_MATCH_CONNMARK=y -CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y -CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y -CONFIG_NETFILTER_XT_MATCH_HELPER=y -CONFIG_NETFILTER_XT_MATCH_IPRANGE=y -CONFIG_NETFILTER_XT_MATCH_LENGTH=y -CONFIG_NETFILTER_XT_MATCH_LIMIT=y -CONFIG_NETFILTER_XT_MATCH_MAC=y -CONFIG_NETFILTER_XT_MATCH_MARK=y -CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y -CONFIG_NETFILTER_XT_MATCH_POLICY=y -CONFIG_NETFILTER_XT_MATCH_QUOTA=y -CONFIG_NETFILTER_XT_MATCH_SOCKET=y -CONFIG_NETFILTER_XT_MATCH_STATE=y -CONFIG_NETFILTER_XT_MATCH_STATISTIC=y -CONFIG_NETFILTER_XT_MATCH_STRING=y -CONFIG_NETFILTER_XT_MATCH_TIME=y -CONFIG_NETFILTER_XT_MATCH_U32=y -CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y -CONFIG_NETFILTER_XT_TARGET_CONNMARK=y -CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y -CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y -CONFIG_NETFILTER_XT_TARGET_MARK=y -CONFIG_NETFILTER_XT_TARGET_NFLOG=y -CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y -CONFIG_NETFILTER_XT_TARGET_SECMARK=y -CONFIG_NETFILTER_XT_TARGET_TCPMSS=y -CONFIG_NETFILTER_XT_TARGET_TPROXY=y -CONFIG_NETFILTER_XT_TARGET_TRACE=y -CONFIG_NET_CLS_ACT=y -CONFIG_NET_CLS_U32=y -CONFIG_NET_EMATCH=y -CONFIG_NET_EMATCH_U32=y -CONFIG_NET_KEY=y -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_HTB=y -CONFIG_NF_CONNTRACK=y -CONFIG_NF_CONNTRACK_AMANDA=y -CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CONNTRACK_FTP=y -CONFIG_NF_CONNTRACK_H323=y -CONFIG_NF_CONNTRACK_IPV4=y -CONFIG_NF_CONNTRACK_IPV6=y -CONFIG_NF_CONNTRACK_IRC=y -CONFIG_NF_CONNTRACK_NETBIOS_NS=y -CONFIG_NF_CONNTRACK_PPTP=y -CONFIG_NF_CONNTRACK_SANE=y -CONFIG_NF_CONNTRACK_SECMARK=y -CONFIG_NF_CONNTRACK_TFTP=y -CONFIG_NF_CT_NETLINK=y -CONFIG_NF_CT_PROTO_DCCP=y -CONFIG_NF_CT_PROTO_SCTP=y -CONFIG_NF_CT_PROTO_UDPLITE=y -CONFIG_NF_NAT=y -CONFIG_NO_HZ=y -CONFIG_PACKET=y -CONFIG_PM_AUTOSLEEP=y -CONFIG_PM_WAKELOCKS=y -CONFIG_PPP=y -CONFIG_PPP_BSDCOMP=y -CONFIG_PPP_DEFLATE=y -CONFIG_PPP_MPPE=y -CONFIG_PREEMPT=y -CONFIG_QUOTA=y -CONFIG_RANDOMIZE_BASE=y -CONFIG_RTC_CLASS=y -CONFIG_RT_GROUP_SCHED=y -CONFIG_SECCOMP=y -CONFIG_SECURITY=y -CONFIG_SECURITY_NETWORK=y -CONFIG_SECURITY_SELINUX=y -CONFIG_SETEND_EMULATION=y -CONFIG_STAGING=y -CONFIG_SWP_EMULATION=y -CONFIG_SYNC=y -CONFIG_TUN=y -CONFIG_UNIX=y -CONFIG_USB_GADGET=y -CONFIG_USB_CONFIGFS=y -CONFIG_USB_CONFIGFS_F_FS=y -CONFIG_USB_CONFIGFS_F_MIDI=y -CONFIG_USB_OTG_WAKELOCK=y -CONFIG_XFRM_USER=y
diff --git a/kernel/configs/android-fetch-configs.sh b/kernel/configs/android-fetch-configs.sh new file mode 100755 index 0000000..2dcd298 --- /dev/null +++ b/kernel/configs/android-fetch-configs.sh
@@ -0,0 +1,4 @@ +#!/bin/sh + +curl https://android.googlesource.com/kernel/configs/+archive/master/android-4.14.tar.gz | tar xzv +
diff --git a/kernel/configs/android-recommended.config b/kernel/configs/android-recommended.config deleted file mode 100644 index 946fb92..0000000 --- a/kernel/configs/android-recommended.config +++ /dev/null
@@ -1,129 +0,0 @@ -# KEEP ALPHABETICALLY SORTED -# CONFIG_AIO is not set -# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_LEGACY_PTYS is not set -# CONFIG_NF_CONNTRACK_SIP is not set -# CONFIG_PM_WAKELOCKS_GC is not set -# CONFIG_VT is not set -CONFIG_ARM64_SW_TTBR0_PAN=y -CONFIG_BACKLIGHT_LCD_SUPPORT=y -CONFIG_BLK_DEV_DM=y -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_CC_STACKPROTECTOR_STRONG=y -CONFIG_COMPACTION=y -CONFIG_CPU_SW_DOMAIN_PAN=y -CONFIG_DM_CRYPT=y -CONFIG_DM_UEVENT=y -CONFIG_DM_VERITY=y -CONFIG_DM_VERITY_FEC=y -CONFIG_DRAGONRISE_FF=y -CONFIG_ENABLE_DEFAULT_TRACERS=y -CONFIG_EXT4_FS=y -CONFIG_EXT4_FS_SECURITY=y -CONFIG_FUSE_FS=y -CONFIG_GREENASIA_FF=y -CONFIG_HIDRAW=y -CONFIG_HID_A4TECH=y -CONFIG_HID_ACRUX=y -CONFIG_HID_ACRUX_FF=y -CONFIG_HID_APPLE=y -CONFIG_HID_BELKIN=y -CONFIG_HID_CHERRY=y -CONFIG_HID_CHICONY=y -CONFIG_HID_CYPRESS=y -CONFIG_HID_DRAGONRISE=y -CONFIG_HID_ELECOM=y -CONFIG_HID_EMS_FF=y -CONFIG_HID_EZKEY=y -CONFIG_HID_GREENASIA=y -CONFIG_HID_GYRATION=y -CONFIG_HID_HOLTEK=y -CONFIG_HID_KENSINGTON=y -CONFIG_HID_KEYTOUCH=y -CONFIG_HID_KYE=y -CONFIG_HID_LCPOWER=y -CONFIG_HID_LOGITECH=y -CONFIG_HID_LOGITECH_DJ=y -CONFIG_HID_MAGICMOUSE=y -CONFIG_HID_MICROSOFT=y -CONFIG_HID_MONTEREY=y -CONFIG_HID_MULTITOUCH=y -CONFIG_HID_NTRIG=y -CONFIG_HID_ORTEK=y -CONFIG_HID_PANTHERLORD=y -CONFIG_HID_PETALYNX=y -CONFIG_HID_PICOLCD=y -CONFIG_HID_PRIMAX=y -CONFIG_HID_PRODIKEYS=y -CONFIG_HID_ROCCAT=y -CONFIG_HID_SAITEK=y -CONFIG_HID_SAMSUNG=y -CONFIG_HID_SMARTJOYPLUS=y -CONFIG_HID_SONY=y -CONFIG_HID_SPEEDLINK=y -CONFIG_HID_SUNPLUS=y -CONFIG_HID_THRUSTMASTER=y -CONFIG_HID_TIVO=y -CONFIG_HID_TOPSEED=y -CONFIG_HID_TWINHAN=y -CONFIG_HID_UCLOGIC=y -CONFIG_HID_WACOM=y -CONFIG_HID_WALTOP=y -CONFIG_HID_WIIMOTE=y -CONFIG_HID_ZEROPLUS=y -CONFIG_HID_ZYDACRON=y -CONFIG_INPUT_EVDEV=y -CONFIG_INPUT_GPIO=y -CONFIG_INPUT_JOYSTICK=y -CONFIG_INPUT_MISC=y -CONFIG_INPUT_TABLET=y -CONFIG_INPUT_UINPUT=y -CONFIG_ION=y -CONFIG_JOYSTICK_XPAD=y -CONFIG_JOYSTICK_XPAD_FF=y -CONFIG_JOYSTICK_XPAD_LEDS=y -CONFIG_KALLSYMS_ALL=y -CONFIG_KSM=y -CONFIG_LOGIG940_FF=y -CONFIG_LOGIRUMBLEPAD2_FF=y -CONFIG_LOGITECH_FF=y -CONFIG_MD=y -CONFIG_MEDIA_SUPPORT=y -CONFIG_MSDOS_FS=y -CONFIG_PANIC_TIMEOUT=5 -CONFIG_PANTHERLORD_FF=y -CONFIG_PERF_EVENTS=y -CONFIG_PM_DEBUG=y -CONFIG_PM_RUNTIME=y -CONFIG_PM_WAKELOCKS_LIMIT=0 -CONFIG_POWER_SUPPLY=y -CONFIG_PSTORE=y -CONFIG_PSTORE_CONSOLE=y -CONFIG_PSTORE_RAM=y -CONFIG_SCHEDSTATS=y -CONFIG_SMARTJOYPLUS_FF=y -CONFIG_SND=y -CONFIG_SOUND=y -CONFIG_STRICT_KERNEL_RWX=y -CONFIG_SUSPEND_TIME=y -CONFIG_TABLET_USB_ACECAD=y -CONFIG_TABLET_USB_AIPTEK=y -CONFIG_TABLET_USB_GTCO=y -CONFIG_TABLET_USB_HANWANG=y -CONFIG_TABLET_USB_KBTAB=y -CONFIG_TASKSTATS=y -CONFIG_TASK_DELAY_ACCT=y -CONFIG_TASK_IO_ACCOUNTING=y -CONFIG_TASK_XACCT=y -CONFIG_TIMER_STATS=y -CONFIG_TMPFS=y -CONFIG_TMPFS_POSIX_ACL=y -CONFIG_UHID=y -CONFIG_USB_ANNOUNCE_NEW_DEVICES=y -CONFIG_USB_EHCI_HCD=y -CONFIG_USB_HIDDEV=y -CONFIG_USB_USBNET=y -CONFIG_VFAT_FS=y
diff --git a/kernel/cpu.c b/kernel/cpu.c index 0171754..c14b366 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c
@@ -1211,6 +1211,7 @@ void __weak arch_enable_nonboot_cpus_end(void) void enable_nonboot_cpus(void) { int cpu, error; + struct device *cpu_device; /* Allow everyone to use the CPU hotplug again */ cpu_maps_update_begin(); @@ -1228,6 +1229,12 @@ void enable_nonboot_cpus(void) trace_suspend_resume(TPS("CPU_ON"), cpu, false); if (!error) { pr_info("CPU%d is up\n", cpu); + cpu_device = get_cpu_device(cpu); + if (!cpu_device) + pr_err("%s: failed to get cpu%d device\n", + __func__, cpu); + else + kobject_uevent(&cpu_device->kobj, KOBJ_ONLINE); continue; } pr_warn("Error taking CPU%d up: %d\n", cpu, error);
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 6a4b414..1a94ff1 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c
@@ -217,7 +217,7 @@ static char *kdb_read(char *buffer, size_t bufsize) int i; int diag, dtab_count; int key, buf_size, ret; - + static int last_crlf; diag = kdbgetintenv("DTABCOUNT", &dtab_count); if (diag) @@ -238,6 +238,9 @@ static char *kdb_read(char *buffer, size_t bufsize) return buffer; if (key != 9) tab = 0; + if (key != 10 && key != 13) + last_crlf = 0; + switch (key) { case 8: /* backspace */ if (cp > buffer) { @@ -255,7 +258,12 @@ static char *kdb_read(char *buffer, size_t bufsize) *cp = tmp; } break; - case 13: /* enter */ + case 10: /* new line */ + case 13: /* carriage return */ + /* handle \n after \r */ + if (last_crlf && last_crlf != key) + break; + last_crlf = key; *lastchar++ = '\n'; *lastchar++ = '\0'; if (!KDB_STATE(KGDB_TRANS)) {
diff --git a/kernel/events/core.c b/kernel/events/core.c index 991af68..66f66b6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c
@@ -397,8 +397,13 @@ static cpumask_var_t perf_online_mask; * 0 - disallow raw tracepoint access for unpriv * 1 - disallow cpu events for unpriv * 2 - disallow kernel profiling for unpriv + * 3 - disallow all unpriv perf event use */ +#ifdef CONFIG_SECURITY_PERF_EVENTS_RESTRICT +int sysctl_perf_event_paranoid __read_mostly = 3; +#else int sysctl_perf_event_paranoid __read_mostly = 2; +#endif /* Minimum for 512 kiB + 1 user control page */ int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */ @@ -9984,6 +9989,9 @@ SYSCALL_DEFINE5(perf_event_open, if (flags & ~PERF_FLAG_ALL) return -EINVAL; + if (perf_paranoid_any() && !capable(CAP_SYS_ADMIN)) + return -EACCES; + err = perf_copy_attr(attr_uptr, &attr); if (err) return err;
diff --git a/kernel/fork.c b/kernel/fork.c index 6d6ce2c..ecd2c63 100644 --- a/kernel/fork.c +++ b/kernel/fork.c
@@ -90,6 +90,7 @@ #include <linux/kcov.h> #include <linux/livepatch.h> #include <linux/thread_info.h> +#include <linux/cpufreq_times.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -366,6 +367,8 @@ void put_task_stack(struct task_struct *tsk) void free_task(struct task_struct *tsk) { + cpufreq_task_times_exit(tsk); + #ifndef CONFIG_THREAD_INFO_IN_TASK /* * The task is finally done with both the stack and thread_info, @@ -1596,6 +1599,8 @@ static __latent_entropy struct task_struct *copy_process( if (!p) goto fork_out; + cpufreq_task_times_init(p); + /* * This _must_ happen before we call free_task(), i.e. before we jump * to any of the bad_fork_* labels. This is to avoid freeing @@ -2066,6 +2071,8 @@ long _do_fork(unsigned long clone_flags, struct completion vfork; struct pid *pid; + cpufreq_task_times_alloc(p); + trace_sched_process_fork(current, p); pid = get_task_pid(p, PIDTYPE_PID);
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 127e7cf..3314c0c 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c
@@ -302,6 +302,24 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, !!__bpf_address_lookup(addr, symbolsize, offset, namebuf); } +#ifdef CONFIG_CFI_CLANG +/* + * LLVM appends .cfi to function names when CONFIG_CFI_CLANG is enabled, + * which causes confusion and potentially breaks user space tools, so we + * will strip the postfix from expanded symbol names. + */ +static inline void cleanup_symbol_name(char *s) +{ + char *res; + + res = strrchr(s, '.'); + if (res && !strcmp(res, ".cfi")) + *res = '\0'; +} +#else +static inline void cleanup_symbol_name(char *s) {} +#endif + /* * Lookup an address * - modname is set to NULL if it's in the kernel. @@ -328,7 +346,9 @@ const char *kallsyms_lookup(unsigned long addr, namebuf, KSYM_NAME_LEN); if (modname) *modname = NULL; - return namebuf; + + ret = namebuf; + goto found; } /* See if it's in a module or a BPF JITed image. */ @@ -337,11 +357,16 @@ const char *kallsyms_lookup(unsigned long addr, if (!ret) ret = bpf_address_lookup(addr, symbolsize, offset, modname, namebuf); + +found: + cleanup_symbol_name(namebuf); return ret; } int lookup_symbol_name(unsigned long addr, char *symname) { + int res; + symname[0] = '\0'; symname[KSYM_NAME_LEN - 1] = '\0'; @@ -352,15 +377,23 @@ int lookup_symbol_name(unsigned long addr, char *symname) /* Grab name */ kallsyms_expand_symbol(get_symbol_offset(pos), symname, KSYM_NAME_LEN); - return 0; + goto found; } /* See if it's in a module. */ - return lookup_module_symbol_name(addr, symname); + res = lookup_module_symbol_name(addr, symname); + if (res) + return res; + +found: + cleanup_symbol_name(symname); + return 0; } int lookup_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name) { + int res; + name[0] = '\0'; name[KSYM_NAME_LEN - 1] = '\0'; @@ -372,10 +405,16 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size, kallsyms_expand_symbol(get_symbol_offset(pos), name, KSYM_NAME_LEN); modname[0] = '\0'; - return 0; + goto found; } /* See if it's in a module. */ - return lookup_module_symbol_attrs(addr, size, offset, modname, name); + res = lookup_module_symbol_attrs(addr, size, offset, modname, name); + if (res) + return res; + +found: + cleanup_symbol_name(name); + return 0; } /* Look up a kernel symbol and return it in a text buffer. */
diff --git a/kernel/module.c b/kernel/module.c index 94528b8..ef7358d 100644 --- a/kernel/module.c +++ b/kernel/module.c
@@ -2123,6 +2123,8 @@ void __weak module_arch_freeing_init(struct module *mod) { } +static void cfi_cleanup(struct module *mod); + /* Free a module, remove from lists, etc. */ static void free_module(struct module *mod) { @@ -2164,6 +2166,10 @@ static void free_module(struct module *mod) /* This may be empty, but that's OK */ disable_ro_nx(&mod->init_layout); + + /* Clean up CFI for the module. */ + cfi_cleanup(mod); + module_arch_freeing_init(mod); module_memfree(mod->init_layout.base); kfree(mod->args); @@ -3361,6 +3367,8 @@ int __weak module_finalize(const Elf_Ehdr *hdr, return 0; } +static void cfi_init(struct module *mod); + static int post_relocation(struct module *mod, const struct load_info *info) { /* Sort exception table now relocations are done. */ @@ -3373,6 +3381,9 @@ static int post_relocation(struct module *mod, const struct load_info *info) /* Setup kallsyms-specific fields. */ add_kallsyms(mod, info); + /* Setup CFI for the module. */ + cfi_init(mod); + /* Arch-specific module finalizing. */ return module_finalize(info->hdr, info->sechdrs, mod); } @@ -4120,6 +4131,22 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, } #endif /* CONFIG_KALLSYMS */ +static void cfi_init(struct module *mod) +{ +#ifdef CONFIG_CFI_CLANG + mod->cfi_check = + (cfi_check_fn)mod_find_symname(mod, CFI_CHECK_FN_NAME); + cfi_module_add(mod, module_addr_min, module_addr_max); +#endif +} + +static void cfi_cleanup(struct module *mod) +{ +#ifdef CONFIG_CFI_CLANG + cfi_module_remove(mod, module_addr_min, module_addr_max); +#endif +} + /* Maximum number of characters written by module_flags() */ #define MODULE_FLAGS_BUF_SIZE (TAINT_FLAGS_COUNT + 4)
diff --git a/kernel/power/Makefile b/kernel/power/Makefile index a3f79f0e..5c1743d 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile
@@ -15,3 +15,5 @@ obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o + +obj-$(CONFIG_SUSPEND) += wakeup_reason.o
diff --git a/kernel/power/process.c b/kernel/power/process.c index 7381d49..c366e3d 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c
@@ -22,6 +22,7 @@ #include <linux/kmod.h> #include <trace/events/power.h> #include <linux/cpuset.h> +#include <linux/wakeup_reason.h> /* * Timeout for stopping processes @@ -38,6 +39,9 @@ static int try_to_freeze_tasks(bool user_only) unsigned int elapsed_msecs; bool wakeup = false; int sleep_usecs = USEC_PER_MSEC; +#ifdef CONFIG_PM_SLEEP + char suspend_abort[MAX_SUSPEND_ABORT_LEN]; +#endif start = ktime_get_boottime(); @@ -67,6 +71,11 @@ static int try_to_freeze_tasks(bool user_only) break; if (pm_wakeup_pending()) { +#ifdef CONFIG_PM_SLEEP + pm_get_active_wakeup_sources(suspend_abort, + MAX_SUSPEND_ABORT_LEN); + log_suspend_abort_reason(suspend_abort); +#endif wakeup = true; break; } @@ -85,26 +94,27 @@ static int try_to_freeze_tasks(bool user_only) elapsed = ktime_sub(end, start); elapsed_msecs = ktime_to_ms(elapsed); - if (todo) { + if (wakeup) { pr_cont("\n"); - pr_err("Freezing of tasks %s after %d.%03d seconds " - "(%d tasks refusing to freeze, wq_busy=%d):\n", - wakeup ? "aborted" : "failed", + pr_err("Freezing of tasks aborted after %d.%03d seconds", + elapsed_msecs / 1000, elapsed_msecs % 1000); + } else if (todo) { + pr_cont("\n"); + pr_err("Freezing of tasks failed after %d.%03d seconds" + " (%d tasks refusing to freeze, wq_busy=%d):\n", elapsed_msecs / 1000, elapsed_msecs % 1000, todo - wq_busy, wq_busy); if (wq_busy) show_workqueue_state(); - if (!wakeup) { - read_lock(&tasklist_lock); - for_each_process_thread(g, p) { - if (p != current && !freezer_should_skip(p) - && freezing(p) && !frozen(p)) - sched_show_task(p); - } - read_unlock(&tasklist_lock); + read_lock(&tasklist_lock); + for_each_process_thread(g, p) { + if (p != current && !freezer_should_skip(p) + && freezing(p) && !frozen(p)) + sched_show_task(p); } + read_unlock(&tasklist_lock); } else { pr_cont("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000, elapsed_msecs % 1000);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index c0bc2c8..4789ca7 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c
@@ -31,6 +31,7 @@ #include <trace/events/power.h> #include <linux/compiler.h> #include <linux/moduleparam.h> +#include <linux/wakeup_reason.h> #include "power.h" @@ -389,7 +390,8 @@ void __weak arch_suspend_enable_irqs(void) */ static int suspend_enter(suspend_state_t state, bool *wakeup) { - int error; + char suspend_abort[MAX_SUSPEND_ABORT_LEN]; + int error, last_dev; error = platform_suspend_prepare(state); if (error) @@ -397,7 +399,11 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) error = dpm_suspend_late(PMSG_SUSPEND); if (error) { + last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1; + last_dev %= REC_FAILED_NUM; pr_err("late suspend of devices failed\n"); + log_suspend_abort_reason("%s device failed to power down", + suspend_stats.failed_devs[last_dev]); goto Platform_finish; } error = platform_suspend_prepare_late(state); @@ -411,7 +417,11 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) error = dpm_suspend_noirq(PMSG_SUSPEND); if (error) { + last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1; + last_dev %= REC_FAILED_NUM; pr_err("noirq suspend of devices failed\n"); + log_suspend_abort_reason("noirq suspend of %s device failed", + suspend_stats.failed_devs[last_dev]); goto Platform_early_resume; } error = platform_suspend_prepare_noirq(state); @@ -422,8 +432,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) goto Platform_wake; error = disable_nonboot_cpus(); - if (error || suspend_test(TEST_CPUS)) + if (error || suspend_test(TEST_CPUS)) { + log_suspend_abort_reason("Disabling non-boot cpus failed"); goto Enable_cpus; + } arch_suspend_disable_irqs(); BUG_ON(!irqs_disabled()); @@ -438,6 +450,9 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) trace_suspend_resume(TPS("machine_suspend"), state, false); } else if (*wakeup) { + pm_get_active_wakeup_sources(suspend_abort, + MAX_SUSPEND_ABORT_LEN); + log_suspend_abort_reason(suspend_abort); error = -EBUSY; } syscore_resume(); @@ -487,6 +502,7 @@ int suspend_devices_and_enter(suspend_state_t state) error = dpm_suspend_start(PMSG_SUSPEND); if (error) { pr_err("Some devices failed to suspend, or early wake event detected\n"); + log_suspend_abort_reason("Some devices failed to suspend, or early wake event detected"); goto Recover_platform; } suspend_test_finish("suspend devices");
diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c new file mode 100644 index 0000000..252611f --- /dev/null +++ b/kernel/power/wakeup_reason.c
@@ -0,0 +1,225 @@ +/* + * kernel/power/wakeup_reason.c + * + * Logs the reasons which caused the kernel to resume from + * the suspend mode. + * + * Copyright (C) 2014 Google, Inc. + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/wakeup_reason.h> +#include <linux/kernel.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/kobject.h> +#include <linux/sysfs.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/notifier.h> +#include <linux/suspend.h> + + +#define MAX_WAKEUP_REASON_IRQS 32 +static int irq_list[MAX_WAKEUP_REASON_IRQS]; +static int irqcount; +static bool suspend_abort; +static char abort_reason[MAX_SUSPEND_ABORT_LEN]; +static struct kobject *wakeup_reason; +static DEFINE_SPINLOCK(resume_reason_lock); + +static ktime_t last_monotime; /* monotonic time before last suspend */ +static ktime_t curr_monotime; /* monotonic time after last suspend */ +static ktime_t last_stime; /* monotonic boottime offset before last suspend */ +static ktime_t curr_stime; /* monotonic boottime offset after last suspend */ + +static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + int irq_no, buf_offset = 0; + struct irq_desc *desc; + spin_lock(&resume_reason_lock); + if (suspend_abort) { + buf_offset = sprintf(buf, "Abort: %s", abort_reason); + } else { + for (irq_no = 0; irq_no < irqcount; irq_no++) { + desc = irq_to_desc(irq_list[irq_no]); + if (desc && desc->action && desc->action->name) + buf_offset += sprintf(buf + buf_offset, "%d %s\n", + irq_list[irq_no], desc->action->name); + else + buf_offset += sprintf(buf + buf_offset, "%d\n", + irq_list[irq_no]); + } + } + spin_unlock(&resume_reason_lock); + return buf_offset; +} + +static ssize_t last_suspend_time_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct timespec sleep_time; + struct timespec total_time; + struct timespec suspend_resume_time; + + /* + * total_time is calculated from monotonic bootoffsets because + * unlike CLOCK_MONOTONIC it include the time spent in suspend state. + */ + total_time = ktime_to_timespec(ktime_sub(curr_stime, last_stime)); + + /* + * suspend_resume_time is calculated as monotonic (CLOCK_MONOTONIC) + * time interval before entering suspend and post suspend. + */ + suspend_resume_time = ktime_to_timespec(ktime_sub(curr_monotime, last_monotime)); + + /* sleep_time = total_time - suspend_resume_time */ + sleep_time = timespec_sub(total_time, suspend_resume_time); + + /* Export suspend_resume_time and sleep_time in pair here. */ + return sprintf(buf, "%lu.%09lu %lu.%09lu\n", + suspend_resume_time.tv_sec, suspend_resume_time.tv_nsec, + sleep_time.tv_sec, sleep_time.tv_nsec); +} + +static struct kobj_attribute resume_reason = __ATTR_RO(last_resume_reason); +static struct kobj_attribute suspend_time = __ATTR_RO(last_suspend_time); + +static struct attribute *attrs[] = { + &resume_reason.attr, + &suspend_time.attr, + NULL, +}; +static struct attribute_group attr_group = { + .attrs = attrs, +}; + +/* + * logs all the wake up reasons to the kernel + * stores the irqs to expose them to the userspace via sysfs + */ +void log_wakeup_reason(int irq) +{ + struct irq_desc *desc; + desc = irq_to_desc(irq); + if (desc && desc->action && desc->action->name) + printk(KERN_INFO "Resume caused by IRQ %d, %s\n", irq, + desc->action->name); + else + printk(KERN_INFO "Resume caused by IRQ %d\n", irq); + + spin_lock(&resume_reason_lock); + if (irqcount == MAX_WAKEUP_REASON_IRQS) { + spin_unlock(&resume_reason_lock); + printk(KERN_WARNING "Resume caused by more than %d IRQs\n", + MAX_WAKEUP_REASON_IRQS); + return; + } + + irq_list[irqcount++] = irq; + spin_unlock(&resume_reason_lock); +} + +int check_wakeup_reason(int irq) +{ + int irq_no; + int ret = false; + + spin_lock(&resume_reason_lock); + for (irq_no = 0; irq_no < irqcount; irq_no++) + if (irq_list[irq_no] == irq) { + ret = true; + break; + } + spin_unlock(&resume_reason_lock); + return ret; +} + +void log_suspend_abort_reason(const char *fmt, ...) +{ + va_list args; + + spin_lock(&resume_reason_lock); + + //Suspend abort reason has already been logged. + if (suspend_abort) { + spin_unlock(&resume_reason_lock); + return; + } + + suspend_abort = true; + va_start(args, fmt); + vsnprintf(abort_reason, MAX_SUSPEND_ABORT_LEN, fmt, args); + va_end(args); + spin_unlock(&resume_reason_lock); +} + +/* Detects a suspend and clears all the previous wake up reasons*/ +static int wakeup_reason_pm_event(struct notifier_block *notifier, + unsigned long pm_event, void *unused) +{ + switch (pm_event) { + case PM_SUSPEND_PREPARE: + spin_lock(&resume_reason_lock); + irqcount = 0; + suspend_abort = false; + spin_unlock(&resume_reason_lock); + /* monotonic time since boot */ + last_monotime = ktime_get(); + /* monotonic time since boot including the time spent in suspend */ + last_stime = ktime_get_boottime(); + break; + case PM_POST_SUSPEND: + /* monotonic time since boot */ + curr_monotime = ktime_get(); + /* monotonic time since boot including the time spent in suspend */ + curr_stime = ktime_get_boottime(); + break; + default: + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block wakeup_reason_pm_notifier_block = { + .notifier_call = wakeup_reason_pm_event, +}; + +/* Initializes the sysfs parameter + * registers the pm_event notifier + */ +int __init wakeup_reason_init(void) +{ + int retval; + + retval = register_pm_notifier(&wakeup_reason_pm_notifier_block); + if (retval) + printk(KERN_WARNING "[%s] failed to register PM notifier %d\n", + __func__, retval); + + wakeup_reason = kobject_create_and_add("wakeup_reasons", kernel_kobj); + if (!wakeup_reason) { + printk(KERN_WARNING "[%s] failed to create a sysfs kobject\n", + __func__); + return 1; + } + retval = sysfs_create_group(wakeup_reason, &attr_group); + if (retval) { + kobject_put(wakeup_reason); + printk(KERN_WARNING "[%s] failed to create a sysfs group %d\n", + __func__, retval); + } + return 0; +} + +late_initcall(wakeup_reason_init);
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index a9ee16b..b9207a9 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile
@@ -20,9 +20,12 @@ obj-y += idle_task.o fair.o rt.o deadline.o obj-y += wait.o wait_bit.o swait.o completion.o idle.o obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o +obj-$(CONFIG_GENERIC_ARCH_TOPOLOGY) += energy.o +obj-$(CONFIG_SCHED_WALT) += walt.o obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHED_DEBUG) += debug.o +obj-$(CONFIG_SCHED_TUNE) += tune.o obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c index 1d179ab..0c011dd 100644 --- a/kernel/sched/autogroup.c +++ b/kernel/sched/autogroup.c
@@ -263,7 +263,6 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m) } #endif /* CONFIG_PROC_FS */ -#ifdef CONFIG_SCHED_DEBUG int autogroup_path(struct task_group *tg, char *buf, int buflen) { if (!task_group_is_autogroup(tg)) @@ -271,4 +270,3 @@ int autogroup_path(struct task_group *tg, char *buf, int buflen) return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); } -#endif /* CONFIG_SCHED_DEBUG */
diff --git a/kernel/sched/autogroup.h b/kernel/sched/autogroup.h index 27cd22b..590184b 100644 --- a/kernel/sched/autogroup.h +++ b/kernel/sched/autogroup.h
@@ -56,11 +56,9 @@ autogroup_task_group(struct task_struct *p, struct task_group *tg) return tg; } -#ifdef CONFIG_SCHED_DEBUG static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) { return 0; } -#endif #endif /* CONFIG_SCHED_AUTOGROUP */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0552ddb..c674185 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c
@@ -39,6 +39,7 @@ #define CREATE_TRACE_POINTS #include <trace/events/sched.h> +#include "walt.h" DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); @@ -438,6 +439,8 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task) if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL)) return; + head->count++; + get_task_struct(task); /* @@ -447,6 +450,10 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task) head->lastp = &node->next; } +static int +try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags, + int sibling_count_hint); + void wake_up_q(struct wake_q_head *head) { struct wake_q_node *node = head->first; @@ -461,10 +468,10 @@ void wake_up_q(struct wake_q_head *head) task->wake_q.next = NULL; /* - * wake_up_process() implies a wmb() to pair with the queueing + * try_to_wake_up() implies a wmb() to pair with the queueing * in wake_q_add() so as not to miss wakeups. */ - wake_up_process(task); + try_to_wake_up(task, TASK_NORMAL, 0, head->count); put_task_struct(task); } } @@ -943,12 +950,17 @@ static inline bool is_cpu_allowed(struct task_struct *p, int cpu) static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf, struct task_struct *p, int new_cpu) { + struct rq *new_rq = cpu_rq(new_cpu); + lockdep_assert_held(&rq->lock); p->on_rq = TASK_ON_RQ_MIGRATING; dequeue_task(rq, p, DEQUEUE_NOCLOCK); + rq_unpin_lock(rq, rf); + double_lock_balance(rq, new_rq); set_task_cpu(p, new_cpu); - rq_unlock(rq, rf); + double_unlock_balance(rq, new_rq); + raw_spin_unlock(&rq->lock); rq = cpu_rq(new_cpu); @@ -1205,6 +1217,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) p->sched_class->migrate_task_rq(p); p->se.nr_migrations++; perf_event_task_migrate(p); + + walt_fixup_busy_time(p, new_cpu); } __set_task_cpu(p, new_cpu); @@ -1554,12 +1568,14 @@ static int select_fallback_rq(int cpu, struct task_struct *p) * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable. */ static inline -int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) +int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags, + int sibling_count_hint) { lockdep_assert_held(&p->pi_lock); if (p->nr_cpus_allowed > 1) - cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); + cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags, + sibling_count_hint); else cpu = cpumask_any(&p->cpus_allowed); @@ -1965,11 +1981,33 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) * */ +#ifdef CONFIG_SMP +#ifdef CONFIG_SCHED_WALT +/* utility function to update walt signals at wakeup */ +static inline void walt_try_to_wake_up(struct task_struct *p) +{ + struct rq *rq = cpu_rq(task_cpu(p)); + struct rq_flags rf; + u64 wallclock; + + rq_lock_irqsave(rq, &rf); + wallclock = walt_ktime_clock(); + walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0); + walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0); + rq_unlock_irqrestore(rq, &rf); +} +#else +#define walt_try_to_wake_up(a) {} +#endif +#endif + /** * try_to_wake_up - wake up a thread * @p: the thread to be awakened * @state: the mask of task states that can be woken * @wake_flags: wake modifier flags (WF_*) + * @sibling_count_hint: A hint at the number of threads that are being woken up + * in this event. * * If (@state & @p->state) @p->state = TASK_RUNNING. * @@ -1982,7 +2020,8 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) * %false otherwise. */ static int -try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) +try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags, + int sibling_count_hint) { unsigned long flags; int cpu, success = 0; @@ -2060,6 +2099,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) */ smp_cond_load_acquire(&p->on_cpu, !VAL); + walt_try_to_wake_up(p); + p->sched_contributes_to_load = !!task_contributes_to_load(p); p->state = TASK_WAKING; @@ -2068,7 +2109,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) atomic_dec(&task_rq(p)->nr_iowait); } - cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags); + cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags, + sibling_count_hint); if (task_cpu(p) != cpu) { wake_flags |= WF_MIGRATED; set_task_cpu(p, cpu); @@ -2129,6 +2171,11 @@ static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf) trace_sched_waking(p); if (!task_on_rq_queued(p)) { + u64 wallclock = walt_ktime_clock(); + + walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0); + walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0); + if (p->in_iowait) { delayacct_blkio_end(p); atomic_dec(&rq->nr_iowait); @@ -2156,13 +2203,13 @@ static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf) */ int wake_up_process(struct task_struct *p) { - return try_to_wake_up(p, TASK_NORMAL, 0); + return try_to_wake_up(p, TASK_NORMAL, 0, 1); } EXPORT_SYMBOL(wake_up_process); int wake_up_state(struct task_struct *p, unsigned int state) { - return try_to_wake_up(p, state, 0); + return try_to_wake_up(p, state, 0, 1); } /* @@ -2181,7 +2228,12 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) p->se.prev_sum_exec_runtime = 0; p->se.nr_migrations = 0; p->se.vruntime = 0; +#ifdef CONFIG_SCHED_WALT + p->last_sleep_ts = 0; +#endif + INIT_LIST_HEAD(&p->se.group_node); + walt_init_new_task_load(p); #ifdef CONFIG_FAIR_GROUP_SCHED p->se.cfs_rq = NULL; @@ -2458,6 +2510,9 @@ void wake_up_new_task(struct task_struct *p) struct rq *rq; raw_spin_lock_irqsave(&p->pi_lock, rf.flags); + + walt_init_new_task_load(p); + p->state = TASK_RUNNING; #ifdef CONFIG_SMP /* @@ -2468,13 +2523,15 @@ void wake_up_new_task(struct task_struct *p) * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, * as we're not fully set-up yet. */ - __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); + __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0, 1)); #endif rq = __task_rq_lock(p, &rf); update_rq_clock(rq); post_init_entity_util_avg(&p->se); activate_task(rq, p, ENQUEUE_NOCLOCK); + walt_mark_task_starting(p); + p->on_rq = TASK_ON_RQ_QUEUED; trace_sched_wakeup_new(p); check_preempt_curr(rq, p, WF_FORK); @@ -2929,7 +2986,7 @@ void sched_exec(void) int dest_cpu; raw_spin_lock_irqsave(&p->pi_lock, flags); - dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0); + dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0, 1); if (dest_cpu == smp_processor_id()) goto unlock; @@ -3028,6 +3085,9 @@ void scheduler_tick(void) rq_lock(rq, &rf); + walt_set_window_start(rq, &rf); + walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, + walt_ktime_clock(), 0); update_rq_clock(rq); curr->sched_class->task_tick(rq, curr, 0); cpu_load_update_active(rq); @@ -3299,6 +3359,7 @@ static void __sched notrace __schedule(bool preempt) struct rq_flags rf; struct rq *rq; int cpu; + u64 wallclock; cpu = smp_processor_id(); rq = cpu_rq(cpu); @@ -3354,10 +3415,17 @@ static void __sched notrace __schedule(bool preempt) } next = pick_next_task(rq, prev, &rf); + wallclock = walt_ktime_clock(); + walt_update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, 0); + walt_update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0); clear_tsk_need_resched(prev); clear_preempt_need_resched(); if (likely(prev != next)) { +#ifdef CONFIG_SCHED_WALT + if (!prev->on_rq) + prev->last_sleep_ts = wallclock; +#endif rq->nr_switches++; rq->curr = next; /* @@ -3618,7 +3686,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void) int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags, void *key) { - return try_to_wake_up(curr->private, mode, wake_flags); + return try_to_wake_up(curr->private, mode, wake_flags, 1); } EXPORT_SYMBOL(default_wake_function); @@ -5709,6 +5777,9 @@ int sched_cpu_dying(unsigned int cpu) sched_ttwu_pending(); rq_lock_irqsave(rq, &rf); + + walt_migrate_sync_cpu(cpu); + if (rq->rd) { BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); set_rq_offline(rq); @@ -5919,12 +5990,18 @@ void __init sched_init(void) rq->idle_stamp = 0; rq->avg_idle = 2*sysctl_sched_migration_cost; rq->max_idle_balance_cost = sysctl_sched_migration_cost; +#ifdef CONFIG_SCHED_WALT + rq->cur_irqload = 0; + rq->avg_irqload = 0; + rq->irqload_ts = 0; +#endif INIT_LIST_HEAD(&rq->cfs_tasks); rq_attach_root(rq, &def_root_domain); #ifdef CONFIG_NO_HZ_COMMON rq->last_load_update_tick = jiffies; + rq->last_blocked_load_update_tick = jiffies; rq->nohz_flags = 0; #endif #ifdef CONFIG_NO_HZ_FULL
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 81eb789..d70794a 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c
@@ -19,11 +19,14 @@ #include "sched.h" +unsigned long boosted_cpu_util(int cpu, unsigned long other_util); + #define SUGOV_KTHREAD_PRIORITY 50 struct sugov_tunables { struct gov_attr_set attr_set; - unsigned int rate_limit_us; + unsigned int up_rate_limit_us; + unsigned int down_rate_limit_us; }; struct sugov_policy { @@ -34,7 +37,9 @@ struct sugov_policy { raw_spinlock_t update_lock; /* For shared policies */ u64 last_freq_update_time; - s64 freq_update_delay_ns; + s64 min_rate_limit_ns; + s64 up_rate_delay_ns; + s64 down_rate_delay_ns; unsigned int next_freq; unsigned int cached_raw_freq; @@ -111,8 +116,32 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) return true; } + /* No need to recalculate next freq for min_rate_limit_us + * at least. However we might still decide to further rate + * limit once frequency change direction is decided, according + * to the separate rate limits. + */ + delta_ns = time - sg_policy->last_freq_update_time; - return delta_ns >= sg_policy->freq_update_delay_ns; + return delta_ns >= sg_policy->min_rate_limit_ns; +} + +static bool sugov_up_down_rate_limit(struct sugov_policy *sg_policy, u64 time, + unsigned int next_freq) +{ + s64 delta_ns; + + delta_ns = time - sg_policy->last_freq_update_time; + + if (next_freq > sg_policy->next_freq && + delta_ns < sg_policy->up_rate_delay_ns) + return true; + + if (next_freq < sg_policy->next_freq && + delta_ns < sg_policy->down_rate_delay_ns) + return true; + + return false; } static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, @@ -123,6 +152,9 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, if (sg_policy->next_freq == next_freq) return; + if (sugov_up_down_rate_limit(sg_policy, time, next_freq)) + return; + sg_policy->next_freq = next_freq; sg_policy->last_freq_update_time = time; @@ -178,13 +210,15 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, static void sugov_get_util(unsigned long *util, unsigned long *max, int cpu) { - struct rq *rq = cpu_rq(cpu); - unsigned long cfs_max; + unsigned long max_cap, rt; - cfs_max = arch_scale_cpu_capacity(NULL, cpu); + max_cap = arch_scale_cpu_capacity(NULL, cpu); - *util = min(rq->cfs.avg.util_avg, cfs_max); - *max = cfs_max; + rt = sched_get_rt_rq_util(cpu); + + *util = boosted_cpu_util(cpu, rt); + *util = min(*util, max_cap); + *max = max_cap; } static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, @@ -272,7 +306,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, busy = sugov_cpu_is_busy(sg_cpu); - if (flags & SCHED_CPUFREQ_RT_DL) { + if (flags & SCHED_CPUFREQ_DL) { next_f = policy->cpuinfo.max_freq; } else { sugov_get_util(&util, &max, sg_cpu->cpu); @@ -290,6 +324,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, sg_policy->cached_raw_freq = 0; } } + sugov_update_commit(sg_policy, time, next_f); } @@ -318,7 +353,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) j_sg_cpu->iowait_boost_pending = false; continue; } - if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL) + if (j_sg_cpu->flags & SCHED_CPUFREQ_DL) return policy->cpuinfo.max_freq; j_util = j_sg_cpu->util; @@ -354,7 +389,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time, sg_cpu->last_update = time; if (sugov_should_update_freq(sg_policy, time)) { - if (flags & SCHED_CPUFREQ_RT_DL) + if (flags & SCHED_CPUFREQ_DL) next_f = sg_policy->policy->cpuinfo.max_freq; else next_f = sugov_next_freq_shared(sg_cpu, time); @@ -409,15 +444,32 @@ static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr return container_of(attr_set, struct sugov_tunables, attr_set); } -static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) +static DEFINE_MUTEX(min_rate_lock); + +static void update_min_rate_limit_ns(struct sugov_policy *sg_policy) +{ + mutex_lock(&min_rate_lock); + sg_policy->min_rate_limit_ns = min(sg_policy->up_rate_delay_ns, + sg_policy->down_rate_delay_ns); + mutex_unlock(&min_rate_lock); +} + +static ssize_t up_rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) { struct sugov_tunables *tunables = to_sugov_tunables(attr_set); - return sprintf(buf, "%u\n", tunables->rate_limit_us); + return sprintf(buf, "%u\n", tunables->up_rate_limit_us); } -static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, - size_t count) +static ssize_t down_rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) +{ + struct sugov_tunables *tunables = to_sugov_tunables(attr_set); + + return sprintf(buf, "%u\n", tunables->down_rate_limit_us); +} + +static ssize_t up_rate_limit_us_store(struct gov_attr_set *attr_set, + const char *buf, size_t count) {