Snap for 11819167 from f3f0f0a21769367202e97cdd7fa3842f39867fec to busytown-mac-infra-release
Change-Id: I4ba7ee10986726bc0bf08951debf446f12a1c586
diff --git a/.github/ISSUE_TEMPLATE.txt b/.github/ISSUE_TEMPLATE.txt
new file mode 100644
index 0000000..0e736fb
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE.txt
@@ -0,0 +1,9 @@
+Please **do not** send pull-requests or open new issues on Github.
+
+Besides, the current erofs-utils repo is:
+git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git
+
+Github is not _the place_ for EROFS development, and some mirrors
+are actually unofficial and not frequently monitored.
+
+* Send bug reports and/or feedback to: linux-erofs@lists.ozlabs.org
diff --git a/.github/PULL_REQUEST_TEMPLATE.txt b/.github/PULL_REQUEST_TEMPLATE.txt
new file mode 100644
index 0000000..0e736fb
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.txt
@@ -0,0 +1,9 @@
+Please **do not** send pull-requests or open new issues on Github.
+
+Besides, the current erofs-utils repo is:
+git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git
+
+Github is not _the place_ for EROFS development, and some mirrors
+are actually unofficial and not frequently monitored.
+
+* Send bug reports and/or feedback to: linux-erofs@lists.ozlabs.org
diff --git a/AUTHORS b/AUTHORS
index 6b41df8..bc67a65 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,7 +1,7 @@
EROFS USERSPACE UTILITIES
M: Li Guifu <bluce.lee@aliyun.com>
M: Gao Xiang <xiang@kernel.org>
-M: Huang Jianan <huangjianan@oppo.com>
+M: Huang Jianan <jnhuang95@gmail.com>
R: Chao Yu <chao@kernel.org>
R: Miao Xie <miaoxie@huawei.com>
R: Fang Wei <fangwei1@huawei.com>
diff --git a/Android.bp b/Android.bp
index fd7fc3d..bd7e06d 100644
--- a/Android.bp
+++ b/Android.bp
@@ -73,6 +73,8 @@
"-DHAVE_SYS_IOCTL_H",
"-DHAVE_LLISTXATTR",
"-DHAVE_LGETXATTR",
+ "-D_FILE_OFFSET_BITS=64",
+ "-DEROFS_MAX_BLOCK_SIZE=16384",
],
}
@@ -108,6 +110,9 @@
srcs: [
"lib/*.c",
],
+ exclude_srcs: [
+ "lib/compressor_libdeflate.c",
+ ],
export_include_dirs: ["include"],
target: {
@@ -123,7 +128,6 @@
defaults: ["erofs-utils_defaults"],
srcs: [
- "lib/*.c",
"mkfs/*.c",
],
static_libs: [
@@ -158,7 +162,6 @@
host_supported: true,
recovery_available: true,
srcs: [
- "lib/*.c",
"dump/*.c",
],
static_libs: [
@@ -177,7 +180,6 @@
host_supported: true,
recovery_available: true,
srcs: [
- "lib/*.c",
"fsck/*.c",
],
static_libs: [
diff --git a/ChangeLog b/ChangeLog
index 97d7336..99220c8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,43 @@
+erofs-utils 1.7.1
+
+ * A quick maintenance release includes the following fixes:
+ - fix a build issue of cross-compilation with autoconf (Sandeep Dhavale);
+ - fix an invalid error code in lib/tar.c (Erik Sjölund);
+ - fix corrupted directories with hardlinks.
+
+ -- Gao Xiang <xiang@kernel.org> Fri, 20 Oct 2023 00:00:00 +0800
+
+erofs-utils 1.7
+
+ * This release includes the following updates:
+ - support arbitrary valid block sizes in addition to page size;
+ - (mkfs.erofs) arrange on-disk meta with Breadth-First Traversal instead;
+ - support long xattr name prefixes (Jingbo Xu);
+ - support UUID functionality without libuuid (Norbert Lange);
+ - (mkfs.erofs, experimental) add DEFLATE algorithm support;
+ - (mkfs.erofs, experimental) support building images directly from tarballs;
+ - (dump.erofs) print more superblock fields (Guo Xuenan);
+ - (mkfs.erofs, experimental) introduce preliminary rebuild mode (Jingbo Xu);
+ - various bugfixes and cleanups (Sandeep Dhavale, Guo Xuenan, Yue Hu,
+ Weizhao Ouyang, Kelvin Zhang, Noboru Asai, Yifan Zhao and Li Yiyan);
+
+ -- Gao Xiang <xiang@kernel.org> Thu, 21 Sep 2023 00:00:00 +0800
+
+erofs-utils 1.6
+
+ * This release includes the following updates:
+ - support fragments by using `-Efragments` (Yue Hu);
+ - support compressed data deduplication by using `-Ededupe` (Ziyang Zhang);
+ - (erofsfuse) support extended attributes (Huang Jianan);
+ - (mkfs.erofs) support multiple algorithms in a single image (Gao Xiang);
+ - (mkfs.erofs) support chunk-based sparse files (Gao Xiang);
+ - (mkfs.erofs) add volume-label setting support (Naoto Yamaguchi);
+ - (mkfs.erofs) add uid/gid offsetting support (Naoto Yamaguchi);
+ - (mkfs.erofs) pack files entirely by using `-Eall-fragments` (Gao Xiang);
+ - various bugfixes and cleanups;
+
+ -- Gao Xiang <xiang@kernel.org> Sun, 12 Mar 2023 00:00:00 +0800
+
erofs-utils 1.5
* This release includes the following updates:
diff --git a/METADATA b/METADATA
index 56dfd1b..2b7423b 100644
--- a/METADATA
+++ b/METADATA
@@ -1,6 +1,6 @@
# This project was upgraded with external_updater.
# Usage: tools/external_updater/updater.sh update erofs-utils
-# For more info, check https://cs.android.com/android/platform/superproject/+/master:tools/external_updater/README.md
+# For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md
name: "erofs-utils"
description: "EROFS Utilities"
@@ -9,11 +9,11 @@
type: GIT
value: "https://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git"
}
- version: "v1.5"
+ version: "v1.7.1"
license_type: RESTRICTED
last_upgrade_date {
year: 2023
- month: 1
- day: 18
+ month: 10
+ day: 23
}
}
diff --git a/README b/README
index 92b3128..e224b23 100644
--- a/README
+++ b/README
@@ -1,7 +1,7 @@
erofs-utils
===========
-userspace tools for EROFS filesystem, currently including:
+Userspace tools for EROFS filesystem, currently including:
mkfs.erofs filesystem formatter
erofsfuse FUSE daemon alternative
@@ -9,76 +9,60 @@
fsck.erofs filesystem compatibility & consistency checker as well
as extractor
-Dependencies & build
---------------------
- lz4 1.8.0+ for lz4 enabled [2], lz4 1.9.3+ highly recommended [4][5].
- XZ Utils 5.3.2alpha [6] or later versions for MicroLZMA enabled.
+EROFS filesystem overview
+-------------------------
- libfuse 2.6+ for erofsfuse enabled as a plus.
+EROFS filesystem stands for Enhanced Read-Only File System. It aims to
+form a generic read-only filesystem solution for various read-only use
+cases instead of just focusing on storage space saving without
+considering any side effects of runtime performance.
-How to build with lz4-1.9.0 or above
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Typically EROFS could be considered in the following use scenarios:
+ - Firmwares in performance-sensitive systems, such as system
+ partitions of Android smartphones;
-To build, you can run the following commands in order:
+ - Mountable immutable images such as container images for effective
+ metadata & data access compared with tar, cpio or other local
+ filesystems (e.g. ext4, XFS, btrfs, etc.)
-::
+ - FSDAX-enabled rootfs for secure containers (Linux 5.15+);
- $ ./autogen.sh
- $ ./configure
- $ make
+ - Live CDs which need a set of files with another high-performance
+ algorithm to optimize startup time; others files for archival
+ purposes only are not needed;
-mkfs.erofs binary will be generated under mkfs folder.
+ - and more.
-* For lz4 < 1.9.2, there are some stability issues about
- LZ4_compress_destSize(). (lz4hc isn't impacted) [3].
+Note that all EROFS metadata is uncompressed by design, so that you
+could take EROFS as a drop-in read-only replacement of ext4, XFS,
+btrfs, etc. without any compression-based dependencies and EROFS can
+bring more effective filesystem accesses to users with reduced
+metadata.
-** For lz4 = 1.9.2, there is a noticeable regression about
- LZ4_decompress_safe_partial() [5], which impacts erofsfuse
- functionality for legacy images (without 0PADDING).
+For more details of EROFS filesystem itself, please refer to:
+https://www.kernel.org/doc/html/next/filesystems/erofs.html
-How to build with lz4-1.8.0~1.8.3
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+For more details on how to build erofs-utils, see `docs/INSTALL.md`.
-For these old lz4 versions, lz4hc algorithm cannot be supported
-without lz4-static installed due to LZ4_compress_HC_destSize()
-unstable api usage, which means lz4 will only be available if
-lz4-static isn't found.
-
-On Fedora, lz4-static can be installed by using:
-
- yum install lz4-static.x86_64
-
-However, it's still not recommended using those versions directly
-since there are serious bugs in these compressors, see [2] [3] [4]
-as well.
-
-How to build with liblzma
-~~~~~~~~~~~~~~~~~~~~~~~~~
-
-In order to enable LZMA support, build with the following commands:
- $ ./configure --enable-lzma
- $ make
-
-Additionally, you could specify liblzma build paths with:
- --with-liblzma-incdir and --with-liblzma-libdir
+For more details about filesystem performance, see
+`docs/PERFORMANCE.md`.
mkfs.erofs
----------
-two main kinds of EROFS images can be generated: (un)compressed.
+Two main kinds of EROFS images can be generated: (un)compressed images.
- - For uncompressed images, there will be none of compression
- files in these images. However, it can decide whether the tail
- block of a file should be inlined or not properly [1].
+ - For uncompressed images, there will be none of compresssed files in
+ these images. However, it can decide whether the tail block of a
+ file should be inlined or not properly [1].
- - For compressed images, it'll try to use specific algorithms
- first for each regular file and see if storage space can be
- saved with compression. If not, fallback to an uncompressed
- file.
+ - For compressed images, it'll try to use the given algorithms first
+ for each regular file and see if storage space can be saved with
+ compression. If not, fallback to an uncompressed file.
-How to generate EROFS images (lz4 for Linux 5.3+, lzma for Linux 5.16+)
+How to generate EROFS images (LZ4 for Linux 5.3+, LZMA for Linux 5.16+)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Currently lz4(hc) and lzma are available for compression, e.g.
@@ -113,12 +97,55 @@
please evaluate carefully in advance. Or make your own per-(sub)file
compression strategies according to file access patterns if needed.
+How to generate EROFS images with multiple algorithms (Linux 5.16+)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It's possible to generate an EROFS image with files in different
+algorithms due to various purposes. For example, LZMA for archival
+purposes and LZ4 for runtime purposes.
+
+In order to use alternative algorithms, just specify two or more
+compressing configurations together separated by ':' like below:
+ -zlzma:lz4hc,12:lzma,9 -C32768
+
+Although mkfs still choose the first one by default, you could try to
+write a compress-hints file like below:
+ 4096 1 .*\.so$
+ 32768 2 .*\.txt$
+ 4096 sbin/.*$
+ 16384 0 .*
+
+and specify with `--compress-hints=` so that ".so" files will use
+"lz4hc,12" compression with 4k pclusters, ".txt" files will use
+"lzma,9" compression with 32k pclusters, files under "/sbin" will use
+the default "lzma" compression with 4k plusters and other files will
+use "lzma" compression with 16k pclusters.
+
+Note that the largest pcluster size should be specified with the "-C"
+option (here 32k pcluster size), otherwise all larger pclusters will be
+limited.
+
+How to generate well-compressed EROFS images
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Even if EROFS is not designed for such purposes in the beginning, it
+could still produce some smaller images (not always) compared to other
+approaches with better performance (see `docs/PERFORMANCE.md`). In
+order to build well-compressed EROFS images, try the following options:
+ -C1048576 (5.13+)
+ -Eztailpacking (5.16+)
+ -Efragments / -Eall-fragments ( 6.1+);
+ -Ededupe ( 6.1+).
+
+Also EROFS uses lz4hc level 9 by default, whereas some other approaches
+use lz4hc level 12 by default. So please explicitly specify
+`-zlz4hc,12 ` for comparison purposes.
+
How to generate legacy EROFS images (Linux 4.19+)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Decompression inplace and compacted indexes have been introduced in
-Linux upstream v5.3, which are not forward-compatible with older
-kernels.
+Linux v5.3, which are not forward-compatible with older kernels.
In order to generate _legacy_ EROFS images for old kernels,
consider adding "-E legacy-compress" to the command line, e.g.
@@ -153,25 +180,10 @@
Therefore, NEVER use it if performance is the top concern.
-Note that extended attributes and ACLs aren't implemented yet due to
-the current Android use case vs limited time. If you are interested,
-contribution is, as always, welcome.
-
-How to build erofsfuse
-~~~~~~~~~~~~~~~~~~~~~~
-
-It's disabled by default as an experimental feature for now due to
-the extra libfuse dependency, to enable and build it manually:
-
- $ ./configure --enable-fuse
- $ make
-
-erofsfuse binary will be generated under fuse folder.
-
How to mount an EROFS image with erofsfuse
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-As the other FUSE implementations, it's quite simple to mount with
+As the other FUSE implementations, it's quite easy to mount by using
erofsfuse, e.g.:
$ erofsfuse foo.erofs.img foo/
@@ -192,99 +204,24 @@
EROFS filesystems. Note that extended attributes and ACLs are still
unsupported when extracting images with fsck.erofs.
-Container images
-----------------
-
-EROFS filesystem is well-suitably used for container images with
-advanced features like chunk-based files, multi-devices (blobs)
-and new fscache backend for lazy pulling and cache management, etc.
-
-For example, CNCF Dragonfly Nydus image service [7] introduces an
-(EROFS-compatible) RAFS v6 image format to overcome flaws of the
-current OCIv1 tgz images so that:
-
- - Images can be downloaded on demand in chunks aka lazy pulling with
- new fscache backend (5.19+) or userspace block devices (5.16+);
-
- - Finer chunk-based content-addressable data deduplication to minimize
- storage, transmission and memory footprints;
-
- - Merged filesystem tree to remove all metadata of intermediate layers
- as an option;
-
- - (e)stargz, zstd::chunked and other formats can be converted and run
- on the fly;
-
- - and more.
-
-Apart from Dragonfly Nydus, a native user daemon is planned to be added
-to erofs-utils to parse EROFS, (e)stargz and zstd::chunked images from
-network too as a real part of EROFS filesystem project.
+Note that fragment extraction with fsck.erofs could be slow now and
+it needs to be optimized later. If you are interested, contribution
+is, as always, welcome.
Contribution
------------
-erofs-utils is a part of EROFS filesystem project, feel free to send
-patches or feedback to:
+erofs-utils is a part of EROFS filesystem project, which is completely
+community-driven open source software. If you have interest in EROFS,
+feel free to send feedback and/or patches to:
linux-erofs mailing list <linux-erofs@lists.ozlabs.org>
Comments
--------
-[1] According to the EROFS on-disk format, the tail block of files
- could be inlined aggressively with its metadata in order to reduce
- the I/O overhead and save the storage space (called tail-packing).
-
-[2] There was a bug until lz4-1.8.3, which can crash erofs-utils
- randomly. Fortunately bugfix by our colleague Qiuyang Sun was
- merged in lz4-1.9.0.
-
- For more details, please refer to
- https://github.com/lz4/lz4/commit/660d21272e4c8a0f49db5fc1e6853f08713dff82
-
-[3] There were many bugfixes merged into lz4-1.9.2 for
- LZ4_compress_destSize(), and I once ran into some crashs due to
- those issues. * Again lz4hc is not affected. *
-
- [LZ4_compress_destSize] Allow 2 more bytes of match length
- https://github.com/lz4/lz4/commit/690009e2c2f9e5dcb0d40e7c0c40610ce6006eda
-
- [LZ4_compress_destSize] Fix rare data corruption bug
- https://github.com/lz4/lz4/commit/6bc6f836a18d1f8fd05c8fc2b42f1d800bc25de1
-
- [LZ4_compress_destSize] Fix overflow condition
- https://github.com/lz4/lz4/commit/13a2d9e34ffc4170720ce417c73e396d0ac1471a
-
- [LZ4_compress_destSize] Fix off-by-one error in fix
- https://github.com/lz4/lz4/commit/7c32101c655d93b61fc212dcd512b87119dd7333
-
- [LZ4_compress_destSize] Fix off-by-one error
- https://github.com/lz4/lz4/commit/d7cad81093cd805110291f84d64d385557d0ffba
-
- since upstream lz4 doesn't have stable branch for old versions, it's
- preferred to use latest upstream lz4 library (although some regressions
- could happen since new features are also introduced to latest upstream
- version as well) or backport all stable bugfixes to old stable versions,
- e.g. our unofficial lz4 fork: https://github.com/erofs/lz4
-
-[4] LZ4HC didn't compress long zeroed buffer properly with
- LZ4_compress_HC_destSize()
- https://github.com/lz4/lz4/issues/784
-
- which has been resolved in
- https://github.com/lz4/lz4/commit/e7fe105ac6ed02019d34731d2ba3aceb11b51bb1
-
- and already included in lz4-1.9.3, see:
- https://github.com/lz4/lz4/releases/tag/v1.9.3
-
-[5] LZ4_decompress_safe_partial is broken in 1.9.2
- https://github.com/lz4/lz4/issues/783
-
- which is also resolved in lz4-1.9.3.
-
-[6] https://tukaani.org/xz/xz-5.3.2alpha.tar.xz
-
-[7] https://nydus.dev
- https://github.com/dragonflyoss/image-service
+[1] According to the EROFS on-disk format, the tail blocks of files
+ could be inlined aggressively with their metadata (called
+ tail-packing) in order to minimize the extra I/Os and the storage
+ space.
diff --git a/VERSION b/VERSION
index ef7a460..8cf9ed8 100644
--- a/VERSION
+++ b/VERSION
@@ -1,2 +1,2 @@
-1.5
-2022-06-13
+1.7.1
+2023-10-20
diff --git a/configure.ac b/configure.ac
index a736ff0..a546310 100644
--- a/configure.ac
+++ b/configure.ac
@@ -29,6 +29,41 @@
AC_MSG_ERROR([pkg-config is required. See pkg-config.freedesktop.org])
fi
+dnl Check if the flag is supported by compiler
+dnl CC_CHECK_CFLAGS_SILENT([FLAG], [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND])
+AC_DEFUN([CC_CHECK_CFLAGS_SILENT], [
+ AC_CACHE_VAL(AS_TR_SH([cc_cv_cflags_$1]),
+ [ac_save_CFLAGS="$CFLAGS"
+ CFLAGS="$CFLAGS $1"
+ AC_LINK_IFELSE([AC_LANG_SOURCE([int main() { return 0; }])],
+ [eval "AS_TR_SH([cc_cv_cflags_$1])='yes'"],
+ [eval "AS_TR_SH([cc_cv_cflags_$1])='no'"])
+ CFLAGS="$ac_save_CFLAGS"
+ ])
+
+ AS_IF([eval test x$]AS_TR_SH([cc_cv_cflags_$1])[ = xyes],
+ [$2], [$3])
+])
+
+dnl Check if the flag is supported by compiler (cacheable)
+dnl CC_CHECK_CFLAG([FLAG], [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND])
+AC_DEFUN([CC_CHECK_CFLAG], [
+ AC_CACHE_CHECK([if $CC supports $1 flag],
+ AS_TR_SH([cc_cv_cflags_$1]),
+ CC_CHECK_CFLAGS_SILENT([$1]) dnl Don't execute actions here!
+ )
+
+ AS_IF([eval test x$]AS_TR_SH([cc_cv_cflags_$1])[ = xyes],
+ [$2], [$3])
+])
+
+dnl CC_CHECK_CFLAGS([FLAG1 FLAG2], [action-if-found], [action-if-not])
+AC_DEFUN([CC_CHECK_CFLAGS], [
+ for flag in $1; do
+ CC_CHECK_CFLAG($flag, [$2], [$3])
+ done
+])
+
dnl EROFS_UTILS_PARSE_DIRECTORY
dnl Input: $1 = a string to a relative or absolute directory
dnl Output: $2 = the variable to set with the absolute directory
@@ -59,6 +94,8 @@
fi
])
+AC_ARG_VAR([MAX_BLOCK_SIZE], [The maximum block size which erofs-utils supports])
+
AC_ARG_ENABLE([debug],
[AS_HELP_STRING([--enable-debug],
[enable debugging mode @<:@default=no@:>@])],
@@ -71,6 +108,12 @@
[enable_werror="$enableval"],
[enable_werror="no"])
+AC_ARG_ENABLE([fuzzing],
+ [AS_HELP_STRING([--enable-fuzzing],
+ [set up fuzzing mode @<:@default=no@:>@])],
+ [enable_fuzzing="$enableval"],
+ [enable_fuzzing="no"])
+
AC_ARG_ENABLE(lz4,
[AS_HELP_STRING([--disable-lz4], [disable LZ4 compression support @<:@default=enabled@:>@])],
[enable_lz4="$enableval"], [enable_lz4="yes"])
@@ -79,6 +122,15 @@
[AS_HELP_STRING([--enable-lzma], [enable LZMA compression support @<:@default=no@:>@])],
[enable_lzma="$enableval"], [enable_lzma="no"])
+AC_ARG_WITH(zlib,
+ [AS_HELP_STRING([--without-zlib],
+ [Ignore presence of zlib inflate support @<:@default=enabled@:>@])])
+
+AC_ARG_WITH(libdeflate,
+ [AS_HELP_STRING([--with-libdeflate],
+ [Enable and build with libdeflate inflate support @<:@default=disabled@:>@])], [],
+ [with_libdeflate="no"])
+
AC_ARG_ENABLE(fuse,
[AS_HELP_STRING([--enable-fuse], [enable erofsfuse @<:@default=no@:>@])],
[enable_fuse="$enableval"], [enable_fuse="no"])
@@ -124,6 +176,7 @@
fcntl.h
getopt.h
inttypes.h
+ linux/aufs_type.h
linux/falloc.h
linux/fs.h
linux/types.h
@@ -134,7 +187,10 @@
stdlib.h
string.h
sys/ioctl.h
+ sys/mman.h
+ sys/random.h
sys/stat.h
+ sys/statfs.h
sys/sysmacros.h
sys/time.h
unistd.h
@@ -190,8 +246,12 @@
llistxattr
memset
realpath
+ lseek64
+ ftello64
pread64
pwrite64
+ posix_fadvise
+ fstatfs
strdup
strerror
strrchr
@@ -199,6 +259,35 @@
tmpfile64
utimensat]))
+# Detect maximum block size if necessary
+AS_IF([test "x$MAX_BLOCK_SIZE" = "x"], [
+ AC_CACHE_CHECK([sysconf (_SC_PAGESIZE)], [erofs_cv_max_block_size],
+ AC_RUN_IFELSE([AC_LANG_PROGRAM(
+[[
+#include <unistd.h>
+#include <stdio.h>
+]],
+[[
+ int result;
+ FILE *f;
+
+ result = sysconf(_SC_PAGESIZE);
+ if (result < 0)
+ return 1;
+
+ f = fopen("conftest.out", "w");
+ if (!f)
+ return 1;
+
+ fprintf(f, "%d", result);
+ fclose(f);
+ return 0;
+]])],
+ [erofs_cv_max_block_size=`cat conftest.out`],
+ [erofs_cv_max_block_size=4096],
+ [erofs_cv_max_block_size=4096]))
+], [erofs_cv_max_block_size=$MAX_BLOCK_SIZE])
+
# Configure debug mode
AS_IF([test "x$enable_debug" != "xno"], [], [
dnl Turn off all assert checking.
@@ -319,11 +408,53 @@
CPPFLAGS="${saved_CPPFLAGS}"
fi
+# Configure zlib
+AS_IF([test "x$with_zlib" != "xno"], [
+ PKG_CHECK_MODULES([zlib], [zlib])
+ # Paranoia: don't trust the result reported by pkgconfig before trying out
+ saved_LIBS="$LIBS"
+ saved_CPPFLAGS=${CPPFLAGS}
+ CPPFLAGS="${zlib_CFLAGS} ${CPPFLAGS}"
+ LIBS="${zlib_LIBS} $LIBS"
+ AC_CHECK_LIB(z, inflate, [
+ have_zlib="yes" ], [
+ AC_MSG_ERROR([zlib doesn't work properly])])
+ LIBS="${saved_LIBS}"
+ CPPFLAGS="${saved_CPPFLAGS}"], [have_zlib="no"])
+
+# Configure libdeflate
+AS_IF([test "x$with_libdeflate" != "xno"], [
+ PKG_CHECK_MODULES([libdeflate], [libdeflate])
+ # Paranoia: don't trust the result reported by pkgconfig before trying out
+ saved_LIBS="$LIBS"
+ saved_CPPFLAGS=${CPPFLAGS}
+ CPPFLAGS="${libdeflate_CFLAGS} ${CPPFLAGS}"
+ LIBS="${libdeflate_LIBS} $LIBS"
+ AC_CHECK_LIB(deflate, libdeflate_deflate_decompress, [
+ have_libdeflate="yes" ], [
+ AC_MSG_ERROR([libdeflate doesn't work properly])])
+ LIBS="${saved_LIBS}"
+ CPPFLAGS="${saved_CPPFLAGS}"], [have_libdeflate="no"])
+
+# Enable 64-bit off_t
+CFLAGS+=" -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64"
+
+# Configure fuzzing mode
+AS_IF([test "x$enable_fuzzing" != "xyes"], [], [
+ CC_CHECK_CFLAGS(["-fsanitize=address,fuzzer-no-link"], [
+ CFLAGS="$CFLAGS -g -O1 -fsanitize=address,fuzzer-no-link"
+ ], [
+ AC_MSG_ERROR([Compiler doesn't support `-fsanitize=address,fuzzer-no-link`])
+ ])
+])
+AM_CONDITIONAL([ENABLE_FUZZING], [test "x${enable_fuzzing}" = "xyes"])
+
# Set up needed symbols, conditionals and compiler/linker flags
AM_CONDITIONAL([ENABLE_LZ4], [test "x${have_lz4}" = "xyes"])
AM_CONDITIONAL([ENABLE_LZ4HC], [test "x${have_lz4hc}" = "xyes"])
AM_CONDITIONAL([ENABLE_FUSE], [test "x${have_fuse}" = "xyes"])
AM_CONDITIONAL([ENABLE_LIBLZMA], [test "x${have_liblzma}" = "xyes"])
+AM_CONDITIONAL([ENABLE_LIBDEFLATE], [test "x${have_libdeflate}" = "xyes"])
if test "x$have_uuid" = "xyes"; then
AC_DEFINE([HAVE_LIBUUID], 1, [Define to 1 if libuuid is found])
@@ -361,6 +492,21 @@
AC_SUBST([liblzma_CFLAGS])
fi
+if test "x$have_zlib" = "xyes"; then
+ AC_DEFINE([HAVE_ZLIB], 1, [Define to 1 if zlib is found])
+fi
+
+if test "x$have_libdeflate" = "xyes"; then
+ AC_DEFINE([HAVE_LIBDEFLATE], 1, [Define to 1 if libdeflate is found])
+fi
+
+# Dump maximum block size
+AS_IF([test "x$erofs_cv_max_block_size" = "x"],
+ [$erofs_cv_max_block_size = 4096], [])
+
+AC_DEFINE_UNQUOTED([EROFS_MAX_BLOCK_SIZE], [$erofs_cv_max_block_size],
+ [The maximum block size which erofs-utils supports])
+
AC_CONFIG_FILES([Makefile
man/Makefile
lib/Makefile
diff --git a/docs/INSTALL.md b/docs/INSTALL.md
new file mode 100644
index 0000000..2e818da
--- /dev/null
+++ b/docs/INSTALL.md
@@ -0,0 +1,71 @@
+This document describes how to configure and build erofs-utils from
+source.
+
+See the [README](../README) file in the top level directory about
+the brief overview of erofs-utils.
+
+## Dependencies & build
+
+LZ4 1.9.3+ for LZ4(HC) enabled [^1].
+
+[XZ Utils 5.3.2alpha+](https://tukaani.org/xz/xz-5.3.2alpha.tar.gz) for
+LZMA enabled, [XZ Utils 5.4+](https://tukaani.org/xz/xz-5.4.1.tar.gz)
+highly recommended.
+
+libfuse 2.6+ for erofsfuse enabled.
+
+[^1]: It's not recommended to use LZ4 versions under 1.9.3 since
+unexpected crashes could make trouble to end users due to broken
+LZ4_compress_destSize() (fixed in v1.9.2),
+[LZ4_compress_HC_destSize()](https://github.com/lz4/lz4/commit/660d21272e4c8a0f49db5fc1e6853f08713dff82) or
+[LZ4_decompress_safe_partial()](https://github.com/lz4/lz4/issues/783).
+
+## How to build with LZ4
+
+To build, the following commands can be used in order:
+
+``` sh
+$ ./autogen.sh
+$ ./configure
+$ make
+```
+
+`mkfs.erofs`, `dump.erofs` and `fsck.erofs` binaries will be
+generated under the corresponding folders.
+
+## How to build with liblzma
+
+In order to enable LZMA support, build with the following commands:
+
+``` sh
+$ ./configure --enable-lzma
+$ make
+```
+
+Additionally, you could specify liblzma target paths with
+`--with-liblzma-incdir` and `--with-liblzma-libdir` manually.
+
+## How to build erofsfuse
+
+It's disabled by default as an experimental feature for now due
+to the extra libfuse dependency, to enable and build it manually:
+
+``` sh
+$ ./configure --enable-fuse
+$ make
+```
+
+`erofsfuse` binary will be generated under `fuse` folder.
+
+## How to install erofs-utils manually
+
+Use the following command to install erofs-utils binaries:
+
+``` sh
+# make install
+```
+
+By default, `make install` will install all the files in
+`/usr/local/bin`, `/usr/local/lib` etc. You can specify an
+installation prefix other than `/usr/local` using `--prefix`,
+for instance `--prefix=$HOME`.
diff --git a/docs/PERFORMANCE.md b/docs/PERFORMANCE.md
new file mode 100644
index 0000000..5431856
--- /dev/null
+++ b/docs/PERFORMANCE.md
@@ -0,0 +1,201 @@
+# Test setup
+
+Processor: x86_64, Intel(R) Xeon(R) Platinum 8369B CPU @ 2.70GHz * 2 VCores
+
+Storage: Cloud disk, 3000 IOPS upper limit
+
+OS Kernel: Linux 6.2
+
+Software: LZ4 1.9.3, erofs-utils 1.6, squashfs-tools 4.5.1
+
+Disclaimer: Test results could be varied from different hardware and/or data patterns. Therefore, the following results are **ONLY for reference**.
+
+# Benchmark on multiple files
+
+[Rootfs of Debian docker image](https://github.com/debuerreotype/docker-debian-artifacts/blob/dist-amd64/bullseye/rootfs.tar.xz?raw=true) is used as the dataset, which contains 7000+ files and directories.
+Note that that dataset can be replaced regularly, and the SHA1 of the snapshot "rootfs.tar.xz" used here is "aee9b01a530078dbef8f08521bfcabe65b244955".
+
+## Image size
+
+| Size | Filesystem | Cluster size | Build options |
+|-----------|------------|--------------|----------------------------------------------------------------|
+| 124669952 | erofs | uncompressed | -T0 [^1] |
+| 124522496 | squashfs | uncompressed | -noD -noI -noX -noF -no-xattrs -all-time 0 -no-duplicates [^2] |
+| 73601024 | squashfs | 4096 | -b 4096 -comp lz4 -Xhc -no-xattrs -all-time 0 |
+| 73121792 | erofs | 4096 | -zlz4hc,12 [^3] -C4096 -Efragments -T0 |
+| 67162112 | squashfs | 16384 | -b 16384 -comp lz4 -Xhc -no-xattrs -all-time 0 |
+| 65478656 | erofs | 16384 | -zlz4hc,12 -C16384 -Efragments -T0 |
+| 61456384 | squashfs | 65536 | -b 65536 -comp lz4 -Xhc -no-xattrs -all-time 0 |
+| 59834368 | erofs | 65536 | -zlz4hc,12 -C65536 -Efragments -T0 |
+| 59150336 | squashfs | 131072 | -b 131072 -comp lz4 -Xhc -no-xattrs -all-time 0 |
+| 58515456 | erofs | 131072 | -zlz4hc,12 -C131072 -Efragments -T0 |
+
+[^1]: Forcely reset all timestamps to match squashfs on-disk basic inodes for now.
+[^2]: Currently erofs-utils doesn't actively de-duplicate identical files although the on-disk format supports this.
+[^3]: Because squashfs uses level 12 for LZ4HC by default.
+
+## Sequential data access
+
+```bash
+hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "tar cf - . | cat > /dev/null"
+```
+
+| Filesystem | Cluster size | Time |
+|------------|--------------|---------------------------------|
+| squashfs | 4096 | 10.257 s ± 0.031 s |
+| erofs | uncompressed | 1.111 s ± 0.022 s |
+| squashfs | uncompressed | 1.034 s ± 0.020 s |
+| squashfs | 131072 | 941.3 ms ± 7.5 ms |
+| erofs | 4096 | 848.1 ms ± 17.8 ms |
+| erofs | 131072 | 724.2 ms ± 11.0 ms |
+
+## Sequential metadata access
+
+```bash
+hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "tar cf /dev/null ."
+```
+
+| Filesystem | Cluster size | Time |
+|------------|--------------|---------------------------------|
+| erofs | uncompressed | 419.6 ms ± 8.2 ms |
+| squashfs | 4096 | 142.5 ms ± 5.4 ms |
+| squashfs | uncompressed | 129.2 ms ± 3.9 ms |
+| squashfs | 131072 | 125.4 ms ± 4.0 ms |
+| erofs | 4096 | 75.5 ms ± 3.5 ms |
+| erofs | 131072 | 65.8 ms ± 3.6 ms |
+
+[ Note that erofs-utils currently doesn't perform quite well for such cases due to metadata arrangement when building. It will be fixed in the later versions. ]
+
+## Small random data access (~7%)
+
+```bash
+find mnt -type f -printf "%p\n" | sort -R | head -n 500 > list.txt
+hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "cat list.txt | xargs cat > /dev/null"
+```
+
+| Filesystem | Cluster size | Time |
+|------------|--------------|---------------------------------|
+| squashfs | 4096 | 1.386 s ± 0.032 s |
+| squashfs | uncompressed | 1.083 s ± 0.044 s |
+| squashfs | 131072 | 1.067 s ± 0.046 s |
+| erofs | 4096 | 249.6 ms ± 6.5 ms |
+| erofs | uncompressed | 237.8 ms ± 6.3 ms |
+| erofs | 131072 | 189.6 ms ± 7.8 ms |
+
+
+## Small random metadata access (~7%)
+
+```bash
+find mnt -type f -printf "%p\n" | sort -R | head -n 500 > list.txt
+hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "cat list.txt | xargs stat"
+```
+
+| Filesystem | Cluster size | Time |
+|------------|--------------|---------------------------------|
+| squashfs | 4096 | 817.0 ms ± 34.5 ms |
+| squashfs | 131072 | 801.0 ms ± 40.1 ms |
+| squashfs | uncompressed | 741.3 ms ± 18.2 ms |
+| erofs | uncompressed | 197.8 ms ± 4.1 ms |
+| erofs | 4096 | 63.1 ms ± 2.0 ms |
+| erofs | 131072 | 60.7 ms ± 3.6 ms |
+
+## Full random data access (~100%)
+
+```bash
+find mnt -type f -printf "%p\n" | sort -R > list.txt
+hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "cat list.txt | xargs cat > /dev/null"
+```
+
+| Filesystem | Cluster size | Time |
+|------------|--------------|---------------------------------|
+| squashfs | 4096 | 20.668 s ± 0.040 s |
+| squashfs | uncompressed | 12.543 s ± 0.041 s |
+| squashfs | 131072 | 11.753 s ± 0.412 s |
+| erofs | uncompressed | 1.493 s ± 0.023 s |
+| erofs | 4096 | 1.223 s ± 0.013 s |
+| erofs | 131072 | 598.2 ms ± 6.6 ms |
+
+## Full random metadata access (~100%)
+
+```bash
+find mnt -type f -printf "%p\n" | sort -R > list.txt
+hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "cat list.txt | xargs stat"
+```
+
+| Filesystem | Cluster size | Time |
+|------------|--------------|---------------------------------|
+| squashfs | 131072 | 9.212 s ± 0.467 s |
+| squashfs | 4096 | 8.905 s ± 0.147 s |
+| squashfs | uncompressed | 7.961 s ± 0.045 s |
+| erofs | 4096 | 661.2 ms ± 14.9 ms |
+| erofs | uncompressed | 125.8 ms ± 6.6 ms |
+| erofs | 131072 | 119.6 ms ± 5.5 ms |
+
+
+# FIO benchmark on a single large file
+
+`silesia.tar` (203M) is used to benchmark, which could be generated from unzipping [silesia.zip](http://mattmahoney.net/dc/silesia.zip) and tar.
+
+## Image size
+
+| Size | Filesystem | Cluster size | Build options |
+|-----------|------------|--------------|-----------------------------------------------------------|
+| 114339840 | squashfs | 4096 | -b 4096 -comp lz4 -Xhc -no-xattrs |
+| 104972288 | erofs | 4096 | -zlz4hc,12 -C4096 |
+| 98033664 | squashfs | 16384 | -b 16384 -comp lz4 -Xhc -no-xattrs |
+| 89571328 | erofs | 16384 | -zlz4hc,12 -C16384 |
+| 85143552 | squashfs | 65536 | -b 65536 -comp lz4 -Xhc -no-xattrs |
+| 81211392 | squashfs | 131072 | -b 131072 -comp lz4 -Xhc -no-xattrs |
+| 80519168 | erofs | 65536 | -zlz4hc,12 -C65536 |
+| 78888960 | erofs | 131072 | -zlz4hc,12 -C131072 |
+
+## Sequential I/Os
+
+```bash
+fio -filename=silesia.tar -bs=4k -rw=read -name=job1
+```
+
+| Filesystem | Cluster size | Bandwidth |
+|------------|--------------|-----------|
+| erofs | 65536 | 624 MiB/s |
+| erofs | 16384 | 600 MiB/s |
+| erofs | 4096 | 569 MiB/s |
+| erofs | 131072 | 535 MiB/s |
+| squashfs | 131072 | 236 MiB/s |
+| squashfs | 65536 | 157 MiB/s |
+| squashfs | 16384 | 55.2MiB/s |
+| squashfs | 4096 | 12.5MiB/s |
+
+## Full Random I/Os
+
+```bash
+fio -filename=silesia.tar -bs=4k -rw=randread -name=job1
+```
+
+| Filesystem | Cluster size | Bandwidth |
+|------------|--------------|-----------|
+| erofs | 131072 | 242 MiB/s |
+| squashfs | 131072 | 232 MiB/s |
+| erofs | 65536 | 198 MiB/s |
+| squashfs | 65536 | 150 MiB/s |
+| erofs | 16384 | 96.4MiB/s |
+| squashfs | 16384 | 49.5MiB/s |
+| erofs | 4096 | 33.7MiB/s |
+| squashfs | 4096 | 6817KiB/s |
+
+## Small Random I/Os (~5%)
+
+```bash
+fio -filename=silesia.tar -bs=4k -rw=randread --io_size=10m -name=job1
+```
+
+| Filesystem | Cluster size | Bandwidth |
+|------------|--------------|-----------|
+| erofs | 131072 | 19.2MiB/s |
+| erofs | 65536 | 16.9MiB/s |
+| squashfs | 131072 | 15.1MiB/s |
+| erofs | 16384 | 14.7MiB/s |
+| squashfs | 65536 | 13.8MiB/s |
+| erofs | 4096 | 13.0MiB/s |
+| squashfs | 16384 | 11.7MiB/s |
+| squashfs | 4096 | 4376KiB/s |
diff --git a/docs/compress-hints.example b/docs/compress-hints.example
new file mode 100644
index 0000000..4f481ff
--- /dev/null
+++ b/docs/compress-hints.example
@@ -0,0 +1,7 @@
+# https://github.com/debuerreotype/docker-debian-artifacts/blob/dist-amd64/bullseye/rootfs.tar.xz?raw=true
+# -zlzma:lz4hc,12:lzma,109 -C131072 --compress-hints=compress-hints.example image size: 66M
+# -zlz4hc,12 image size: 76M
+4096 1 .*\.so.*$
+4096 1 bin/
+4096 1 sbin/
+131072 2 etc/
diff --git a/dump/Makefile.am b/dump/Makefile.am
index c2bef6d..aed20c2 100644
--- a/dump/Makefile.am
+++ b/dump/Makefile.am
@@ -7,4 +7,4 @@
dump_erofs_SOURCES = main.c
dump_erofs_CFLAGS = -Wall -I$(top_srcdir)/include
dump_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
- ${libuuid_LIBS} ${liblz4_LIBS} ${liblzma_LIBS}
+ ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS}
diff --git a/dump/main.c b/dump/main.c
index 49ff2b7..5425b7b 100644
--- a/dump/main.c
+++ b/dump/main.c
@@ -14,11 +14,11 @@
#include "erofs/inode.h"
#include "erofs/io.h"
#include "erofs/dir.h"
+#include "erofs/compress.h"
+#include "erofs/fragments.h"
#include "../lib/liberofs_private.h"
+#include "../lib/liberofs_uuid.h"
-#ifdef HAVE_LIBUUID
-#include <uuid.h>
-#endif
struct erofsdump_cfg {
unsigned int totalshow;
@@ -37,7 +37,7 @@
static char *file_types[] = {
".txt", ".so", ".xml", ".apk",
".odex", ".vdex", ".oat", ".rc",
- ".otf", ".txt", "others",
+ ".otf", "others",
};
#define OTHERFILETYPE ARRAY_SIZE(file_types)
/* (1 << FILE_MAX_SIZE_BITS)KB */
@@ -91,10 +91,16 @@
static struct erofsdump_feature feature_lists[] = {
{ true, EROFS_FEATURE_COMPAT_SB_CHKSUM, "sb_csum" },
{ true, EROFS_FEATURE_COMPAT_MTIME, "mtime" },
- { false, EROFS_FEATURE_INCOMPAT_LZ4_0PADDING, "0padding" },
+ { true, EROFS_FEATURE_COMPAT_XATTR_FILTER, "xattr_filter" },
+ { false, EROFS_FEATURE_INCOMPAT_ZERO_PADDING, "0padding" },
+ { false, EROFS_FEATURE_INCOMPAT_COMPR_CFGS, "compr_cfgs" },
{ false, EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER, "big_pcluster" },
{ false, EROFS_FEATURE_INCOMPAT_CHUNKED_FILE, "chunked_file" },
{ false, EROFS_FEATURE_INCOMPAT_DEVICE_TABLE, "device_table" },
+ { false, EROFS_FEATURE_INCOMPAT_ZTAILPACKING, "ztailpacking" },
+ { false, EROFS_FEATURE_INCOMPAT_FRAGMENTS, "fragments" },
+ { false, EROFS_FEATURE_INCOMPAT_DEDUPE, "dedupe" },
+ { false, EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES, "xattr_prefixes" },
};
static int erofsdump_readdir(struct erofs_dir_context *ctx);
@@ -151,7 +157,7 @@
usage();
exit(0);
case 3:
- err = blob_open_ro(optarg);
+ err = blob_open_ro(&sbi, optarg);
if (err)
return err;
++sbi.extra_devices;
@@ -196,10 +202,10 @@
stats.uncompressed_files++;
*size = inode->i_size;
break;
- case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
- case EROFS_INODE_FLAT_COMPRESSION:
+ case EROFS_INODE_COMPRESSED_FULL:
+ case EROFS_INODE_COMPRESSED_COMPACT:
stats.compressed_files++;
- *size = inode->u.i_blocks * EROFS_BLKSIZ;
+ *size = inode->u.i_blocks * erofs_blksiz(inode->sbi);
break;
default:
erofs_err("unknown datalayout");
@@ -224,36 +230,23 @@
++stats.file_type_stat[type];
}
-static void update_file_size_statatics(erofs_off_t occupied_size,
- erofs_off_t original_size)
+static void update_file_size_statistics(erofs_off_t size, bool original)
{
- int occupied_size_mark, original_size_mark;
+ unsigned int *file_size = original ? stats.file_original_size :
+ stats.file_comp_size;
+ int size_mark = 0;
- original_size_mark = 0;
- occupied_size_mark = 0;
- occupied_size >>= 10;
- original_size >>= 10;
+ size >>= 10;
- while (occupied_size || original_size) {
- if (occupied_size) {
- occupied_size >>= 1;
- occupied_size_mark++;
- }
- if (original_size) {
- original_size >>= 1;
- original_size_mark++;
- }
+ while (size) {
+ size >>= 1;
+ size_mark++;
}
- if (original_size_mark >= FILE_MAX_SIZE_BITS)
- stats.file_original_size[FILE_MAX_SIZE_BITS]++;
+ if (size_mark >= FILE_MAX_SIZE_BITS)
+ file_size[FILE_MAX_SIZE_BITS]++;
else
- stats.file_original_size[original_size_mark]++;
-
- if (occupied_size_mark >= FILE_MAX_SIZE_BITS)
- stats.file_comp_size[FILE_MAX_SIZE_BITS]++;
- else
- stats.file_comp_size[occupied_size_mark]++;
+ file_size[size_mark]++;
}
static int erofsdump_ls_dirent_iter(struct erofs_dir_context *ctx)
@@ -276,11 +269,37 @@
return erofsdump_readdir(ctx);
}
+static int erofsdump_read_packed_inode(void)
+{
+ int err;
+ erofs_off_t occupied_size = 0;
+ struct erofs_inode vi = { .sbi = &sbi, .nid = sbi.packed_nid };
+
+ if (!(erofs_sb_has_fragments(&sbi) && sbi.packed_nid > 0))
+ return 0;
+
+ err = erofs_read_inode_from_disk(&vi);
+ if (err) {
+ erofs_err("failed to read packed file inode from disk");
+ return err;
+ }
+
+ err = erofsdump_get_occupied_size(&vi, &occupied_size);
+ if (err) {
+ erofs_err("failed to get the file size of packed inode");
+ return err;
+ }
+
+ stats.files_total_size += occupied_size;
+ update_file_size_statistics(occupied_size, false);
+ return 0;
+}
+
static int erofsdump_readdir(struct erofs_dir_context *ctx)
{
int err;
erofs_off_t occupied_size = 0;
- struct erofs_inode vi = { .nid = ctx->de_nid };
+ struct erofs_inode vi = { .sbi = &sbi, .nid = ctx->de_nid };
err = erofs_read_inode_from_disk(&vi);
if (err) {
@@ -300,7 +319,8 @@
stats.files_total_origin_size += vi.i_size;
inc_file_extension_count(ctx->dname, ctx->de_namelen);
stats.files_total_size += occupied_size;
- update_file_size_statatics(occupied_size, vi.i_size);
+ update_file_size_statistics(vi.i_size, true);
+ update_file_size_statistics(occupied_size, false);
}
/* XXXX: the dir depth should be restricted in order to avoid loops */
@@ -334,7 +354,7 @@
int err, i;
erofs_off_t size;
u16 access_mode;
- struct erofs_inode inode = { .nid = dumpcfg.nid };
+ struct erofs_inode inode = { .sbi = &sbi, .nid = dumpcfg.nid };
char path[PATH_MAX];
char access_mode_str[] = "rwxrwxrwx";
char timebuf[128] = {0};
@@ -365,10 +385,10 @@
return;
}
- err = erofs_get_pathname(inode.nid, path, sizeof(path));
+ err = erofs_get_pathname(inode.sbi, inode.nid, path, sizeof(path));
if (err < 0) {
- erofs_err("file path not found @ nid %llu", inode.nid | 0ULL);
- return;
+ strncpy(path, "(not found)", sizeof(path) - 1);
+ path[sizeof(path) - 1] = '\0';
}
strftime(timebuf, sizeof(timebuf),
@@ -377,7 +397,8 @@
for (i = 8; i >= 0; i--)
if (((access_mode >> i) & 1) == 0)
access_mode_str[8 - i] = '-';
- fprintf(stdout, "File : %s\n", path);
+ fprintf(stdout, "Path : %s\n",
+ erofs_is_packed_inode(&inode) ? "(packed file)" : path);
fprintf(stdout, "Size: %" PRIu64" On-disk size: %" PRIu64 " %s\n",
inode.i_size, size,
file_category_types[erofs_mode_to_ftype(inode.i_mode)]);
@@ -387,7 +408,6 @@
inode.datalayout,
(double)(100 * size) / (double)(inode.i_size));
fprintf(stdout, "Inode size: %d ", inode.inode_isize);
- fprintf(stdout, "Extent size: %u ", inode.extent_isize);
fprintf(stdout, "Xattr size: %u\n", inode.xattr_isize);
fprintf(stdout, "Uid: %u Gid: %u ", inode.i_uid, inode.i_gid);
fprintf(stdout, "Access: %04o/%s\n", access_mode, access_mode_str);
@@ -430,19 +450,27 @@
.m_deviceid = map.m_deviceid,
.m_pa = map.m_pa,
};
- err = erofs_map_dev(&sbi, &mdev);
+ err = erofs_map_dev(inode.sbi, &mdev);
if (err) {
erofs_err("failed to map device");
return;
}
- fprintf(stdout, ext_fmt[!!mdev.m_deviceid], extent_count++,
- map.m_la, map.m_la + map.m_llen, map.m_llen,
- mdev.m_pa, mdev.m_pa + map.m_plen, map.m_plen,
- mdev.m_deviceid);
+ if (map.m_flags & EROFS_MAP_FRAGMENT)
+ fprintf(stdout, ext_fmt[!!mdev.m_deviceid],
+ extent_count++,
+ map.m_la, map.m_la + map.m_llen, map.m_llen,
+ 0, 0, 0, mdev.m_deviceid);
+ else
+ fprintf(stdout, ext_fmt[!!mdev.m_deviceid],
+ extent_count++,
+ map.m_la, map.m_la + map.m_llen, map.m_llen,
+ mdev.m_pa, mdev.m_pa + map.m_plen, map.m_plen,
+ mdev.m_deviceid);
map.m_la += map.m_llen;
}
- fprintf(stdout, "%s: %d extents found\n", path, extent_count);
+ fprintf(stdout, "%s: %d extents found\n",
+ erofs_is_packed_inode(&inode) ? "(packed file)" : path, extent_count);
}
static void erofsdump_filesize_distribution(const char *title,
@@ -548,6 +576,11 @@
erofs_err("read dir failed");
return;
}
+ err = erofsdump_read_packed_inode();
+ if (err) {
+ erofs_err("failed to read packed inode");
+ return;
+ }
erofsdump_file_statistic();
erofsdump_filesize_distribution("Original",
stats.file_original_size,
@@ -558,10 +591,27 @@
erofsdump_filetype_distribution(file_types, OTHERFILETYPE);
}
+static void erofsdump_print_supported_compressors(FILE *f, unsigned int mask)
+{
+ unsigned int i = 0;
+ bool comma = false;
+ const char *s;
+
+ while ((s = z_erofs_list_supported_algorithms(i++, &mask)) != NULL) {
+ if (*s == '\0')
+ continue;
+ if (comma)
+ fputs(", ", f);
+ fputs(s, f);
+ comma = true;
+ }
+ fputc('\n', f);
+}
+
static void erofsdump_show_superblock(void)
{
time_t time = sbi.build_time;
- char uuid_str[37] = "not available";
+ char uuid_str[37];
int i = 0;
fprintf(stdout, "Filesystem magic number: 0x%04X\n",
@@ -574,6 +624,19 @@
sbi.xattr_blkaddr);
fprintf(stdout, "Filesystem root nid: %llu\n",
sbi.root_nid | 0ULL);
+ if (erofs_sb_has_fragments(&sbi) && sbi.packed_nid > 0)
+ fprintf(stdout, "Filesystem packed nid: %llu\n",
+ sbi.packed_nid | 0ULL);
+ if (erofs_sb_has_compr_cfgs(&sbi)) {
+ fprintf(stdout, "Filesystem compr_algs: ");
+ erofsdump_print_supported_compressors(stdout,
+ sbi.available_compr_algs);
+ } else {
+ fprintf(stdout, "Filesystem lz4_max_distance: %u\n",
+ sbi.lz4_max_distance | 0U);
+ }
+ fprintf(stdout, "Filesystem sb_extslots: %u\n",
+ sbi.extslots | 0U);
fprintf(stdout, "Filesystem inode count: %llu\n",
sbi.inos | 0ULL);
fprintf(stdout, "Filesystem created: %s",
@@ -586,9 +649,7 @@
if (feat & feature_lists[i].flag)
fprintf(stdout, "%s ", feature_lists[i].name);
}
-#ifdef HAVE_LIBUUID
- uuid_unparse_lower(sbi.uuid, uuid_str);
-#endif
+ erofs_uuid_unparse_lower(sbi.uuid, uuid_str);
fprintf(stdout, "\nFilesystem UUID: %s\n",
uuid_str);
}
@@ -605,13 +666,13 @@
goto exit;
}
- err = dev_open_ro(cfg.c_img_path);
+ err = dev_open_ro(&sbi, cfg.c_img_path);
if (err) {
erofs_err("failed to open image file");
goto exit;
}
- err = erofs_read_superblock();
+ err = erofs_read_superblock(&sbi);
if (err) {
erofs_err("failed to read superblock");
goto exit_dev_close;
@@ -629,16 +690,18 @@
if (dumpcfg.show_extent && !dumpcfg.show_inode) {
usage();
- goto exit_dev_close;
+ goto exit_put_super;
}
if (dumpcfg.show_inode)
erofsdump_show_fileinfo(dumpcfg.show_extent);
+exit_put_super:
+ erofs_put_super(&sbi);
exit_dev_close:
- dev_close();
+ dev_close(&sbi);
exit:
- blob_closeall();
+ blob_closeall(&sbi);
erofs_exit_configure();
return err;
}
diff --git a/fsck/Makefile.am b/fsck/Makefile.am
index e6a1fb6..d024405 100644
--- a/fsck/Makefile.am
+++ b/fsck/Makefile.am
@@ -7,4 +7,13 @@
fsck_erofs_SOURCES = main.c
fsck_erofs_CFLAGS = -Wall -I$(top_srcdir)/include
fsck_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
- ${libuuid_LIBS} ${liblz4_LIBS} ${liblzma_LIBS}
+ ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS}
+
+if ENABLE_FUZZING
+noinst_PROGRAMS = fuzz_erofsfsck
+fuzz_erofsfsck_SOURCES = main.c
+fuzz_erofsfsck_CFLAGS = -Wall -I$(top_srcdir)/include -DFUZZING
+fuzz_erofsfsck_LDFLAGS = -fsanitize=address,fuzzer
+fuzz_erofsfsck_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
+ ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS}
+endif
diff --git a/fsck/main.c b/fsck/main.c
index 5a2f659..3f86da4 100644
--- a/fsck/main.c
+++ b/fsck/main.c
@@ -49,15 +49,27 @@
{0, 0, 0, 0},
};
+#define NR_HARDLINK_HASHTABLE 16384
+
+struct erofsfsck_hardlink_entry {
+ struct list_head list;
+ erofs_nid_t nid;
+ char *path;
+};
+
+static struct list_head erofsfsck_link_hashtable[NR_HARDLINK_HASHTABLE];
+
static void print_available_decompressors(FILE *f, const char *delim)
{
- unsigned int i = 0;
+ int i = 0;
+ bool comma = false;
const char *s;
- while ((s = z_erofs_list_available_compressors(i)) != NULL) {
- if (i++)
+ while ((s = z_erofs_list_available_compressors(&i)) != NULL) {
+ if (comma)
fputs(delim, f);
fputs(s, f);
+ comma = true;
}
fputc('\n', f);
}
@@ -131,6 +143,11 @@
while (len > 1 && optarg[len - 1] == '/')
len--;
+ if (len >= PATH_MAX) {
+ erofs_err("target directory name too long!");
+ return -ENAMETOOLONG;
+ }
+
fsckcfg.extract_path = malloc(PATH_MAX);
if (!fsckcfg.extract_path)
return -ENOMEM;
@@ -143,7 +160,7 @@
}
break;
case 3:
- ret = blob_open_ro(optarg);
+ ret = blob_open_ro(&sbi, optarg);
if (ret)
return ret;
++sbi.extra_devices;
@@ -258,12 +275,13 @@
static int erofs_check_sb_chksum(void)
{
- int ret;
- u8 buf[EROFS_BLKSIZ];
+#ifndef FUZZING
+ u8 buf[EROFS_MAX_BLOCK_SIZE];
u32 crc;
struct erofs_super_block *sb;
+ int ret;
- ret = blk_read(0, buf, 0, 1);
+ ret = blk_read(&sbi, 0, buf, 0, 1);
if (ret) {
erofs_err("failed to read superblock to check checksum: %d",
ret);
@@ -273,18 +291,20 @@
sb = (struct erofs_super_block *)(buf + EROFS_SUPER_OFFSET);
sb->checksum = 0;
- crc = erofs_crc32c(~0, (u8 *)sb, EROFS_BLKSIZ - EROFS_SUPER_OFFSET);
+ crc = erofs_crc32c(~0, (u8 *)sb, erofs_blksiz(&sbi) - EROFS_SUPER_OFFSET);
if (crc != sbi.checksum) {
erofs_err("superblock chksum doesn't match: saved(%08xh) calculated(%08xh)",
sbi.checksum, crc);
fsckcfg.corrupted = true;
return -1;
}
+#endif
return 0;
}
static int erofs_verify_xattr(struct erofs_inode *inode)
{
+ struct erofs_sb_info *sbi = inode->sbi;
unsigned int xattr_hdr_size = sizeof(struct erofs_xattr_ibody_header);
unsigned int xattr_entry_size = sizeof(struct erofs_xattr_entry);
erofs_off_t addr;
@@ -292,7 +312,7 @@
struct erofs_xattr_ibody_header *ih;
struct erofs_xattr_entry *entry;
int i, remaining = inode->xattr_isize, ret = 0;
- char buf[EROFS_BLKSIZ];
+ char buf[EROFS_MAX_BLOCK_SIZE];
if (inode->xattr_isize == xattr_hdr_size) {
erofs_err("xattr_isize %d of nid %llu is not supported yet",
@@ -308,8 +328,8 @@
}
}
- addr = iloc(inode->nid) + inode->inode_isize;
- ret = dev_read(0, buf, addr, xattr_hdr_size);
+ addr = erofs_iloc(inode) + inode->inode_isize;
+ ret = dev_read(sbi, 0, buf, addr, xattr_hdr_size);
if (ret < 0) {
erofs_err("failed to read xattr header @ nid %llu: %d",
inode->nid | 0ULL, ret);
@@ -318,12 +338,12 @@
ih = (struct erofs_xattr_ibody_header *)buf;
xattr_shared_count = ih->h_shared_count;
- ofs = erofs_blkoff(addr) + xattr_hdr_size;
+ ofs = erofs_blkoff(sbi, addr) + xattr_hdr_size;
addr += xattr_hdr_size;
remaining -= xattr_hdr_size;
for (i = 0; i < xattr_shared_count; ++i) {
- if (ofs >= EROFS_BLKSIZ) {
- if (ofs != EROFS_BLKSIZ) {
+ if (ofs >= erofs_blksiz(sbi)) {
+ if (ofs != erofs_blksiz(sbi)) {
erofs_err("unaligned xattr entry in xattr shared area @ nid %llu",
inode->nid | 0ULL);
ret = -EFSCORRUPTED;
@@ -339,7 +359,7 @@
while (remaining > 0) {
unsigned int entry_sz;
- ret = dev_read(0, buf, addr, xattr_entry_size);
+ ret = dev_read(sbi, 0, buf, addr, xattr_entry_size);
if (ret) {
erofs_err("failed to read xattr entry @ nid %llu: %d",
inode->nid | 0ULL, ret);
@@ -366,7 +386,6 @@
struct erofs_map_blocks map = {
.index = UINT_MAX,
};
- struct erofs_map_dev mdev;
int ret = 0;
bool compressed;
erofs_off_t pos = 0;
@@ -383,8 +402,8 @@
case EROFS_INODE_CHUNK_BASED:
compressed = false;
break;
- case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
- case EROFS_INODE_FLAT_COMPRESSION:
+ case EROFS_INODE_COMPRESSED_FULL:
+ case EROFS_INODE_COMPRESSED_COMPACT:
compressed = true;
break;
default:
@@ -393,6 +412,8 @@
}
while (pos < inode->i_size) {
+ unsigned int alloc_rawsize;
+
map.m_la = pos;
if (compressed)
ret = z_erofs_map_blocks_iter(inode, &map,
@@ -421,72 +442,66 @@
if (!(map.m_flags & EROFS_MAP_MAPPED) || !fsckcfg.check_decomp)
continue;
- if (map.m_plen > raw_size) {
- raw_size = map.m_plen;
- raw = realloc(raw, raw_size);
- BUG_ON(!raw);
+ if (map.m_plen > Z_EROFS_PCLUSTER_MAX_SIZE) {
+ if (compressed) {
+ erofs_err("invalid pcluster size %" PRIu64 " @ offset %" PRIu64 " of nid %" PRIu64,
+ map.m_plen, map.m_la,
+ inode->nid | 0ULL);
+ ret = -EFSCORRUPTED;
+ goto out;
+ }
+ alloc_rawsize = Z_EROFS_PCLUSTER_MAX_SIZE;
+ } else {
+ alloc_rawsize = map.m_plen;
}
- mdev = (struct erofs_map_dev) {
- .m_deviceid = map.m_deviceid,
- .m_pa = map.m_pa,
- };
- ret = erofs_map_dev(&sbi, &mdev);
- if (ret) {
- erofs_err("failed to map device of m_pa %" PRIu64 ", m_deviceid %u @ nid %llu: %d",
- map.m_pa, map.m_deviceid, inode->nid | 0ULL,
- ret);
- goto out;
- }
+ if (alloc_rawsize > raw_size) {
+ char *newraw = realloc(raw, alloc_rawsize);
- if (compressed && map.m_llen > buffer_size) {
- buffer_size = map.m_llen;
- buffer = realloc(buffer, buffer_size);
- BUG_ON(!buffer);
- }
-
- ret = dev_read(mdev.m_deviceid, raw, mdev.m_pa, map.m_plen);
- if (ret < 0) {
- erofs_err("failed to read data of m_pa %" PRIu64 ", m_plen %" PRIu64 " @ nid %llu: %d",
- mdev.m_pa, map.m_plen, inode->nid | 0ULL,
- ret);
- goto out;
+ if (!newraw) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ raw = newraw;
+ raw_size = alloc_rawsize;
}
if (compressed) {
- struct z_erofs_decompress_req rq = {
- .in = raw,
- .out = buffer,
- .decodedskip = 0,
- .inputsize = map.m_plen,
- .decodedlength = map.m_llen,
- .alg = map.m_algorithmformat,
- .partial_decoding = 0
- };
-
- ret = z_erofs_decompress(&rq);
- if (ret < 0) {
- erofs_err("failed to decompress data of m_pa %" PRIu64 ", m_plen %" PRIu64 " @ nid %llu: %s",
- mdev.m_pa, map.m_plen,
- inode->nid | 0ULL, strerror(-ret));
- goto out;
+ if (map.m_llen > buffer_size) {
+ buffer_size = map.m_llen;
+ buffer = realloc(buffer, buffer_size);
+ BUG_ON(!buffer);
}
- }
+ ret = z_erofs_read_one_data(inode, &map, raw, buffer,
+ 0, map.m_llen, false);
+ if (ret)
+ goto out;
- if (outfd >= 0 && write(outfd, compressed ? buffer : raw,
- map.m_llen) < 0) {
- erofs_err("I/O error occurred when verifying data chunk @ nid %llu",
- inode->nid | 0ULL);
- ret = -EIO;
- goto out;
+ if (outfd >= 0 && write(outfd, buffer, map.m_llen) < 0)
+ goto fail_eio;
+ } else {
+ u64 p = 0;
+
+ do {
+ u64 count = min_t(u64, alloc_rawsize,
+ map.m_llen);
+
+ ret = erofs_read_one_data(inode, &map, raw, p, count);
+ if (ret)
+ goto out;
+
+ if (outfd >= 0 && write(outfd, raw, count) < 0)
+ goto fail_eio;
+ map.m_llen -= count;
+ p += count;
+ } while (map.m_llen);
}
}
if (fsckcfg.print_comp_ratio) {
- fsckcfg.logical_blocks +=
- DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
- fsckcfg.physical_blocks +=
- DIV_ROUND_UP(pchunk_len, EROFS_BLKSIZ);
+ if (!erofs_is_packed_inode(inode))
+ fsckcfg.logical_blocks += BLK_ROUND_UP(inode->sbi, inode->i_size);
+ fsckcfg.physical_blocks += BLK_ROUND_UP(inode->sbi, pchunk_len);
}
out:
if (raw)
@@ -494,6 +509,12 @@
if (buffer)
free(buffer);
return ret < 0 ? ret : 0;
+
+fail_eio:
+ erofs_err("I/O error occurred when verifying data chunk @ nid %llu",
+ inode->nid | 0ULL);
+ ret = -EIO;
+ goto out;
}
static inline int erofs_extract_dir(struct erofs_inode *inode)
@@ -542,6 +563,63 @@
return 0;
}
+static char *erofsfsck_hardlink_find(erofs_nid_t nid)
+{
+ struct list_head *head =
+ &erofsfsck_link_hashtable[nid % NR_HARDLINK_HASHTABLE];
+ struct erofsfsck_hardlink_entry *entry;
+
+ list_for_each_entry(entry, head, list)
+ if (entry->nid == nid)
+ return entry->path;
+ return NULL;
+}
+
+static int erofsfsck_hardlink_insert(erofs_nid_t nid, const char *path)
+{
+ struct erofsfsck_hardlink_entry *entry;
+
+ entry = malloc(sizeof(*entry));
+ if (!entry)
+ return -ENOMEM;
+
+ entry->nid = nid;
+ entry->path = strdup(path);
+ if (!entry->path) {
+ free(entry);
+ return -ENOMEM;
+ }
+
+ list_add_tail(&entry->list,
+ &erofsfsck_link_hashtable[nid % NR_HARDLINK_HASHTABLE]);
+ return 0;
+}
+
+static void erofsfsck_hardlink_init(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < NR_HARDLINK_HASHTABLE; ++i)
+ init_list_head(&erofsfsck_link_hashtable[i]);
+}
+
+static void erofsfsck_hardlink_exit(void)
+{
+ struct erofsfsck_hardlink_entry *entry, *n;
+ struct list_head *head;
+ unsigned int i;
+
+ for (i = 0; i < NR_HARDLINK_HASHTABLE; ++i) {
+ head = &erofsfsck_link_hashtable[i];
+
+ list_for_each_entry_safe(entry, n, head, list) {
+ if (entry->path)
+ free(entry->path);
+ free(entry);
+ }
+ }
+}
+
static inline int erofs_extract_file(struct erofs_inode *inode)
{
bool tryagain = true;
@@ -679,28 +757,88 @@
static int erofsfsck_dirent_iter(struct erofs_dir_context *ctx)
{
int ret;
- size_t prev_pos = fsckcfg.extract_pos;
+ size_t prev_pos, curr_pos;
if (ctx->dot_dotdot)
return 0;
- if (fsckcfg.extract_path) {
- size_t curr_pos = prev_pos;
+ prev_pos = fsckcfg.extract_pos;
+ curr_pos = prev_pos;
+ if (prev_pos + ctx->de_namelen >= PATH_MAX) {
+ erofs_err("unable to fsck since the path is too long (%u)",
+ curr_pos + ctx->de_namelen);
+ return -EOPNOTSUPP;
+ }
+
+ if (fsckcfg.extract_path) {
fsckcfg.extract_path[curr_pos++] = '/';
strncpy(fsckcfg.extract_path + curr_pos, ctx->dname,
ctx->de_namelen);
curr_pos += ctx->de_namelen;
fsckcfg.extract_path[curr_pos] = '\0';
- fsckcfg.extract_pos = curr_pos;
+ } else {
+ curr_pos += ctx->de_namelen;
}
-
+ fsckcfg.extract_pos = curr_pos;
ret = erofsfsck_check_inode(ctx->dir->nid, ctx->de_nid);
- if (fsckcfg.extract_path) {
+ if (fsckcfg.extract_path)
fsckcfg.extract_path[prev_pos] = '\0';
- fsckcfg.extract_pos = prev_pos;
+ fsckcfg.extract_pos = prev_pos;
+ return ret;
+}
+
+static int erofsfsck_extract_inode(struct erofs_inode *inode)
+{
+ int ret;
+ char *oldpath;
+
+ if (!fsckcfg.extract_path) {
+verify:
+ /* verify data chunk layout */
+ return erofs_verify_inode_data(inode, -1);
}
+
+ oldpath = erofsfsck_hardlink_find(inode->nid);
+ if (oldpath) {
+ if (link(oldpath, fsckcfg.extract_path) == -1) {
+ erofs_err("failed to extract hard link: %s (%s)",
+ fsckcfg.extract_path, strerror(errno));
+ return -errno;
+ }
+ return 0;
+ }
+
+ switch (inode->i_mode & S_IFMT) {
+ case S_IFDIR:
+ ret = erofs_extract_dir(inode);
+ break;
+ case S_IFREG:
+ if (erofs_is_packed_inode(inode))
+ goto verify;
+ ret = erofs_extract_file(inode);
+ break;
+ case S_IFLNK:
+ ret = erofs_extract_symlink(inode);
+ break;
+ case S_IFCHR:
+ case S_IFBLK:
+ case S_IFIFO:
+ case S_IFSOCK:
+ ret = erofs_extract_special(inode);
+ break;
+ default:
+ /* TODO */
+ goto verify;
+ }
+ if (ret && ret != -ECANCELED)
+ return ret;
+
+ /* record nid and old path for hardlink */
+ if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
+ ret = erofsfsck_hardlink_insert(inode->nid,
+ fsckcfg.extract_path);
return ret;
}
@@ -712,6 +850,7 @@
erofs_dbg("check inode: nid(%llu)", nid | 0ULL);
inode.nid = nid;
+ inode.sbi = &sbi;
ret = erofs_read_inode_from_disk(&inode);
if (ret) {
if (ret == -EIO)
@@ -725,32 +864,7 @@
if (ret)
goto out;
- if (fsckcfg.extract_path) {
- switch (inode.i_mode & S_IFMT) {
- case S_IFDIR:
- ret = erofs_extract_dir(&inode);
- break;
- case S_IFREG:
- ret = erofs_extract_file(&inode);
- break;
- case S_IFLNK:
- ret = erofs_extract_symlink(&inode);
- break;
- case S_IFCHR:
- case S_IFBLK:
- case S_IFIFO:
- case S_IFSOCK:
- ret = erofs_extract_special(&inode);
- break;
- default:
- /* TODO */
- goto verify;
- }
- } else {
-verify:
- /* verify data chunk layout */
- ret = erofs_verify_inode_data(&inode, -1);
- }
+ ret = erofsfsck_extract_inode(&inode);
if (ret && ret != -ECANCELED)
goto out;
@@ -766,7 +880,7 @@
ret = erofs_iterate_dir(&ctx, true);
}
- if (!ret)
+ if (!ret && !erofs_is_packed_inode(&inode))
erofsfsck_set_attributes(&inode, fsckcfg.extract_path);
if (ret == -ECANCELED)
@@ -777,7 +891,11 @@
return ret;
}
-int main(int argc, char **argv)
+#ifdef FUZZING
+int erofsfsck_fuzz_one(int argc, char *argv[])
+#else
+int main(int argc, char *argv[])
+#endif
{
int err;
@@ -804,21 +922,36 @@
goto exit;
}
- err = dev_open_ro(cfg.c_img_path);
+#ifdef FUZZING
+ cfg.c_dbg_lvl = -1;
+#endif
+
+ err = dev_open_ro(&sbi, cfg.c_img_path);
if (err) {
erofs_err("failed to open image file");
goto exit;
}
- err = erofs_read_superblock();
+ err = erofs_read_superblock(&sbi);
if (err) {
erofs_err("failed to read superblock");
goto exit_dev_close;
}
- if (erofs_sb_has_sb_chksum() && erofs_check_sb_chksum()) {
+ if (erofs_sb_has_sb_chksum(&sbi) && erofs_check_sb_chksum()) {
erofs_err("failed to verify superblock checksum");
- goto exit_dev_close;
+ goto exit_put_super;
+ }
+
+ if (fsckcfg.extract_path)
+ erofsfsck_hardlink_init();
+
+ if (erofs_sb_has_fragments(&sbi) && sbi.packed_nid > 0) {
+ err = erofsfsck_check_inode(sbi.packed_nid, sbi.packed_nid);
+ if (err) {
+ erofs_err("failed to verify packed file");
+ goto exit_hardlink;
+ }
}
err = erofsfsck_check_inode(sbi.root_nid, sbi.root_nid);
@@ -843,10 +976,40 @@
}
}
+exit_hardlink:
+ if (fsckcfg.extract_path)
+ erofsfsck_hardlink_exit();
+exit_put_super:
+ erofs_put_super(&sbi);
exit_dev_close:
- dev_close();
+ dev_close(&sbi);
exit:
- blob_closeall();
+ blob_closeall(&sbi);
erofs_exit_configure();
return err ? 1 : 0;
}
+
+#ifdef FUZZING
+int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size)
+{
+ int fd, ret;
+ char filename[] = "/tmp/erofsfsck_libfuzzer_XXXXXX";
+ char *argv[] = {
+ "fsck.erofs",
+ "--extract",
+ filename,
+ };
+
+ fd = mkstemp(filename);
+ if (fd < 0)
+ return -errno;
+ if (write(fd, Data, Size) != Size) {
+ close(fd);
+ return -EIO;
+ }
+ close(fd);
+ ret = erofsfsck_fuzz_one(ARRAY_SIZE(argv), argv);
+ unlink(filename);
+ return ret ? -1 : 0;
+}
+#endif
diff --git a/fuse/Makefile.am b/fuse/Makefile.am
index 3179a2b..50be783 100644
--- a/fuse/Makefile.am
+++ b/fuse/Makefile.am
@@ -7,4 +7,4 @@
erofsfuse_CFLAGS = -Wall -I$(top_srcdir)/include
erofsfuse_CFLAGS += -DFUSE_USE_VERSION=26 ${libfuse_CFLAGS} ${libselinux_CFLAGS}
erofsfuse_LDADD = $(top_builddir)/lib/liberofs.la ${libfuse_LIBS} ${liblz4_LIBS} \
- ${libselinux_LIBS} ${liblzma_LIBS}
+ ${libselinux_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS}
diff --git a/fuse/main.c b/fuse/main.c
index f4c2476..821d98c 100644
--- a/fuse/main.c
+++ b/fuse/main.c
@@ -13,6 +13,7 @@
#include "erofs/print.h"
#include "erofs/io.h"
#include "erofs/dir.h"
+#include "erofs/inode.h"
struct erofsfuse_dir_context {
struct erofs_dir_context ctx;
@@ -24,11 +25,13 @@
static int erofsfuse_fill_dentries(struct erofs_dir_context *ctx)
{
struct erofsfuse_dir_context *fusectx = (void *)ctx;
+ struct stat st = {0};
char dname[EROFS_NAME_LEN + 1];
strncpy(dname, ctx->dname, ctx->de_namelen);
dname[ctx->de_namelen] = '\0';
- fusectx->filler(fusectx->buf, dname, NULL, 0);
+ st.st_mode = erofs_ftype_to_dtype(ctx->de_ftype) << 12;
+ fusectx->filler(fusectx->buf, dname, &st, 0);
return 0;
}
@@ -46,6 +49,7 @@
};
erofs_dbg("readdir:%s offset=%llu", path, (long long)offset);
+ dir.sbi = &sbi;
ret = erofs_ilookup(path, &dir);
if (ret)
return ret;
@@ -81,7 +85,7 @@
static int erofsfuse_getattr(const char *path, struct stat *stbuf)
{
- struct erofs_inode vi = {};
+ struct erofs_inode vi = { .sbi = &sbi };
int ret;
erofs_dbg("getattr(%s)", path);
@@ -92,7 +96,7 @@
stbuf->st_mode = vi.i_mode;
stbuf->st_nlink = vi.i_nlink;
stbuf->st_size = vi.i_size;
- stbuf->st_blocks = roundup(vi.i_size, EROFS_BLKSIZ) >> 9;
+ stbuf->st_blocks = roundup(vi.i_size, erofs_blksiz(vi.sbi)) >> 9;
stbuf->st_uid = vi.i_uid;
stbuf->st_gid = vi.i_gid;
if (S_ISBLK(vi.i_mode) || S_ISCHR(vi.i_mode))
@@ -112,6 +116,7 @@
erofs_dbg("path:%s size=%zd offset=%llu", path, size, (long long)offset);
+ vi.sbi = &sbi;
ret = erofs_ilookup(path, &vi);
if (ret)
return ret;
@@ -139,7 +144,45 @@
return 0;
}
+static int erofsfuse_getxattr(const char *path, const char *name, char *value,
+ size_t size
+#ifdef __APPLE__
+ , uint32_t position)
+#else
+ )
+#endif
+{
+ int ret;
+ struct erofs_inode vi;
+
+ erofs_dbg("getxattr(%s): name=%s size=%llu", path, name, size);
+
+ vi.sbi = &sbi;
+ ret = erofs_ilookup(path, &vi);
+ if (ret)
+ return ret;
+
+ return erofs_getxattr(&vi, name, value, size);
+}
+
+static int erofsfuse_listxattr(const char *path, char *list, size_t size)
+{
+ int ret;
+ struct erofs_inode vi;
+
+ erofs_dbg("listxattr(%s): size=%llu", path, size);
+
+ vi.sbi = &sbi;
+ ret = erofs_ilookup(path, &vi);
+ if (ret)
+ return ret;
+
+ return erofs_listxattr(&vi, list, size);
+}
+
static struct fuse_operations erofs_ops = {
+ .getxattr = erofsfuse_getxattr,
+ .listxattr = erofsfuse_listxattr,
.readlink = erofsfuse_readlink,
.getattr = erofsfuse_getattr,
.readdir = erofsfuse_readdir,
@@ -151,6 +194,7 @@
static struct options {
const char *disk;
const char *mountpoint;
+ u64 offset;
unsigned int debug_lvl;
bool show_help;
bool odebug;
@@ -158,6 +202,7 @@
#define OPTION(t, p) { t, offsetof(struct options, p), 1 }
static const struct fuse_opt option_spec[] = {
+ OPTION("--offset=%lu", offset),
OPTION("--dbglevel=%u", debug_lvl),
OPTION("--help", show_help),
FUSE_OPT_KEY("--device=", 1),
@@ -170,6 +215,7 @@
fputs("usage: [options] IMAGE MOUNTPOINT\n\n"
"Options:\n"
+ " --offset=# skip # bytes when reading IMAGE\n"
" --dbglevel=# set output message level to # (maximum 9)\n"
" --device=# specify an extra device to be used together\n"
#if FUSE_MAJOR_VERSION < 3
@@ -190,6 +236,7 @@
static void erofsfuse_dumpcfg(void)
{
erofs_dump("disk: %s\n", fusecfg.disk);
+ erofs_dump("offset: %llu\n", fusecfg.offset | 0ULL);
erofs_dump("mountpoint: %s\n", fusecfg.mountpoint);
erofs_dump("dbglevel: %u\n", cfg.c_dbg_lvl);
}
@@ -201,7 +248,7 @@
switch (key) {
case 1:
- ret = blob_open_ro(arg + sizeof("--device=") - 1);
+ ret = blob_open_ro(&sbi, arg + sizeof("--device=") - 1);
if (ret)
return -1;
++sbi.extra_devices;
@@ -279,23 +326,27 @@
if (fusecfg.odebug && cfg.c_dbg_lvl < EROFS_DBG)
cfg.c_dbg_lvl = EROFS_DBG;
+ cfg.c_offset = fusecfg.offset;
+
erofsfuse_dumpcfg();
- ret = dev_open_ro(fusecfg.disk);
+ ret = dev_open_ro(&sbi, fusecfg.disk);
if (ret) {
fprintf(stderr, "failed to open: %s\n", fusecfg.disk);
goto err_fuse_free_args;
}
- ret = erofs_read_superblock();
+ ret = erofs_read_superblock(&sbi);
if (ret) {
fprintf(stderr, "failed to read erofs super block\n");
goto err_dev_close;
}
ret = fuse_main(args.argc, args.argv, &erofs_ops, NULL);
+
+ erofs_put_super(&sbi);
err_dev_close:
- blob_closeall();
- dev_close();
+ blob_closeall(&sbi);
+ dev_close(&sbi);
err_fuse_free_args:
fuse_opt_free_args(&args);
err:
diff --git a/include/erofs/blobchunk.h b/include/erofs/blobchunk.h
index 49cb7bf..89c8048 100644
--- a/include/erofs/blobchunk.h
+++ b/include/erofs/blobchunk.h
@@ -14,12 +14,16 @@
#include "erofs/internal.h"
+struct erofs_blobchunk *erofs_get_unhashed_chunk(unsigned int device_id,
+ erofs_blk_t blkaddr, erofs_off_t sourceoffset);
int erofs_blob_write_chunk_indexes(struct erofs_inode *inode, erofs_off_t off);
-int erofs_blob_write_chunked_file(struct erofs_inode *inode);
-int erofs_blob_remap(void);
+int erofs_blob_write_chunked_file(struct erofs_inode *inode, int fd,
+ erofs_off_t startoff);
+int tarerofs_write_chunkes(struct erofs_inode *inode, erofs_off_t data_offset);
+int erofs_mkfs_dump_blobs(struct erofs_sb_info *sbi);
void erofs_blob_exit(void);
int erofs_blob_init(const char *blobfile_path);
-int erofs_generate_devtable(void);
+int erofs_mkfs_init_devices(struct erofs_sb_info *sbi, unsigned int devices);
#ifdef __cplusplus
}
diff --git a/include/erofs/block_list.h b/include/erofs/block_list.h
index 78fab44..9f9975e 100644
--- a/include/erofs/block_list.h
+++ b/include/erofs/block_list.h
@@ -13,9 +13,12 @@
#include "internal.h"
+int erofs_blocklist_open(char *filename, bool srcmap);
+void erofs_blocklist_close(void);
+
+void tarerofs_blocklist_write(erofs_blk_t blkaddr, erofs_blk_t nblocks,
+ erofs_off_t srcoff);
#ifdef WITH_ANDROID
-int erofs_droid_blocklist_fopen(void);
-void erofs_droid_blocklist_fclose(void);
void erofs_droid_blocklist_write(struct erofs_inode *inode,
erofs_blk_t blk_start, erofs_blk_t nblocks);
void erofs_droid_blocklist_write_tail_end(struct erofs_inode *inode,
diff --git a/include/erofs/cache.h b/include/erofs/cache.h
index de12399..de5584e 100644
--- a/include/erofs/cache.h
+++ b/include/erofs/cache.h
@@ -22,10 +22,12 @@
#define META 1
/* including inline xattrs, extent */
#define INODE 2
+/* directory data */
+#define DIRA 3
/* shared xattrs */
-#define XATTR 3
+#define XATTR 4
/* device table */
-#define DEVT 4
+#define DEVT 5
struct erofs_bhops {
bool (*preflush)(struct erofs_buffer_head *bh);
@@ -55,11 +57,14 @@
static inline const int get_alignsize(int type, int *type_ret)
{
if (type == DATA)
- return EROFS_BLKSIZ;
+ return erofs_blksiz(&sbi);
if (type == INODE) {
*type_ret = META;
return sizeof(struct erofs_inode_compact);
+ } else if (type == DIRA) {
+ *type_ret = META;
+ return erofs_blksiz(&sbi);
} else if (type == XATTR) {
*type_ret = META;
return sizeof(struct erofs_xattr_entry);
@@ -75,7 +80,6 @@
extern const struct erofs_bhops erofs_drop_directly_bhops;
extern const struct erofs_bhops erofs_skip_write_bhops;
-extern const struct erofs_bhops erofs_buf_write_bhops;
static inline erofs_off_t erofs_btell(struct erofs_buffer_head *bh, bool end)
{
@@ -84,7 +88,7 @@
if (bb->blkaddr == NULL_ADDR)
return NULL_ADDR_UL;
- return blknr_to_addr(bb->blkaddr) +
+ return erofs_pos(&sbi, bb->blkaddr) +
(end ? list_next_entry(bh, list)->off : bh->off);
}
@@ -108,6 +112,7 @@
bool erofs_bflush(struct erofs_buffer_block *bb);
void erofs_bdrop(struct erofs_buffer_head *bh, bool tryrevoke);
+erofs_blk_t erofs_total_metablocks(void);
#ifdef __cplusplus
}
diff --git a/include/erofs/compress.h b/include/erofs/compress.h
index 24f6204..46cff03 100644
--- a/include/erofs/compress.h
+++ b/include/erofs/compress.h
@@ -14,16 +14,28 @@
#include "internal.h"
-#define EROFS_CONFIG_COMPR_MAX_SZ (3000 * 1024)
-#define EROFS_CONFIG_COMPR_MIN_SZ (32 * 1024)
+#define EROFS_CONFIG_COMPR_MAX_SZ (4000 * 1024)
void z_erofs_drop_inline_pcluster(struct erofs_inode *inode);
-int erofs_write_compressed_file(struct erofs_inode *inode);
+int erofs_write_compressed_file(struct erofs_inode *inode, int fd);
-int z_erofs_compress_init(struct erofs_buffer_head *bh);
+int z_erofs_compress_init(struct erofs_sb_info *sbi,
+ struct erofs_buffer_head *bh);
int z_erofs_compress_exit(void);
-const char *z_erofs_list_available_compressors(unsigned int i);
+const char *z_erofs_list_supported_algorithms(int i, unsigned int *mask);
+const char *z_erofs_list_available_compressors(int *i);
+
+static inline bool erofs_is_packed_inode(struct erofs_inode *inode)
+{
+ erofs_nid_t packed_nid = inode->sbi->packed_nid;
+
+ if (inode->nid == EROFS_PACKED_NID_UNALLOCATED) {
+ DBG_BUGON(packed_nid != EROFS_PACKED_NID_UNALLOCATED);
+ return true;
+ }
+ return (packed_nid > 0 && inode->nid == packed_nid);
+}
#ifdef __cplusplus
}
diff --git a/include/erofs/compress_hints.h b/include/erofs/compress_hints.h
index 659c5b6..9f0d8ae 100644
--- a/include/erofs/compress_hints.h
+++ b/include/erofs/compress_hints.h
@@ -20,11 +20,12 @@
regex_t reg;
unsigned int physical_clusterblks;
+ unsigned char algorithmtype;
};
bool z_erofs_apply_compress_hints(struct erofs_inode *inode);
void erofs_cleanup_compress_hints(void);
-int erofs_load_compress_hints(void);
+int erofs_load_compress_hints(struct erofs_sb_info *sbi);
#ifdef __cplusplus
}
diff --git a/include/erofs/config.h b/include/erofs/config.h
index 0d0916c..e342722 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -32,6 +32,8 @@
TIMESTAMP_CLAMPING,
};
+#define EROFS_MAX_COMPR_CFGS 64
+
struct erofs_configure {
const char *c_version;
int c_dbg_lvl;
@@ -39,13 +41,20 @@
bool c_legacy_compress;
#ifndef NDEBUG
bool c_random_pclusterblks;
+ bool c_random_algorithms;
#endif
char c_timeinherit;
char c_chunkbits;
- bool c_noinline_data;
+ bool c_inline_data;
bool c_ztailpacking;
+ bool c_fragments;
+ bool c_all_fragments;
+ bool c_dedupe;
bool c_ignore_mtime;
bool c_showprogress;
+ bool c_extra_ea_name_prefixes;
+ bool c_xattr_name_filter;
+ bool c_ovlfs_strip;
#ifdef HAVE_LIBSELINUX
struct selabel_handle *sehnd;
@@ -55,24 +64,28 @@
char *c_src_path;
char *c_blobdev_path;
char *c_compress_hints_file;
- char *c_compr_alg_master;
- int c_compr_level_master;
+ char *c_compr_alg[EROFS_MAX_COMPR_CFGS];
+ int c_compr_level[EROFS_MAX_COMPR_CFGS];
char c_force_inodeversion;
char c_force_chunkformat;
/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
int c_inline_xattr_tolerance;
- u32 c_pclusterblks_max, c_pclusterblks_def;
+ u32 c_pclusterblks_max, c_pclusterblks_def, c_pclusterblks_packed;
u32 c_max_decompressed_extent_bytes;
u32 c_dict_size;
u64 c_unix_timestamp;
u32 c_uid, c_gid;
+ const char *mount_point;
+ long long c_uid_offset, c_gid_offset;
#ifdef WITH_ANDROID
- char *mount_point;
char *target_out_path;
char *fs_config_file;
char *block_list_file;
#endif
+
+ /* offset when reading multi partition images */
+ u64 c_offset;
};
extern struct erofs_configure cfg;
diff --git a/include/erofs/decompress.h b/include/erofs/decompress.h
index 82bf7b8..0d55483 100644
--- a/include/erofs/decompress.h
+++ b/include/erofs/decompress.h
@@ -14,6 +14,7 @@
#include "internal.h"
struct z_erofs_decompress_req {
+ struct erofs_sb_info *sbi;
char *in, *out;
/*
@@ -23,6 +24,9 @@
unsigned int decodedskip;
unsigned int inputsize, decodedlength;
+ /* cut point of interlaced uncompressed data */
+ unsigned int interlaced_offset;
+
/* indicate the algorithm will be used for decompression */
unsigned int alg;
bool partial_decoding;
diff --git a/include/erofs/dedupe.h b/include/erofs/dedupe.h
new file mode 100644
index 0000000..153bd4c
--- /dev/null
+++ b/include/erofs/dedupe.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+/*
+ * Copyright (C) 2022 Alibaba Cloud
+ */
+#ifndef __EROFS_DEDUPE_H
+#define __EROFS_DEDUPE_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "internal.h"
+
+struct z_erofs_inmem_extent {
+ erofs_blk_t blkaddr;
+ unsigned int compressedblks;
+ unsigned int length;
+ bool raw, partial;
+};
+
+struct z_erofs_dedupe_ctx {
+ u8 *start, *end;
+ u8 *cur;
+ struct z_erofs_inmem_extent e;
+};
+
+int z_erofs_dedupe_match(struct z_erofs_dedupe_ctx *ctx);
+int z_erofs_dedupe_insert(struct z_erofs_inmem_extent *e,
+ void *original_data);
+void z_erofs_dedupe_commit(bool drop);
+int z_erofs_dedupe_init(unsigned int wsiz);
+void z_erofs_dedupe_exit(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/erofs/defs.h b/include/erofs/defs.h
index e5aa23c..fefa7e7 100644
--- a/include/erofs/defs.h
+++ b/include/erofs/defs.h
@@ -179,9 +179,29 @@
#define __maybe_unused __attribute__((__unused__))
#endif
-static inline u32 get_unaligned_le32(const u8 *p)
+#define __packed __attribute__((__packed__))
+
+#define __get_unaligned_t(type, ptr) ({ \
+ const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \
+ __pptr->x; \
+})
+
+#define __put_unaligned_t(type, val, ptr) do { \
+ struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \
+ __pptr->x = (val); \
+} while (0)
+
+#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr))
+#define put_unaligned(val, ptr) __put_unaligned_t(typeof(*(ptr)), (val), (ptr))
+
+static inline u32 get_unaligned_le32(const void *p)
{
- return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24;
+ return le32_to_cpu(__get_unaligned_t(__le32, p));
+}
+
+static inline void put_unaligned_le32(u32 val, void *p)
+{
+ __put_unaligned_t(__le32, cpu_to_le32(val), p);
}
/**
@@ -266,6 +286,11 @@
return x ? sizeof(x) * 8 - __builtin_clz(x) : 0;
}
+static inline unsigned long lowbit(unsigned long n)
+{
+ return n & -n;
+}
+
/**
* __roundup_pow_of_two() - round up to nearest power of two
* @n: value to round up
@@ -313,11 +338,6 @@
#define ST_MTIM_NSEC(stbuf) 0
#endif
-#ifdef __APPLE__
-#define stat64 stat
-#define lstat64 lstat
-#endif
-
#ifdef __cplusplus
}
#endif
diff --git a/include/erofs/dir.h b/include/erofs/dir.h
index 74bffb5..5460ac4 100644
--- a/include/erofs/dir.h
+++ b/include/erofs/dir.h
@@ -62,7 +62,8 @@
/* Iterate over inodes that are in directory */
int erofs_iterate_dir(struct erofs_dir_context *ctx, bool fsck);
/* Get a full pathname of the inode NID */
-int erofs_get_pathname(erofs_nid_t nid, char *buf, size_t size);
+int erofs_get_pathname(struct erofs_sb_info *sbi, erofs_nid_t nid,
+ char *buf, size_t size);
#ifdef __cplusplus
}
diff --git a/include/erofs/diskbuf.h b/include/erofs/diskbuf.h
new file mode 100644
index 0000000..29d9fe2
--- /dev/null
+++ b/include/erofs/diskbuf.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+#ifndef __EROFS_DISKBUF_H
+#define __EROFS_DISKBUF_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "erofs/defs.h"
+
+struct erofs_diskbuf {
+ void *sp; /* internal stream pointer */
+ u64 offset; /* internal offset */
+};
+
+int erofs_diskbuf_getfd(struct erofs_diskbuf *db, u64 *off);
+
+int erofs_diskbuf_reserve(struct erofs_diskbuf *db, int sid, u64 *off);
+void erofs_diskbuf_commit(struct erofs_diskbuf *db, u64 len);
+void erofs_diskbuf_close(struct erofs_diskbuf *db);
+
+int erofs_diskbuf_init(unsigned int nstrms);
+void erofs_diskbuf_exit(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/erofs/err.h b/include/erofs/err.h
index 08b0bdb..2ae9e21 100644
--- a/include/erofs/err.h
+++ b/include/erofs/err.h
@@ -33,6 +33,12 @@
return (long) ptr;
}
+static inline void * ERR_CAST(const void *ptr)
+{
+ /* cast away the const */
+ return (void *) ptr;
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/include/erofs/fragments.h b/include/erofs/fragments.h
new file mode 100644
index 0000000..4c6f755
--- /dev/null
+++ b/include/erofs/fragments.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+/*
+ * Copyright (C), 2022, Coolpad Group Limited.
+ */
+#ifndef __EROFS_FRAGMENTS_H
+#define __EROFS_FRAGMENTS_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "erofs/internal.h"
+
+extern const char *erofs_frags_packedname;
+#define EROFS_PACKED_INODE erofs_frags_packedname
+
+FILE *erofs_packedfile_init(void);
+void erofs_packedfile_exit(void);
+struct erofs_inode *erofs_mkfs_build_packedfile(void);
+
+int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 *tofcrc);
+int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd, u32 tofcrc);
+int z_erofs_pack_fragments(struct erofs_inode *inode, void *data,
+ unsigned int len, u32 tofcrc);
+void z_erofs_fragments_commit(struct erofs_inode *inode);
+int z_erofs_fragments_init(void);
+void z_erofs_fragments_exit(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/erofs/hashmap.h b/include/erofs/hashmap.h
index 3d38578..d25092d 100644
--- a/include/erofs/hashmap.h
+++ b/include/erofs/hashmap.h
@@ -61,7 +61,7 @@
/* hashmap functions */
void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function,
size_t initial_size);
-void hashmap_free(struct hashmap *map, int free_entries);
+int hashmap_free(struct hashmap *map);
/* hashmap_entry functions */
static inline void hashmap_entry_init(void *entry, unsigned int hash)
@@ -75,8 +75,7 @@
void *hashmap_get(const struct hashmap *map, const void *key, const void *keydata);
void *hashmap_get_next(const struct hashmap *map, const void *entry);
void hashmap_add(struct hashmap *map, void *entry);
-void *hashmap_put(struct hashmap *map, void *entry);
-void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata);
+void *hashmap_remove(struct hashmap *map, const void *key);
static inline void *hashmap_get_from_hash(const struct hashmap *map,
unsigned int hash,
diff --git a/include/erofs/inode.h b/include/erofs/inode.h
index 79b39b0..bcfd98e 100644
--- a/include/erofs/inode.h
+++ b/include/erofs/inode.h
@@ -15,12 +15,30 @@
#include "erofs/internal.h"
+static inline struct erofs_inode *erofs_igrab(struct erofs_inode *inode)
+{
+ ++inode->i_count;
+ return inode;
+}
+
+u32 erofs_new_encode_dev(dev_t dev);
unsigned char erofs_mode_to_ftype(umode_t mode);
+unsigned char erofs_ftype_to_dtype(unsigned int filetype);
void erofs_inode_manager_init(void);
+void erofs_insert_ihash(struct erofs_inode *inode, dev_t dev, ino_t ino);
+struct erofs_inode *erofs_iget(dev_t dev, ino_t ino);
+struct erofs_inode *erofs_iget_by_nid(erofs_nid_t nid);
unsigned int erofs_iput(struct erofs_inode *inode);
erofs_nid_t erofs_lookupnid(struct erofs_inode *inode);
-struct erofs_inode *erofs_mkfs_build_tree_from_path(struct erofs_inode *parent,
- const char *path);
+struct erofs_dentry *erofs_d_alloc(struct erofs_inode *parent,
+ const char *name);
+int erofs_rebuild_dump_tree(struct erofs_inode *dir);
+int erofs_init_empty_dir(struct erofs_inode *dir);
+int __erofs_fill_inode(struct erofs_inode *inode, struct stat *st,
+ const char *path);
+struct erofs_inode *erofs_new_inode(void);
+struct erofs_inode *erofs_mkfs_build_tree_from_path(const char *path);
+struct erofs_inode *erofs_mkfs_build_special_from_fd(int fd, const char *name);
#ifdef __cplusplus
}
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 6a70f11..c1ff582 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -17,35 +17,20 @@
typedef unsigned short umode_t;
-#define __packed __attribute__((__packed__))
-
#include "erofs_fs.h"
#include <fcntl.h>
+#include <sys/types.h> /* for off_t definition */
+#include <sys/stat.h> /* for S_ISCHR definition */
+#include <stdio.h>
#ifndef PATH_MAX
#define PATH_MAX 4096 /* # chars in a path name including nul */
#endif
-#ifndef PAGE_SHIFT
-#define PAGE_SHIFT (12)
+#ifndef EROFS_MAX_BLOCK_SIZE
+#define EROFS_MAX_BLOCK_SIZE 4096
#endif
-#ifndef PAGE_SIZE
-#define PAGE_SIZE (1U << PAGE_SHIFT)
-#endif
-
-/* no obvious reason to support explicit PAGE_SIZE != 4096 for now */
-#if PAGE_SIZE != 4096
-#warning EROFS may be incompatible on your platform
-#endif
-
-#ifndef PAGE_MASK
-#define PAGE_MASK (~(PAGE_SIZE-1))
-#endif
-
-#define LOG_BLOCK_SIZE (12)
-#define EROFS_BLKSIZ (1U << LOG_BLOCK_SIZE)
-
#define EROFS_ISLOTBITS 5
#define EROFS_SLOTSIZE (1U << EROFS_ISLOTBITS)
@@ -57,21 +42,33 @@
#define NULL_ADDR ((unsigned int)-1)
#define NULL_ADDR_UL ((unsigned long)-1)
-#define erofs_blknr(addr) ((addr) / EROFS_BLKSIZ)
-#define erofs_blkoff(addr) ((addr) % EROFS_BLKSIZ)
-#define blknr_to_addr(nr) ((erofs_off_t)(nr) * EROFS_BLKSIZ)
+/* global sbi */
+extern struct erofs_sb_info sbi;
-#define BLK_ROUND_UP(addr) DIV_ROUND_UP(addr, EROFS_BLKSIZ)
+#define erofs_blksiz(sbi) (1u << (sbi)->blkszbits)
+#define erofs_blknr(sbi, addr) ((addr) >> (sbi)->blkszbits)
+#define erofs_blkoff(sbi, addr) ((addr) & (erofs_blksiz(sbi) - 1))
+#define erofs_pos(sbi, nr) ((erofs_off_t)(nr) << (sbi)->blkszbits)
+#define BLK_ROUND_UP(sbi, addr) DIV_ROUND_UP(addr, erofs_blksiz(sbi))
struct erofs_buffer_head;
struct erofs_device_info {
+ u8 tag[64];
u32 blocks;
u32 mapped_blkaddr;
};
+struct erofs_xattr_prefix_item {
+ struct erofs_xattr_long_prefix *prefix;
+ u8 infix_len;
+};
+
+#define EROFS_PACKED_NID_UNALLOCATED -1
+
struct erofs_sb_info {
struct erofs_device_info *devs;
+ char *devname;
u64 total_blocks;
u64 primarydevice_blocks;
@@ -84,7 +81,9 @@
u64 build_time;
u32 build_time_nsec;
+ u8 extslots;
unsigned char islotbits;
+ unsigned char blkszbits;
/* what we really care is nid, rather than ino.. */
erofs_nid_t root_nid;
@@ -92,6 +91,7 @@
u64 inos;
u8 uuid[16];
+ char volume_name[16];
u16 available_compr_algs;
u16 lz4_max_distance;
@@ -102,53 +102,74 @@
u16 devt_slotoff; /* used for mkfs */
u16 device_id_mask; /* used for others */
};
+ erofs_nid_t packed_nid;
+
+ u32 xattr_prefix_start;
+ u8 xattr_prefix_count;
+ struct erofs_xattr_prefix_item *xattr_prefixes;
+
+ int devfd, devblksz;
+ u64 devsz;
+ dev_t dev;
+ unsigned int nblobs;
+ unsigned int blobfd[256];
+
+ struct list_head list;
+
+ u64 saved_by_deduplication;
};
-/* global sbi */
-extern struct erofs_sb_info sbi;
-
-static inline erofs_off_t iloc(erofs_nid_t nid)
-{
- return blknr_to_addr(sbi.meta_blkaddr) + (nid << sbi.islotbits);
-}
+/* make sure that any user of the erofs headers has atleast 64bit off_t type */
+extern int erofs_assert_largefile[sizeof(off_t)-8];
#define EROFS_FEATURE_FUNCS(name, compat, feature) \
-static inline bool erofs_sb_has_##name(void) \
+static inline bool erofs_sb_has_##name(struct erofs_sb_info *sbi) \
{ \
- return sbi.feature_##compat & EROFS_FEATURE_##feature; \
+ return sbi->feature_##compat & EROFS_FEATURE_##feature; \
} \
-static inline void erofs_sb_set_##name(void) \
+static inline void erofs_sb_set_##name(struct erofs_sb_info *sbi) \
{ \
- sbi.feature_##compat |= EROFS_FEATURE_##feature; \
+ sbi->feature_##compat |= EROFS_FEATURE_##feature; \
} \
-static inline void erofs_sb_clear_##name(void) \
+static inline void erofs_sb_clear_##name(struct erofs_sb_info *sbi) \
{ \
- sbi.feature_##compat &= ~EROFS_FEATURE_##feature; \
+ sbi->feature_##compat &= ~EROFS_FEATURE_##feature; \
}
-EROFS_FEATURE_FUNCS(lz4_0padding, incompat, INCOMPAT_LZ4_0PADDING)
+EROFS_FEATURE_FUNCS(lz4_0padding, incompat, INCOMPAT_ZERO_PADDING)
EROFS_FEATURE_FUNCS(compr_cfgs, incompat, INCOMPAT_COMPR_CFGS)
EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER)
EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE)
EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE)
EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING)
+EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS)
+EROFS_FEATURE_FUNCS(dedupe, incompat, INCOMPAT_DEDUPE)
+EROFS_FEATURE_FUNCS(xattr_prefixes, incompat, INCOMPAT_XATTR_PREFIXES)
EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
+EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER)
#define EROFS_I_EA_INITED (1 << 0)
#define EROFS_I_Z_INITED (1 << 1)
+struct erofs_diskbuf;
+
struct erofs_inode {
struct list_head i_hash, i_subdirs, i_xattrs;
union {
/* (erofsfuse) runtime flags */
unsigned int flags;
- /* (mkfs.erofs) device ID containing source file */
- u32 dev;
+
+ /* (mkfs.erofs) next pointer for directory dumping */
+ struct erofs_inode *next_dirwrite;
};
unsigned int i_count;
+ struct erofs_sb_info *sbi;
struct erofs_inode *i_parent;
+ /* (mkfs.erofs) device ID containing source file */
+ u32 dev;
+
umode_t i_mode;
erofs_off_t i_size;
@@ -169,17 +190,28 @@
};
} u;
- char i_srcpath[PATH_MAX + 1];
-
+ char *i_srcpath;
+ union {
+ char *i_link;
+ struct erofs_diskbuf *i_diskbuf;
+ };
unsigned char datalayout;
unsigned char inode_isize;
/* inline tail-end packing size */
unsigned short idata_size;
bool compressed_idata;
+ bool lazy_tailblock;
+ bool with_diskbuf;
+ bool opaque;
+ /* OVL: non-merge dir that may contain whiteout entries */
+ bool whiteouts;
unsigned int xattr_isize;
unsigned int extent_isize;
+ unsigned int xattr_shared_count;
+ unsigned int *xattr_shared_xattrs;
+
erofs_nid_t nid;
struct erofs_buffer_head *bh;
struct erofs_buffer_head *bh_inline, *bh_data;
@@ -206,8 +238,18 @@
#ifdef WITH_ANDROID
uint64_t capabilities;
#endif
+ erofs_off_t fragmentoff;
+ unsigned int fragment_size;
};
+static inline erofs_off_t erofs_iloc(struct erofs_inode *inode)
+{
+ struct erofs_sb_info *sbi = inode->sbi;
+
+ return erofs_pos(sbi, sbi->meta_blkaddr) +
+ (inode->nid << sbi->islotbits);
+}
+
static inline bool is_inode_layout_compression(struct erofs_inode *inode)
{
return erofs_inode_is_data_compressed(inode->datalayout);
@@ -276,6 +318,8 @@
BH_Mapped,
BH_Encoded,
BH_FullMapped,
+ BH_Fragment,
+ BH_Partialref,
};
/* Has a disk mapping */
@@ -286,9 +330,13 @@
#define EROFS_MAP_ENCODED (1 << BH_Encoded)
/* The length of extent is full */
#define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped)
+/* Located in the special packed inode */
+#define EROFS_MAP_FRAGMENT (1 << BH_Fragment)
+/* The extent refers to partial decompressed data */
+#define EROFS_MAP_PARTIAL_REF (1 << BH_Partialref)
struct erofs_map_blocks {
- char mpage[EROFS_BLKSIZ];
+ char mpage[EROFS_MAX_BLOCK_SIZE];
erofs_off_t m_pa, m_la;
u64 m_plen, m_llen;
@@ -304,10 +352,12 @@
* approach instead if possible since it's more metadata lightweight.)
*/
#define EROFS_GET_BLOCKS_FIEMAP 0x0002
+/* Used to map tail extent for tailpacking inline or fragment pcluster */
#define EROFS_GET_BLOCKS_FINDTAIL 0x0008
enum {
Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX,
+ Z_EROFS_COMPRESSION_INTERLACED,
Z_EROFS_COMPRESSION_RUNTIME_MAX
};
@@ -317,12 +367,12 @@
};
/* super.c */
-int erofs_read_superblock(void);
+int erofs_read_superblock(struct erofs_sb_info *sbi);
+void erofs_put_super(struct erofs_sb_info *sbi);
/* namei.c */
int erofs_read_inode_from_disk(struct erofs_inode *vi);
int erofs_ilookup(const char *path, struct erofs_inode *vi);
-int erofs_read_inode_from_disk(struct erofs_inode *vi);
/* data.c */
int erofs_pread(struct erofs_inode *inode, char *buf,
@@ -330,6 +380,13 @@
int erofs_map_blocks(struct erofs_inode *inode,
struct erofs_map_blocks *map, int flags);
int erofs_map_dev(struct erofs_sb_info *sbi, struct erofs_map_dev *map);
+int erofs_read_one_data(struct erofs_inode *inode, struct erofs_map_blocks *map,
+ char *buffer, u64 offset, size_t len);
+int z_erofs_read_one_data(struct erofs_inode *inode,
+ struct erofs_map_blocks *map, char *raw, char *buffer,
+ erofs_off_t skip, erofs_off_t length, bool trimmed);
+void *erofs_read_metadata(struct erofs_sb_info *sbi, erofs_nid_t nid,
+ erofs_off_t *offset, int *lengthp);
static inline int erofs_get_occupied_size(const struct erofs_inode *inode,
erofs_off_t *size)
@@ -341,16 +398,21 @@
case EROFS_INODE_CHUNK_BASED:
*size = inode->i_size;
break;
- case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
- case EROFS_INODE_FLAT_COMPRESSION:
- *size = inode->u.i_blocks * EROFS_BLKSIZ;
+ case EROFS_INODE_COMPRESSED_FULL:
+ case EROFS_INODE_COMPRESSED_COMPACT:
+ *size = inode->u.i_blocks * erofs_blksiz(inode->sbi);
break;
default:
- return -ENOTSUP;
+ return -EOPNOTSUPP;
}
return 0;
}
+/* data.c */
+int erofs_getxattr(struct erofs_inode *vi, const char *name, char *buffer,
+ size_t buffer_size);
+int erofs_listxattr(struct erofs_inode *vi, char *buffer, size_t buffer_size);
+
/* zmap.c */
int z_erofs_fill_inode(struct erofs_inode *vi);
int z_erofs_map_blocks_iter(struct erofs_inode *vi,
@@ -375,6 +437,12 @@
return crc;
}
+#define EROFS_WHITEOUT_DEV 0
+static inline bool erofs_inode_is_whiteout(struct erofs_inode *inode)
+{
+ return S_ISCHR(inode->i_mode) && inode->u.i_rdev == EROFS_WHITEOUT_DEV;
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/include/erofs/io.h b/include/erofs/io.h
index 0f58c70..4db5716 100644
--- a/include/erofs/io.h
+++ b/include/erofs/io.h
@@ -22,36 +22,36 @@
#define O_BINARY 0
#endif
-void blob_closeall(void);
-int blob_open_ro(const char *dev);
-int dev_open(const char *devname);
-int dev_open_ro(const char *dev);
-void dev_close(void);
-int dev_write(const void *buf, u64 offset, size_t len);
-int dev_read(int device_id, void *buf, u64 offset, size_t len);
-int dev_fillzero(u64 offset, size_t len, bool padding);
-int dev_fsync(void);
-int dev_resize(erofs_blk_t nblocks);
-u64 dev_length(void);
-
-extern int erofs_devfd;
+void blob_closeall(struct erofs_sb_info *sbi);
+int blob_open_ro(struct erofs_sb_info *sbi, const char *dev);
+int dev_open(struct erofs_sb_info *sbi, const char *devname);
+int dev_open_ro(struct erofs_sb_info *sbi, const char *dev);
+void dev_close(struct erofs_sb_info *sbi);
+int dev_write(struct erofs_sb_info *sbi, const void *buf,
+ u64 offset, size_t len);
+int dev_read(struct erofs_sb_info *sbi, int device_id,
+ void *buf, u64 offset, size_t len);
+int dev_fillzero(struct erofs_sb_info *sbi, u64 offset,
+ size_t len, bool padding);
+int dev_fsync(struct erofs_sb_info *sbi);
+int dev_resize(struct erofs_sb_info *sbi, erofs_blk_t nblocks);
ssize_t erofs_copy_file_range(int fd_in, erofs_off_t *off_in,
int fd_out, erofs_off_t *off_out,
size_t length);
-static inline int blk_write(const void *buf, erofs_blk_t blkaddr,
- u32 nblocks)
+static inline int blk_write(struct erofs_sb_info *sbi, const void *buf,
+ erofs_blk_t blkaddr, u32 nblocks)
{
- return dev_write(buf, blknr_to_addr(blkaddr),
- blknr_to_addr(nblocks));
+ return dev_write(sbi, buf, erofs_pos(sbi, blkaddr),
+ erofs_pos(sbi, nblocks));
}
-static inline int blk_read(int device_id, void *buf,
+static inline int blk_read(struct erofs_sb_info *sbi, int device_id, void *buf,
erofs_blk_t start, u32 nblocks)
{
- return dev_read(device_id, buf, blknr_to_addr(start),
- blknr_to_addr(nblocks));
+ return dev_read(sbi, device_id, buf, erofs_pos(sbi, start),
+ erofs_pos(sbi, nblocks));
}
#ifdef __cplusplus
diff --git a/include/erofs/list.h b/include/erofs/list.h
index 3f5da1a..d7a9fee 100644
--- a/include/erofs/list.h
+++ b/include/erofs/list.h
@@ -70,6 +70,26 @@
return head->next == head;
}
+static inline void __list_splice(struct list_head *list,
+ struct list_head *prev, struct list_head *next)
+{
+ struct list_head *first = list->next;
+ struct list_head *last = list->prev;
+
+ first->prev = prev;
+ prev->next = first;
+
+ last->next = next;
+ next->prev = last;
+}
+
+static inline void list_splice_tail(struct list_head *list,
+ struct list_head *head)
+{
+ if (!list_empty(list))
+ __list_splice(list, head->prev, head);
+}
+
#define list_entry(ptr, type, member) container_of(ptr, type, member)
#define list_first_entry(ptr, type, member) \
diff --git a/include/erofs/rebuild.h b/include/erofs/rebuild.h
new file mode 100644
index 0000000..e99ce74
--- /dev/null
+++ b/include/erofs/rebuild.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+#ifndef __EROFS_REBUILD_H
+#define __EROFS_REBUILD_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "internal.h"
+
+struct erofs_dentry *erofs_rebuild_get_dentry(struct erofs_inode *pwd,
+ char *path, bool aufs, bool *whout, bool *opq, bool to_head);
+
+int erofs_rebuild_load_tree(struct erofs_inode *root, struct erofs_sb_info *sbi);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/erofs/tar.h b/include/erofs/tar.h
new file mode 100644
index 0000000..a76f740
--- /dev/null
+++ b/include/erofs/tar.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+#ifndef __EROFS_TAR_H
+#define __EROFS_TAR_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#if defined(HAVE_ZLIB)
+#include <zlib.h>
+#endif
+#include <sys/stat.h>
+
+#include "internal.h"
+
+struct erofs_pax_header {
+ struct stat st;
+ struct list_head xattrs;
+ bool use_mtime;
+ bool use_size;
+ bool use_uid;
+ bool use_gid;
+ char *path, *link;
+};
+
+#define EROFS_IOS_DECODER_NONE 0
+#define EROFS_IOS_DECODER_GZIP 1
+
+struct erofs_iostream {
+ union {
+ int fd; /* original fd */
+ void *handler;
+ };
+ u64 sz;
+ char *buffer;
+ unsigned int head, tail, bufsize;
+ int decoder;
+ bool feof;
+};
+
+struct erofs_tarfile {
+ struct erofs_pax_header global;
+ struct erofs_iostream ios;
+ char *mapfile;
+
+ int fd;
+ u64 offset;
+ bool index_mode, aufs;
+};
+
+void erofs_iostream_close(struct erofs_iostream *ios);
+int erofs_iostream_open(struct erofs_iostream *ios, int fd, int decoder);
+int tarerofs_parse_tar(struct erofs_inode *root, struct erofs_tarfile *tar);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/erofs/xattr.h b/include/erofs/xattr.h
index 226e984..0f76037 100644
--- a/include/erofs/xattr.h
+++ b/include/erofs/xattr.h
@@ -14,40 +14,52 @@
#include "internal.h"
+#ifndef ENOATTR
+#define ENOATTR ENODATA
+#endif
+
+static inline unsigned int inlinexattr_header_size(struct erofs_inode *vi)
+{
+ return sizeof(struct erofs_xattr_ibody_header) +
+ sizeof(u32) * vi->xattr_shared_count;
+}
+
+static inline erofs_blk_t xattrblock_addr(struct erofs_inode *vi,
+ unsigned int xattr_id)
+{
+ return vi->sbi->xattr_blkaddr +
+ erofs_blknr(vi->sbi, xattr_id * sizeof(__u32));
+}
+
+static inline unsigned int xattrblock_offset(struct erofs_inode *vi,
+ unsigned int xattr_id)
+{
+ return erofs_blkoff(vi->sbi, xattr_id * sizeof(__u32));
+}
+
#define EROFS_INODE_XATTR_ICOUNT(_size) ({\
u32 __size = le16_to_cpu(_size); \
((__size) == 0) ? 0 : \
(_size - sizeof(struct erofs_xattr_ibody_header)) / \
sizeof(struct erofs_xattr_entry) + 1; })
-#ifndef XATTR_USER_PREFIX
-#define XATTR_USER_PREFIX "user."
-#endif
-#ifndef XATTR_USER_PREFIX_LEN
-#define XATTR_USER_PREFIX_LEN (sizeof(XATTR_USER_PREFIX) - 1)
-#endif
-#ifndef XATTR_SECURITY_PREFIX
-#define XATTR_SECURITY_PREFIX "security."
-#endif
-#ifndef XATTR_SECURITY_PREFIX_LEN
-#define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1)
-#endif
-#ifndef XATTR_TRUSTED_PREFIX
-#define XATTR_TRUSTED_PREFIX "trusted."
-#endif
-#ifndef XATTR_TRUSTED_PREFIX_LEN
-#define XATTR_TRUSTED_PREFIX_LEN (sizeof(XATTR_TRUSTED_PREFIX) - 1)
-#endif
-#ifndef XATTR_NAME_POSIX_ACL_ACCESS
-#define XATTR_NAME_POSIX_ACL_ACCESS "system.posix_acl_access"
-#endif
-#ifndef XATTR_NAME_POSIX_ACL_DEFAULT
-#define XATTR_NAME_POSIX_ACL_DEFAULT "system.posix_acl_default"
-#endif
-
+int erofs_scan_file_xattrs(struct erofs_inode *inode);
int erofs_prepare_xattr_ibody(struct erofs_inode *inode);
-char *erofs_export_xattr_ibody(struct list_head *ixattrs, unsigned int size);
-int erofs_build_shared_xattrs_from_path(const char *path);
+char *erofs_export_xattr_ibody(struct erofs_inode *inode);
+int erofs_build_shared_xattrs_from_path(struct erofs_sb_info *sbi, const char *path);
+
+int erofs_xattr_insert_name_prefix(const char *prefix);
+void erofs_xattr_cleanup_name_prefixes(void);
+int erofs_xattr_write_name_prefixes(struct erofs_sb_info *sbi, FILE *f);
+void erofs_xattr_prefixes_cleanup(struct erofs_sb_info *sbi);
+int erofs_xattr_prefixes_init(struct erofs_sb_info *sbi);
+
+int erofs_setxattr(struct erofs_inode *inode, char *key,
+ const void *value, size_t size);
+int erofs_set_opaque_xattr(struct erofs_inode *inode);
+void erofs_clear_opaque_xattr(struct erofs_inode *inode);
+int erofs_set_origin_xattr(struct erofs_inode *inode);
+int erofs_read_xattrs_from_disk(struct erofs_inode *inode);
#ifdef __cplusplus
}
diff --git a/include/erofs/xxhash.h b/include/erofs/xxhash.h
new file mode 100644
index 0000000..5441209
--- /dev/null
+++ b/include/erofs/xxhash.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0+ */
+#ifndef __EROFS_XXHASH_H
+#define __EROFS_XXHASH_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include <stdint.h>
+
+/**
+ * xxh32() - calculate the 32-bit hash of the input with a given seed.
+ *
+ * @input: The data to hash.
+ * @length: The length of the data to hash.
+ * @seed: The seed can be used to alter the result predictably.
+ *
+ * Return: The 32-bit hash of the data.
+ */
+uint32_t xxh32(const void *input, size_t length, uint32_t seed);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/erofs_fs.h b/include/erofs_fs.h
index 08f9761..eba6c26 100644
--- a/include/erofs_fs.h
+++ b/include/erofs_fs.h
@@ -3,7 +3,7 @@
* EROFS (Enhanced ROM File System) on-disk format definition
*
* Copyright (C) 2017-2018 HUAWEI, Inc.
- * http://www.huawei.com/
+ * https://www.huawei.com/
* Copyright (C) 2021, Alibaba Cloud
*/
#ifndef __EROFS_FS_H
@@ -12,36 +12,42 @@
#define EROFS_SUPER_MAGIC_V1 0xE0F5E1E2
#define EROFS_SUPER_OFFSET 1024
-#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001
-#define EROFS_FEATURE_COMPAT_MTIME 0x00000002
+#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001
+#define EROFS_FEATURE_COMPAT_MTIME 0x00000002
+#define EROFS_FEATURE_COMPAT_XATTR_FILTER 0x00000004
/*
* Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should
* be incompatible with this kernel version.
*/
-#define EROFS_FEATURE_INCOMPAT_LZ4_0PADDING 0x00000001
+#define EROFS_FEATURE_INCOMPAT_ZERO_PADDING 0x00000001
#define EROFS_FEATURE_INCOMPAT_COMPR_CFGS 0x00000002
#define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER 0x00000002
#define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE 0x00000004
#define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008
+#define EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 0x00000008
#define EROFS_FEATURE_INCOMPAT_ZTAILPACKING 0x00000010
+#define EROFS_FEATURE_INCOMPAT_FRAGMENTS 0x00000020
+#define EROFS_FEATURE_INCOMPAT_DEDUPE 0x00000020
+#define EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES 0x00000040
#define EROFS_ALL_FEATURE_INCOMPAT \
- (EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \
+ (EROFS_FEATURE_INCOMPAT_ZERO_PADDING | \
EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \
EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \
EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \
- EROFS_FEATURE_INCOMPAT_ZTAILPACKING)
+ EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 | \
+ EROFS_FEATURE_INCOMPAT_ZTAILPACKING | \
+ EROFS_FEATURE_INCOMPAT_FRAGMENTS | \
+ EROFS_FEATURE_INCOMPAT_DEDUPE | \
+ EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES)
#define EROFS_SB_EXTSLOT_SIZE 16
struct erofs_deviceslot {
- union {
- u8 uuid[16]; /* used for device manager later */
- u8 userdata[64]; /* digest(sha256), etc. */
- } u;
- __le32 blocks; /* total fs blocks of this device */
- __le32 mapped_blkaddr; /* map starting at mapped_blkaddr */
+ u8 tag[64]; /* digest(sha256), etc. */
+ __le32 blocks; /* total fs blocks of this device */
+ __le32 mapped_blkaddr; /* map starting at mapped_blkaddr */
u8 reserved[56];
};
#define EROFS_DEVT_SLOT_SIZE sizeof(struct erofs_deviceslot)
@@ -51,14 +57,14 @@
__le32 magic; /* file system magic number */
__le32 checksum; /* crc32c(super_block) */
__le32 feature_compat;
- __u8 blkszbits; /* support block_size == PAGE_SIZE only */
+ __u8 blkszbits; /* filesystem block size in bit shift */
__u8 sb_extslots; /* superblock size = 128 + sb_extslots * 16 */
__le16 root_nid; /* nid of root directory */
__le64 inos; /* total valid ino # (== f_files - f_favail) */
- __le64 build_time; /* inode v1 time derivation */
- __le32 build_time_nsec; /* inode v1 time derivation in nano scale */
+ __le64 build_time; /* compact inode time derivation */
+ __le32 build_time_nsec; /* compact inode time derivation in ns scale */
__le32 blocks; /* used for statfs */
__le32 meta_blkaddr; /* start block address of metadata area */
__le32 xattr_blkaddr; /* start block address of shared xattr area */
@@ -73,39 +79,39 @@
} __packed u1;
__le16 extra_devices; /* # of devices besides the primary device */
__le16 devt_slotoff; /* startoff = devt_slotoff * devt_slotsize */
- __u8 reserved2[38];
+ __u8 dirblkbits; /* directory block size in bit shift */
+ __u8 xattr_prefix_count; /* # of long xattr name prefixes */
+ __le32 xattr_prefix_start; /* start of long xattr prefixes */
+ __le64 packed_nid; /* nid of the special packed inode */
+ __u8 xattr_filter_reserved; /* reserved for xattr name filter */
+ __u8 reserved2[23];
};
/*
- * erofs inode datalayout (i_format in on-disk inode):
- * 0 - inode plain without inline data A:
- * inode, [xattrs], ... | ... | no-holed data
- * 1 - inode VLE compression B (legacy):
- * inode, [xattrs], extents ... | ...
- * 2 - inode plain with inline data C:
- * inode, [xattrs], last_inline_data, ... | ... | no-holed data
- * 3 - inode compression D:
- * inode, [xattrs], map_header, extents ... | ...
- * 4 - inode chunk-based E:
- * inode, [xattrs], chunk indexes ... | ...
+ * EROFS inode datalayout (i_format in on-disk inode):
+ * 0 - uncompressed flat inode without tail-packing inline data:
+ * 1 - compressed inode with non-compact indexes:
+ * 2 - uncompressed flat inode with tail-packing inline data:
+ * 3 - compressed inode with compact indexes:
+ * 4 - chunk-based inode with (optional) multi-device support:
* 5~7 - reserved
*/
enum {
EROFS_INODE_FLAT_PLAIN = 0,
- EROFS_INODE_FLAT_COMPRESSION_LEGACY = 1,
+ EROFS_INODE_COMPRESSED_FULL = 1,
EROFS_INODE_FLAT_INLINE = 2,
- EROFS_INODE_FLAT_COMPRESSION = 3,
+ EROFS_INODE_COMPRESSED_COMPACT = 3,
EROFS_INODE_CHUNK_BASED = 4,
EROFS_INODE_DATALAYOUT_MAX
};
static inline bool erofs_inode_is_data_compressed(unsigned int datamode)
{
- return datamode == EROFS_INODE_FLAT_COMPRESSION ||
- datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY;
+ return datamode == EROFS_INODE_COMPRESSED_COMPACT ||
+ datamode == EROFS_INODE_COMPRESSED_FULL;
}
-/* bit definitions of inode i_advise */
+/* bit definitions of inode i_format */
#define EROFS_I_VERSION_BITS 1
#define EROFS_I_DATALAYOUT_BITS 3
@@ -123,11 +129,30 @@
#define EROFS_CHUNK_FORMAT_ALL \
(EROFS_CHUNK_FORMAT_BLKBITS_MASK | EROFS_CHUNK_FORMAT_INDEXES)
+/* 32-byte on-disk inode */
+#define EROFS_INODE_LAYOUT_COMPACT 0
+/* 64-byte on-disk inode */
+#define EROFS_INODE_LAYOUT_EXTENDED 1
+
struct erofs_inode_chunk_info {
__le16 format; /* chunk blkbits, etc. */
__le16 reserved;
};
+union erofs_inode_i_u {
+ /* total compressed blocks for compressed inodes */
+ __le32 compressed_blocks;
+
+ /* block address for uncompressed flat inodes */
+ __le32 raw_blkaddr;
+
+ /* for device files, used to indicate old/new device # */
+ __le32 rdev;
+
+ /* for chunk-based files, it contains the summary info */
+ struct erofs_inode_chunk_info c;
+};
+
/* 32-byte reduced form of an ondisk inode */
struct erofs_inode_compact {
__le16 i_format; /* inode format hints */
@@ -138,28 +163,14 @@
__le16 i_nlink;
__le32 i_size;
__le32 i_reserved;
- union {
- /* file total compressed blocks for data mapping 1 */
- __le32 compressed_blocks;
- __le32 raw_blkaddr;
+ union erofs_inode_i_u i_u;
- /* for device files, used to indicate old/new device # */
- __le32 rdev;
-
- /* for chunk-based files, it contains the summary info */
- struct erofs_inode_chunk_info c;
- } i_u;
- __le32 i_ino; /* only used for 32-bit stat compatibility */
+ __le32 i_ino; /* only used for 32-bit stat compatibility */
__le16 i_uid;
__le16 i_gid;
__le32 i_reserved2;
};
-/* 32 bytes on-disk inode */
-#define EROFS_INODE_LAYOUT_COMPACT 0
-/* 64 bytes on-disk inode */
-#define EROFS_INODE_LAYOUT_EXTENDED 1
-
/* 64-byte complete form of an ondisk inode */
struct erofs_inode_extended {
__le16 i_format; /* inode format hints */
@@ -169,21 +180,9 @@
__le16 i_mode;
__le16 i_reserved;
__le64 i_size;
- union {
- /* file total compressed blocks for data mapping 1 */
- __le32 compressed_blocks;
- __le32 raw_blkaddr;
+ union erofs_inode_i_u i_u;
- /* for device files, used to indicate old/new device # */
- __le32 rdev;
-
- /* for chunk-based files, it contains the summary info */
- struct erofs_inode_chunk_info c;
- } i_u;
-
- /* only used for 32-bit stat compatibility */
- __le32 i_ino;
-
+ __le32 i_ino; /* only used for 32-bit stat compatibility */
__le32 i_uid;
__le32 i_gid;
__le64 i_mtime;
@@ -192,10 +191,6 @@
__u8 i_reserved2[16];
};
-#define EROFS_MAX_SHARED_XATTRS (128)
-/* h_shared_count between 129 ... 255 are special # */
-#define EROFS_SHARED_XATTR_EXTENT (255)
-
/*
* inline xattrs (n == i_xattr_icount):
* erofs_xattr_ibody_header(1) + (n - 1) * 4 bytes
@@ -208,7 +203,7 @@
* for read-only fs, no need to introduce h_refcount
*/
struct erofs_xattr_ibody_header {
- __le32 h_reserved;
+ __le32 h_name_filter; /* bit value 1 indicates not-present */
__u8 h_shared_count;
__u8 h_reserved2[7];
__le32 h_shared_xattrs[0]; /* shared xattr id array */
@@ -222,6 +217,17 @@
#define EROFS_XATTR_INDEX_LUSTRE 5
#define EROFS_XATTR_INDEX_SECURITY 6
+/*
+ * bit 7 of e_name_index is set when it refers to a long xattr name prefix,
+ * while the remained lower bits represent the index of the prefix.
+ */
+#define EROFS_XATTR_LONG_PREFIX 0x80
+#define EROFS_XATTR_LONG_PREFIX_MASK 0x7f
+
+#define EROFS_XATTR_FILTER_BITS 32
+#define EROFS_XATTR_FILTER_DEFAULT UINT32_MAX
+#define EROFS_XATTR_FILTER_SEED 0x25BBE08F
+
/* xattr entry (for both inline & shared xattrs) */
struct erofs_xattr_entry {
__u8 e_name_len; /* length of name */
@@ -231,6 +237,12 @@
char e_name[0]; /* attribute name */
};
+/* long xattr name prefix */
+struct erofs_xattr_long_prefix {
+ __u8 base_index; /* short xattr name prefix index */
+ char infix[0]; /* infix apart from short prefix */
+};
+
static inline unsigned int erofs_xattr_ibody_size(__le16 i_xattr_icount)
{
if (!i_xattr_icount)
@@ -261,132 +273,6 @@
__le32 blkaddr; /* start block address of this inode chunk */
};
-/* maximum supported size of a physical compression cluster */
-#define Z_EROFS_PCLUSTER_MAX_SIZE (1024 * 1024)
-
-/* available compression algorithm types (for h_algorithmtype) */
-enum {
- Z_EROFS_COMPRESSION_LZ4 = 0,
- Z_EROFS_COMPRESSION_LZMA = 1,
- Z_EROFS_COMPRESSION_MAX
-};
-#define Z_EROFS_ALL_COMPR_ALGS (1 << (Z_EROFS_COMPRESSION_MAX - 1))
-
-/* 14 bytes (+ length field = 16 bytes) */
-struct z_erofs_lz4_cfgs {
- __le16 max_distance;
- __le16 max_pclusterblks;
- u8 reserved[10];
-} __packed;
-
-/* 14 bytes (+ length field = 16 bytes) */
-struct z_erofs_lzma_cfgs {
- __le32 dict_size;
- __le16 format;
- u8 reserved[8];
-} __packed;
-#define Z_EROFS_LZMA_MAX_DICT_SIZE (8 * Z_EROFS_PCLUSTER_MAX_SIZE)
-
-/*
- * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
- * e.g. for 4k logical cluster size, 4B if compacted 2B is off;
- * (4B) + 2B + (4B) if compacted 2B is on.
- * bit 1 : HEAD1 big pcluster (0 - off; 1 - on)
- * bit 2 : HEAD2 big pcluster (0 - off; 1 - on)
- * bit 3 : tailpacking inline pcluster (0 - off; 1 - on)
- */
-#define Z_EROFS_ADVISE_COMPACTED_2B 0x0001
-#define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002
-#define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004
-#define Z_EROFS_ADVISE_INLINE_PCLUSTER 0x0008
-
-struct z_erofs_map_header {
- __le16 h_reserved1;
- /* record the size of tailpacking data */
- __le16 h_idata_size;
- __le16 h_advise;
- /*
- * bit 0-3 : algorithm type of head 1 (logical cluster type 01);
- * bit 4-7 : algorithm type of head 2 (logical cluster type 11).
- */
- __u8 h_algorithmtype;
- /*
- * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096;
- * bit 3-7 : reserved.
- */
- __u8 h_clusterbits;
-};
-
-#define Z_EROFS_VLE_LEGACY_HEADER_PADDING 8
-
-/*
- * Fixed-sized output compression ondisk Logical Extent cluster type:
- * 0 - literal (uncompressed) cluster
- * 1 - compressed cluster (for the head logical cluster)
- * 2 - compressed cluster (for the other logical clusters)
- *
- * In detail,
- * 0 - literal (uncompressed) cluster,
- * di_advise = 0
- * di_clusterofs = the literal data offset of the cluster
- * di_blkaddr = the blkaddr of the literal cluster
- *
- * 1 - compressed cluster (for the head logical cluster)
- * di_advise = 1
- * di_clusterofs = the decompressed data offset of the cluster
- * di_blkaddr = the blkaddr of the compressed cluster
- *
- * 2 - compressed cluster (for the other logical clusters)
- * di_advise = 2
- * di_clusterofs =
- * the decompressed data offset in its own head cluster
- * di_u.delta[0] = distance to its corresponding head cluster
- * di_u.delta[1] = distance to its corresponding tail cluster
- * (di_advise could be 0, 1 or 2)
- */
-enum {
- Z_EROFS_VLE_CLUSTER_TYPE_PLAIN = 0,
- Z_EROFS_VLE_CLUSTER_TYPE_HEAD = 1,
- Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD = 2,
- Z_EROFS_VLE_CLUSTER_TYPE_RESERVED = 3,
- Z_EROFS_VLE_CLUSTER_TYPE_MAX
-};
-
-#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS 2
-#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT 0
-
-/*
- * D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the
- * compressed block count of a compressed extent (in logical clusters, aka.
- * block count of a pcluster).
- */
-#define Z_EROFS_VLE_DI_D0_CBLKCNT (1 << 11)
-
-struct z_erofs_vle_decompressed_index {
- __le16 di_advise;
- /* where to decompress in the head cluster */
- __le16 di_clusterofs;
-
- union {
- /* for the head cluster */
- __le32 blkaddr;
- /*
- * for the rest clusters
- * eg. for 4k page-sized cluster, maximum 4K*64k = 256M)
- * [0] - pointing to the head cluster
- * [1] - pointing to the tail cluster
- */
- __le16 delta[2];
- } di_u;
-};
-
-#define Z_EROFS_VLE_LEGACY_INDEX_ALIGN(size) \
- (round_up(size, sizeof(struct z_erofs_vle_decompressed_index)) + \
- sizeof(struct z_erofs_map_header) + Z_EROFS_VLE_LEGACY_HEADER_PADDING)
-
-#define Z_EROFS_VLE_EXTENT_ALIGN(size) round_up(size, \
- sizeof(struct z_erofs_vle_decompressed_index))
-
/* dirent sorts in alphabet order, thus we can do binary search */
struct erofs_dirent {
__le64 nid; /* node number */
@@ -410,9 +296,157 @@
#define EROFS_NAME_LEN 255
+/* maximum supported size of a physical compression cluster */
+#define Z_EROFS_PCLUSTER_MAX_SIZE (1024 * 1024)
+
+/* available compression algorithm types (for h_algorithmtype) */
+enum {
+ Z_EROFS_COMPRESSION_LZ4 = 0,
+ Z_EROFS_COMPRESSION_LZMA = 1,
+ Z_EROFS_COMPRESSION_DEFLATE = 2,
+ Z_EROFS_COMPRESSION_MAX
+};
+#define Z_EROFS_ALL_COMPR_ALGS ((1 << Z_EROFS_COMPRESSION_MAX) - 1)
+
+/* 14 bytes (+ length field = 16 bytes) */
+struct z_erofs_lz4_cfgs {
+ __le16 max_distance;
+ __le16 max_pclusterblks;
+ u8 reserved[10];
+} __packed;
+
+/* 14 bytes (+ length field = 16 bytes) */
+struct z_erofs_lzma_cfgs {
+ __le32 dict_size;
+ __le16 format;
+ u8 reserved[8];
+} __packed;
+
+#define Z_EROFS_LZMA_MAX_DICT_SIZE (8 * Z_EROFS_PCLUSTER_MAX_SIZE)
+
+/* 6 bytes (+ length field = 8 bytes) */
+struct z_erofs_deflate_cfgs {
+ u8 windowbits; /* 8..15 for DEFLATE */
+ u8 reserved[5];
+} __packed;
+
+/*
+ * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
+ * e.g. for 4k logical cluster size, 4B if compacted 2B is off;
+ * (4B) + 2B + (4B) if compacted 2B is on.
+ * bit 1 : HEAD1 big pcluster (0 - off; 1 - on)
+ * bit 2 : HEAD2 big pcluster (0 - off; 1 - on)
+ * bit 3 : tailpacking inline pcluster (0 - off; 1 - on)
+ * bit 4 : interlaced plain pcluster (0 - off; 1 - on)
+ * bit 5 : fragment pcluster (0 - off; 1 - on)
+ */
+#define Z_EROFS_ADVISE_COMPACTED_2B 0x0001
+#define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002
+#define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004
+#define Z_EROFS_ADVISE_INLINE_PCLUSTER 0x0008
+#define Z_EROFS_ADVISE_INTERLACED_PCLUSTER 0x0010
+#define Z_EROFS_ADVISE_FRAGMENT_PCLUSTER 0x0020
+
+#define Z_EROFS_FRAGMENT_INODE_BIT 7
+struct z_erofs_map_header {
+ union {
+ /* fragment data offset in the packed inode */
+ __le32 h_fragmentoff;
+ struct {
+ __le16 h_reserved1;
+ /* indicates the encoded size of tailpacking data */
+ __le16 h_idata_size;
+ };
+ };
+ __le16 h_advise;
+ /*
+ * bit 0-3 : algorithm type of head 1 (logical cluster type 01);
+ * bit 4-7 : algorithm type of head 2 (logical cluster type 11).
+ */
+ __u8 h_algorithmtype;
+ /*
+ * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096;
+ * bit 3-6 : reserved;
+ * bit 7 : move the whole file into packed inode or not.
+ */
+ __u8 h_clusterbits;
+};
+
+/*
+ * On-disk logical cluster type:
+ * 0 - literal (uncompressed) lcluster
+ * 1,3 - compressed lcluster (for HEAD lclusters)
+ * 2 - compressed lcluster (for NONHEAD lclusters)
+ *
+ * In detail,
+ * 0 - literal (uncompressed) lcluster,
+ * di_advise = 0
+ * di_clusterofs = the literal data offset of the lcluster
+ * di_blkaddr = the blkaddr of the literal pcluster
+ *
+ * 1,3 - compressed lcluster (for HEAD lclusters)
+ * di_advise = 1 or 3
+ * di_clusterofs = the decompressed data offset of the lcluster
+ * di_blkaddr = the blkaddr of the compressed pcluster
+ *
+ * 2 - compressed lcluster (for NONHEAD lclusters)
+ * di_advise = 2
+ * di_clusterofs =
+ * the decompressed data offset in its own HEAD lcluster
+ * di_u.delta[0] = distance to this HEAD lcluster
+ * di_u.delta[1] = distance to the next HEAD lcluster
+ */
+enum {
+ Z_EROFS_LCLUSTER_TYPE_PLAIN = 0,
+ Z_EROFS_LCLUSTER_TYPE_HEAD1 = 1,
+ Z_EROFS_LCLUSTER_TYPE_NONHEAD = 2,
+ Z_EROFS_LCLUSTER_TYPE_HEAD2 = 3,
+ Z_EROFS_LCLUSTER_TYPE_MAX
+};
+
+#define Z_EROFS_LI_LCLUSTER_TYPE_BITS 2
+#define Z_EROFS_LI_LCLUSTER_TYPE_BIT 0
+
+/* (noncompact only, HEAD) This pcluster refers to partial decompressed data */
+#define Z_EROFS_LI_PARTIAL_REF (1 << 15)
+
+/*
+ * D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the
+ * compressed block count of a compressed extent (in logical clusters, aka.
+ * block count of a pcluster).
+ */
+#define Z_EROFS_LI_D0_CBLKCNT (1 << 11)
+
+struct z_erofs_lcluster_index {
+ __le16 di_advise;
+ /* where to decompress in the head lcluster */
+ __le16 di_clusterofs;
+
+ union {
+ /* for the HEAD lclusters */
+ __le32 blkaddr;
+ /*
+ * for the NONHEAD lclusters
+ * [0] - distance to its HEAD lcluster
+ * [1] - distance to the next HEAD lcluster
+ */
+ __le16 delta[2];
+ } di_u;
+};
+
+#define Z_EROFS_FULL_INDEX_ALIGN(end) \
+ (round_up(end, 8) + sizeof(struct z_erofs_map_header) + 8)
+
/* check the EROFS on-disk layout strictly at compile time */
static inline void erofs_check_ondisk_layout_definitions(void)
{
+ const union {
+ struct z_erofs_map_header h;
+ __le64 v;
+ } fmh __maybe_unused = {
+ .h.h_clusterbits = 1 << Z_EROFS_FRAGMENT_INODE_BIT,
+ };
+
BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128);
BUILD_BUG_ON(sizeof(struct erofs_inode_compact) != 32);
BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64);
@@ -421,15 +455,18 @@
BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_info) != 4);
BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) != 8);
BUILD_BUG_ON(sizeof(struct z_erofs_map_header) != 8);
- BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8);
+ BUILD_BUG_ON(sizeof(struct z_erofs_lcluster_index) != 8);
BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12);
/* keep in sync between 2 index structures for better extendibility */
BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) !=
- sizeof(struct z_erofs_vle_decompressed_index));
+ sizeof(struct z_erofs_lcluster_index));
BUILD_BUG_ON(sizeof(struct erofs_deviceslot) != 128);
- BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) <
- Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1);
+ BUILD_BUG_ON(BIT(Z_EROFS_LI_LCLUSTER_TYPE_BITS) <
+ Z_EROFS_LCLUSTER_TYPE_MAX - 1);
+ /* exclude old compiler versions like gcc 7.5.0 */
+ BUILD_BUG_ON(__builtin_constant_p(fmh.v) ?
+ fmh.v != cpu_to_le64(1ULL << 63) : 0);
}
#endif
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 3fad357..483d410 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -9,6 +9,7 @@
$(top_srcdir)/include/erofs/config.h \
$(top_srcdir)/include/erofs/decompress.h \
$(top_srcdir)/include/erofs/defs.h \
+ $(top_srcdir)/include/erofs/diskbuf.h \
$(top_srcdir)/include/erofs/err.h \
$(top_srcdir)/include/erofs/exclude.h \
$(top_srcdir)/include/erofs/flex-array.h \
@@ -19,16 +20,23 @@
$(top_srcdir)/include/erofs/io.h \
$(top_srcdir)/include/erofs/list.h \
$(top_srcdir)/include/erofs/print.h \
+ $(top_srcdir)/include/erofs/tar.h \
$(top_srcdir)/include/erofs/trace.h \
$(top_srcdir)/include/erofs/xattr.h \
$(top_srcdir)/include/erofs/compress_hints.h \
+ $(top_srcdir)/include/erofs/fragments.h \
+ $(top_srcdir)/include/erofs/xxhash.h \
+ $(top_srcdir)/include/erofs/rebuild.h \
$(top_srcdir)/lib/liberofs_private.h
noinst_HEADERS += compressor.h
liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
namei.c data.c compress.c compressor.c zmap.c decompress.c \
- compress_hints.c hashmap.c sha256.c blobchunk.c dir.c
-liberofs_la_CFLAGS = -Wall -I$(top_srcdir)/include
+ compress_hints.c hashmap.c sha256.c blobchunk.c dir.c \
+ fragments.c rb_tree.c dedupe.c uuid_unparse.c uuid.c tar.c \
+ block_list.c xxhash.c rebuild.c diskbuf.c
+
+liberofs_la_CFLAGS = -Wall ${libuuid_CFLAGS} -I$(top_srcdir)/include
if ENABLE_LZ4
liberofs_la_CFLAGS += ${LZ4_CFLAGS}
liberofs_la_SOURCES += compressor_lz4.c
@@ -40,3 +48,8 @@
liberofs_la_CFLAGS += ${liblzma_CFLAGS}
liberofs_la_SOURCES += compressor_liblzma.c
endif
+
+liberofs_la_SOURCES += kite_deflate.c compressor_deflate.c
+if ENABLE_LIBDEFLATE
+liberofs_la_SOURCES += compressor_libdeflate.c
+endif
diff --git a/lib/blobchunk.c b/lib/blobchunk.c
index 77b0c17..e4d0bad 100644
--- a/lib/blobchunk.c
+++ b/lib/blobchunk.c
@@ -10,80 +10,102 @@
#include "erofs/block_list.h"
#include "erofs/cache.h"
#include "erofs/io.h"
+#include "sha256.h"
#include <unistd.h>
-void erofs_sha256(const unsigned char *in, unsigned long in_size,
- unsigned char out[32]);
-
struct erofs_blobchunk {
- struct hashmap_entry ent;
+ union {
+ struct hashmap_entry ent;
+ struct list_head list;
+ };
char sha256[32];
- unsigned int chunksize;
+ unsigned int device_id;
+ union {
+ erofs_off_t chunksize;
+ erofs_off_t sourceoffset;
+ };
erofs_blk_t blkaddr;
};
static struct hashmap blob_hashmap;
static FILE *blobfile;
static erofs_blk_t remapped_base;
+static erofs_off_t datablob_size;
static bool multidev;
static struct erofs_buffer_head *bh_devt;
+struct erofs_blobchunk erofs_holechunk = {
+ .blkaddr = EROFS_NULL_ADDR,
+};
+static LIST_HEAD(unhashed_blobchunks);
-static struct erofs_blobchunk *erofs_blob_getchunk(int fd,
- unsigned int chunksize)
+struct erofs_blobchunk *erofs_get_unhashed_chunk(unsigned int device_id,
+ erofs_blk_t blkaddr, erofs_off_t sourceoffset)
{
- static u8 zeroed[EROFS_BLKSIZ];
- u8 *chunkdata, sha256[32];
- int ret;
- unsigned int hash;
- erofs_off_t blkpos;
struct erofs_blobchunk *chunk;
- chunkdata = malloc(chunksize);
- if (!chunkdata)
+ chunk = calloc(1, sizeof(struct erofs_blobchunk));
+ if (!chunk)
return ERR_PTR(-ENOMEM);
- ret = read(fd, chunkdata, chunksize);
- if (ret < chunksize) {
- chunk = ERR_PTR(-EIO);
- goto out;
- }
- erofs_sha256(chunkdata, chunksize, sha256);
+ chunk->device_id = device_id;
+ chunk->blkaddr = blkaddr;
+ chunk->sourceoffset = sourceoffset;
+ list_add_tail(&chunk->list, &unhashed_blobchunks);
+ return chunk;
+}
+
+static struct erofs_blobchunk *erofs_blob_getchunk(struct erofs_sb_info *sbi,
+ u8 *buf, erofs_off_t chunksize)
+{
+ static u8 zeroed[EROFS_MAX_BLOCK_SIZE];
+ struct erofs_blobchunk *chunk;
+ unsigned int hash, padding;
+ u8 sha256[32];
+ erofs_off_t blkpos;
+ int ret;
+
+ erofs_sha256(buf, chunksize, sha256);
hash = memhash(sha256, sizeof(sha256));
chunk = hashmap_get_from_hash(&blob_hashmap, hash, sha256);
if (chunk) {
DBG_BUGON(chunksize != chunk->chunksize);
- goto out;
+ sbi->saved_by_deduplication += chunksize;
+ erofs_dbg("Found duplicated chunk at %u", chunk->blkaddr);
+ return chunk;
}
+
chunk = malloc(sizeof(struct erofs_blobchunk));
- if (!chunk) {
- chunk = ERR_PTR(-ENOMEM);
- goto out;
- }
+ if (!chunk)
+ return ERR_PTR(-ENOMEM);
chunk->chunksize = chunksize;
- blkpos = ftell(blobfile);
- DBG_BUGON(erofs_blkoff(blkpos));
- chunk->blkaddr = erofs_blknr(blkpos);
memcpy(chunk->sha256, sha256, sizeof(sha256));
- hashmap_entry_init(&chunk->ent, hash);
- hashmap_add(&blob_hashmap, chunk);
+ blkpos = ftell(blobfile);
+ DBG_BUGON(erofs_blkoff(sbi, blkpos));
+
+ if (sbi->extra_devices)
+ chunk->device_id = 1;
+ else
+ chunk->device_id = 0;
+ chunk->blkaddr = erofs_blknr(sbi, blkpos);
erofs_dbg("Writing chunk (%u bytes) to %u", chunksize, chunk->blkaddr);
- ret = fwrite(chunkdata, chunksize, 1, blobfile);
- if (ret == 1 && erofs_blkoff(chunksize))
- ret = fwrite(zeroed, EROFS_BLKSIZ - erofs_blkoff(chunksize),
- 1, blobfile);
- if (ret < 1) {
- struct hashmap_entry key;
-
- hashmap_entry_init(&key, hash);
- hashmap_remove(&blob_hashmap, &key, sha256);
- free(chunk);
- chunk = ERR_PTR(-ENOSPC);
- goto out;
+ ret = fwrite(buf, chunksize, 1, blobfile);
+ if (ret == 1) {
+ padding = erofs_blkoff(sbi, chunksize);
+ if (padding) {
+ padding = erofs_blksiz(sbi) - padding;
+ ret = fwrite(zeroed, padding, 1, blobfile);
+ }
}
-out:
- free(chunkdata);
+
+ if (ret < 1) {
+ free(chunk);
+ return ERR_PTR(-ENOSPC);
+ }
+
+ hashmap_entry_init(&chunk->ent, hash);
+ hashmap_add(&blob_hashmap, chunk);
return chunk;
}
@@ -106,158 +128,360 @@
{
struct erofs_inode_chunk_index idx = {0};
erofs_blk_t extent_start = EROFS_NULL_ADDR;
- erofs_blk_t extent_end, extents_blks;
+ erofs_blk_t extent_end, chunkblks;
+ erofs_off_t source_offset;
unsigned int dst, src, unit;
bool first_extent = true;
- erofs_blk_t base_blkaddr = 0;
-
- if (multidev) {
- idx.device_id = 1;
- DBG_BUGON(!(inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES));
- } else {
- base_blkaddr = remapped_base;
- }
if (inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
unit = sizeof(struct erofs_inode_chunk_index);
else
unit = EROFS_BLOCK_MAP_ENTRY_SIZE;
+ chunkblks = 1U << (inode->u.chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK);
for (dst = src = 0; dst < inode->extent_isize;
src += sizeof(void *), dst += unit) {
struct erofs_blobchunk *chunk;
chunk = *(void **)(inode->chunkindexes + src);
- idx.blkaddr = base_blkaddr + chunk->blkaddr;
- if (extent_start != EROFS_NULL_ADDR &&
- idx.blkaddr == extent_end + 1) {
- extent_end = idx.blkaddr;
+ if (chunk->blkaddr == EROFS_NULL_ADDR) {
+ idx.blkaddr = EROFS_NULL_ADDR;
+ } else if (chunk->device_id) {
+ DBG_BUGON(!(inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES));
+ idx.blkaddr = chunk->blkaddr;
+ extent_start = EROFS_NULL_ADDR;
} else {
+ idx.blkaddr = remapped_base + chunk->blkaddr;
+ }
+
+ if (extent_start == EROFS_NULL_ADDR ||
+ idx.blkaddr != extent_end) {
if (extent_start != EROFS_NULL_ADDR) {
+ tarerofs_blocklist_write(extent_start,
+ extent_end - extent_start,
+ source_offset);
erofs_droid_blocklist_write_extent(inode,
extent_start,
- (extent_end - extent_start) + 1,
+ extent_end - extent_start,
first_extent, false);
first_extent = false;
}
extent_start = idx.blkaddr;
- extent_end = idx.blkaddr;
+ source_offset = chunk->sourceoffset;
}
+ extent_end = idx.blkaddr + chunkblks;
+ idx.device_id = cpu_to_le16(chunk->device_id);
+ idx.blkaddr = cpu_to_le32(idx.blkaddr);
+
if (unit == EROFS_BLOCK_MAP_ENTRY_SIZE)
memcpy(inode->chunkindexes + dst, &idx.blkaddr, unit);
else
memcpy(inode->chunkindexes + dst, &idx, sizeof(idx));
}
off = roundup(off, unit);
-
- if (extent_start == EROFS_NULL_ADDR)
- extents_blks = 0;
- else
- extents_blks = (extent_end - extent_start) + 1;
- erofs_droid_blocklist_write_extent(inode, extent_start, extents_blks,
+ if (extent_start != EROFS_NULL_ADDR)
+ tarerofs_blocklist_write(extent_start, extent_end - extent_start,
+ source_offset);
+ erofs_droid_blocklist_write_extent(inode, extent_start,
+ extent_start == EROFS_NULL_ADDR ?
+ 0 : extent_end - extent_start,
first_extent, true);
- return dev_write(inode->chunkindexes, off, inode->extent_isize);
+ return dev_write(inode->sbi, inode->chunkindexes, off, inode->extent_isize);
}
-int erofs_blob_write_chunked_file(struct erofs_inode *inode)
+int erofs_blob_mergechunks(struct erofs_inode *inode, unsigned int chunkbits,
+ unsigned int new_chunkbits)
{
- unsigned int chunksize = 1 << cfg.c_chunkbits;
- unsigned int count = DIV_ROUND_UP(inode->i_size, chunksize);
- struct erofs_inode_chunk_index *idx;
- erofs_off_t pos, len;
- unsigned int unit;
- int fd, ret;
+ struct erofs_sb_info *sbi = inode->sbi;
+ unsigned int dst, src, unit, count;
- inode->u.chunkformat |= inode->u.chunkbits - LOG_BLOCK_SIZE;
- if (multidev)
- inode->u.chunkformat |= EROFS_CHUNK_FORMAT_INDEXES;
+ if (new_chunkbits - sbi->blkszbits > EROFS_CHUNK_FORMAT_BLKBITS_MASK)
+ new_chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + sbi->blkszbits;
+ if (chunkbits >= new_chunkbits) /* no need to merge */
+ goto out;
if (inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
unit = sizeof(struct erofs_inode_chunk_index);
else
unit = EROFS_BLOCK_MAP_ENTRY_SIZE;
+ count = round_up(inode->i_size, 1ULL << new_chunkbits) >> new_chunkbits;
+ for (dst = src = 0; dst < count; ++dst) {
+ *((void **)inode->chunkindexes + dst) =
+ *((void **)inode->chunkindexes + src);
+ src += 1U << (new_chunkbits - chunkbits);
+ }
+
+ DBG_BUGON(count * unit >= inode->extent_isize);
inode->extent_isize = count * unit;
- idx = malloc(count * max(sizeof(*idx), sizeof(void *)));
+ chunkbits = new_chunkbits;
+out:
+ inode->u.chunkformat = (chunkbits - sbi->blkszbits) |
+ (inode->u.chunkformat & ~EROFS_CHUNK_FORMAT_BLKBITS_MASK);
+ return 0;
+}
+
+int erofs_blob_write_chunked_file(struct erofs_inode *inode, int fd,
+ erofs_off_t startoff)
+{
+ struct erofs_sb_info *sbi = inode->sbi;
+ unsigned int chunkbits = cfg.c_chunkbits;
+ unsigned int count, unit;
+ struct erofs_blobchunk *chunk, *lastch;
+ struct erofs_inode_chunk_index *idx;
+ erofs_off_t pos, len, chunksize;
+ erofs_blk_t lb, minextblks;
+ u8 *chunkdata;
+ int ret;
+
+#ifdef SEEK_DATA
+ /* if the file is fully sparsed, use one big chunk instead */
+ if (lseek(fd, startoff, SEEK_DATA) < 0 && errno == ENXIO) {
+ chunkbits = ilog2(inode->i_size - 1) + 1;
+ if (chunkbits < sbi->blkszbits)
+ chunkbits = sbi->blkszbits;
+ }
+#endif
+ if (chunkbits - sbi->blkszbits > EROFS_CHUNK_FORMAT_BLKBITS_MASK)
+ chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + sbi->blkszbits;
+ chunksize = 1ULL << chunkbits;
+ count = DIV_ROUND_UP(inode->i_size, chunksize);
+
+ if (sbi->extra_devices)
+ inode->u.chunkformat |= EROFS_CHUNK_FORMAT_INDEXES;
+ if (inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
+ unit = sizeof(struct erofs_inode_chunk_index);
+ else
+ unit = EROFS_BLOCK_MAP_ENTRY_SIZE;
+
+ chunkdata = malloc(chunksize);
+ if (!chunkdata)
+ return -ENOMEM;
+
+ inode->extent_isize = count * unit;
+ inode->chunkindexes = malloc(count * max(sizeof(*idx), sizeof(void *)));
+ if (!inode->chunkindexes) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ idx = inode->chunkindexes;
+
+ lastch = NULL;
+ minextblks = BLK_ROUND_UP(sbi, inode->i_size);
+ for (pos = 0; pos < inode->i_size; pos += len) {
+#ifdef SEEK_DATA
+ off_t offset = lseek(fd, pos + startoff, SEEK_DATA);
+
+ if (offset < 0) {
+ if (errno != ENXIO)
+ offset = pos;
+ else
+ offset = ((pos >> chunkbits) + 1) << chunkbits;
+ } else {
+ offset -= startoff;
+
+ if (offset != (offset & ~(chunksize - 1))) {
+ offset &= ~(chunksize - 1);
+ if (lseek(fd, offset + startoff, SEEK_SET) !=
+ startoff + offset) {
+ ret = -EIO;
+ goto err;
+ }
+ }
+ }
+
+ if (offset > pos) {
+ len = 0;
+ do {
+ *(void **)idx++ = &erofs_holechunk;
+ pos += chunksize;
+ } while (pos < offset);
+ DBG_BUGON(pos != offset);
+ lastch = NULL;
+ continue;
+ }
+#endif
+
+ len = min_t(u64, inode->i_size - pos, chunksize);
+ ret = read(fd, chunkdata, len);
+ if (ret < len) {
+ ret = -EIO;
+ goto err;
+ }
+
+ chunk = erofs_blob_getchunk(sbi, chunkdata, len);
+ if (IS_ERR(chunk)) {
+ ret = PTR_ERR(chunk);
+ goto err;
+ }
+
+ if (lastch && (lastch->device_id != chunk->device_id ||
+ erofs_pos(sbi, lastch->blkaddr) + lastch->chunksize !=
+ erofs_pos(sbi, chunk->blkaddr))) {
+ lb = lowbit(pos >> sbi->blkszbits);
+ if (lb && lb < minextblks)
+ minextblks = lb;
+ }
+ *(void **)idx++ = chunk;
+ lastch = chunk;
+ }
+ inode->datalayout = EROFS_INODE_CHUNK_BASED;
+ free(chunkdata);
+ return erofs_blob_mergechunks(inode, chunkbits,
+ ilog2(minextblks) + sbi->blkszbits);
+err:
+ free(inode->chunkindexes);
+ inode->chunkindexes = NULL;
+ free(chunkdata);
+ return ret;
+}
+
+int tarerofs_write_chunkes(struct erofs_inode *inode, erofs_off_t data_offset)
+{
+ struct erofs_sb_info *sbi = inode->sbi;
+ unsigned int chunkbits = ilog2(inode->i_size - 1) + 1;
+ unsigned int count, unit, device_id;
+ erofs_off_t chunksize, len, pos;
+ erofs_blk_t blkaddr;
+ struct erofs_inode_chunk_index *idx;
+
+ if (chunkbits < sbi->blkszbits)
+ chunkbits = sbi->blkszbits;
+ if (chunkbits - sbi->blkszbits > EROFS_CHUNK_FORMAT_BLKBITS_MASK)
+ chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + sbi->blkszbits;
+
+ inode->u.chunkformat |= chunkbits - sbi->blkszbits;
+ if (sbi->extra_devices) {
+ device_id = 1;
+ inode->u.chunkformat |= EROFS_CHUNK_FORMAT_INDEXES;
+ unit = sizeof(struct erofs_inode_chunk_index);
+ DBG_BUGON(erofs_blkoff(sbi, data_offset));
+ blkaddr = erofs_blknr(sbi, data_offset);
+ } else {
+ device_id = 0;
+ unit = EROFS_BLOCK_MAP_ENTRY_SIZE;
+ DBG_BUGON(erofs_blkoff(sbi, datablob_size));
+ blkaddr = erofs_blknr(sbi, datablob_size);
+ datablob_size += round_up(inode->i_size, erofs_blksiz(sbi));
+ }
+ chunksize = 1ULL << chunkbits;
+ count = DIV_ROUND_UP(inode->i_size, chunksize);
+
+ inode->extent_isize = count * unit;
+ idx = calloc(count, max(sizeof(*idx), sizeof(void *)));
if (!idx)
return -ENOMEM;
inode->chunkindexes = idx;
- fd = open(inode->i_srcpath, O_RDONLY | O_BINARY);
- if (fd < 0) {
- ret = -errno;
- goto err;
- }
-
for (pos = 0; pos < inode->i_size; pos += len) {
struct erofs_blobchunk *chunk;
- len = min_t(u64, inode->i_size - pos, chunksize);
- chunk = erofs_blob_getchunk(fd, len);
+ len = min_t(erofs_off_t, inode->i_size - pos, chunksize);
+
+ chunk = erofs_get_unhashed_chunk(device_id, blkaddr,
+ data_offset);
if (IS_ERR(chunk)) {
- ret = PTR_ERR(chunk);
- close(fd);
- goto err;
+ free(inode->chunkindexes);
+ inode->chunkindexes = NULL;
+ return PTR_ERR(chunk);
}
+
*(void **)idx++ = chunk;
+ blkaddr += erofs_blknr(sbi, len);
+ data_offset += len;
}
inode->datalayout = EROFS_INODE_CHUNK_BASED;
- close(fd);
return 0;
-err:
- free(inode->chunkindexes);
- inode->chunkindexes = NULL;
- return ret;
}
-int erofs_blob_remap(void)
+int erofs_mkfs_dump_blobs(struct erofs_sb_info *sbi)
{
struct erofs_buffer_head *bh;
ssize_t length;
erofs_off_t pos_in, pos_out;
ssize_t ret;
- fflush(blobfile);
- length = ftell(blobfile);
- if (length < 0)
- return -errno;
- if (multidev) {
- struct erofs_deviceslot dis = {
- .blocks = erofs_blknr(length),
- };
+ if (blobfile) {
+ fflush(blobfile);
+ length = ftell(blobfile);
+ if (length < 0)
+ return -errno;
+ if (sbi->extra_devices)
+ sbi->devs[0].blocks = erofs_blknr(sbi, length);
+ else
+ datablob_size = length;
+ }
+
+ if (sbi->extra_devices) {
+ unsigned int i, ret;
+ erofs_blk_t nblocks;
+
+ nblocks = erofs_mapbh(NULL);
pos_out = erofs_btell(bh_devt, false);
- ret = dev_write(&dis, pos_out, sizeof(dis));
- if (ret)
- return ret;
+ i = 0;
+ do {
+ struct erofs_deviceslot dis = {
+ .mapped_blkaddr = cpu_to_le32(nblocks),
+ .blocks = cpu_to_le32(sbi->devs[i].blocks),
+ };
+ memcpy(dis.tag, sbi->devs[i].tag, sizeof(dis.tag));
+ ret = dev_write(sbi, &dis, pos_out, sizeof(dis));
+ if (ret)
+ return ret;
+ pos_out += sizeof(dis);
+ nblocks += sbi->devs[i].blocks;
+ } while (++i < sbi->extra_devices);
bh_devt->op = &erofs_drop_directly_bhops;
erofs_bdrop(bh_devt, false);
return 0;
}
- bh = erofs_balloc(DATA, length, 0, 0);
+
+ bh = erofs_balloc(DATA, blobfile ? datablob_size : 0, 0, 0);
if (IS_ERR(bh))
return PTR_ERR(bh);
erofs_mapbh(bh->block);
+
pos_out = erofs_btell(bh, false);
- pos_in = 0;
- remapped_base = erofs_blknr(pos_out);
- ret = erofs_copy_file_range(fileno(blobfile), &pos_in,
- erofs_devfd, &pos_out, length);
+ remapped_base = erofs_blknr(sbi, pos_out);
+ if (blobfile) {
+ pos_in = 0;
+ ret = erofs_copy_file_range(fileno(blobfile), &pos_in,
+ sbi->devfd, &pos_out, datablob_size);
+ ret = ret < datablob_size ? -EIO : 0;
+ } else {
+ ret = 0;
+ }
bh->op = &erofs_drop_directly_bhops;
erofs_bdrop(bh, false);
- return ret < length ? -EIO : 0;
+ return ret;
}
void erofs_blob_exit(void)
{
+ struct hashmap_iter iter;
+ struct hashmap_entry *e;
+ struct erofs_blobchunk *bc, *n;
+
if (blobfile)
fclose(blobfile);
- hashmap_free(&blob_hashmap, 1);
+ while ((e = hashmap_iter_first(&blob_hashmap, &iter))) {
+ bc = container_of((struct hashmap_entry *)e,
+ struct erofs_blobchunk, ent);
+ DBG_BUGON(hashmap_remove(&blob_hashmap, e) != e);
+ free(bc);
+ }
+ DBG_BUGON(hashmap_free(&blob_hashmap));
+
+ list_for_each_entry_safe(bc, n, &unhashed_blobchunks, list) {
+ list_del(&bc->list);
+ free(bc);
+ }
}
int erofs_blob_init(const char *blobfile_path)
@@ -280,22 +504,25 @@
return 0;
}
-int erofs_generate_devtable(void)
+int erofs_mkfs_init_devices(struct erofs_sb_info *sbi, unsigned int devices)
{
- struct erofs_deviceslot dis;
-
- if (!multidev)
+ if (!devices)
return 0;
- bh_devt = erofs_balloc(DEVT, sizeof(dis), 0, 0);
- if (IS_ERR(bh_devt))
- return PTR_ERR(bh_devt);
+ sbi->devs = calloc(devices, sizeof(sbi->devs[0]));
+ if (!sbi->devs)
+ return -ENOMEM;
- dis = (struct erofs_deviceslot) {};
+ bh_devt = erofs_balloc(DEVT,
+ sizeof(struct erofs_deviceslot) * devices, 0, 0);
+ if (IS_ERR(bh_devt)) {
+ free(sbi->devs);
+ return PTR_ERR(bh_devt);
+ }
erofs_mapbh(bh_devt->block);
bh_devt->op = &erofs_skip_write_bhops;
- sbi.devt_slotoff = erofs_btell(bh_devt, false) / EROFS_DEVT_SLOT_SIZE;
- sbi.extra_devices = 1;
- erofs_sb_set_device_table();
+ sbi->devt_slotoff = erofs_btell(bh_devt, false) / EROFS_DEVT_SLOT_SIZE;
+ sbi->extra_devices = devices;
+ erofs_sb_set_device_table(sbi);
return 0;
}
diff --git a/lib/block_list.c b/lib/block_list.c
index 896fb01..f47a746 100644
--- a/lib/block_list.c
+++ b/lib/block_list.c
@@ -3,7 +3,6 @@
* Copyright (C), 2021, Coolpad Group Limited.
* Created by Yue Hu <huyue2@yulong.com>
*/
-#ifdef WITH_ANDROID
#include <stdio.h>
#include <sys/stat.h>
#include "erofs/block_list.h"
@@ -12,17 +11,19 @@
#include "erofs/print.h"
static FILE *block_list_fp;
+bool srcmap_enabled;
-int erofs_droid_blocklist_fopen(void)
+int erofs_blocklist_open(char *filename, bool srcmap)
{
- block_list_fp = fopen(cfg.block_list_file, "w");
+ block_list_fp = fopen(filename, "w");
if (!block_list_fp)
- return -1;
+ return -errno;
+ srcmap_enabled = srcmap;
return 0;
}
-void erofs_droid_blocklist_fclose(void)
+void erofs_blocklist_close(void)
{
if (!block_list_fp)
return;
@@ -31,6 +32,18 @@
block_list_fp = NULL;
}
+/* XXX: really need to be cleaned up */
+void tarerofs_blocklist_write(erofs_blk_t blkaddr, erofs_blk_t nblocks,
+ erofs_off_t srcoff)
+{
+ if (!block_list_fp || !nblocks || !srcmap_enabled)
+ return;
+
+ fprintf(block_list_fp, "%08x %8x %08" PRIx64 "\n",
+ blkaddr, nblocks, srcoff);
+}
+
+#ifdef WITH_ANDROID
static void blocklist_write(const char *path, erofs_blk_t blk_start,
erofs_blk_t nblocks, bool first_extent,
bool last_extent)
@@ -95,7 +108,7 @@
return;
/* XXX: another hack, which means it has been outputed before */
- if (erofs_blknr(inode->i_size)) {
+ if (erofs_blknr(inode->sbi, inode->i_size)) {
if (blkaddr == NULL_ADDR)
fprintf(block_list_fp, "\n");
else
diff --git a/lib/cache.c b/lib/cache.c
index c735363..caca49b 100644
--- a/lib/cache.c
+++ b/lib/cache.c
@@ -14,10 +14,10 @@
.list = LIST_HEAD_INIT(blkh.list),
.blkaddr = NULL_ADDR,
};
-static erofs_blk_t tail_blkaddr;
+static erofs_blk_t tail_blkaddr, erofs_metablkcnt;
/* buckets for all mapped buffer blocks to boost up allocation */
-static struct list_head mapped_buckets[META + 1][EROFS_BLKSIZ];
+static struct list_head mapped_buckets[META + 1][EROFS_MAX_BLOCK_SIZE];
/* last mapped buffer block to accelerate erofs_mapbh() */
static struct erofs_buffer_block *last_mapped_block = &blkh;
@@ -39,29 +39,6 @@
.flush = erofs_bh_flush_skip_write,
};
-int erofs_bh_flush_generic_write(struct erofs_buffer_head *bh, void *buf)
-{
- struct erofs_buffer_head *nbh = list_next_entry(bh, list);
- erofs_off_t offset = erofs_btell(bh, false);
-
- DBG_BUGON(nbh->off < bh->off);
- return dev_write(buf, offset, nbh->off - bh->off);
-}
-
-static bool erofs_bh_flush_buf_write(struct erofs_buffer_head *bh)
-{
- int err = erofs_bh_flush_generic_write(bh, bh->fsprivate);
-
- if (err)
- return false;
- free(bh->fsprivate);
- return erofs_bh_flush_generic_end(bh);
-}
-
-const struct erofs_bhops erofs_buf_write_bhops = {
- .flush = erofs_bh_flush_buf_write,
-};
-
/* return buffer_head of erofs super block (with size 0) */
struct erofs_buffer_head *erofs_buffer_init(void)
{
@@ -86,7 +63,8 @@
if (bb->blkaddr == NULL_ADDR)
return;
- bkt = mapped_buckets[bb->type] + bb->buffers.off % EROFS_BLKSIZ;
+ bkt = mapped_buckets[bb->type] +
+ (bb->buffers.off & (erofs_blksiz(&sbi) - 1));
list_del(&bb->mapped_list);
list_add_tail(&bb->mapped_list, bkt);
}
@@ -99,10 +77,11 @@
unsigned int extrasize,
bool dryrun)
{
+ const unsigned int blksiz = erofs_blksiz(&sbi);
+ const unsigned int blkmask = blksiz - 1;
const erofs_off_t alignedoffset = roundup(bb->buffers.off, alignsize);
- const int oob = cmpsgn(roundup((bb->buffers.off - 1) % EROFS_BLKSIZ + 1,
- alignsize) + incr + extrasize,
- EROFS_BLKSIZ);
+ const int oob = cmpsgn(roundup(((bb->buffers.off - 1) & blkmask) + 1,
+ alignsize) + incr + extrasize, blksiz);
bool tailupdate = false;
erofs_blk_t blkaddr;
@@ -114,7 +93,7 @@
blkaddr = bb->blkaddr;
if (blkaddr != NULL_ADDR) {
tailupdate = (tail_blkaddr == blkaddr +
- BLK_ROUND_UP(bb->buffers.off));
+ DIV_ROUND_UP(bb->buffers.off, blksiz));
if (oob && !tailupdate)
return -EINVAL;
}
@@ -129,10 +108,11 @@
bb->buffers.off = alignedoffset + incr;
/* need to update the tail_blkaddr */
if (tailupdate)
- tail_blkaddr = blkaddr + BLK_ROUND_UP(bb->buffers.off);
+ tail_blkaddr = blkaddr +
+ DIV_ROUND_UP(bb->buffers.off, blksiz);
erofs_bupdate_mapped(bb);
}
- return (alignedoffset + incr - 1) % EROFS_BLKSIZ + 1;
+ return ((alignedoffset + incr - 1) & blkmask) + 1;
}
int erofs_bh_balloon(struct erofs_buffer_head *bh, erofs_off_t incr)
@@ -152,16 +132,17 @@
unsigned int alignsize,
struct erofs_buffer_block **bbp)
{
+ const unsigned int blksiz = erofs_blksiz(&sbi);
struct erofs_buffer_block *cur, *bb;
unsigned int used0, used_before, usedmax, used;
int ret;
- used0 = (size + required_ext) % EROFS_BLKSIZ + inline_ext;
+ used0 = ((size + required_ext) & (blksiz - 1)) + inline_ext;
/* inline data should be in the same fs block */
- if (used0 > EROFS_BLKSIZ)
+ if (used0 > blksiz)
return -ENOSPC;
- if (!used0 || alignsize == EROFS_BLKSIZ) {
+ if (!used0 || alignsize == blksiz) {
*bbp = NULL;
return 0;
}
@@ -170,10 +151,10 @@
bb = NULL;
/* try to find a most-fit mapped buffer block first */
- if (size + required_ext + inline_ext >= EROFS_BLKSIZ)
+ if (size + required_ext + inline_ext >= blksiz)
goto skip_mapped;
- used_before = rounddown(EROFS_BLKSIZ -
+ used_before = rounddown(blksiz -
(size + required_ext + inline_ext), alignsize);
for (; used_before; --used_before) {
struct list_head *bt = mapped_buckets[type] + used_before;
@@ -191,7 +172,7 @@
DBG_BUGON(cur->type != type);
DBG_BUGON(cur->blkaddr == NULL_ADDR);
- DBG_BUGON(used_before != cur->buffers.off % EROFS_BLKSIZ);
+ DBG_BUGON(used_before != (cur->buffers.off & (blksiz - 1)));
ret = __erofs_battach(cur, NULL, size, alignsize,
required_ext + inline_ext, true);
@@ -202,7 +183,7 @@
/* should contain all data in the current block */
used = ret + required_ext + inline_ext;
- DBG_BUGON(used > EROFS_BLKSIZ);
+ DBG_BUGON(used > blksiz);
bb = cur;
usedmax = used;
@@ -215,7 +196,7 @@
if (cur == &blkh)
cur = list_next_entry(cur, list);
for (; cur != &blkh; cur = list_next_entry(cur, list)) {
- used_before = cur->buffers.off % EROFS_BLKSIZ;
+ used_before = cur->buffers.off & (blksiz - 1);
/* skip if buffer block is just full */
if (!used_before)
@@ -230,10 +211,10 @@
if (ret < 0)
continue;
- used = (ret + required_ext) % EROFS_BLKSIZ + inline_ext;
+ used = ((ret + required_ext) & (blksiz - 1)) + inline_ext;
/* should contain inline data in current block */
- if (used > EROFS_BLKSIZ)
+ if (used > blksiz)
continue;
/*
@@ -288,7 +269,10 @@
bb->blkaddr = NULL_ADDR;
bb->buffers.off = 0;
init_list_head(&bb->buffers.list);
- list_add_tail(&bb->list, &blkh.list);
+ if (type == DATA)
+ list_add(&bb->list, &last_mapped_block->list);
+ else
+ list_add_tail(&bb->list, &blkh.list);
init_list_head(&bb->mapped_list);
bh = malloc(sizeof(struct erofs_buffer_head));
@@ -300,8 +284,10 @@
ret = __erofs_battach(bb, bh, size, alignsize,
required_ext + inline_ext, false);
- if (ret < 0)
+ if (ret < 0) {
+ free(bh);
return ERR_PTR(ret);
+ }
return bh;
}
@@ -343,7 +329,7 @@
erofs_bupdate_mapped(bb);
}
- blkaddr = bb->blkaddr + BLK_ROUND_UP(bb->buffers.off);
+ blkaddr = bb->blkaddr + BLK_ROUND_UP(&sbi, bb->buffers.off);
if (blkaddr > tail_blkaddr)
tail_blkaddr = blkaddr;
@@ -367,8 +353,21 @@
return tail_blkaddr;
}
+static void erofs_bfree(struct erofs_buffer_block *bb)
+{
+ DBG_BUGON(!list_empty(&bb->buffers.list));
+
+ if (bb == last_mapped_block)
+ last_mapped_block = list_prev_entry(bb, list);
+
+ list_del(&bb->mapped_list);
+ list_del(&bb->list);
+ free(bb);
+}
+
bool erofs_bflush(struct erofs_buffer_block *bb)
{
+ const unsigned int blksiz = erofs_blksiz(&sbi);
struct erofs_buffer_block *p, *n;
erofs_blk_t blkaddr;
@@ -396,18 +395,15 @@
if (skip)
continue;
- padding = EROFS_BLKSIZ - p->buffers.off % EROFS_BLKSIZ;
- if (padding != EROFS_BLKSIZ)
- dev_fillzero(blknr_to_addr(blkaddr) - padding,
+ padding = blksiz - (p->buffers.off & (blksiz - 1));
+ if (padding != blksiz)
+ dev_fillzero(&sbi, erofs_pos(&sbi, blkaddr) - padding,
padding, true);
- DBG_BUGON(!list_empty(&p->buffers.list));
-
+ if (p->type != DATA)
+ erofs_metablkcnt += BLK_ROUND_UP(&sbi, p->buffers.off);
erofs_dbg("block %u to %u flushed", p->blkaddr, blkaddr - 1);
-
- list_del(&p->mapped_list);
- list_del(&p->list);
- free(p);
+ erofs_bfree(p);
}
return true;
}
@@ -420,7 +416,7 @@
/* tail_blkaddr could be rolled back after revoking all bhs */
if (tryrevoke && blkaddr != NULL_ADDR &&
- tail_blkaddr == blkaddr + BLK_ROUND_UP(bb->buffers.off))
+ tail_blkaddr == blkaddr + BLK_ROUND_UP(&sbi, bb->buffers.off))
rollback = true;
bh->op = &erofs_drop_directly_bhops;
@@ -429,13 +425,14 @@
if (!list_empty(&bb->buffers.list))
return;
- if (bb == last_mapped_block)
- last_mapped_block = list_prev_entry(bb, list);
-
- list_del(&bb->mapped_list);
- list_del(&bb->list);
- free(bb);
-
+ if (!rollback && bb->type != DATA)
+ erofs_metablkcnt += BLK_ROUND_UP(&sbi, bb->buffers.off);
+ erofs_bfree(bb);
if (rollback)
tail_blkaddr = blkaddr;
}
+
+erofs_blk_t erofs_total_metablocks(void)
+{
+ return erofs_metablkcnt;
+}
diff --git a/lib/compress.c b/lib/compress.c
index ee3b856..f6dc12a 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -15,59 +15,70 @@
#include "erofs/io.h"
#include "erofs/cache.h"
#include "erofs/compress.h"
+#include "erofs/dedupe.h"
#include "compressor.h"
#include "erofs/block_list.h"
#include "erofs/compress_hints.h"
+#include "erofs/fragments.h"
-static struct erofs_compress compresshandle;
-static unsigned int algorithmtype[2];
+/* compressing configuration specified by users */
+struct erofs_compress_cfg {
+ struct erofs_compress handle;
+ unsigned int algorithmtype;
+ bool enable;
+} erofs_ccfg[EROFS_MAX_COMPR_CFGS];
struct z_erofs_vle_compress_ctx {
- u8 *metacur;
-
u8 queue[EROFS_CONFIG_COMPR_MAX_SZ * 2];
+ struct z_erofs_inmem_extent e; /* (lookahead) extent */
+
+ struct erofs_inode *inode;
+ struct erofs_compress_cfg *ccfg;
+
+ u8 *metacur;
unsigned int head, tail;
- unsigned int compressedblks;
+ erofs_off_t remaining;
+ unsigned int pclustersize;
erofs_blk_t blkaddr; /* pointing to the next blkaddr */
u16 clusterofs;
+
+ u32 tof_chksum;
+ bool fix_dedupedfrag;
+ bool fragemitted;
};
-#define Z_EROFS_LEGACY_MAP_HEADER_SIZE \
- (sizeof(struct z_erofs_map_header) + Z_EROFS_VLE_LEGACY_HEADER_PADDING)
+#define Z_EROFS_LEGACY_MAP_HEADER_SIZE Z_EROFS_FULL_INDEX_ALIGN(0)
-static unsigned int vle_compressmeta_capacity(erofs_off_t filesize)
+static void z_erofs_write_indexes_final(struct z_erofs_vle_compress_ctx *ctx)
{
- const unsigned int indexsize = BLK_ROUND_UP(filesize) *
- sizeof(struct z_erofs_vle_decompressed_index);
-
- return Z_EROFS_LEGACY_MAP_HEADER_SIZE + indexsize;
-}
-
-static void vle_write_indexes_final(struct z_erofs_vle_compress_ctx *ctx)
-{
- const unsigned int type = Z_EROFS_VLE_CLUSTER_TYPE_PLAIN;
- struct z_erofs_vle_decompressed_index di;
+ const unsigned int type = Z_EROFS_LCLUSTER_TYPE_PLAIN;
+ struct z_erofs_lcluster_index di;
if (!ctx->clusterofs)
return;
di.di_clusterofs = cpu_to_le16(ctx->clusterofs);
di.di_u.blkaddr = 0;
- di.di_advise = cpu_to_le16(type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT);
+ di.di_advise = cpu_to_le16(type << Z_EROFS_LI_LCLUSTER_TYPE_BIT);
memcpy(ctx->metacur, &di, sizeof(di));
ctx->metacur += sizeof(di);
}
-static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
- unsigned int count, bool raw)
+static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
{
+ struct erofs_inode *inode = ctx->inode;
+ struct erofs_sb_info *sbi = inode->sbi;
unsigned int clusterofs = ctx->clusterofs;
- unsigned int d0 = 0, d1 = (clusterofs + count) / EROFS_BLKSIZ;
- struct z_erofs_vle_decompressed_index di;
- unsigned int type;
- __le16 advise;
+ unsigned int count = ctx->e.length;
+ unsigned int d0 = 0, d1 = (clusterofs + count) / erofs_blksiz(sbi);
+ struct z_erofs_lcluster_index di;
+ unsigned int type, advise;
+ if (!count)
+ return;
+
+ ctx->e.length = 0; /* mark as written first */
di.di_clusterofs = cpu_to_le16(ctx->clusterofs);
/* whether the tail-end (un)compressed block or not */
@@ -76,13 +87,18 @@
* A lcluster cannot have three parts with the middle one which
* is well-compressed for !ztailpacking cases.
*/
- DBG_BUGON(!raw && !cfg.c_ztailpacking);
- type = raw ? Z_EROFS_VLE_CLUSTER_TYPE_PLAIN :
- Z_EROFS_VLE_CLUSTER_TYPE_HEAD;
- advise = cpu_to_le16(type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT);
+ DBG_BUGON(!ctx->e.raw && !cfg.c_ztailpacking && !cfg.c_fragments);
+ DBG_BUGON(ctx->e.partial);
+ type = ctx->e.raw ? Z_EROFS_LCLUSTER_TYPE_PLAIN :
+ Z_EROFS_LCLUSTER_TYPE_HEAD1;
+ advise = type << Z_EROFS_LI_LCLUSTER_TYPE_BIT;
+ di.di_advise = cpu_to_le16(advise);
- di.di_advise = advise;
- di.di_u.blkaddr = cpu_to_le32(ctx->blkaddr);
+ if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL &&
+ !ctx->e.compressedblks)
+ di.di_u.blkaddr = cpu_to_le32(inode->fragmentoff >> 32);
+ else
+ di.di_u.blkaddr = cpu_to_le32(ctx->e.blkaddr);
memcpy(ctx->metacur, &di, sizeof(di));
ctx->metacur += sizeof(di);
@@ -92,14 +108,15 @@
}
do {
+ advise = 0;
/* XXX: big pcluster feature should be per-inode */
- if (d0 == 1 && erofs_sb_has_big_pcluster()) {
- type = Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD;
- di.di_u.delta[0] = cpu_to_le16(ctx->compressedblks |
- Z_EROFS_VLE_DI_D0_CBLKCNT);
+ if (d0 == 1 && erofs_sb_has_big_pcluster(sbi)) {
+ type = Z_EROFS_LCLUSTER_TYPE_NONHEAD;
+ di.di_u.delta[0] = cpu_to_le16(ctx->e.compressedblks |
+ Z_EROFS_LI_D0_CBLKCNT);
di.di_u.delta[1] = cpu_to_le16(d1);
} else if (d0) {
- type = Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD;
+ type = Z_EROFS_LCLUSTER_TYPE_NONHEAD;
/*
* If the |Z_EROFS_VLE_DI_D0_CBLKCNT| bit is set, parser
@@ -112,73 +129,194 @@
* To solve this, we replace d0 with
* Z_EROFS_VLE_DI_D0_CBLKCNT-1.
*/
- if (d0 >= Z_EROFS_VLE_DI_D0_CBLKCNT)
+ if (d0 >= Z_EROFS_LI_D0_CBLKCNT)
di.di_u.delta[0] = cpu_to_le16(
- Z_EROFS_VLE_DI_D0_CBLKCNT - 1);
+ Z_EROFS_LI_D0_CBLKCNT - 1);
else
di.di_u.delta[0] = cpu_to_le16(d0);
di.di_u.delta[1] = cpu_to_le16(d1);
} else {
- type = raw ? Z_EROFS_VLE_CLUSTER_TYPE_PLAIN :
- Z_EROFS_VLE_CLUSTER_TYPE_HEAD;
- di.di_u.blkaddr = cpu_to_le32(ctx->blkaddr);
+ type = ctx->e.raw ? Z_EROFS_LCLUSTER_TYPE_PLAIN :
+ Z_EROFS_LCLUSTER_TYPE_HEAD1;
+
+ if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL &&
+ !ctx->e.compressedblks)
+ di.di_u.blkaddr = cpu_to_le32(inode->fragmentoff >> 32);
+ else
+ di.di_u.blkaddr = cpu_to_le32(ctx->e.blkaddr);
+
+ if (ctx->e.partial) {
+ DBG_BUGON(ctx->e.raw);
+ advise |= Z_EROFS_LI_PARTIAL_REF;
+ }
}
- advise = cpu_to_le16(type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT);
- di.di_advise = advise;
+ advise |= type << Z_EROFS_LI_LCLUSTER_TYPE_BIT;
+ di.di_advise = cpu_to_le16(advise);
memcpy(ctx->metacur, &di, sizeof(di));
ctx->metacur += sizeof(di);
- count -= EROFS_BLKSIZ - clusterofs;
+ count -= erofs_blksiz(sbi) - clusterofs;
clusterofs = 0;
++d0;
--d1;
- } while (clusterofs + count >= EROFS_BLKSIZ);
+ } while (clusterofs + count >= erofs_blksiz(sbi));
ctx->clusterofs = clusterofs + count;
}
+static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
+ unsigned int *len)
+{
+ struct erofs_inode *inode = ctx->inode;
+ const unsigned int lclustermask = (1 << inode->z_logical_clusterbits) - 1;
+ struct erofs_sb_info *sbi = inode->sbi;
+ int ret = 0;
+
+ /*
+ * No need dedupe for packed inode since it is composed of
+ * fragments which have already been deduplicated.
+ */
+ if (erofs_is_packed_inode(inode))
+ goto out;
+
+ do {
+ struct z_erofs_dedupe_ctx dctx = {
+ .start = ctx->queue + ctx->head - ({ int rc;
+ if (ctx->e.length <= erofs_blksiz(sbi))
+ rc = 0;
+ else if (ctx->e.length - erofs_blksiz(sbi) >= ctx->head)
+ rc = ctx->head;
+ else
+ rc = ctx->e.length - erofs_blksiz(sbi);
+ rc; }),
+ .end = ctx->queue + ctx->head + *len,
+ .cur = ctx->queue + ctx->head,
+ };
+ int delta;
+
+ if (z_erofs_dedupe_match(&dctx))
+ break;
+
+ delta = ctx->queue + ctx->head - dctx.cur;
+ /*
+ * For big pcluster dedupe, leave two indices at least to store
+ * CBLKCNT as the first step. Even laterly, an one-block
+ * decompresssion could be done as another try in practice.
+ */
+ if (dctx.e.compressedblks > 1 &&
+ ((ctx->clusterofs + ctx->e.length - delta) & lclustermask) +
+ dctx.e.length < 2 * (lclustermask + 1))
+ break;
+
+ if (delta) {
+ DBG_BUGON(delta < 0);
+ DBG_BUGON(!ctx->e.length);
+
+ /*
+ * For big pcluster dedupe, if we decide to shorten the
+ * previous big pcluster, make sure that the previous
+ * CBLKCNT is still kept.
+ */
+ if (ctx->e.compressedblks > 1 &&
+ (ctx->clusterofs & lclustermask) + ctx->e.length
+ - delta < 2 * (lclustermask + 1))
+ break;
+ ctx->e.partial = true;
+ ctx->e.length -= delta;
+ }
+
+ /* fall back to noncompact indexes for deduplication */
+ inode->z_advise &= ~Z_EROFS_ADVISE_COMPACTED_2B;
+ inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
+ erofs_sb_set_dedupe(sbi);
+
+ sbi->saved_by_deduplication +=
+ dctx.e.compressedblks * erofs_blksiz(sbi);
+ erofs_dbg("Dedupe %u %scompressed data (delta %d) to %u of %u blocks",
+ dctx.e.length, dctx.e.raw ? "un" : "",
+ delta, dctx.e.blkaddr, dctx.e.compressedblks);
+ z_erofs_write_indexes(ctx);
+ ctx->e = dctx.e;
+ ctx->head += dctx.e.length - delta;
+ DBG_BUGON(*len < dctx.e.length - delta);
+ *len -= dctx.e.length - delta;
+
+ if (ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) {
+ const unsigned int qh_aligned =
+ round_down(ctx->head, erofs_blksiz(sbi));
+ const unsigned int qh_after = ctx->head - qh_aligned;
+
+ memmove(ctx->queue, ctx->queue + qh_aligned,
+ *len + qh_after);
+ ctx->head = qh_after;
+ ctx->tail = qh_after + *len;
+ ret = -EAGAIN;
+ break;
+ }
+ } while (*len);
+
+out:
+ z_erofs_write_indexes(ctx);
+ return ret;
+}
+
static int write_uncompressed_extent(struct z_erofs_vle_compress_ctx *ctx,
unsigned int *len, char *dst)
{
int ret;
- unsigned int count;
+ struct erofs_sb_info *sbi = ctx->inode->sbi;
+ unsigned int count, interlaced_offset, rightpart;
/* reset clusterofs to 0 if permitted */
- if (!erofs_sb_has_lz4_0padding() && ctx->clusterofs &&
+ if (!erofs_sb_has_lz4_0padding(sbi) && ctx->clusterofs &&
ctx->head >= ctx->clusterofs) {
ctx->head -= ctx->clusterofs;
*len += ctx->clusterofs;
ctx->clusterofs = 0;
}
- /* write uncompressed data */
- count = min(EROFS_BLKSIZ, *len);
+ count = min(erofs_blksiz(sbi), *len);
- memcpy(dst, ctx->queue + ctx->head, count);
- memset(dst + count, 0, EROFS_BLKSIZ - count);
+ /* write interlaced uncompressed data if needed */
+ if (ctx->inode->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER)
+ interlaced_offset = ctx->clusterofs;
+ else
+ interlaced_offset = 0;
+ rightpart = min(erofs_blksiz(sbi) - interlaced_offset, count);
+
+ memset(dst, 0, erofs_blksiz(sbi));
+
+ memcpy(dst + interlaced_offset, ctx->queue + ctx->head, rightpart);
+ memcpy(dst, ctx->queue + ctx->head + rightpart, count - rightpart);
erofs_dbg("Writing %u uncompressed data to block %u",
count, ctx->blkaddr);
- ret = blk_write(dst, ctx->blkaddr, 1);
+ ret = blk_write(sbi, dst, ctx->blkaddr, 1);
if (ret)
return ret;
return count;
}
-static unsigned int z_erofs_get_max_pclusterblks(struct erofs_inode *inode)
+static unsigned int z_erofs_get_max_pclustersize(struct erofs_inode *inode)
{
+ unsigned int pclusterblks;
+
+ if (erofs_is_packed_inode(inode))
+ pclusterblks = cfg.c_pclusterblks_packed;
#ifndef NDEBUG
- if (cfg.c_random_pclusterblks)
- return 1 + rand() % cfg.c_pclusterblks_max;
+ else if (cfg.c_random_pclusterblks)
+ pclusterblks = 1 + rand() % cfg.c_pclusterblks_max;
#endif
- if (cfg.c_compress_hints_file) {
+ else if (cfg.c_compress_hints_file) {
z_erofs_apply_compress_hints(inode);
DBG_BUGON(!inode->z_physical_clusterblks);
- return inode->z_physical_clusterblks;
+ pclusterblks = inode->z_physical_clusterblks;
+ } else {
+ pclusterblks = cfg.c_pclusterblks_def;
}
- return cfg.c_pclusterblks_def;
+ return pclusterblks * erofs_blksiz(inode->sbi);
}
static int z_erofs_fill_inline_data(struct erofs_inode *inode, void *data,
@@ -197,23 +335,25 @@
return len;
}
-static void tryrecompress_trailing(void *in, unsigned int *insize,
+static void tryrecompress_trailing(struct z_erofs_vle_compress_ctx *ctx,
+ struct erofs_compress *ec,
+ void *in, unsigned int *insize,
void *out, int *compressedsize)
{
+ struct erofs_sb_info *sbi = ctx->inode->sbi;
static char tmp[Z_EROFS_PCLUSTER_MAX_SIZE];
unsigned int count;
int ret = *compressedsize;
/* no need to recompress */
- if (!(ret & (EROFS_BLKSIZ - 1)))
+ if (!(ret & (erofs_blksiz(sbi) - 1)))
return;
count = *insize;
- ret = erofs_compress_destsize(&compresshandle,
- in, &count, (void *)tmp,
- rounddown(ret, EROFS_BLKSIZ), false);
+ ret = erofs_compress_destsize(ec, in, &count, (void *)tmp,
+ rounddown(ret, erofs_blksiz(sbi)), false);
if (ret <= 0 || ret + (*insize - count) >=
- roundup(*compressedsize, EROFS_BLKSIZ))
+ roundup(*compressedsize, erofs_blksiz(sbi)))
return;
/* replace the original compressed data if any gain */
@@ -222,34 +362,79 @@
*compressedsize = ret;
}
-static int vle_compress_one(struct erofs_inode *inode,
- struct z_erofs_vle_compress_ctx *ctx,
- bool final)
+static bool z_erofs_fixup_deduped_fragment(struct z_erofs_vle_compress_ctx *ctx,
+ unsigned int len)
{
- struct erofs_compress *const h = &compresshandle;
+ struct erofs_inode *inode = ctx->inode;
+ struct erofs_sb_info *sbi = inode->sbi;
+ const unsigned int newsize = ctx->remaining + len;
+
+ DBG_BUGON(!inode->fragment_size);
+
+ /* try to fix again if it gets larger (should be rare) */
+ if (inode->fragment_size < newsize) {
+ ctx->pclustersize = min(z_erofs_get_max_pclustersize(inode),
+ roundup(newsize - inode->fragment_size,
+ erofs_blksiz(sbi)));
+ return false;
+ }
+
+ inode->fragmentoff += inode->fragment_size - newsize;
+ inode->fragment_size = newsize;
+
+ erofs_dbg("Reducing fragment size to %u at %llu",
+ inode->fragment_size, inode->fragmentoff | 0ULL);
+
+ /* it's the end */
+ DBG_BUGON(ctx->tail - ctx->head + ctx->remaining != newsize);
+ ctx->head = ctx->tail;
+ ctx->remaining = 0;
+ return true;
+}
+
+static int vle_compress_one(struct z_erofs_vle_compress_ctx *ctx)
+{
+ static char dstbuf[EROFS_CONFIG_COMPR_MAX_SZ + EROFS_MAX_BLOCK_SIZE];
+ struct erofs_inode *inode = ctx->inode;
+ struct erofs_sb_info *sbi = inode->sbi;
+ char *const dst = dstbuf + erofs_blksiz(sbi);
+ struct erofs_compress *const h = &ctx->ccfg->handle;
unsigned int len = ctx->tail - ctx->head;
- unsigned int count;
+ bool is_packed_inode = erofs_is_packed_inode(inode);
+ bool final = !ctx->remaining;
int ret;
- static char dstbuf[EROFS_CONFIG_COMPR_MAX_SZ + EROFS_BLKSIZ];
- char *const dst = dstbuf + EROFS_BLKSIZ;
while (len) {
- unsigned int pclustersize =
- z_erofs_get_max_pclusterblks(inode) * EROFS_BLKSIZ;
- bool may_inline = (cfg.c_ztailpacking && final);
- bool raw;
+ bool may_packing = (cfg.c_fragments && final &&
+ !is_packed_inode);
+ bool may_inline = (cfg.c_ztailpacking && final &&
+ !may_packing);
+ bool fix_dedupedfrag = ctx->fix_dedupedfrag;
- if (len <= pclustersize) {
- if (!final)
+ if (z_erofs_compress_dedupe(ctx, &len) && !final)
+ break;
+
+ if (len <= ctx->pclustersize) {
+ if (!final || !len)
break;
- if (!may_inline && len <= EROFS_BLKSIZ)
+ if (may_packing) {
+ if (inode->fragment_size && !fix_dedupedfrag) {
+ ctx->pclustersize =
+ roundup(len, erofs_blksiz(sbi));
+ goto fix_dedupedfrag;
+ }
+ ctx->e.length = len;
+ goto frag_packing;
+ }
+ if (!may_inline && len <= erofs_blksiz(sbi))
goto nocompression;
}
- count = min(len, cfg.c_max_decompressed_extent_bytes);
+ ctx->e.length = min(len,
+ cfg.c_max_decompressed_extent_bytes);
ret = erofs_compress_destsize(h, ctx->queue + ctx->head,
- &count, dst, pclustersize,
- !(final && len == count));
+ &ctx->e.length, dst, ctx->pclustersize,
+ !(final && len == ctx->e.length));
if (ret <= 0) {
if (ret != -EAGAIN) {
erofs_err("failed to compress %s: %s",
@@ -257,29 +442,45 @@
erofs_strerror(ret));
}
- if (may_inline && len < EROFS_BLKSIZ)
+ if (may_inline && len < erofs_blksiz(sbi)) {
ret = z_erofs_fill_inline_data(inode,
ctx->queue + ctx->head,
len, true);
- else
+ } else {
+ may_inline = false;
+ may_packing = false;
nocompression:
ret = write_uncompressed_extent(ctx, &len, dst);
+ }
if (ret < 0)
return ret;
- count = ret;
+ ctx->e.length = ret;
/*
* XXX: For now, we have to leave `ctx->compressedblks
* = 1' since there is no way to generate compressed
* indexes after the time that ztailpacking is decided.
*/
- ctx->compressedblks = 1;
- raw = true;
+ ctx->e.compressedblks = 1;
+ ctx->e.raw = true;
+ } else if (may_packing && len == ctx->e.length &&
+ ret < ctx->pclustersize &&
+ (!inode->fragment_size || fix_dedupedfrag)) {
+frag_packing:
+ ret = z_erofs_pack_fragments(inode,
+ ctx->queue + ctx->head,
+ len, ctx->tof_chksum);
+ if (ret < 0)
+ return ret;
+ ctx->e.compressedblks = 0; /* indicate a fragment */
+ ctx->e.raw = false;
+ ctx->fragemitted = true;
+ fix_dedupedfrag = false;
/* tailpcluster should be less than 1 block */
- } else if (may_inline && len == count &&
- ret < EROFS_BLKSIZ) {
- if (ctx->clusterofs + len <= EROFS_BLKSIZ) {
+ } else if (may_inline && len == ctx->e.length &&
+ ret < erofs_blksiz(sbi)) {
+ if (ctx->clusterofs + len <= erofs_blksiz(sbi)) {
inode->eof_tailraw = malloc(len);
if (!inode->eof_tailraw)
return -ENOMEM;
@@ -292,48 +493,73 @@
ret = z_erofs_fill_inline_data(inode, dst, ret, false);
if (ret < 0)
return ret;
- ctx->compressedblks = 1;
- raw = false;
+ ctx->e.compressedblks = 1;
+ ctx->e.raw = false;
} else {
unsigned int tailused, padding;
- if (may_inline && len == count)
- tryrecompress_trailing(ctx->queue + ctx->head,
- &count, dst, &ret);
+ /*
+ * If there's space left for the last round when
+ * deduping fragments, try to read the fragment and
+ * recompress a little more to check whether it can be
+ * filled up. Fix up the fragment if succeeds.
+ * Otherwise, just drop it and go to packing.
+ */
+ if (may_packing && len == ctx->e.length &&
+ (ret & (erofs_blksiz(sbi) - 1)) &&
+ ctx->tail < sizeof(ctx->queue)) {
+ ctx->pclustersize = BLK_ROUND_UP(sbi, ret) *
+ erofs_blksiz(sbi);
+ goto fix_dedupedfrag;
+ }
- tailused = ret & (EROFS_BLKSIZ - 1);
+ if (may_inline && len == ctx->e.length)
+ tryrecompress_trailing(ctx, h,
+ ctx->queue + ctx->head,
+ &ctx->e.length, dst, &ret);
+
+ tailused = ret & (erofs_blksiz(sbi) - 1);
padding = 0;
- ctx->compressedblks = DIV_ROUND_UP(ret, EROFS_BLKSIZ);
- DBG_BUGON(ctx->compressedblks * EROFS_BLKSIZ >= count);
+ ctx->e.compressedblks = BLK_ROUND_UP(sbi, ret);
+ DBG_BUGON(ctx->e.compressedblks * erofs_blksiz(sbi) >=
+ ctx->e.length);
/* zero out garbage trailing data for non-0padding */
- if (!erofs_sb_has_lz4_0padding())
+ if (!erofs_sb_has_lz4_0padding(sbi))
memset(dst + ret, 0,
- roundup(ret, EROFS_BLKSIZ) - ret);
+ roundup(ret, erofs_blksiz(sbi)) - ret);
else if (tailused)
- padding = EROFS_BLKSIZ - tailused;
+ padding = erofs_blksiz(sbi) - tailused;
/* write compressed data */
erofs_dbg("Writing %u compressed data to %u of %u blocks",
- count, ctx->blkaddr, ctx->compressedblks);
+ ctx->e.length, ctx->blkaddr,
+ ctx->e.compressedblks);
- ret = blk_write(dst - padding, ctx->blkaddr,
- ctx->compressedblks);
+ ret = blk_write(sbi, dst - padding, ctx->blkaddr,
+ ctx->e.compressedblks);
if (ret)
return ret;
- raw = false;
+ ctx->e.raw = false;
+ may_inline = false;
+ may_packing = false;
}
+ ctx->e.partial = false;
+ ctx->e.blkaddr = ctx->blkaddr;
+ if (!may_inline && !may_packing && !is_packed_inode)
+ (void)z_erofs_dedupe_insert(&ctx->e,
+ ctx->queue + ctx->head);
+ ctx->blkaddr += ctx->e.compressedblks;
+ ctx->head += ctx->e.length;
+ len -= ctx->e.length;
- ctx->head += count;
- /* write compression indexes for this pcluster */
- vle_write_indexes(ctx, count, raw);
-
- ctx->blkaddr += ctx->compressedblks;
- len -= count;
+ if (fix_dedupedfrag &&
+ z_erofs_fixup_deduped_fragment(ctx, len))
+ break;
if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) {
const unsigned int qh_aligned =
- round_down(ctx->head, EROFS_BLKSIZ);
+ round_down(ctx->head, erofs_blksiz(sbi));
const unsigned int qh_after = ctx->head - qh_aligned;
memmove(ctx->queue, ctx->queue + qh_aligned,
@@ -344,6 +570,13 @@
}
}
return 0;
+
+fix_dedupedfrag:
+ DBG_BUGON(!inode->fragment_size);
+ ctx->remaining += inode->fragment_size;
+ ctx->e.length = 0;
+ ctx->fix_dedupedfrag = true;
+ return 0;
}
struct z_erofs_compressindex_vec {
@@ -358,18 +591,18 @@
static void *parse_legacy_indexes(struct z_erofs_compressindex_vec *cv,
unsigned int nr, void *metacur)
{
- struct z_erofs_vle_decompressed_index *const db = metacur;
+ struct z_erofs_lcluster_index *const db = metacur;
unsigned int i;
for (i = 0; i < nr; ++i, ++cv) {
- struct z_erofs_vle_decompressed_index *const di = db + i;
+ struct z_erofs_lcluster_index *const di = db + i;
const unsigned int advise = le16_to_cpu(di->di_advise);
- cv->clustertype = (advise >> Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT) &
- ((1 << Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) - 1);
+ cv->clustertype = (advise >> Z_EROFS_LI_LCLUSTER_TYPE_BIT) &
+ ((1 << Z_EROFS_LI_LCLUSTER_TYPE_BITS) - 1);
cv->clusterofs = le16_to_cpu(di->di_clusterofs);
- if (cv->clustertype == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
+ if (cv->clustertype == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
cv->u.delta[0] = le16_to_cpu(di->di_u.delta[0]);
cv->u.delta[1] = le16_to_cpu(di->di_u.delta[1]);
} else {
@@ -384,10 +617,10 @@
erofs_blk_t *blkaddr_ret,
unsigned int destsize,
unsigned int logical_clusterbits,
- bool final, bool *dummy_head)
+ bool final, bool *dummy_head,
+ bool update_blkaddr)
{
unsigned int vcnt, encodebits, pos, i, cblks;
- bool update_blkaddr;
erofs_blk_t blkaddr;
if (destsize == 4)
@@ -398,21 +631,21 @@
return ERR_PTR(-EINVAL);
encodebits = (vcnt * destsize * 8 - 32) / vcnt;
blkaddr = *blkaddr_ret;
- update_blkaddr = erofs_sb_has_big_pcluster();
pos = 0;
for (i = 0; i < vcnt; ++i) {
unsigned int offset, v;
u8 ch, rem;
- if (cv[i].clustertype == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
- if (cv[i].u.delta[0] & Z_EROFS_VLE_DI_D0_CBLKCNT) {
- cblks = cv[i].u.delta[0] & ~Z_EROFS_VLE_DI_D0_CBLKCNT;
+ if (cv[i].clustertype == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
+ if (cv[i].u.delta[0] & Z_EROFS_LI_D0_CBLKCNT) {
+ cblks = cv[i].u.delta[0] & ~Z_EROFS_LI_D0_CBLKCNT;
offset = cv[i].u.delta[0];
blkaddr += cblks;
*dummy_head = false;
} else if (i + 1 == vcnt) {
- offset = cv[i].u.delta[1];
+ offset = min_t(u16, cv[i].u.delta[1],
+ (1 << logical_clusterbits) - 1);
} else {
offset = cv[i].u.delta[0];
}
@@ -451,25 +684,37 @@
unsigned int legacymetasize,
void *compressmeta)
{
- const unsigned int mpos = Z_EROFS_VLE_EXTENT_ALIGN(inode->inode_isize +
- inode->xattr_isize) +
+ const unsigned int mpos = roundup(inode->inode_isize +
+ inode->xattr_isize, 8) +
sizeof(struct z_erofs_map_header);
const unsigned int totalidx = (legacymetasize -
Z_EROFS_LEGACY_MAP_HEADER_SIZE) /
- sizeof(struct z_erofs_vle_decompressed_index);
+ sizeof(struct z_erofs_lcluster_index);
const unsigned int logical_clusterbits = inode->z_logical_clusterbits;
u8 *out, *in;
struct z_erofs_compressindex_vec cv[16];
+ struct erofs_sb_info *sbi = inode->sbi;
/* # of 8-byte units so that it can be aligned with 32 bytes */
unsigned int compacted_4b_initial, compacted_4b_end;
unsigned int compacted_2b;
bool dummy_head;
+ bool big_pcluster = erofs_sb_has_big_pcluster(sbi);
- if (logical_clusterbits < LOG_BLOCK_SIZE || LOG_BLOCK_SIZE < 12)
+ if (logical_clusterbits < sbi->blkszbits || sbi->blkszbits < 12)
return -EINVAL;
- if (logical_clusterbits > 14) /* currently not supported */
- return -ENOTSUP;
- if (logical_clusterbits == 12) {
+ if (logical_clusterbits > 14) {
+ erofs_err("compact format is unsupported for lcluster size %u",
+ 1 << logical_clusterbits);
+ return -EOPNOTSUPP;
+ }
+
+ if (inode->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) {
+ if (logical_clusterbits != 12) {
+ erofs_err("compact 2B is unsupported for lcluster size %u",
+ 1 << logical_clusterbits);
+ return -EINVAL;
+ }
+
compacted_4b_initial = (32 - mpos % 32) / 4;
if (compacted_4b_initial == 32 / 4)
compacted_4b_initial = 0;
@@ -495,7 +740,7 @@
dummy_head = false;
/* prior to bigpcluster, blkaddr was bumped up once coming into HEAD */
- if (!erofs_sb_has_big_pcluster()) {
+ if (!big_pcluster) {
--blkaddr;
dummy_head = true;
}
@@ -505,7 +750,7 @@
in = parse_legacy_indexes(cv, 2, in);
out = write_compacted_indexes(out, cv, &blkaddr,
4, logical_clusterbits, false,
- &dummy_head);
+ &dummy_head, big_pcluster);
compacted_4b_initial -= 2;
}
DBG_BUGON(compacted_4b_initial);
@@ -515,7 +760,7 @@
in = parse_legacy_indexes(cv, 16, in);
out = write_compacted_indexes(out, cv, &blkaddr,
2, logical_clusterbits, false,
- &dummy_head);
+ &dummy_head, big_pcluster);
compacted_2b -= 16;
}
DBG_BUGON(compacted_2b);
@@ -525,7 +770,7 @@
in = parse_legacy_indexes(cv, 2, in);
out = write_compacted_indexes(out, cv, &blkaddr,
4, logical_clusterbits, false,
- &dummy_head);
+ &dummy_head, big_pcluster);
compacted_4b_end -= 2;
}
@@ -535,7 +780,7 @@
in = parse_legacy_indexes(cv, 1, in);
out = write_compacted_indexes(out, cv, &blkaddr,
4, logical_clusterbits, true,
- &dummy_head);
+ &dummy_head, big_pcluster);
}
inode->extent_isize = out - (u8 *)compressmeta;
return 0;
@@ -544,15 +789,20 @@
static void z_erofs_write_mapheader(struct erofs_inode *inode,
void *compressmeta)
{
+ struct erofs_sb_info *sbi = inode->sbi;
struct z_erofs_map_header h = {
.h_advise = cpu_to_le16(inode->z_advise),
- .h_idata_size = cpu_to_le16(inode->idata_size),
.h_algorithmtype = inode->z_algorithmtype[1] << 4 |
inode->z_algorithmtype[0],
/* lclustersize */
- .h_clusterbits = inode->z_logical_clusterbits - 12,
+ .h_clusterbits = inode->z_logical_clusterbits - sbi->blkszbits,
};
+ if (inode->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)
+ h.h_fragmentoff = cpu_to_le32(inode->fragmentoff);
+ else
+ h.h_idata_size = cpu_to_le16(inode->idata_size);
+
memset(compressmeta, 0, Z_EROFS_LEGACY_MAP_HEADER_SIZE);
/* write out map header */
memcpy(compressmeta, &h, sizeof(struct z_erofs_map_header));
@@ -560,36 +810,38 @@
void z_erofs_drop_inline_pcluster(struct erofs_inode *inode)
{
- const unsigned int type = Z_EROFS_VLE_CLUSTER_TYPE_PLAIN;
+ struct erofs_sb_info *sbi = inode->sbi;
+ const unsigned int type = Z_EROFS_LCLUSTER_TYPE_PLAIN;
struct z_erofs_map_header *h = inode->compressmeta;
h->h_advise = cpu_to_le16(le16_to_cpu(h->h_advise) &
~Z_EROFS_ADVISE_INLINE_PCLUSTER);
+ h->h_idata_size = 0;
if (!inode->eof_tailraw)
return;
DBG_BUGON(inode->compressed_idata != true);
/* patch the EOF lcluster to uncompressed type first */
- if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
- struct z_erofs_vle_decompressed_index *di =
+ if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL) {
+ struct z_erofs_lcluster_index *di =
(inode->compressmeta + inode->extent_isize) -
- sizeof(struct z_erofs_vle_decompressed_index);
+ sizeof(struct z_erofs_lcluster_index);
__le16 advise =
- cpu_to_le16(type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT);
+ cpu_to_le16(type << Z_EROFS_LI_LCLUSTER_TYPE_BIT);
di->di_advise = advise;
- } else if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION) {
+ } else if (inode->datalayout == EROFS_INODE_COMPRESSED_COMPACT) {
/* handle the last compacted 4B pack */
unsigned int eofs, base, pos, v, lo;
u8 *out;
eofs = inode->extent_isize -
- (4 << (DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ) & 1));
+ (4 << (BLK_ROUND_UP(sbi, inode->i_size) & 1));
base = round_down(eofs, 8);
pos = 16 /* encodebits */ * ((eofs - base) / 4);
out = inode->compressmeta + base;
- lo = get_unaligned_le32(out + pos / 8) & (EROFS_BLKSIZ - 1);
- v = (type << LOG_BLOCK_SIZE) | lo;
+ lo = erofs_blkoff(sbi, get_unaligned_le32(out + pos / 8));
+ v = (type << sbi->blkszbits) | lo;
out[pos / 8] = v & 0xff;
out[pos / 8 + 1] = v >> 8;
} else {
@@ -604,72 +856,107 @@
inode->eof_tailraw = NULL;
}
-int erofs_write_compressed_file(struct erofs_inode *inode)
+int erofs_write_compressed_file(struct erofs_inode *inode, int fd)
{
struct erofs_buffer_head *bh;
static struct z_erofs_vle_compress_ctx ctx;
- erofs_off_t remaining;
erofs_blk_t blkaddr, compressed_blocks;
unsigned int legacymetasize;
- int ret, fd;
- u8 *compressmeta = malloc(vle_compressmeta_capacity(inode->i_size));
+ int ret;
+ struct erofs_sb_info *sbi = inode->sbi;
+ u8 *compressmeta = malloc(BLK_ROUND_UP(sbi, inode->i_size) *
+ sizeof(struct z_erofs_lcluster_index) +
+ Z_EROFS_LEGACY_MAP_HEADER_SIZE);
if (!compressmeta)
return -ENOMEM;
- fd = open(inode->i_srcpath, O_RDONLY | O_BINARY);
- if (fd < 0) {
- ret = -errno;
- goto err_free_meta;
- }
-
/* allocate main data buffer */
bh = erofs_balloc(DATA, 0, 0, 0);
if (IS_ERR(bh)) {
ret = PTR_ERR(bh);
- goto err_close;
+ goto err_free_meta;
}
/* initialize per-file compression setting */
inode->z_advise = 0;
- if (!cfg.c_legacy_compress) {
- inode->z_advise |= Z_EROFS_ADVISE_COMPACTED_2B;
- inode->datalayout = EROFS_INODE_FLAT_COMPRESSION;
+ inode->z_logical_clusterbits = sbi->blkszbits;
+ if (!cfg.c_legacy_compress && inode->z_logical_clusterbits <= 14) {
+ if (inode->z_logical_clusterbits <= 12)
+ inode->z_advise |= Z_EROFS_ADVISE_COMPACTED_2B;
+ inode->datalayout = EROFS_INODE_COMPRESSED_COMPACT;
} else {
- inode->datalayout = EROFS_INODE_FLAT_COMPRESSION_LEGACY;
+ inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
}
- if (erofs_sb_has_big_pcluster()) {
+ if (erofs_sb_has_big_pcluster(sbi)) {
inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
- if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION)
+ if (inode->datalayout == EROFS_INODE_COMPRESSED_COMPACT)
inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
}
- inode->z_algorithmtype[0] = algorithmtype[0];
- inode->z_algorithmtype[1] = algorithmtype[1];
- inode->z_logical_clusterbits = LOG_BLOCK_SIZE;
+ if (cfg.c_fragments && !cfg.c_dedupe)
+ inode->z_advise |= Z_EROFS_ADVISE_INTERLACED_PCLUSTER;
+
+#ifndef NDEBUG
+ if (cfg.c_random_algorithms) {
+ while (1) {
+ inode->z_algorithmtype[0] =
+ rand() % EROFS_MAX_COMPR_CFGS;
+ if (erofs_ccfg[inode->z_algorithmtype[0]].enable)
+ break;
+ }
+ }
+#endif
+ ctx.ccfg = &erofs_ccfg[inode->z_algorithmtype[0]];
+ inode->z_algorithmtype[0] = ctx.ccfg[0].algorithmtype;
+ inode->z_algorithmtype[1] = 0;
+
+ inode->idata_size = 0;
+ inode->fragment_size = 0;
+
+ /*
+ * Handle tails in advance to avoid writing duplicated
+ * parts into the packed inode.
+ */
+ if (cfg.c_fragments && !erofs_is_packed_inode(inode)) {
+ ret = z_erofs_fragments_dedupe(inode, fd, &ctx.tof_chksum);
+ if (ret < 0)
+ goto err_bdrop;
+ }
blkaddr = erofs_mapbh(bh->block); /* start_blkaddr */
+ ctx.inode = inode;
+ ctx.pclustersize = z_erofs_get_max_pclustersize(inode);
ctx.blkaddr = blkaddr;
ctx.metacur = compressmeta + Z_EROFS_LEGACY_MAP_HEADER_SIZE;
ctx.head = ctx.tail = 0;
ctx.clusterofs = 0;
- remaining = inode->i_size;
-
- while (remaining) {
- const u64 readcount = min_t(u64, remaining,
- sizeof(ctx.queue) - ctx.tail);
-
- ret = read(fd, ctx.queue + ctx.tail, readcount);
- if (ret != readcount) {
- ret = -errno;
- goto err_bdrop;
- }
- remaining -= readcount;
- ctx.tail += readcount;
-
- ret = vle_compress_one(inode, &ctx, !remaining);
+ ctx.e.length = 0;
+ ctx.remaining = inode->i_size - inode->fragment_size;
+ ctx.fix_dedupedfrag = false;
+ ctx.fragemitted = false;
+ if (cfg.c_all_fragments && !erofs_is_packed_inode(inode) &&
+ !inode->fragment_size) {
+ ret = z_erofs_pack_file_from_fd(inode, fd, ctx.tof_chksum);
if (ret)
goto err_free_idata;
+ } else {
+ while (ctx.remaining) {
+ const u64 rx = min_t(u64, ctx.remaining,
+ sizeof(ctx.queue) - ctx.tail);
+
+ ret = read(fd, ctx.queue + ctx.tail, rx);
+ if (ret != rx) {
+ ret = -errno;
+ goto err_bdrop;
+ }
+ ctx.remaining -= rx;
+ ctx.tail += rx;
+
+ ret = vle_compress_one(&ctx);
+ if (ret)
+ goto err_free_idata;
+ }
}
DBG_BUGON(ctx.head != ctx.tail);
@@ -678,36 +965,65 @@
DBG_BUGON(compressed_blocks < !!inode->idata_size);
compressed_blocks -= !!inode->idata_size;
- vle_write_indexes_final(&ctx);
+ /* generate an extent for the deduplicated fragment */
+ if (inode->fragment_size && !ctx.fragemitted) {
+ z_erofs_write_indexes(&ctx);
+ ctx.e.length = inode->fragment_size;
+ ctx.e.compressedblks = 0;
+ ctx.e.raw = false;
+ ctx.e.partial = false;
+ ctx.e.blkaddr = ctx.blkaddr;
+ }
+ z_erofs_fragments_commit(inode);
+
+ z_erofs_write_indexes(&ctx);
+ z_erofs_write_indexes_final(&ctx);
legacymetasize = ctx.metacur - compressmeta;
/* estimate if data compression saves space or not */
- if (compressed_blocks * EROFS_BLKSIZ + inode->idata_size +
+ if (!inode->fragment_size &&
+ compressed_blocks * erofs_blksiz(sbi) + inode->idata_size +
legacymetasize >= inode->i_size) {
+ z_erofs_dedupe_commit(true);
ret = -ENOSPC;
goto err_free_idata;
}
+ z_erofs_dedupe_commit(false);
z_erofs_write_mapheader(inode, compressmeta);
- close(fd);
+ if (!ctx.fragemitted)
+ sbi->saved_by_deduplication += inode->fragment_size;
+
+ /* if the entire file is a fragment, a simplified form is used. */
+ if (inode->i_size == inode->fragment_size) {
+ DBG_BUGON(inode->fragmentoff >> 63);
+ *(__le64 *)compressmeta =
+ cpu_to_le64(inode->fragmentoff | 1ULL << 63);
+ inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
+ legacymetasize = Z_EROFS_LEGACY_MAP_HEADER_SIZE;
+ }
+
if (compressed_blocks) {
- ret = erofs_bh_balloon(bh, blknr_to_addr(compressed_blocks));
- DBG_BUGON(ret != EROFS_BLKSIZ);
+ ret = erofs_bh_balloon(bh, erofs_pos(sbi, compressed_blocks));
+ DBG_BUGON(ret != erofs_blksiz(sbi));
} else {
- DBG_BUGON(!inode->idata_size);
+ if (!cfg.c_fragments && !cfg.c_dedupe)
+ DBG_BUGON(!inode->idata_size);
}
erofs_info("compressed %s (%llu bytes) into %u blocks",
inode->i_srcpath, (unsigned long long)inode->i_size,
compressed_blocks);
- if (inode->idata_size)
+ if (inode->idata_size) {
+ bh->op = &erofs_skip_write_bhops;
inode->bh_data = bh;
- else
+ } else {
erofs_bdrop(bh, false);
+ }
inode->u.i_blocks = compressed_blocks;
- if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
+ if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL) {
inode->extent_isize = legacymetasize;
} else {
ret = z_erofs_convert_to_compacted_format(inode, blkaddr,
@@ -716,7 +1032,8 @@
DBG_BUGON(ret);
}
inode->compressmeta = compressmeta;
- erofs_droid_blocklist_write(inode, blkaddr, compressed_blocks);
+ if (!erofs_is_packed_inode(inode))
+ erofs_droid_blocklist_write(inode, blkaddr, compressed_blocks);
return 0;
err_free_idata:
@@ -726,28 +1043,18 @@
}
err_bdrop:
erofs_bdrop(bh, true); /* revoke buffer */
-err_close:
- close(fd);
err_free_meta:
free(compressmeta);
return ret;
}
-static int erofs_get_compress_algorithm_id(const char *name)
-{
- if (!strcmp(name, "lz4") || !strcmp(name, "lz4hc"))
- return Z_EROFS_COMPRESSION_LZ4;
- if (!strcmp(name, "lzma"))
- return Z_EROFS_COMPRESSION_LZMA;
- return -ENOTSUP;
-}
-
-int z_erofs_build_compr_cfgs(struct erofs_buffer_head *sb_bh)
+static int z_erofs_build_compr_cfgs(struct erofs_sb_info *sbi,
+ struct erofs_buffer_head *sb_bh)
{
struct erofs_buffer_head *bh = sb_bh;
int ret = 0;
- if (sbi.available_compr_algs & (1 << Z_EROFS_COMPRESSION_LZ4)) {
+ if (sbi->available_compr_algs & (1 << Z_EROFS_COMPRESSION_LZ4)) {
struct {
__le16 size;
struct z_erofs_lz4_cfgs lz4;
@@ -755,7 +1062,7 @@
.size = cpu_to_le16(sizeof(struct z_erofs_lz4_cfgs)),
.lz4 = {
.max_distance =
- cpu_to_le16(sbi.lz4_max_distance),
+ cpu_to_le16(sbi->lz4_max_distance),
.max_pclusterblks = cfg.c_pclusterblks_max,
}
};
@@ -766,12 +1073,12 @@
return PTR_ERR(bh);
}
erofs_mapbh(bh->block);
- ret = dev_write(&lz4alg, erofs_btell(bh, false),
+ ret = dev_write(sbi, &lz4alg, erofs_btell(bh, false),
sizeof(lz4alg));
bh->op = &erofs_drop_directly_bhops;
}
#ifdef HAVE_LIBLZMA
- if (sbi.available_compr_algs & (1 << Z_EROFS_COMPRESSION_LZMA)) {
+ if (sbi->available_compr_algs & (1 << Z_EROFS_COMPRESSION_LZMA)) {
struct {
__le16 size;
struct z_erofs_lzma_cfgs lzma;
@@ -788,71 +1095,101 @@
return PTR_ERR(bh);
}
erofs_mapbh(bh->block);
- ret = dev_write(&lzmaalg, erofs_btell(bh, false),
+ ret = dev_write(sbi, &lzmaalg, erofs_btell(bh, false),
sizeof(lzmaalg));
bh->op = &erofs_drop_directly_bhops;
}
#endif
+ if (sbi->available_compr_algs & (1 << Z_EROFS_COMPRESSION_DEFLATE)) {
+ struct {
+ __le16 size;
+ struct z_erofs_deflate_cfgs z;
+ } __packed zalg = {
+ .size = cpu_to_le16(sizeof(struct z_erofs_deflate_cfgs)),
+ .z = {
+ .windowbits =
+ cpu_to_le32(ilog2(cfg.c_dict_size)),
+ }
+ };
+
+ bh = erofs_battach(bh, META, sizeof(zalg));
+ if (IS_ERR(bh)) {
+ DBG_BUGON(1);
+ return PTR_ERR(bh);
+ }
+ erofs_mapbh(bh->block);
+ ret = dev_write(sbi, &zalg, erofs_btell(bh, false),
+ sizeof(zalg));
+ bh->op = &erofs_drop_directly_bhops;
+ }
return ret;
}
-int z_erofs_compress_init(struct erofs_buffer_head *sb_bh)
+int z_erofs_compress_init(struct erofs_sb_info *sbi, struct erofs_buffer_head *sb_bh)
{
- /* initialize for primary compression algorithm */
- int ret = erofs_compressor_init(&compresshandle,
- cfg.c_compr_alg_master);
+ int i, ret;
- if (ret)
- return ret;
+ for (i = 0; cfg.c_compr_alg[i]; ++i) {
+ struct erofs_compress *c = &erofs_ccfg[i].handle;
+
+ ret = erofs_compressor_init(sbi, c, cfg.c_compr_alg[i]);
+ if (ret)
+ return ret;
+
+ ret = erofs_compressor_setlevel(c, cfg.c_compr_level[i]);
+ if (ret)
+ return ret;
+
+ erofs_ccfg[i].algorithmtype =
+ z_erofs_get_compress_algorithm_id(c);
+ erofs_ccfg[i].enable = true;
+ sbi->available_compr_algs |= 1 << erofs_ccfg[i].algorithmtype;
+ if (erofs_ccfg[i].algorithmtype != Z_EROFS_COMPRESSION_LZ4)
+ erofs_sb_set_compr_cfgs(sbi);
+ }
/*
* if primary algorithm is empty (e.g. compression off),
* clear 0PADDING feature for old kernel compatibility.
*/
- if (!cfg.c_compr_alg_master ||
- (cfg.c_legacy_compress && !strcmp(cfg.c_compr_alg_master, "lz4")))
- erofs_sb_clear_lz4_0padding();
+ if (!cfg.c_compr_alg[0] ||
+ (cfg.c_legacy_compress && !strncmp(cfg.c_compr_alg[0], "lz4", 3)))
+ erofs_sb_clear_lz4_0padding(sbi);
- if (!cfg.c_compr_alg_master)
+ if (!cfg.c_compr_alg[0])
return 0;
- ret = erofs_compressor_setlevel(&compresshandle,
- cfg.c_compr_level_master);
- if (ret)
- return ret;
-
- /* figure out primary algorithm */
- ret = erofs_get_compress_algorithm_id(cfg.c_compr_alg_master);
- if (ret < 0)
- return ret;
-
- algorithmtype[0] = ret; /* primary algorithm (head 0) */
- algorithmtype[1] = 0; /* secondary algorithm (head 1) */
/*
* if big pcluster is enabled, an extra CBLKCNT lcluster index needs
* to be loaded in order to get those compressed block counts.
*/
if (cfg.c_pclusterblks_max > 1) {
if (cfg.c_pclusterblks_max >
- Z_EROFS_PCLUSTER_MAX_SIZE / EROFS_BLKSIZ) {
+ Z_EROFS_PCLUSTER_MAX_SIZE / erofs_blksiz(sbi)) {
erofs_err("unsupported clusterblks %u (too large)",
cfg.c_pclusterblks_max);
return -EINVAL;
}
- erofs_sb_set_big_pcluster();
+ erofs_sb_set_big_pcluster(sbi);
+ }
+ if (cfg.c_pclusterblks_packed > cfg.c_pclusterblks_max) {
+ erofs_err("invalid physical cluster size for the packed file");
+ return -EINVAL;
}
- if (ret != Z_EROFS_COMPRESSION_LZ4)
- erofs_sb_set_compr_cfgs();
-
- if (erofs_sb_has_compr_cfgs()) {
- sbi.available_compr_algs |= 1 << ret;
- return z_erofs_build_compr_cfgs(sb_bh);
- }
+ if (erofs_sb_has_compr_cfgs(sbi))
+ return z_erofs_build_compr_cfgs(sbi, sb_bh);
return 0;
}
int z_erofs_compress_exit(void)
{
- return erofs_compressor_exit(&compresshandle);
+ int i, ret;
+
+ for (i = 0; cfg.c_compr_alg[i]; ++i) {
+ ret = erofs_compressor_exit(&erofs_ccfg[i].handle);
+ if (ret)
+ return ret;
+ }
+ return 0;
}
diff --git a/lib/compress_hints.c b/lib/compress_hints.c
index 92964eb..afc9f8f 100644
--- a/lib/compress_hints.c
+++ b/lib/compress_hints.c
@@ -20,57 +20,60 @@
erofs_err("invalid regex %s (%s)\n", s, str);
}
-static int erofs_insert_compress_hints(const char *s, unsigned int blks)
+/* algorithmtype is actually ccfg # here */
+static int erofs_insert_compress_hints(const char *s, unsigned int blks,
+ unsigned int algorithmtype)
{
- struct erofs_compress_hints *r;
+ struct erofs_compress_hints *ch;
int ret;
- r = malloc(sizeof(struct erofs_compress_hints));
- if (!r)
+ ch = malloc(sizeof(struct erofs_compress_hints));
+ if (!ch)
return -ENOMEM;
- ret = regcomp(&r->reg, s, REG_EXTENDED|REG_NOSUB);
+ ret = regcomp(&ch->reg, s, REG_EXTENDED|REG_NOSUB);
if (ret) {
- dump_regerror(ret, s, &r->reg);
- goto err_out;
+ dump_regerror(ret, s, &ch->reg);
+ free(ch);
+ return ret;
}
- r->physical_clusterblks = blks;
+ ch->physical_clusterblks = blks;
+ ch->algorithmtype = algorithmtype;
- list_add_tail(&r->list, &compress_hints_head);
+ list_add_tail(&ch->list, &compress_hints_head);
erofs_info("compress hint %s (%u) is inserted", s, blks);
return ret;
-
-err_out:
- free(r);
- return ret;
}
bool z_erofs_apply_compress_hints(struct erofs_inode *inode)
{
const char *s;
struct erofs_compress_hints *r;
- unsigned int pclusterblks;
+ unsigned int pclusterblks, algorithmtype;
if (inode->z_physical_clusterblks)
return true;
s = erofs_fspath(inode->i_srcpath);
pclusterblks = cfg.c_pclusterblks_def;
+ algorithmtype = 0;
list_for_each_entry(r, &compress_hints_head, list) {
int ret = regexec(&r->reg, s, (size_t)0, NULL, 0);
if (!ret) {
pclusterblks = r->physical_clusterblks;
+ algorithmtype = r->algorithmtype;
break;
}
if (ret != REG_NOMATCH)
dump_regerror(ret, s, &r->reg);
}
inode->z_physical_clusterblks = pclusterblks;
+ inode->z_algorithmtype[0] = algorithmtype;
/* pclusterblks is 0 means this file shouldn't be compressed */
- return !!pclusterblks;
+ return pclusterblks != 0;
}
void erofs_cleanup_compress_hints(void)
@@ -83,7 +86,7 @@
}
}
-int erofs_load_compress_hints(void)
+int erofs_load_compress_hints(struct erofs_sb_info *sbi)
{
char buf[PATH_MAX + 100];
FILE *f;
@@ -98,32 +101,53 @@
return -errno;
for (line = 1; fgets(buf, sizeof(buf), f); ++line) {
- unsigned int pclustersize;
- char *pattern;
+ unsigned int pclustersize, ccfg;
+ char *alg, *pattern;
+
+ if (*buf == '#' || *buf == '\n')
+ continue;
pclustersize = atoi(strtok(buf, "\t "));
+ alg = strtok(NULL, "\n\t ");
pattern = strtok(NULL, "\n");
+ if (!pattern) {
+ pattern = alg;
+ alg = NULL;
+ }
if (!pattern || *pattern == '\0') {
erofs_err("cannot find a match pattern at line %u",
line);
ret = -EINVAL;
goto out;
}
- if (pclustersize % EROFS_BLKSIZ) {
+ if (!alg || *alg == '\0') {
+ ccfg = 0;
+ } else {
+ ccfg = atoi(alg);
+ if (ccfg >= EROFS_MAX_COMPR_CFGS ||
+ !cfg.c_compr_alg[ccfg]) {
+ erofs_err("invalid compressing configuration \"%s\" at line %u",
+ alg, line);
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (pclustersize % erofs_blksiz(sbi)) {
erofs_warn("invalid physical clustersize %u, "
"use default pclusterblks %u",
pclustersize, cfg.c_pclusterblks_def);
continue;
}
erofs_insert_compress_hints(pattern,
- pclustersize / EROFS_BLKSIZ);
+ pclustersize / erofs_blksiz(sbi), ccfg);
if (pclustersize > max_pclustersize)
max_pclustersize = pclustersize;
}
- if (cfg.c_pclusterblks_max * EROFS_BLKSIZ < max_pclustersize) {
- cfg.c_pclusterblks_max = max_pclustersize / EROFS_BLKSIZ;
+ if (cfg.c_pclusterblks_max * erofs_blksiz(sbi) < max_pclustersize) {
+ cfg.c_pclusterblks_max = max_pclustersize / erofs_blksiz(sbi);
erofs_warn("update max pclusterblks to %u", cfg.c_pclusterblks_max);
}
out:
diff --git a/lib/compressor.c b/lib/compressor.c
index a46bc39..93f5617 100644
--- a/lib/compressor.c
+++ b/lib/compressor.c
@@ -10,18 +10,71 @@
#define EROFS_CONFIG_COMPR_DEF_BOUNDARY (128)
-static const struct erofs_compressor *compressors[] = {
+static const struct erofs_algorithm {
+ char *name;
+ const struct erofs_compressor *c;
+ unsigned int id;
+
+ /* its name won't be shown as a supported algorithm */
+ bool optimisor;
+} erofs_algs[] = {
+ { "lz4",
#if LZ4_ENABLED
-#if LZ4HC_ENABLED
- &erofs_compressor_lz4hc,
-#endif
&erofs_compressor_lz4,
+#else
+ NULL,
#endif
+ Z_EROFS_COMPRESSION_LZ4, false },
+
+#if LZ4HC_ENABLED
+ { "lz4hc", &erofs_compressor_lz4hc,
+ Z_EROFS_COMPRESSION_LZ4, true },
+#endif
+
+ { "lzma",
#if HAVE_LIBLZMA
&erofs_compressor_lzma,
+#else
+ NULL,
+#endif
+ Z_EROFS_COMPRESSION_LZMA, false },
+
+ { "deflate", &erofs_compressor_deflate,
+ Z_EROFS_COMPRESSION_DEFLATE, false },
+
+#if HAVE_LIBDEFLATE
+ { "libdeflate", &erofs_compressor_libdeflate,
+ Z_EROFS_COMPRESSION_DEFLATE, true },
#endif
};
+int z_erofs_get_compress_algorithm_id(const struct erofs_compress *c)
+{
+ DBG_BUGON(!c->alg);
+ return c->alg->id;
+}
+
+const char *z_erofs_list_supported_algorithms(int i, unsigned int *mask)
+{
+ if (i >= ARRAY_SIZE(erofs_algs))
+ return NULL;
+ if (!erofs_algs[i].optimisor && (*mask & (1 << erofs_algs[i].id))) {
+ *mask ^= 1 << erofs_algs[i].id;
+ return erofs_algs[i].name;
+ }
+ return "";
+}
+
+const char *z_erofs_list_available_compressors(int *i)
+{
+ for (;*i < ARRAY_SIZE(erofs_algs); ++*i) {
+ if (!erofs_algs[*i].c)
+ continue;
+ return erofs_algs[(*i)++].name;
+ }
+ return NULL;
+}
+
int erofs_compress_destsize(const struct erofs_compress *c,
const void *src, unsigned int *srcsize,
void *dst, unsigned int dstsize, bool inblocks)
@@ -30,18 +83,18 @@
int ret;
DBG_BUGON(!c->alg);
- if (!c->alg->compress_destsize)
+ if (!c->alg->c->compress_destsize)
return -ENOTSUP;
uncompressed_capacity = *srcsize;
- ret = c->alg->compress_destsize(c, src, srcsize, dst, dstsize);
+ ret = c->alg->c->compress_destsize(c, src, srcsize, dst, dstsize);
if (ret < 0)
return ret;
- /* XXX: ret >= EROFS_BLKSIZ is a temporary hack for ztailpacking */
- if (inblocks || ret >= EROFS_BLKSIZ ||
+ /* XXX: ret >= destsize_alignsize is a temporary hack for ztailpacking */
+ if (inblocks || ret >= c->destsize_alignsize ||
uncompressed_capacity != *srcsize)
- compressed_size = roundup(ret, EROFS_BLKSIZ);
+ compressed_size = roundup(ret, c->destsize_alignsize);
else
compressed_size = ret;
DBG_BUGON(c->compress_threshold < 100);
@@ -51,16 +104,11 @@
return ret;
}
-const char *z_erofs_list_available_compressors(unsigned int i)
-{
- return i >= ARRAY_SIZE(compressors) ? NULL : compressors[i]->name;
-}
-
int erofs_compressor_setlevel(struct erofs_compress *c, int compression_level)
{
DBG_BUGON(!c->alg);
- if (c->alg->setlevel)
- return c->alg->setlevel(c, compression_level);
+ if (c->alg->c->setlevel)
+ return c->alg->c->setlevel(c, compression_level);
if (compression_level >= 0)
return -EINVAL;
@@ -68,16 +116,19 @@
return 0;
}
-int erofs_compressor_init(struct erofs_compress *c, char *alg_name)
+int erofs_compressor_init(struct erofs_sb_info *sbi,
+ struct erofs_compress *c, char *alg_name)
{
int ret, i;
+ c->sbi = sbi;
+
/* should be written in "minimum compression ratio * 100" */
c->compress_threshold = 100;
/* optimize for 4k size page */
- c->destsize_alignsize = EROFS_BLKSIZ;
- c->destsize_redzone_begin = EROFS_BLKSIZ - 16;
+ c->destsize_alignsize = erofs_blksiz(sbi);
+ c->destsize_redzone_begin = erofs_blksiz(sbi) - 16;
c->destsize_redzone_end = EROFS_CONFIG_COMPR_DEF_BOUNDARY;
if (!alg_name) {
@@ -86,13 +137,16 @@
}
ret = -EINVAL;
- for (i = 0; i < ARRAY_SIZE(compressors); ++i) {
- if (alg_name && strcmp(alg_name, compressors[i]->name))
+ for (i = 0; i < ARRAY_SIZE(erofs_algs); ++i) {
+ if (alg_name && strcmp(alg_name, erofs_algs[i].name))
continue;
- ret = compressors[i]->init(c);
+ if (!erofs_algs[i].c)
+ continue;
+
+ ret = erofs_algs[i].c->init(c);
if (!ret) {
- DBG_BUGON(!c->alg);
+ c->alg = &erofs_algs[i];
return 0;
}
}
@@ -102,7 +156,7 @@
int erofs_compressor_exit(struct erofs_compress *c)
{
- if (c->alg && c->alg->exit)
- return c->alg->exit(c);
+ if (c->alg && c->alg->c->exit)
+ return c->alg->c->exit(c);
return 0;
}
diff --git a/lib/compressor.h b/lib/compressor.h
index cf063f1..9fa01d1 100644
--- a/lib/compressor.h
+++ b/lib/compressor.h
@@ -12,8 +12,6 @@
struct erofs_compress;
struct erofs_compressor {
- const char *name;
-
int default_level;
int best_level;
@@ -26,8 +24,11 @@
void *dst, unsigned int dstsize);
};
+struct erofs_algorithm;
+
struct erofs_compress {
- const struct erofs_compressor *alg;
+ struct erofs_sb_info *sbi;
+ const struct erofs_algorithm *alg;
unsigned int compress_threshold;
unsigned int compression_level;
@@ -44,13 +45,17 @@
extern const struct erofs_compressor erofs_compressor_lz4;
extern const struct erofs_compressor erofs_compressor_lz4hc;
extern const struct erofs_compressor erofs_compressor_lzma;
+extern const struct erofs_compressor erofs_compressor_deflate;
+extern const struct erofs_compressor erofs_compressor_libdeflate;
+int z_erofs_get_compress_algorithm_id(const struct erofs_compress *c);
int erofs_compress_destsize(const struct erofs_compress *c,
const void *src, unsigned int *srcsize,
void *dst, unsigned int dstsize, bool inblocks);
int erofs_compressor_setlevel(struct erofs_compress *c, int compression_level);
-int erofs_compressor_init(struct erofs_compress *c, char *alg_name);
+int erofs_compressor_init(struct erofs_sb_info *sbi,
+ struct erofs_compress *c, char *alg_name);
int erofs_compressor_exit(struct erofs_compress *c);
#endif
diff --git a/lib/compressor_deflate.c b/lib/compressor_deflate.c
new file mode 100644
index 0000000..4e5902e
--- /dev/null
+++ b/lib/compressor_deflate.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+/*
+ * Copyright (C) 2023, Alibaba Cloud
+ * Copyright (C) 2023, Gao Xiang <xiang@kernel.org>
+ */
+#include "erofs/internal.h"
+#include "erofs/print.h"
+#include "erofs/config.h"
+#include "compressor.h"
+
+void *kite_deflate_init(int level, unsigned int dict_size);
+void kite_deflate_end(void *s);
+int kite_deflate_destsize(void *s, const u8 *in, u8 *out,
+ unsigned int *srcsize, unsigned int target_dstsize);
+
+static int deflate_compress_destsize(const struct erofs_compress *c,
+ const void *src, unsigned int *srcsize,
+ void *dst, unsigned int dstsize)
+{
+ int rc = kite_deflate_destsize(c->private_data, src, dst,
+ srcsize, dstsize);
+
+ if (rc <= 0)
+ return -EFAULT;
+ return rc;
+}
+
+static int compressor_deflate_exit(struct erofs_compress *c)
+{
+ if (!c->private_data)
+ return -EINVAL;
+
+ kite_deflate_end(c->private_data);
+ return 0;
+}
+
+static int compressor_deflate_init(struct erofs_compress *c)
+{
+ c->private_data = NULL;
+
+ erofs_warn("EXPERIMENTAL DEFLATE algorithm in use. Use at your own risk!");
+ erofs_warn("*Carefully* check filesystem data correctness to avoid corruption!");
+ erofs_warn("Please send a report to <linux-erofs@lists.ozlabs.org> if something is wrong.");
+ return 0;
+}
+
+static int erofs_compressor_deflate_setlevel(struct erofs_compress *c,
+ int compression_level)
+{
+ void *s;
+
+ if (c->private_data) {
+ kite_deflate_end(c->private_data);
+ c->private_data = NULL;
+ }
+
+ if (compression_level < 0)
+ compression_level = erofs_compressor_deflate.default_level;
+
+ s = kite_deflate_init(compression_level, cfg.c_dict_size);
+ if (IS_ERR(s))
+ return PTR_ERR(s);
+
+ c->private_data = s;
+ c->compression_level = compression_level;
+ return 0;
+}
+
+const struct erofs_compressor erofs_compressor_deflate = {
+ .default_level = 1,
+ .best_level = 9,
+ .init = compressor_deflate_init,
+ .exit = compressor_deflate_exit,
+ .setlevel = erofs_compressor_deflate_setlevel,
+ .compress_destsize = deflate_compress_destsize,
+};
diff --git a/lib/compressor_libdeflate.c b/lib/compressor_libdeflate.c
new file mode 100644
index 0000000..c0b019a
--- /dev/null
+++ b/lib/compressor_libdeflate.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+#include "erofs/internal.h"
+#include "erofs/print.h"
+#include "erofs/config.h"
+#include <libdeflate.h>
+#include "compressor.h"
+
+static int libdeflate_compress_destsize(const struct erofs_compress *c,
+ const void *src, unsigned int *srcsize,
+ void *dst, unsigned int dstsize)
+{
+ static size_t last_uncompressed_size = 0;
+ size_t l = 0; /* largest input that fits so far */
+ size_t l_csize = 0;
+ size_t r = *srcsize + 1; /* smallest input that doesn't fit so far */
+ size_t m;
+ u8 tmpbuf[dstsize + 9];
+
+ if (last_uncompressed_size)
+ m = last_uncompressed_size * 15 / 16;
+ else
+ m = dstsize * 4;
+ for (;;) {
+ size_t csize;
+
+ m = max(m, l + 1);
+ m = min(m, r - 1);
+
+ csize = libdeflate_deflate_compress(c->private_data, src, m,
+ tmpbuf, dstsize + 9);
+ /*printf("Tried %zu => %zu\n", m, csize);*/
+ if (csize > 0 && csize <= dstsize) {
+ /* Fits */
+ memcpy(dst, tmpbuf, csize);
+ l = m;
+ l_csize = csize;
+ if (r <= l + 1 || csize +
+ (22 - 2*(int)c->compression_level) >= dstsize)
+ break;
+ /*
+ * Estimate needed input prefix size based on current
+ * compression ratio.
+ */
+ m = (dstsize * m) / csize;
+ } else {
+ /* Doesn't fit */
+ r = m;
+ if (r <= l + 1)
+ break;
+ m = (l + r) / 2;
+ }
+ }
+
+ /*
+ * Since generic EROFS on-disk compressed data will be filled with
+ * leading 0s (but no more than one block, 4KB for example, even the
+ * whole pcluster is 128KB) if not filled, it will be used to identify
+ * the actual compressed length as well without taking more reserved
+ * compressed bytes or some extra metadata to record this.
+ *
+ * DEFLATE streams can also be used in this way, if it starts from a
+ * non-last stored block, flag an unused bit instead to avoid the zero
+ * byte. It's still a valid one according to the DEFLATE specification.
+ */
+ if (l_csize && !((u8 *)dst)[0])
+ ((u8 *)dst)[0] = 1 << (2 + 1);
+
+ /*printf("Choosing %zu => %zu\n", l, l_csize);*/
+ *srcsize = l;
+ last_uncompressed_size = l;
+ return l_csize;
+}
+
+static int compressor_libdeflate_exit(struct erofs_compress *c)
+{
+ if (!c->private_data)
+ return -EINVAL;
+
+ libdeflate_free_compressor(c->private_data);
+ return 0;
+}
+
+static int compressor_libdeflate_init(struct erofs_compress *c)
+{
+ c->private_data = NULL;
+
+ erofs_warn("EXPERIMENTAL libdeflate compressor in use. Use at your own risk!");
+ return 0;
+}
+
+static int erofs_compressor_libdeflate_setlevel(struct erofs_compress *c,
+ int compression_level)
+{
+ if (compression_level < 0)
+ compression_level = erofs_compressor_deflate.default_level;
+
+ libdeflate_free_compressor(c->private_data);
+ c->private_data = libdeflate_alloc_compressor(compression_level);
+ if (!c->private_data)
+ return -ENOMEM;
+ c->compression_level = compression_level;
+ return 0;
+}
+
+const struct erofs_compressor erofs_compressor_libdeflate = {
+ .default_level = 1,
+ .best_level = 12,
+ .init = compressor_libdeflate_init,
+ .exit = compressor_libdeflate_exit,
+ .setlevel = erofs_compressor_libdeflate_setlevel,
+ .compress_destsize = libdeflate_compress_destsize,
+};
diff --git a/lib/compressor_liblzma.c b/lib/compressor_liblzma.c
index 4886d6a..0ed6f23 100644
--- a/lib/compressor_liblzma.c
+++ b/lib/compressor_liblzma.c
@@ -56,11 +56,16 @@
int compression_level)
{
struct erofs_liblzma_context *ctx = c->private_data;
+ u32 preset;
if (compression_level < 0)
- compression_level = LZMA_PRESET_DEFAULT;
+ preset = LZMA_PRESET_DEFAULT;
+ else if (compression_level >= 100)
+ preset = (compression_level - 100) | LZMA_PRESET_EXTREME;
+ else
+ preset = compression_level;
- if (lzma_lzma_preset(&ctx->opt, compression_level))
+ if (lzma_lzma_preset(&ctx->opt, preset))
return -EINVAL;
/* XXX: temporary hack */
@@ -83,7 +88,6 @@
{
struct erofs_liblzma_context *ctx;
- c->alg = &erofs_compressor_lzma;
ctx = malloc(sizeof(*ctx));
if (!ctx)
return -ENOMEM;
@@ -95,9 +99,8 @@
}
const struct erofs_compressor erofs_compressor_lzma = {
- .name = "lzma",
.default_level = LZMA_PRESET_DEFAULT,
- .best_level = LZMA_PRESET_EXTREME,
+ .best_level = 109,
.init = erofs_compressor_liblzma_init,
.exit = erofs_compressor_liblzma_exit,
.setlevel = erofs_compressor_liblzma_setlevel,
diff --git a/lib/compressor_lz4.c b/lib/compressor_lz4.c
index b6f6e7e..6677693 100644
--- a/lib/compressor_lz4.c
+++ b/lib/compressor_lz4.c
@@ -32,13 +32,11 @@
static int compressor_lz4_init(struct erofs_compress *c)
{
- c->alg = &erofs_compressor_lz4;
- sbi.lz4_max_distance = LZ4_DISTANCE_MAX;
+ c->sbi->lz4_max_distance = LZ4_DISTANCE_MAX;
return 0;
}
const struct erofs_compressor erofs_compressor_lz4 = {
- .name = "lz4",
.default_level = 0,
.best_level = 0,
.init = compressor_lz4_init,
diff --git a/lib/compressor_lz4hc.c b/lib/compressor_lz4hc.c
index eec1c84..b410e15 100644
--- a/lib/compressor_lz4hc.c
+++ b/lib/compressor_lz4hc.c
@@ -38,13 +38,11 @@
static int compressor_lz4hc_init(struct erofs_compress *c)
{
- c->alg = &erofs_compressor_lz4hc;
-
c->private_data = LZ4_createStreamHC();
if (!c->private_data)
return -ENOMEM;
- sbi.lz4_max_distance = LZ4_DISTANCE_MAX;
+ c->sbi->lz4_max_distance = LZ4_DISTANCE_MAX;
return 0;
}
@@ -60,7 +58,6 @@
}
const struct erofs_compressor erofs_compressor_lz4hc = {
- .name = "lz4hc",
.default_level = LZ4HC_CLEVEL_DEFAULT,
.best_level = LZ4HC_CLEVEL_MAX,
.init = compressor_lz4hc_init,
diff --git a/lib/config.c b/lib/config.c
index d478b07..2f3df37 100644
--- a/lib/config.c
+++ b/lib/config.c
@@ -25,7 +25,6 @@
cfg.c_version = PACKAGE_VERSION;
cfg.c_dry_run = false;
cfg.c_ignore_mtime = false;
- cfg.c_compr_level_master = -1;
cfg.c_force_inodeversion = 0;
cfg.c_inline_xattr_tolerance = 2;
cfg.c_unix_timestamp = -1;
diff --git a/lib/data.c b/lib/data.c
index 6bc554d..a87053f 100644
--- a/lib/data.c
+++ b/lib/data.c
@@ -18,27 +18,29 @@
erofs_blk_t nblocks, lastblk;
u64 offset = map->m_la;
struct erofs_inode *vi = inode;
+ struct erofs_sb_info *sbi = inode->sbi;
bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);
trace_erofs_map_blocks_flatmode_enter(inode, map, flags);
- nblocks = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
+ nblocks = BLK_ROUND_UP(sbi, inode->i_size);
lastblk = nblocks - tailendpacking;
/* there is no hole in flatmode */
map->m_flags = EROFS_MAP_MAPPED;
- if (offset < blknr_to_addr(lastblk)) {
- map->m_pa = blknr_to_addr(vi->u.i_blkaddr) + map->m_la;
- map->m_plen = blknr_to_addr(lastblk) - offset;
+ if (offset < erofs_pos(sbi, lastblk)) {
+ map->m_pa = erofs_pos(sbi, vi->u.i_blkaddr) + map->m_la;
+ map->m_plen = erofs_pos(sbi, lastblk) - offset;
} else if (tailendpacking) {
/* 2 - inode inline B: inode, [xattrs], inline last blk... */
- map->m_pa = iloc(vi->nid) + vi->inode_isize +
- vi->xattr_isize + erofs_blkoff(map->m_la);
+ map->m_pa = erofs_iloc(vi) + vi->inode_isize +
+ vi->xattr_isize + erofs_blkoff(sbi, map->m_la);
map->m_plen = inode->i_size - offset;
/* inline data should be located in the same meta block */
- if (erofs_blkoff(map->m_pa) + map->m_plen > EROFS_BLKSIZ) {
+ if (erofs_blkoff(sbi, map->m_pa) + map->m_plen >
+ erofs_blksiz(sbi)) {
erofs_err("inline data cross block boundary @ nid %" PRIu64,
vi->nid);
DBG_BUGON(1);
@@ -65,8 +67,9 @@
struct erofs_map_blocks *map, int flags)
{
struct erofs_inode *vi = inode;
+ struct erofs_sb_info *sbi = inode->sbi;
struct erofs_inode_chunk_index *idx;
- u8 buf[EROFS_BLKSIZ];
+ u8 buf[EROFS_MAX_BLOCK_SIZE];
u64 chunknr;
unsigned int unit;
erofs_off_t pos;
@@ -89,39 +92,39 @@
unit = EROFS_BLOCK_MAP_ENTRY_SIZE; /* block map */
chunknr = map->m_la >> vi->u.chunkbits;
- pos = roundup(iloc(vi->nid) + vi->inode_isize +
+ pos = roundup(erofs_iloc(vi) + vi->inode_isize +
vi->xattr_isize, unit) + unit * chunknr;
- err = blk_read(0, buf, erofs_blknr(pos), 1);
+ err = blk_read(sbi, 0, buf, erofs_blknr(sbi, pos), 1);
if (err < 0)
return -EIO;
map->m_la = chunknr << vi->u.chunkbits;
map->m_plen = min_t(erofs_off_t, 1UL << vi->u.chunkbits,
- roundup(inode->i_size - map->m_la, EROFS_BLKSIZ));
+ roundup(inode->i_size - map->m_la, erofs_blksiz(sbi)));
/* handle block map */
if (!(vi->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) {
- __le32 *blkaddr = (void *)buf + erofs_blkoff(pos);
+ __le32 *blkaddr = (void *)buf + erofs_blkoff(sbi, pos);
if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) {
map->m_flags = 0;
} else {
- map->m_pa = blknr_to_addr(le32_to_cpu(*blkaddr));
+ map->m_pa = erofs_pos(sbi, le32_to_cpu(*blkaddr));
map->m_flags = EROFS_MAP_MAPPED;
}
goto out;
}
/* parse chunk indexes */
- idx = (void *)buf + erofs_blkoff(pos);
+ idx = (void *)buf + erofs_blkoff(sbi, pos);
switch (le32_to_cpu(idx->blkaddr)) {
case EROFS_NULL_ADDR:
map->m_flags = 0;
break;
default:
map->m_deviceid = le16_to_cpu(idx->device_id) &
- sbi.device_id_mask;
- map->m_pa = blknr_to_addr(le32_to_cpu(idx->blkaddr));
+ sbi->device_id_mask;
+ map->m_pa = erofs_pos(sbi, le32_to_cpu(idx->blkaddr));
map->m_flags = EROFS_MAP_MAPPED;
break;
}
@@ -145,8 +148,8 @@
dif = sbi->devs + id;
if (!dif->mapped_blkaddr)
continue;
- startoff = blknr_to_addr(dif->mapped_blkaddr);
- length = blknr_to_addr(dif->blocks);
+ startoff = erofs_pos(sbi, dif->mapped_blkaddr);
+ length = erofs_pos(sbi, dif->blocks);
if (map->m_pa >= startoff &&
map->m_pa < startoff + length) {
@@ -158,19 +161,39 @@
return 0;
}
+int erofs_read_one_data(struct erofs_inode *inode, struct erofs_map_blocks *map,
+ char *buffer, u64 offset, size_t len)
+{
+ struct erofs_sb_info *sbi = inode->sbi;
+ struct erofs_map_dev mdev;
+ int ret;
+
+ mdev = (struct erofs_map_dev) {
+ .m_deviceid = map->m_deviceid,
+ .m_pa = map->m_pa,
+ };
+ ret = erofs_map_dev(sbi, &mdev);
+ if (ret)
+ return ret;
+
+ ret = dev_read(sbi, mdev.m_deviceid, buffer, mdev.m_pa + offset, len);
+ if (ret < 0)
+ return -EIO;
+ return 0;
+}
+
static int erofs_read_raw_data(struct erofs_inode *inode, char *buffer,
erofs_off_t size, erofs_off_t offset)
{
struct erofs_map_blocks map = {
.index = UINT_MAX,
};
- struct erofs_map_dev mdev;
int ret;
erofs_off_t ptr = offset;
while (ptr < offset + size) {
char *const estart = buffer + ptr - offset;
- erofs_off_t eend;
+ erofs_off_t eend, moff = 0;
map.m_la = ptr;
ret = erofs_map_blocks(inode, &map, 0);
@@ -179,14 +202,6 @@
DBG_BUGON(map.m_plen != map.m_llen);
- mdev = (struct erofs_map_dev) {
- .m_deviceid = map.m_deviceid,
- .m_pa = map.m_pa,
- };
- ret = erofs_map_dev(&sbi, &mdev);
- if (ret)
- return ret;
-
/* trim extent */
eend = min(offset + size, map.m_la + map.m_llen);
DBG_BUGON(ptr < map.m_la);
@@ -204,19 +219,77 @@
}
if (ptr > map.m_la) {
- mdev.m_pa += ptr - map.m_la;
+ moff = ptr - map.m_la;
map.m_la = ptr;
}
- ret = dev_read(mdev.m_deviceid, estart, mdev.m_pa,
- eend - map.m_la);
- if (ret < 0)
- return -EIO;
+ ret = erofs_read_one_data(inode, &map, estart, moff,
+ eend - map.m_la);
+ if (ret)
+ return ret;
ptr = eend;
}
return 0;
}
+int z_erofs_read_one_data(struct erofs_inode *inode,
+ struct erofs_map_blocks *map, char *raw, char *buffer,
+ erofs_off_t skip, erofs_off_t length, bool trimmed)
+{
+ struct erofs_sb_info *sbi = inode->sbi;
+ struct erofs_map_dev mdev;
+ int ret = 0;
+
+ if (map->m_flags & EROFS_MAP_FRAGMENT) {
+ struct erofs_inode packed_inode = {
+ .sbi = sbi,
+ .nid = sbi->packed_nid,
+ };
+
+ ret = erofs_read_inode_from_disk(&packed_inode);
+ if (ret) {
+ erofs_err("failed to read packed inode from disk");
+ return ret;
+ }
+
+ return erofs_pread(&packed_inode, buffer, length - skip,
+ inode->fragmentoff + skip);
+ }
+
+ /* no device id here, thus it will always succeed */
+ mdev = (struct erofs_map_dev) {
+ .m_pa = map->m_pa,
+ };
+ ret = erofs_map_dev(sbi, &mdev);
+ if (ret) {
+ DBG_BUGON(1);
+ return ret;
+ }
+
+ ret = dev_read(sbi, mdev.m_deviceid, raw, mdev.m_pa, map->m_plen);
+ if (ret < 0)
+ return ret;
+
+ ret = z_erofs_decompress(&(struct z_erofs_decompress_req) {
+ .sbi = sbi,
+ .in = raw,
+ .out = buffer,
+ .decodedskip = skip,
+ .interlaced_offset =
+ map->m_algorithmformat == Z_EROFS_COMPRESSION_INTERLACED ?
+ erofs_blkoff(sbi, map->m_la) : 0,
+ .inputsize = map->m_plen,
+ .decodedlength = length,
+ .alg = map->m_algorithmformat,
+ .partial_decoding = trimmed ? true :
+ !(map->m_flags & EROFS_MAP_FULL_MAPPED) ||
+ (map->m_flags & EROFS_MAP_PARTIAL_REF),
+ });
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
static int z_erofs_read_data(struct erofs_inode *inode, char *buffer,
erofs_off_t size, erofs_off_t offset)
{
@@ -224,8 +297,7 @@
struct erofs_map_blocks map = {
.index = UINT_MAX,
};
- struct erofs_map_dev mdev;
- bool partial;
+ bool trimmed;
unsigned int bufsize = 0;
char *raw = NULL;
int ret = 0;
@@ -238,27 +310,17 @@
if (ret)
break;
- /* no device id here, thus it will always succeed */
- mdev = (struct erofs_map_dev) {
- .m_pa = map.m_pa,
- };
- ret = erofs_map_dev(&sbi, &mdev);
- if (ret) {
- DBG_BUGON(1);
- break;
- }
-
/*
* trim to the needed size if the returned extent is quite
* larger than requested, and set up partial flag as well.
*/
if (end < map.m_la + map.m_llen) {
length = end - map.m_la;
- partial = true;
+ trimmed = true;
} else {
DBG_BUGON(end != map.m_la + map.m_llen);
length = map.m_llen;
- partial = !(map.m_flags & EROFS_MAP_FULL_MAPPED);
+ trimmed = false;
}
if (map.m_la < offset) {
@@ -270,7 +332,7 @@
}
if (!(map.m_flags & EROFS_MAP_MAPPED)) {
- memset(buffer + end - offset, 0, length);
+ memset(buffer + end - offset, 0, length - skip);
end = map.m_la;
continue;
}
@@ -283,19 +345,9 @@
break;
}
}
- ret = dev_read(mdev.m_deviceid, raw, mdev.m_pa, map.m_plen);
- if (ret < 0)
- break;
- ret = z_erofs_decompress(&(struct z_erofs_decompress_req) {
- .in = raw,
- .out = buffer + end - offset,
- .decodedskip = skip,
- .inputsize = map.m_plen,
- .decodedlength = length,
- .alg = map.m_algorithmformat,
- .partial_decoding = partial
- });
+ ret = z_erofs_read_one_data(inode, &map, raw,
+ buffer + end - offset, skip, length, trimmed);
if (ret < 0)
break;
}
@@ -312,11 +364,95 @@
case EROFS_INODE_FLAT_INLINE:
case EROFS_INODE_CHUNK_BASED:
return erofs_read_raw_data(inode, buf, count, offset);
- case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
- case EROFS_INODE_FLAT_COMPRESSION:
+ case EROFS_INODE_COMPRESSED_FULL:
+ case EROFS_INODE_COMPRESSED_COMPACT:
return z_erofs_read_data(inode, buf, count, offset);
default:
break;
}
return -EINVAL;
}
+
+static void *erofs_read_metadata_nid(struct erofs_sb_info *sbi, erofs_nid_t nid,
+ erofs_off_t *offset, int *lengthp)
+{
+ struct erofs_inode vi = { .sbi = sbi, .nid = nid };
+ __le16 __len;
+ int ret, len;
+ char *buffer;
+
+ ret = erofs_read_inode_from_disk(&vi);
+ if (ret)
+ return ERR_PTR(ret);
+
+ *offset = round_up(*offset, 4);
+ ret = erofs_pread(&vi, (void *)&__len, sizeof(__le16), *offset);
+ if (ret)
+ return ERR_PTR(ret);
+
+ len = le16_to_cpu(__len);
+ if (!len)
+ return ERR_PTR(-EFSCORRUPTED);
+
+ buffer = malloc(len);
+ if (!buffer)
+ return ERR_PTR(-ENOMEM);
+ *offset += sizeof(__le16);
+ *lengthp = len;
+
+ ret = erofs_pread(&vi, buffer, len, *offset);
+ if (ret) {
+ free(buffer);
+ return ERR_PTR(ret);
+ }
+ *offset += len;
+ return buffer;
+}
+
+static void *erofs_read_metadata_bdi(struct erofs_sb_info *sbi,
+ erofs_off_t *offset, int *lengthp)
+{
+ int ret, len, i, cnt;
+ void *buffer;
+ u8 data[EROFS_MAX_BLOCK_SIZE];
+
+ *offset = round_up(*offset, 4);
+ ret = blk_read(sbi, 0, data, erofs_blknr(sbi, *offset), 1);
+ if (ret)
+ return ERR_PTR(ret);
+ len = le16_to_cpu(*(__le16 *)&data[erofs_blkoff(sbi, *offset)]);
+ if (!len)
+ return ERR_PTR(-EFSCORRUPTED);
+
+ buffer = malloc(len);
+ if (!buffer)
+ return ERR_PTR(-ENOMEM);
+ *offset += sizeof(__le16);
+ *lengthp = len;
+
+ for (i = 0; i < len; i += cnt) {
+ cnt = min_t(int, erofs_blksiz(sbi) - erofs_blkoff(sbi, *offset),
+ len - i);
+ ret = blk_read(sbi, 0, data, erofs_blknr(sbi, *offset), 1);
+ if (ret) {
+ free(buffer);
+ return ERR_PTR(ret);
+ }
+ memcpy(buffer + i, data + erofs_blkoff(sbi, *offset), cnt);
+ *offset += cnt;
+ }
+ return buffer;
+}
+
+/*
+ * read variable-sized metadata, offset will be aligned by 4-byte
+ *
+ * @nid is 0 if metadata is in meta inode
+ */
+void *erofs_read_metadata(struct erofs_sb_info *sbi, erofs_nid_t nid,
+ erofs_off_t *offset, int *lengthp)
+{
+ if (nid)
+ return erofs_read_metadata_nid(sbi, nid, offset, lengthp);
+ return erofs_read_metadata_bdi(sbi, offset, lengthp);
+}
diff --git a/lib/decompress.c b/lib/decompress.c
index 1661f91..fe8a40c 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -9,12 +9,160 @@
#include "erofs/err.h"
#include "erofs/print.h"
+#ifdef HAVE_LIBDEFLATE
+/* if libdeflate is available, use libdeflate instead. */
+#include <libdeflate.h>
+
+static int z_erofs_decompress_deflate(struct z_erofs_decompress_req *rq)
+{
+ struct erofs_sb_info *sbi = rq->sbi;
+ u8 *dest = (u8 *)rq->out;
+ u8 *src = (u8 *)rq->in;
+ u8 *buff = NULL;
+ size_t actual_out;
+ unsigned int inputmargin = 0;
+ struct libdeflate_decompressor *inf;
+ enum libdeflate_result ret;
+
+ while (!src[inputmargin & (erofs_blksiz(sbi) - 1)])
+ if (!(++inputmargin & (erofs_blksiz(sbi) - 1)))
+ break;
+
+ if (inputmargin >= rq->inputsize)
+ return -EFSCORRUPTED;
+
+ if (rq->decodedskip) {
+ buff = malloc(rq->decodedlength);
+ if (!buff)
+ return -ENOMEM;
+ dest = buff;
+ }
+
+ inf = libdeflate_alloc_decompressor();
+ if (!inf)
+ return -ENOMEM;
+
+ if (rq->partial_decoding) {
+ ret = libdeflate_deflate_decompress(inf, src + inputmargin,
+ rq->inputsize - inputmargin, dest,
+ rq->decodedlength, &actual_out);
+ if (ret && ret != LIBDEFLATE_INSUFFICIENT_SPACE) {
+ ret = -EIO;
+ goto out_inflate_end;
+ }
+
+ if (actual_out != rq->decodedlength) {
+ ret = -EIO;
+ goto out_inflate_end;
+ }
+ } else {
+ ret = libdeflate_deflate_decompress(inf, src + inputmargin,
+ rq->inputsize - inputmargin, dest,
+ rq->decodedlength, NULL);
+ if (ret) {
+ ret = -EIO;
+ goto out_inflate_end;
+ }
+ }
+
+ if (rq->decodedskip)
+ memcpy(rq->out, dest + rq->decodedskip,
+ rq->decodedlength - rq->decodedskip);
+
+out_inflate_end:
+ libdeflate_free_decompressor(inf);
+ if (buff)
+ free(buff);
+ return ret;
+}
+#elif defined(HAVE_ZLIB)
+#include <zlib.h>
+
+/* report a zlib or i/o error */
+static int zerr(int ret)
+{
+ switch (ret) {
+ case Z_STREAM_ERROR:
+ return -EINVAL;
+ case Z_DATA_ERROR:
+ return -EIO;
+ case Z_MEM_ERROR:
+ return -ENOMEM;
+ case Z_ERRNO:
+ case Z_VERSION_ERROR:
+ default:
+ return -EFAULT;
+ }
+}
+
+static int z_erofs_decompress_deflate(struct z_erofs_decompress_req *rq)
+{
+ struct erofs_sb_info *sbi = rq->sbi;
+ u8 *dest = (u8 *)rq->out;
+ u8 *src = (u8 *)rq->in;
+ u8 *buff = NULL;
+ unsigned int inputmargin = 0;
+ z_stream strm;
+ int ret;
+
+ while (!src[inputmargin & (erofs_blksiz(sbi) - 1)])
+ if (!(++inputmargin & (erofs_blksiz(sbi) - 1)))
+ break;
+
+ if (inputmargin >= rq->inputsize)
+ return -EFSCORRUPTED;
+
+ if (rq->decodedskip) {
+ buff = malloc(rq->decodedlength);
+ if (!buff)
+ return -ENOMEM;
+ dest = buff;
+ }
+
+ /* allocate inflate state */
+ strm.zalloc = Z_NULL;
+ strm.zfree = Z_NULL;
+ strm.opaque = Z_NULL;
+ strm.avail_in = 0;
+ strm.next_in = Z_NULL;
+ ret = inflateInit2(&strm, -15);
+ if (ret != Z_OK) {
+ free(buff);
+ return zerr(ret);
+ }
+
+ strm.next_in = src + inputmargin;
+ strm.avail_in = rq->inputsize - inputmargin;
+ strm.next_out = dest;
+ strm.avail_out = rq->decodedlength;
+
+ ret = inflate(&strm, rq->partial_decoding ? Z_SYNC_FLUSH : Z_FINISH);
+ if (ret != Z_STREAM_END || strm.total_out != rq->decodedlength) {
+ if (ret != Z_OK || !rq->partial_decoding) {
+ ret = zerr(ret);
+ goto out_inflate_end;
+ }
+ }
+
+ if (rq->decodedskip)
+ memcpy(rq->out, dest + rq->decodedskip,
+ rq->decodedlength - rq->decodedskip);
+
+out_inflate_end:
+ inflateEnd(&strm);
+ if (buff)
+ free(buff);
+ return ret;
+}
+#endif
+
#ifdef HAVE_LIBLZMA
#include <lzma.h>
static int z_erofs_decompress_lzma(struct z_erofs_decompress_req *rq)
{
int ret = 0;
+ struct erofs_sb_info *sbi = rq->sbi;
u8 *dest = (u8 *)rq->out;
u8 *src = (u8 *)rq->in;
u8 *buff = NULL;
@@ -22,8 +170,8 @@
lzma_stream strm;
lzma_ret ret2;
- while (!src[inputmargin & ~PAGE_MASK])
- if (!(++inputmargin & ~PAGE_MASK))
+ while (!src[inputmargin & (erofs_blksiz(sbi) - 1)])
+ if (!(++inputmargin & (erofs_blksiz(sbi) - 1)))
break;
if (inputmargin >= rq->inputsize)
@@ -81,12 +229,13 @@
char *buff = NULL;
bool support_0padding = false;
unsigned int inputmargin = 0;
+ struct erofs_sb_info *sbi = rq->sbi;
- if (erofs_sb_has_lz4_0padding()) {
+ if (erofs_sb_has_lz4_0padding(sbi)) {
support_0padding = true;
- while (!src[inputmargin & ~PAGE_MASK])
- if (!(++inputmargin & ~PAGE_MASK))
+ while (!src[inputmargin & (erofs_blksiz(sbi) - 1)])
+ if (!(++inputmargin & (erofs_blksiz(sbi) - 1)))
break;
if (inputmargin >= rq->inputsize)
@@ -131,13 +280,32 @@
int z_erofs_decompress(struct z_erofs_decompress_req *rq)
{
- if (rq->alg == Z_EROFS_COMPRESSION_SHIFTED) {
- if (rq->inputsize > EROFS_BLKSIZ)
+ struct erofs_sb_info *sbi = rq->sbi;
+
+ if (rq->alg == Z_EROFS_COMPRESSION_INTERLACED) {
+ unsigned int count, rightpart, skip;
+
+ /* XXX: should support inputsize >= erofs_blksiz(sbi) later */
+ if (rq->inputsize > erofs_blksiz(sbi))
return -EFSCORRUPTED;
- DBG_BUGON(rq->decodedlength > EROFS_BLKSIZ);
- DBG_BUGON(rq->decodedlength < rq->decodedskip);
+ if (rq->decodedlength > erofs_blksiz(sbi))
+ return -EFSCORRUPTED;
+ if (rq->decodedlength < rq->decodedskip)
+ return -EFSCORRUPTED;
+
+ count = rq->decodedlength - rq->decodedskip;
+ skip = erofs_blkoff(sbi, rq->interlaced_offset + rq->decodedskip);
+ rightpart = min(erofs_blksiz(sbi) - skip, count);
+ memcpy(rq->out, rq->in + skip, rightpart);
+ memcpy(rq->out + rightpart, rq->in, count - rightpart);
+ return 0;
+ } else if (rq->alg == Z_EROFS_COMPRESSION_SHIFTED) {
+ if (rq->decodedlength > rq->inputsize)
+ return -EFSCORRUPTED;
+
+ DBG_BUGON(rq->decodedlength < rq->decodedskip);
memcpy(rq->out, rq->in + rq->decodedskip,
rq->decodedlength - rq->decodedskip);
return 0;
@@ -151,5 +319,9 @@
if (rq->alg == Z_EROFS_COMPRESSION_LZMA)
return z_erofs_decompress_lzma(rq);
#endif
+#if defined(HAVE_ZLIB) || defined(HAVE_LIBDEFLATE)
+ if (rq->alg == Z_EROFS_COMPRESSION_DEFLATE)
+ return z_erofs_decompress_deflate(rq);
+#endif
return -EOPNOTSUPP;
}
diff --git a/lib/dedupe.c b/lib/dedupe.c
new file mode 100644
index 0000000..17da452
--- /dev/null
+++ b/lib/dedupe.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+/*
+ * Copyright (C) 2022 Alibaba Cloud
+ */
+#include "erofs/dedupe.h"
+#include "erofs/print.h"
+#include "rb_tree.h"
+#include "rolling_hash.h"
+#include "sha256.h"
+
+unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2,
+ unsigned long sz)
+{
+ const unsigned long *a1, *a2;
+ unsigned long n = sz;
+
+ if (sz < sizeof(long))
+ goto out_bytes;
+
+ if (((long)s1 & (sizeof(long) - 1)) ==
+ ((long)s2 & (sizeof(long) - 1))) {
+ while ((long)s1 & (sizeof(long) - 1)) {
+ if (*s1 != *s2)
+ break;
+ ++s1;
+ ++s2;
+ --sz;
+ }
+
+ a1 = (const unsigned long *)s1;
+ a2 = (const unsigned long *)s2;
+ while (sz >= sizeof(long)) {
+ if (*a1 != *a2)
+ break;
+ ++a1;
+ ++a2;
+ sz -= sizeof(long);
+ }
+ } else {
+ a1 = (const unsigned long *)s1;
+ a2 = (const unsigned long *)s2;
+ do {
+ if (get_unaligned(a1) != get_unaligned(a2))
+ break;
+ ++a1;
+ ++a2;
+ sz -= sizeof(long);
+ } while (sz >= sizeof(long));
+ }
+ s1 = (const u8 *)a1;
+ s2 = (const u8 *)a2;
+out_bytes:
+ while (sz) {
+ if (*s1 != *s2)
+ break;
+ ++s1;
+ ++s2;
+ --sz;
+ }
+ return n - sz;
+}
+
+static unsigned int window_size, rollinghash_rm;
+static struct rb_tree *dedupe_tree, *dedupe_subtree;
+
+struct z_erofs_dedupe_item {
+ long long hash;
+ u8 prefix_sha256[32];
+
+ erofs_blk_t compressed_blkaddr;
+ unsigned int compressed_blks;
+
+ int original_length;
+ bool partial, raw;
+ u8 extra_data[];
+};
+
+static int z_erofs_dedupe_rbtree_cmp(struct rb_tree *self,
+ struct rb_node *node_a, struct rb_node *node_b)
+{
+ struct z_erofs_dedupe_item *e_a = node_a->value;
+ struct z_erofs_dedupe_item *e_b = node_b->value;
+
+ return (e_a->hash > e_b->hash) - (e_a->hash < e_b->hash);
+}
+
+int z_erofs_dedupe_match(struct z_erofs_dedupe_ctx *ctx)
+{
+ struct z_erofs_dedupe_item e_find;
+ u8 *cur;
+ bool initial = true;
+
+ if (!dedupe_tree)
+ return -ENOENT;
+
+ if (ctx->cur > ctx->end - window_size)
+ cur = ctx->end - window_size;
+ else
+ cur = ctx->cur;
+
+ /* move backward byte-by-byte */
+ for (; cur >= ctx->start; --cur) {
+ struct z_erofs_dedupe_item *e;
+ unsigned int extra;
+ u8 sha256[32];
+
+ if (initial) {
+ /* initial try */
+ e_find.hash = erofs_rolling_hash_init(cur, window_size, true);
+ initial = false;
+ } else {
+ e_find.hash = erofs_rolling_hash_advance(e_find.hash,
+ rollinghash_rm, cur[window_size], cur[0]);
+ }
+
+ e = rb_tree_find(dedupe_tree, &e_find);
+ if (!e) {
+ e = rb_tree_find(dedupe_subtree, &e_find);
+ if (!e)
+ continue;
+ }
+
+ erofs_sha256(cur, window_size, sha256);
+ if (memcmp(sha256, e->prefix_sha256, sizeof(sha256)))
+ continue;
+
+ extra = min_t(unsigned int, ctx->end - cur - window_size,
+ e->original_length - window_size);
+ extra = erofs_memcmp2(cur + window_size, e->extra_data, extra);
+ if (window_size + extra <= ctx->cur - cur)
+ continue;
+ ctx->cur = cur;
+ ctx->e.length = window_size + extra;
+ ctx->e.partial = e->partial ||
+ (window_size + extra < e->original_length);
+ ctx->e.raw = e->raw;
+ ctx->e.blkaddr = e->compressed_blkaddr;
+ ctx->e.compressedblks = e->compressed_blks;
+ return 0;
+ }
+ return -ENOENT;
+}
+
+int z_erofs_dedupe_insert(struct z_erofs_inmem_extent *e,
+ void *original_data)
+{
+ struct z_erofs_dedupe_item *di;
+
+ if (!dedupe_subtree || e->length < window_size)
+ return 0;
+
+ di = malloc(sizeof(*di) + e->length - window_size);
+ if (!di)
+ return -ENOMEM;
+
+ di->original_length = e->length;
+ erofs_sha256(original_data, window_size, di->prefix_sha256);
+ di->hash = erofs_rolling_hash_init(original_data,
+ window_size, true);
+ memcpy(di->extra_data, original_data + window_size,
+ e->length - window_size);
+ di->compressed_blkaddr = e->blkaddr;
+ di->compressed_blks = e->compressedblks;
+ di->partial = e->partial;
+ di->raw = e->raw;
+
+ /* with the same rolling hash */
+ if (!rb_tree_insert(dedupe_subtree, di))
+ free(di);
+ return 0;
+}
+
+static void z_erofs_dedupe_node_free_cb(struct rb_tree *self,
+ struct rb_node *node)
+{
+ free(node->value);
+ rb_tree_node_dealloc_cb(self, node);
+}
+
+void z_erofs_dedupe_commit(bool drop)
+{
+ if (!dedupe_subtree)
+ return;
+ if (!drop) {
+ struct rb_iter iter;
+ struct z_erofs_dedupe_item *di;
+
+ di = rb_iter_first(&iter, dedupe_subtree);
+ while (di) {
+ if (!rb_tree_insert(dedupe_tree, di))
+ DBG_BUGON(1);
+ di = rb_iter_next(&iter);
+ }
+ /*rb_iter_dealloc(iter);*/
+ rb_tree_dealloc(dedupe_subtree, rb_tree_node_dealloc_cb);
+ } else {
+ rb_tree_dealloc(dedupe_subtree, z_erofs_dedupe_node_free_cb);
+ }
+ dedupe_subtree = rb_tree_create(z_erofs_dedupe_rbtree_cmp);
+}
+
+int z_erofs_dedupe_init(unsigned int wsiz)
+{
+ dedupe_tree = rb_tree_create(z_erofs_dedupe_rbtree_cmp);
+ if (!dedupe_tree)
+ return -ENOMEM;
+
+ dedupe_subtree = rb_tree_create(z_erofs_dedupe_rbtree_cmp);
+ if (!dedupe_subtree) {
+ rb_tree_dealloc(dedupe_subtree, NULL);
+ return -ENOMEM;
+ }
+ window_size = wsiz;
+ rollinghash_rm = erofs_rollinghash_calc_rm(window_size);
+ return 0;
+}
+
+void z_erofs_dedupe_exit(void)
+{
+ z_erofs_dedupe_commit(true);
+ rb_tree_dealloc(dedupe_subtree, NULL);
+ rb_tree_dealloc(dedupe_tree, z_erofs_dedupe_node_free_cb);
+}
diff --git a/lib/dir.c b/lib/dir.c
index e6b9283..1223cbc 100644
--- a/lib/dir.c
+++ b/lib/dir.c
@@ -4,11 +4,25 @@
#include "erofs/print.h"
#include "erofs/dir.h"
+/* filename should not have a '/' in the name string */
+static bool erofs_validate_filename(const char *dname, int size)
+{
+ char *name = (char *)dname;
+
+ while (name - dname < size && *name != '\0') {
+ if (*name == '/')
+ return false;
+ ++name;
+ }
+ return true;
+}
+
static int traverse_dirents(struct erofs_dir_context *ctx,
void *dentry_blk, unsigned int lblk,
unsigned int next_nameoff, unsigned int maxsize,
bool fsck)
{
+ struct erofs_sb_info *sbi = ctx->dir->sbi;
struct erofs_dirent *de = dentry_blk;
const struct erofs_dirent *end = dentry_blk + next_nameoff;
const char *prev_name = NULL;
@@ -41,7 +55,7 @@
break;
}
- if (nameoff + de_namelen > maxsize ||
+ if (nameoff + de_namelen > maxsize || !de_namelen ||
de_namelen > EROFS_NAME_LEN) {
errmsg = "bogus dirent namelen";
break;
@@ -76,8 +90,8 @@
goto out;
}
ctx->flags |= EROFS_READDIR_DOTDOT_FOUND;
- if (sbi.root_nid == ctx->dir->nid) {
- ctx->pnid = sbi.root_nid;
+ if (sbi->root_nid == ctx->dir->nid) {
+ ctx->pnid = sbi->root_nid;
ctx->flags |= EROFS_READDIR_VALID_PNID;
}
if (fsck &&
@@ -101,6 +115,10 @@
}
break;
}
+ } else if (fsck &&
+ !erofs_validate_filename(de_name, de_namelen)) {
+ errmsg = "corrupted dirent with illegal filename";
+ goto out;
}
ret = ctx->cb(ctx);
if (ret) {
@@ -123,9 +141,10 @@
int erofs_iterate_dir(struct erofs_dir_context *ctx, bool fsck)
{
struct erofs_inode *dir = ctx->dir;
+ struct erofs_sb_info *sbi = dir->sbi;
int err = 0;
erofs_off_t pos;
- char buf[EROFS_BLKSIZ];
+ char buf[EROFS_MAX_BLOCK_SIZE];
if (!S_ISDIR(dir->i_mode))
return -ENOTDIR;
@@ -133,9 +152,9 @@
ctx->flags &= ~EROFS_READDIR_ALL_SPECIAL_FOUND;
pos = 0;
while (pos < dir->i_size) {
- erofs_blk_t lblk = erofs_blknr(pos);
+ erofs_blk_t lblk = erofs_blknr(sbi, pos);
erofs_off_t maxsize = min_t(erofs_off_t,
- dir->i_size - pos, EROFS_BLKSIZ);
+ dir->i_size - pos, erofs_blksiz(sbi));
const struct erofs_dirent *de = (const void *)buf;
unsigned int nameoff;
@@ -148,7 +167,7 @@
nameoff = le16_to_cpu(de->nameoff);
if (nameoff < sizeof(struct erofs_dirent) ||
- nameoff >= EROFS_BLKSIZ) {
+ nameoff >= erofs_blksiz(sbi)) {
erofs_err("invalid de[0].nameoff %u @ nid %llu, lblk %u",
nameoff, dir->nid | 0ULL, lblk);
return -EFSCORRUPTED;
@@ -203,7 +222,10 @@
}
if (ctx->de_ftype == EROFS_FT_DIR || ctx->de_ftype == EROFS_FT_UNKNOWN) {
- struct erofs_inode dir = { .nid = ctx->de_nid };
+ struct erofs_inode dir = {
+ .sbi = ctx->dir->sbi,
+ .nid = ctx->de_nid
+ };
ret = erofs_read_inode_from_disk(&dir);
if (ret) {
@@ -212,10 +234,16 @@
}
if (S_ISDIR(dir.i_mode)) {
- ctx->dir = &dir;
- pathctx->pos = pos + len + 1;
- ret = erofs_iterate_dir(ctx, false);
- pathctx->pos = pos;
+ struct erofs_get_pathname_context nctx = {
+ .ctx.flags = 0,
+ .ctx.dir = &dir,
+ .ctx.cb = erofs_get_pathname_iter,
+ .target_nid = pathctx->target_nid,
+ .buf = pathctx->buf,
+ .size = pathctx->size,
+ .pos = pos + len + 1,
+ };
+ ret = erofs_iterate_dir(&nctx.ctx, false);
if (ret == EROFS_PATHNAME_FOUND) {
pathctx->buf[pos++] = '/';
strncpy(pathctx->buf + pos, dname, len);
@@ -229,10 +257,14 @@
return 0;
}
-int erofs_get_pathname(erofs_nid_t nid, char *buf, size_t size)
+int erofs_get_pathname(struct erofs_sb_info *sbi, erofs_nid_t nid,
+ char *buf, size_t size)
{
int ret;
- struct erofs_inode root = { .nid = sbi.root_nid };
+ struct erofs_inode root = {
+ .sbi = sbi,
+ .nid = sbi->root_nid,
+ };
struct erofs_get_pathname_context pathctx = {
.ctx.flags = 0,
.ctx.dir = &root,
diff --git a/lib/diskbuf.c b/lib/diskbuf.c
new file mode 100644
index 0000000..8205ba5
--- /dev/null
+++ b/lib/diskbuf.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+#include "erofs/diskbuf.h"
+#include "erofs/internal.h"
+#include "erofs/print.h"
+#include <stdio.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+/* A simple approach to avoid creating too many temporary files */
+static struct erofs_diskbufstrm {
+ u64 count;
+ u64 tailoffset, devpos;
+ int fd;
+ unsigned int alignsize;
+ bool locked;
+} *dbufstrm;
+
+int erofs_diskbuf_getfd(struct erofs_diskbuf *db, u64 *fpos)
+{
+ const struct erofs_diskbufstrm *strm = db->sp;
+ u64 offset;
+
+ if (!strm)
+ return -1;
+ offset = db->offset + strm->devpos;
+ if (lseek(strm->fd, offset, SEEK_SET) != offset)
+ return -E2BIG;
+ if (fpos)
+ *fpos = offset;
+ return strm->fd;
+}
+
+int erofs_diskbuf_reserve(struct erofs_diskbuf *db, int sid, u64 *off)
+{
+ struct erofs_diskbufstrm *strm = dbufstrm + sid;
+
+ if (strm->tailoffset & (strm->alignsize - 1)) {
+ strm->tailoffset = round_up(strm->tailoffset, strm->alignsize);
+ if (lseek(strm->fd, strm->tailoffset + strm->devpos,
+ SEEK_SET) != strm->tailoffset + strm->devpos)
+ return -EIO;
+ }
+ db->offset = strm->tailoffset;
+ if (off)
+ *off = db->offset + strm->devpos;
+ db->sp = strm;
+ ++strm->count;
+ strm->locked = true; /* TODO: need a real lock for MT */
+ return strm->fd;
+}
+
+void erofs_diskbuf_commit(struct erofs_diskbuf *db, u64 len)
+{
+ struct erofs_diskbufstrm *strm = db->sp;
+
+ DBG_BUGON(!strm);
+ DBG_BUGON(!strm->locked);
+ DBG_BUGON(strm->tailoffset != db->offset);
+ strm->tailoffset += len;
+}
+
+void erofs_diskbuf_close(struct erofs_diskbuf *db)
+{
+ struct erofs_diskbufstrm *strm = db->sp;
+
+ DBG_BUGON(!strm);
+ DBG_BUGON(strm->count <= 1);
+ --strm->count;
+ db->sp = NULL;
+}
+
+int erofs_tmpfile(void)
+{
+#define TRAILER "tmp.XXXXXXXXXX"
+ char buf[PATH_MAX];
+ int fd;
+ umode_t u;
+
+ (void)snprintf(buf, sizeof(buf), "%s/" TRAILER,
+ getenv("TMPDIR") ?: "/tmp");
+
+ fd = mkstemp(buf);
+ if (fd < 0)
+ return -errno;
+
+ unlink(buf);
+ u = umask(0);
+ (void)umask(u);
+ (void)fchmod(fd, 0666 & ~u);
+ return fd;
+}
+
+int erofs_diskbuf_init(unsigned int nstrms)
+{
+ struct erofs_diskbufstrm *strm;
+
+ strm = calloc(nstrms + 1, sizeof(*strm));
+ if (!strm)
+ return -ENOMEM;
+ strm[nstrms].fd = -1;
+ dbufstrm = strm;
+
+ for (; strm < dbufstrm + nstrms; ++strm) {
+ struct stat st;
+
+ /* try to use the devfd for regfiles on stream 0 */
+ if (strm == dbufstrm && sbi.devsz == INT64_MAX) {
+ strm->devpos = 1ULL << 40;
+ if (!ftruncate(sbi.devfd, strm->devpos << 1)) {
+ strm->fd = dup(sbi.devfd);
+ if (lseek(strm->fd, strm->devpos,
+ SEEK_SET) != strm->devpos)
+ return -EIO;
+ goto setupone;
+ }
+ }
+ strm->devpos = 0;
+ strm->fd = erofs_tmpfile();
+ if (strm->fd < 0)
+ return -ENOSPC;
+setupone:
+ strm->tailoffset = 0;
+ strm->count = 1;
+ if (fstat(strm->fd, &st))
+ return -errno;
+ strm->alignsize = max_t(u32, st.st_blksize, getpagesize());
+ }
+ return 0;
+}
+
+void erofs_diskbuf_exit(void)
+{
+ struct erofs_diskbufstrm *strm;
+
+ if (!dbufstrm)
+ return;
+
+ for (strm = dbufstrm; strm->fd >= 0; ++strm) {
+ DBG_BUGON(strm->count != 1);
+
+ close(strm->fd);
+ strm->fd = -1;
+ }
+}
diff --git a/lib/fragments.c b/lib/fragments.c
new file mode 100644
index 0000000..d4f6be1
--- /dev/null
+++ b/lib/fragments.c
@@ -0,0 +1,351 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+/*
+ * Copyright (C), 2022, Coolpad Group Limited.
+ * Created by Yue Hu <huyue2@coolpad.com>
+ */
+#ifndef _LARGEFILE_SOURCE
+#define _LARGEFILE_SOURCE
+#endif
+#ifndef _LARGEFILE64_SOURCE
+#define _LARGEFILE64_SOURCE
+#endif
+#ifndef _FILE_OFFSET_BITS
+#define _FILE_OFFSET_BITS 64
+#endif
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include "erofs/err.h"
+#include "erofs/inode.h"
+#include "erofs/compress.h"
+#include "erofs/print.h"
+#include "erofs/internal.h"
+#include "erofs/fragments.h"
+
+struct erofs_fragment_dedupe_item {
+ struct list_head list;
+ unsigned int length;
+ erofs_off_t pos;
+ u8 data[];
+};
+
+#define EROFS_TOF_HASHLEN 16
+
+#define FRAGMENT_HASHSIZE 65536
+#define FRAGMENT_HASH(c) ((c) & (FRAGMENT_HASHSIZE - 1))
+
+static struct list_head dupli_frags[FRAGMENT_HASHSIZE];
+static FILE *packedfile;
+const char *erofs_frags_packedname = "packed_file";
+
+#ifndef HAVE_LSEEK64
+#define erofs_lseek64 lseek
+#else
+#define erofs_lseek64 lseek64
+#endif
+
+static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd,
+ u32 crc)
+{
+ struct erofs_fragment_dedupe_item *cur, *di = NULL;
+ struct list_head *head;
+ u8 *data;
+ unsigned int length, e2, deduped;
+ erofs_off_t pos;
+ int ret;
+
+ head = &dupli_frags[FRAGMENT_HASH(crc)];
+ if (list_empty(head))
+ return 0;
+
+ /* XXX: no need to read so much for smaller? */
+ if (inode->i_size < EROFS_CONFIG_COMPR_MAX_SZ)
+ length = inode->i_size;
+ else
+ length = EROFS_CONFIG_COMPR_MAX_SZ;
+
+ data = malloc(length);
+ if (!data)
+ return -ENOMEM;
+
+ if (erofs_lseek64(fd, inode->i_size - length, SEEK_SET) < 0) {
+ ret = -errno;
+ goto out;
+ }
+
+ ret = read(fd, data, length);
+ if (ret != length) {
+ ret = -errno;
+ goto out;
+ }
+
+ DBG_BUGON(length <= EROFS_TOF_HASHLEN);
+ e2 = length - EROFS_TOF_HASHLEN;
+ deduped = 0;
+
+ list_for_each_entry(cur, head, list) {
+ unsigned int e1, mn, i = 0;
+
+ DBG_BUGON(cur->length <= EROFS_TOF_HASHLEN);
+ e1 = cur->length - EROFS_TOF_HASHLEN;
+
+ if (memcmp(cur->data + e1, data + e2, EROFS_TOF_HASHLEN))
+ continue;
+
+ mn = min(e1, e2);
+ while (i < mn && cur->data[e1 - i - 1] == data[e2 - i - 1])
+ ++i;
+
+ if (!di || i + EROFS_TOF_HASHLEN > deduped) {
+ deduped = i + EROFS_TOF_HASHLEN;
+ di = cur;
+
+ /* full match */
+ if (i == e2)
+ break;
+ }
+ }
+ if (!di)
+ goto out;
+
+ DBG_BUGON(di->length < deduped);
+ pos = di->pos + di->length - deduped;
+ /* let's read more to dedupe as long as we can */
+ if (deduped == di->length) {
+ fflush(packedfile);
+
+ while(deduped < inode->i_size && pos) {
+ char buf[2][16384];
+ unsigned int sz = min_t(unsigned int, pos,
+ sizeof(buf[0]));
+
+ if (pread(fileno(packedfile), buf[0], sz,
+ pos - sz) != sz)
+ break;
+ if (pread(fd, buf[1], sz,
+ inode->i_size - deduped - sz) != sz)
+ break;
+
+ if (memcmp(buf[0], buf[1], sz))
+ break;
+ pos -= sz;
+ deduped += sz;
+ }
+ }
+ inode->fragment_size = deduped;
+ inode->fragmentoff = pos;
+
+ erofs_dbg("Dedupe %u tail data at %llu", inode->fragment_size,
+ inode->fragmentoff | 0ULL);
+out:
+ free(data);
+ return ret;
+}
+
+int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 *tofcrc)
+{
+ u8 data_to_hash[EROFS_TOF_HASHLEN];
+ int ret;
+
+ if (inode->i_size <= EROFS_TOF_HASHLEN)
+ return 0;
+
+ if (erofs_lseek64(fd, inode->i_size - EROFS_TOF_HASHLEN, SEEK_SET) < 0)
+ return -errno;
+
+ ret = read(fd, data_to_hash, EROFS_TOF_HASHLEN);
+ if (ret != EROFS_TOF_HASHLEN)
+ return -errno;
+
+ *tofcrc = erofs_crc32c(~0, data_to_hash, EROFS_TOF_HASHLEN);
+ ret = z_erofs_fragments_dedupe_find(inode, fd, *tofcrc);
+ if (ret < 0)
+ return ret;
+ ret = lseek(fd, 0, SEEK_SET);
+ if (ret < 0)
+ return -errno;
+ return 0;
+}
+
+static int z_erofs_fragments_dedupe_insert(void *data, unsigned int len,
+ erofs_off_t pos, u32 crc)
+{
+ struct erofs_fragment_dedupe_item *di;
+
+ if (len <= EROFS_TOF_HASHLEN)
+ return 0;
+ if (len > EROFS_CONFIG_COMPR_MAX_SZ) {
+ data += len - EROFS_CONFIG_COMPR_MAX_SZ;
+ pos += len - EROFS_CONFIG_COMPR_MAX_SZ;
+ len = EROFS_CONFIG_COMPR_MAX_SZ;
+ }
+ di = malloc(sizeof(*di) + len);
+ if (!di)
+ return -ENOMEM;
+
+ memcpy(di->data, data, len);
+ di->length = len;
+ di->pos = pos;
+
+ list_add_tail(&di->list, &dupli_frags[FRAGMENT_HASH(crc)]);
+ return 0;
+}
+
+int z_erofs_fragments_init(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < FRAGMENT_HASHSIZE; ++i)
+ init_list_head(&dupli_frags[i]);
+ return 0;
+}
+
+void z_erofs_fragments_exit(void)
+{
+ struct erofs_fragment_dedupe_item *di, *n;
+ struct list_head *head;
+ unsigned int i;
+
+ for (i = 0; i < FRAGMENT_HASHSIZE; ++i) {
+ head = &dupli_frags[i];
+
+ list_for_each_entry_safe(di, n, head, list)
+ free(di);
+ }
+}
+
+void z_erofs_fragments_commit(struct erofs_inode *inode)
+{
+ if (!inode->fragment_size)
+ return;
+ /*
+ * If the packed inode is larger than 4GiB, the full fragmentoff
+ * will be recorded by switching to the noncompact layout anyway.
+ */
+ if (inode->fragmentoff >> 32)
+ inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
+
+ inode->z_advise |= Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
+ erofs_sb_set_fragments(inode->sbi);
+}
+
+int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd,
+ u32 tofcrc)
+{
+#ifdef HAVE_FTELLO64
+ off64_t offset = ftello64(packedfile);
+#else
+ off_t offset = ftello(packedfile);
+#endif
+ char *memblock;
+ int rc;
+
+ if (offset < 0)
+ return -errno;
+
+ inode->fragmentoff = (erofs_off_t)offset;
+ inode->fragment_size = inode->i_size;
+
+ memblock = mmap(NULL, inode->i_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (memblock == MAP_FAILED || !memblock) {
+ unsigned long long remaining = inode->fragment_size;
+
+ memblock = NULL;
+ while (remaining) {
+ char buf[32768];
+ unsigned int sz = min_t(unsigned int, remaining,
+ sizeof(buf));
+
+ rc = read(fd, buf, sz);
+ if (rc != sz) {
+ if (rc < 0)
+ rc = -errno;
+ else
+ rc = -EAGAIN;
+ goto out;
+ }
+ if (fwrite(buf, sz, 1, packedfile) != 1) {
+ rc = -EIO;
+ goto out;
+ }
+ remaining -= sz;
+ }
+ rc = lseek(fd, 0, SEEK_SET);
+ if (rc < 0) {
+ rc = -errno;
+ goto out;
+ }
+ } else if (fwrite(memblock, inode->fragment_size, 1, packedfile) != 1) {
+ rc = -EIO;
+ goto out;
+ }
+
+ erofs_dbg("Recording %u fragment data at %lu", inode->fragment_size,
+ inode->fragmentoff);
+
+ if (memblock)
+ rc = z_erofs_fragments_dedupe_insert(memblock,
+ inode->fragment_size, inode->fragmentoff, tofcrc);
+out:
+ if (memblock)
+ munmap(memblock, inode->i_size);
+ return rc;
+}
+
+int z_erofs_pack_fragments(struct erofs_inode *inode, void *data,
+ unsigned int len, u32 tofcrc)
+{
+#ifdef HAVE_FTELLO64
+ off64_t offset = ftello64(packedfile);
+#else
+ off_t offset = ftello(packedfile);
+#endif
+ int ret;
+
+ if (offset < 0)
+ return -errno;
+
+ inode->fragmentoff = (erofs_off_t)offset;
+ inode->fragment_size = len;
+
+ if (fwrite(data, len, 1, packedfile) != 1)
+ return -EIO;
+
+ erofs_dbg("Recording %u fragment data at %lu", inode->fragment_size,
+ inode->fragmentoff);
+
+ ret = z_erofs_fragments_dedupe_insert(data, len, inode->fragmentoff,
+ tofcrc);
+ if (ret)
+ return ret;
+ return len;
+}
+
+struct erofs_inode *erofs_mkfs_build_packedfile(void)
+{
+ fflush(packedfile);
+
+ return erofs_mkfs_build_special_from_fd(fileno(packedfile),
+ EROFS_PACKED_INODE);
+}
+
+void erofs_packedfile_exit(void)
+{
+ if (packedfile)
+ fclose(packedfile);
+}
+
+FILE *erofs_packedfile_init(void)
+{
+#ifdef HAVE_TMPFILE64
+ packedfile = tmpfile64();
+#else
+ packedfile = tmpfile();
+#endif
+ if (!packedfile)
+ return ERR_PTR(-ENOMEM);
+ return packedfile;
+}
diff --git a/lib/hashmap.c b/lib/hashmap.c
index e11bd8d..45916ae 100644
--- a/lib/hashmap.c
+++ b/lib/hashmap.c
@@ -149,20 +149,21 @@
alloc_table(map, size);
}
-void hashmap_free(struct hashmap *map, int free_entries)
+int hashmap_free(struct hashmap *map)
{
- if (!map || !map->table)
- return;
- if (free_entries) {
+ if (map && map->table) {
struct hashmap_iter iter;
struct hashmap_entry *e;
hashmap_iter_init(map, &iter);
- while ((e = hashmap_iter_next(&iter)))
- free(e);
+ e = hashmap_iter_next(&iter);
+ if (e)
+ return -EBUSY;
+
+ free(map->table);
+ memset(map, 0, sizeof(*map));
}
- free(map->table);
- memset(map, 0, sizeof(*map));
+ return 0;
}
void *hashmap_get(const struct hashmap *map, const void *key, const void *keydata)
@@ -194,10 +195,13 @@
rehash(map, map->tablesize << HASHMAP_RESIZE_BITS);
}
-void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata)
+void *hashmap_remove(struct hashmap *map, const void *entry)
{
struct hashmap_entry *old;
- struct hashmap_entry **e = find_entry_ptr(map, key, keydata);
+ struct hashmap_entry **e = &map->table[bucket(map, entry)];
+
+ while (*e && *e != entry)
+ e = &(*e)->next;
if (!*e)
return NULL;
@@ -214,14 +218,6 @@
return old;
}
-void *hashmap_put(struct hashmap *map, void *entry)
-{
- struct hashmap_entry *old = hashmap_remove(map, entry, NULL);
-
- hashmap_add(map, entry);
- return old;
-}
-
void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter)
{
iter->map = map;
diff --git a/lib/inode.c b/lib/inode.c
index f192510..8409ccd 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -16,6 +16,7 @@
#endif
#include <dirent.h>
#include "erofs/print.h"
+#include "erofs/diskbuf.h"
#include "erofs/inode.h"
#include "erofs/cache.h"
#include "erofs/io.h"
@@ -25,6 +26,7 @@
#include "erofs/block_list.h"
#include "erofs/compress_hints.h"
#include "erofs/blobchunk.h"
+#include "erofs/fragments.h"
#include "liberofs_private.h"
#define S_SHIFT 12
@@ -43,6 +45,25 @@
return erofs_ftype_by_mode[(mode & S_IFMT) >> S_SHIFT];
}
+static const unsigned char erofs_dtype_by_ftype[EROFS_FT_MAX] = {
+ [EROFS_FT_UNKNOWN] = DT_UNKNOWN,
+ [EROFS_FT_REG_FILE] = DT_REG,
+ [EROFS_FT_DIR] = DT_DIR,
+ [EROFS_FT_CHRDEV] = DT_CHR,
+ [EROFS_FT_BLKDEV] = DT_BLK,
+ [EROFS_FT_FIFO] = DT_FIFO,
+ [EROFS_FT_SOCK] = DT_SOCK,
+ [EROFS_FT_SYMLINK] = DT_LNK
+};
+
+unsigned char erofs_ftype_to_dtype(unsigned int filetype)
+{
+ if (filetype >= EROFS_FT_MAX)
+ return DT_UNKNOWN;
+
+ return erofs_dtype_by_ftype[filetype];
+}
+
#define NR_INODE_HASHTABLE 16384
struct list_head inode_hashtable[NR_INODE_HASHTABLE];
@@ -55,10 +76,10 @@
init_list_head(&inode_hashtable[i]);
}
-static struct erofs_inode *erofs_igrab(struct erofs_inode *inode)
+void erofs_insert_ihash(struct erofs_inode *inode, dev_t dev, ino_t ino)
{
- ++inode->i_count;
- return inode;
+ list_add(&inode->i_hash,
+ &inode_hashtable[(ino ^ dev) % NR_INODE_HASHTABLE]);
}
/* get the inode from the (source) inode # */
@@ -99,6 +120,14 @@
if (inode->eof_tailraw)
free(inode->eof_tailraw);
list_del(&inode->i_hash);
+ if (inode->i_srcpath)
+ free(inode->i_srcpath);
+ if (inode->with_diskbuf) {
+ erofs_diskbuf_close(inode->i_diskbuf);
+ free(inode->i_diskbuf);
+ } else if (inode->i_link) {
+ free(inode->i_link);
+ }
free(inode);
return 0;
}
@@ -120,7 +149,8 @@
/* allocate main data for a inode */
static int __allocate_inode_bh_data(struct erofs_inode *inode,
- unsigned long nblocks)
+ unsigned long nblocks,
+ int type)
{
struct erofs_buffer_head *bh;
int ret;
@@ -132,7 +162,7 @@
}
/* allocate main data buffer */
- bh = erofs_balloc(DATA, blknr_to_addr(nblocks), 0, 0);
+ bh = erofs_balloc(type, erofs_pos(inode->sbi, nblocks), 0, 0);
if (IS_ERR(bh))
return PTR_ERR(bh);
@@ -157,11 +187,49 @@
return strcmp(da->name, db->name);
}
-int erofs_prepare_dir_file(struct erofs_inode *dir, unsigned int nr_subdirs)
+static int erofs_prepare_dir_layout(struct erofs_inode *dir,
+ unsigned int nr_subdirs)
{
+ struct erofs_sb_info *sbi = dir->sbi;
struct erofs_dentry *d, *n, **sorted_d;
- unsigned int d_size, i_nlink, i;
- int ret;
+ unsigned int i;
+ unsigned int d_size = 0;
+
+ sorted_d = malloc(nr_subdirs * sizeof(d));
+ if (!sorted_d)
+ return -ENOMEM;
+ i = 0;
+ list_for_each_entry_safe(d, n, &dir->i_subdirs, d_child) {
+ list_del(&d->d_child);
+ sorted_d[i++] = d;
+ }
+ DBG_BUGON(i != nr_subdirs);
+ qsort(sorted_d, nr_subdirs, sizeof(d), comp_subdir);
+ for (i = 0; i < nr_subdirs; i++)
+ list_add_tail(&sorted_d[i]->d_child, &dir->i_subdirs);
+ free(sorted_d);
+
+ /* let's calculate dir size */
+ list_for_each_entry(d, &dir->i_subdirs, d_child) {
+ int len = strlen(d->name) + sizeof(struct erofs_dirent);
+
+ if (erofs_blkoff(sbi, d_size) + len > erofs_blksiz(sbi))
+ d_size = round_up(d_size, erofs_blksiz(sbi));
+ d_size += len;
+ }
+ dir->i_size = d_size;
+
+ /* no compression for all dirs */
+ dir->datalayout = EROFS_INODE_FLAT_INLINE;
+
+ /* it will be used in erofs_prepare_inode_buffer */
+ dir->idata_size = d_size % erofs_blksiz(sbi);
+ return 0;
+}
+
+int erofs_init_empty_dir(struct erofs_inode *dir)
+{
+ struct erofs_dentry *d;
/* dot is pointed to the current dir inode */
d = erofs_d_alloc(dir, ".");
@@ -177,56 +245,21 @@
d->inode = erofs_igrab(dir->i_parent);
d->type = EROFS_FT_DIR;
- /* sort subdirs */
- nr_subdirs += 2;
- sorted_d = malloc(nr_subdirs * sizeof(d));
- if (!sorted_d)
- return -ENOMEM;
- i = 0;
- list_for_each_entry_safe(d, n, &dir->i_subdirs, d_child) {
- list_del(&d->d_child);
- sorted_d[i++] = d;
- }
- DBG_BUGON(i != nr_subdirs);
- qsort(sorted_d, nr_subdirs, sizeof(d), comp_subdir);
- for (i = 0; i < nr_subdirs; i++)
- list_add_tail(&sorted_d[i]->d_child, &dir->i_subdirs);
- free(sorted_d);
+ dir->i_nlink = 2;
+ return 0;
+}
- /* let's calculate dir size and update i_nlink */
- d_size = 0;
- i_nlink = 0;
- list_for_each_entry(d, &dir->i_subdirs, d_child) {
- int len = strlen(d->name) + sizeof(struct erofs_dirent);
+int erofs_prepare_dir_file(struct erofs_inode *dir, unsigned int nr_subdirs)
+{
+ int ret;
- if (d_size % EROFS_BLKSIZ + len > EROFS_BLKSIZ)
- d_size = round_up(d_size, EROFS_BLKSIZ);
- d_size += len;
-
- i_nlink += (d->type == EROFS_FT_DIR);
- }
- dir->i_size = d_size;
- /*
- * if there're too many subdirs as compact form, set nlink=1
- * rather than upgrade to use extented form instead.
- */
- if (i_nlink > USHRT_MAX &&
- dir->inode_isize == sizeof(struct erofs_inode_compact))
- dir->i_nlink = 1;
- else
- dir->i_nlink = i_nlink;
-
- /* no compression for all dirs */
- dir->datalayout = EROFS_INODE_FLAT_INLINE;
-
- /* allocate dir main data */
- ret = __allocate_inode_bh_data(dir, erofs_blknr(d_size));
+ ret = erofs_init_empty_dir(dir);
if (ret)
return ret;
- /* it will be used in erofs_prepare_inode_buffer */
- dir->idata_size = d_size % EROFS_BLKSIZ;
- return 0;
+ /* sort subdirs */
+ nr_subdirs += 2;
+ return erofs_prepare_dir_layout(dir, nr_subdirs);
}
static void fill_dirblock(char *buf, unsigned int size, unsigned int q,
@@ -253,13 +286,42 @@
memset(buf + q, 0, size - q);
}
-static int write_dirblock(unsigned int q, struct erofs_dentry *head,
+static int write_dirblock(struct erofs_sb_info *sbi,
+ unsigned int q, struct erofs_dentry *head,
struct erofs_dentry *end, erofs_blk_t blkaddr)
{
- char buf[EROFS_BLKSIZ];
+ char buf[EROFS_MAX_BLOCK_SIZE];
- fill_dirblock(buf, EROFS_BLKSIZ, q, head, end);
- return blk_write(buf, blkaddr, 1);
+ fill_dirblock(buf, erofs_blksiz(sbi), q, head, end);
+ return blk_write(sbi, buf, blkaddr, 1);
+}
+
+erofs_nid_t erofs_lookupnid(struct erofs_inode *inode)
+{
+ struct erofs_buffer_head *const bh = inode->bh;
+ struct erofs_sb_info *sbi = inode->sbi;
+ erofs_off_t off, meta_offset;
+
+ if (!bh || (long long)inode->nid > 0)
+ return inode->nid;
+
+ erofs_mapbh(bh->block);
+ off = erofs_btell(bh, false);
+
+ meta_offset = erofs_pos(sbi, sbi->meta_blkaddr);
+ DBG_BUGON(off < meta_offset);
+ inode->nid = (off - meta_offset) >> EROFS_ISLOTBITS;
+ erofs_dbg("Assign nid %llu to file %s (mode %05o)",
+ inode->nid, inode->i_srcpath, inode->i_mode);
+ return inode->nid;
+}
+
+static void erofs_d_invalidate(struct erofs_dentry *d)
+{
+ struct erofs_inode *const inode = d->inode;
+
+ d->nid = erofs_lookupnid(inode);
+ erofs_iput(inode);
}
static int erofs_write_dir_file(struct erofs_inode *dir)
@@ -267,18 +329,25 @@
struct erofs_dentry *head = list_first_entry(&dir->i_subdirs,
struct erofs_dentry,
d_child);
+ struct erofs_sb_info *sbi = dir->sbi;
struct erofs_dentry *d;
int ret;
unsigned int q, used, blkno;
q = used = blkno = 0;
+ /* allocate dir main data */
+ ret = __allocate_inode_bh_data(dir, erofs_blknr(sbi, dir->i_size), DIRA);
+ if (ret)
+ return ret;
+
list_for_each_entry(d, &dir->i_subdirs, d_child) {
const unsigned int len = strlen(d->name) +
sizeof(struct erofs_dirent);
- if (used + len > EROFS_BLKSIZ) {
- ret = write_dirblock(q, head, d,
+ erofs_d_invalidate(d);
+ if (used + len > erofs_blksiz(sbi)) {
+ ret = write_dirblock(sbi, q, head, d,
dir->u.i_blkaddr + blkno);
if (ret)
return ret;
@@ -291,13 +360,13 @@
q += sizeof(struct erofs_dirent);
}
- DBG_BUGON(used > EROFS_BLKSIZ);
- if (used == EROFS_BLKSIZ) {
- DBG_BUGON(dir->i_size % EROFS_BLKSIZ);
+ DBG_BUGON(used > erofs_blksiz(sbi));
+ if (used == erofs_blksiz(sbi)) {
+ DBG_BUGON(dir->i_size % erofs_blksiz(sbi));
DBG_BUGON(dir->idata_size);
- return write_dirblock(q, head, d, dir->u.i_blkaddr + blkno);
+ return write_dirblock(sbi, q, head, d, dir->u.i_blkaddr + blkno);
}
- DBG_BUGON(used != dir->i_size % EROFS_BLKSIZ);
+ DBG_BUGON(used != dir->i_size % erofs_blksiz(sbi));
if (used) {
/* fill tail-end dir block */
dir->idata = malloc(used);
@@ -309,25 +378,26 @@
return 0;
}
-static int erofs_write_file_from_buffer(struct erofs_inode *inode, char *buf)
+int erofs_write_file_from_buffer(struct erofs_inode *inode, char *buf)
{
- const unsigned int nblocks = erofs_blknr(inode->i_size);
+ struct erofs_sb_info *sbi = inode->sbi;
+ const unsigned int nblocks = erofs_blknr(sbi, inode->i_size);
int ret;
inode->datalayout = EROFS_INODE_FLAT_INLINE;
- ret = __allocate_inode_bh_data(inode, nblocks);
+ ret = __allocate_inode_bh_data(inode, nblocks, DATA);
if (ret)
return ret;
if (nblocks)
- blk_write(buf, inode->u.i_blkaddr, nblocks);
- inode->idata_size = inode->i_size % EROFS_BLKSIZ;
+ blk_write(sbi, buf, inode->u.i_blkaddr, nblocks);
+ inode->idata_size = inode->i_size % erofs_blksiz(sbi);
if (inode->idata_size) {
inode->idata = malloc(inode->idata_size);
if (!inode->idata)
return -ENOMEM;
- memcpy(inode->idata, buf + blknr_to_addr(nblocks),
+ memcpy(inode->idata, buf + erofs_pos(sbi, nblocks),
inode->idata_size);
}
return 0;
@@ -345,31 +415,32 @@
{
int ret;
unsigned int nblocks, i;
+ struct erofs_sb_info *sbi = inode->sbi;
inode->datalayout = EROFS_INODE_FLAT_INLINE;
- nblocks = inode->i_size / EROFS_BLKSIZ;
+ nblocks = inode->i_size / erofs_blksiz(sbi);
- ret = __allocate_inode_bh_data(inode, nblocks);
+ ret = __allocate_inode_bh_data(inode, nblocks, DATA);
if (ret)
return ret;
for (i = 0; i < nblocks; ++i) {
- char buf[EROFS_BLKSIZ];
+ char buf[EROFS_MAX_BLOCK_SIZE];
- ret = read(fd, buf, EROFS_BLKSIZ);
- if (ret != EROFS_BLKSIZ) {
+ ret = read(fd, buf, erofs_blksiz(sbi));
+ if (ret != erofs_blksiz(sbi)) {
if (ret < 0)
return -errno;
return -EAGAIN;
}
- ret = blk_write(buf, inode->u.i_blkaddr + i, 1);
+ ret = blk_write(sbi, buf, inode->u.i_blkaddr + i, 1);
if (ret)
return ret;
}
/* read the tail-end data */
- inode->idata_size = inode->i_size % EROFS_BLKSIZ;
+ inode->idata_size = inode->i_size % erofs_blksiz(sbi);
if (inode->idata_size) {
inode->idata = malloc(inode->idata_size);
if (!inode->idata)
@@ -386,14 +457,11 @@
return 0;
}
-int erofs_write_file(struct erofs_inode *inode)
+int erofs_write_file(struct erofs_inode *inode, int fd, u64 fpos)
{
- int ret, fd;
+ int ret;
- if (!inode->i_size) {
- inode->datalayout = EROFS_INODE_FLAT_PLAIN;
- return 0;
- }
+ DBG_BUGON(!inode->i_size);
if (cfg.c_chunkbits) {
inode->u.chunkbits = cfg.c_chunkbits;
@@ -401,29 +469,27 @@
inode->u.chunkformat = 0;
if (cfg.c_force_chunkformat == FORCE_INODE_CHUNK_INDEXES)
inode->u.chunkformat = EROFS_CHUNK_FORMAT_INDEXES;
- return erofs_blob_write_chunked_file(inode);
+ return erofs_blob_write_chunked_file(inode, fd, fpos);
}
- if (cfg.c_compr_alg_master && erofs_file_is_compressible(inode)) {
- ret = erofs_write_compressed_file(inode);
-
+ if (cfg.c_compr_alg[0] && erofs_file_is_compressible(inode)) {
+ ret = erofs_write_compressed_file(inode, fd);
if (!ret || ret != -ENOSPC)
return ret;
+
+ ret = lseek(fd, fpos, SEEK_SET);
+ if (ret < 0)
+ return -errno;
}
/* fallback to all data uncompressed */
- fd = open(inode->i_srcpath, O_RDONLY | O_BINARY);
- if (fd < 0)
- return -errno;
-
- ret = write_uncompressed_file_from_fd(inode, fd);
- close(fd);
- return ret;
+ return write_uncompressed_file_from_fd(inode, fd);
}
static bool erofs_bh_flush_write_inode(struct erofs_buffer_head *bh)
{
struct erofs_inode *const inode = bh->fsprivate;
+ struct erofs_sb_info *sbi = inode->sbi;
const u16 icount = EROFS_INODE_XATTR_ICOUNT(inode->xattr_isize);
erofs_off_t off = erofs_btell(bh, false);
union {
@@ -510,18 +576,18 @@
BUG_ON(1);
}
- ret = dev_write(&u, off, inode->inode_isize);
+ ret = dev_write(sbi, &u, off, inode->inode_isize);
if (ret)
return false;
off += inode->inode_isize;
if (inode->xattr_isize) {
- char *xattrs = erofs_export_xattr_ibody(&inode->i_xattrs,
- inode->xattr_isize);
+ char *xattrs = erofs_export_xattr_ibody(inode);
+
if (IS_ERR(xattrs))
return false;
- ret = dev_write(xattrs, off, inode->xattr_isize);
+ ret = dev_write(sbi, xattrs, off, inode->xattr_isize);
free(xattrs);
if (ret)
return false;
@@ -536,8 +602,8 @@
return false;
} else {
/* write compression metadata */
- off = Z_EROFS_VLE_EXTENT_ALIGN(off);
- ret = dev_write(inode->compressmeta, off,
+ off = roundup(off, 8);
+ ret = dev_write(sbi, inode->compressmeta, off,
inode->extent_isize);
if (ret)
return false;
@@ -556,6 +622,7 @@
static int erofs_prepare_tail_block(struct erofs_inode *inode)
{
+ struct erofs_sb_info *sbi = inode->sbi;
struct erofs_buffer_head *bh;
int ret;
@@ -563,23 +630,16 @@
return 0;
bh = inode->bh_data;
- if (!bh) {
- bh = erofs_balloc(DATA, EROFS_BLKSIZ, 0, 0);
- if (IS_ERR(bh))
- return PTR_ERR(bh);
- bh->op = &erofs_skip_write_bhops;
-
- /* get blkaddr of bh */
- ret = erofs_mapbh(bh->block);
- DBG_BUGON(ret < 0);
- inode->u.i_blkaddr = bh->block->blkaddr;
-
- inode->bh_data = bh;
- return 0;
+ if (bh) {
+ /* expend a block as the tail block (should be successful) */
+ ret = erofs_bh_balloon(bh, erofs_blksiz(sbi));
+ if (ret != erofs_blksiz(sbi)) {
+ DBG_BUGON(1);
+ return -EIO;
+ }
+ } else {
+ inode->lazy_tailblock = true;
}
- /* expend a block as the tail block (should be successful) */
- ret = erofs_bh_balloon(bh, EROFS_BLKSIZ);
- DBG_BUGON(ret != EROFS_BLKSIZ);
return 0;
}
@@ -592,15 +652,14 @@
inodesize = inode->inode_isize + inode->xattr_isize;
if (inode->extent_isize)
- inodesize = Z_EROFS_VLE_EXTENT_ALIGN(inodesize) +
- inode->extent_isize;
+ inodesize = roundup(inodesize, 8) + inode->extent_isize;
/* TODO: tailpacking inline of chunk-based format isn't finalized */
if (inode->datalayout == EROFS_INODE_CHUNK_BASED)
goto noinline;
if (!is_inode_layout_compression(inode)) {
- if (cfg.c_noinline_data && S_ISREG(inode->i_mode)) {
+ if (!cfg.c_inline_data && S_ISREG(inode->i_mode)) {
inode->datalayout = EROFS_INODE_FLAT_PLAIN;
goto noinline;
}
@@ -637,7 +696,7 @@
erofs_dbg("Inline %scompressed data (%u bytes) to %s",
inode->compressed_idata ? "" : "un",
inode->idata_size, inode->i_srcpath);
- erofs_sb_set_ztailpacking();
+ erofs_sb_set_ztailpacking(inode->sbi);
} else {
inode->datalayout = EROFS_INODE_FLAT_INLINE;
erofs_dbg("Inline tail-end data (%u bytes) to %s",
@@ -665,7 +724,7 @@
const erofs_off_t off = erofs_btell(bh, false);
int ret;
- ret = dev_write(inode->idata, off, inode->idata_size);
+ ret = dev_write(inode->sbi, inode->idata, off, inode->idata_size);
if (ret)
return false;
@@ -683,6 +742,7 @@
static int erofs_write_tail_end(struct erofs_inode *inode)
{
+ struct erofs_sb_info *sbi = inode->sbi;
struct erofs_buffer_head *bh, *ibh;
bh = inode->bh_data;
@@ -702,25 +762,47 @@
int ret;
erofs_off_t pos, zero_pos;
- erofs_mapbh(bh->block);
- pos = erofs_btell(bh, true) - EROFS_BLKSIZ;
+ if (!bh) {
+ bh = erofs_balloc(DATA, erofs_blksiz(sbi), 0, 0);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
+ bh->op = &erofs_skip_write_bhops;
+
+ /* get blkaddr of bh */
+ ret = erofs_mapbh(bh->block);
+ inode->u.i_blkaddr = bh->block->blkaddr;
+ inode->bh_data = bh;
+ } else {
+ if (inode->lazy_tailblock) {
+ /* expend a tail block (should be successful) */
+ ret = erofs_bh_balloon(bh, erofs_blksiz(sbi));
+ if (ret != erofs_blksiz(sbi)) {
+ DBG_BUGON(1);
+ return -EIO;
+ }
+ inode->lazy_tailblock = false;
+ }
+ ret = erofs_mapbh(bh->block);
+ }
+ DBG_BUGON(ret < 0);
+ pos = erofs_btell(bh, true) - erofs_blksiz(sbi);
/* 0'ed data should be padded at head for 0padding conversion */
- if (erofs_sb_has_lz4_0padding() && inode->compressed_idata) {
+ if (erofs_sb_has_lz4_0padding(sbi) && inode->compressed_idata) {
zero_pos = pos;
- pos += EROFS_BLKSIZ - inode->idata_size;
+ pos += erofs_blksiz(sbi) - inode->idata_size;
} else {
/* pad 0'ed data for the other cases */
zero_pos = pos + inode->idata_size;
}
- ret = dev_write(inode->idata, pos, inode->idata_size);
+ ret = dev_write(sbi, inode->idata, pos, inode->idata_size);
if (ret)
return ret;
- DBG_BUGON(inode->idata_size > EROFS_BLKSIZ);
- if (inode->idata_size < EROFS_BLKSIZ) {
- ret = dev_fillzero(zero_pos,
- EROFS_BLKSIZ - inode->idata_size,
+ DBG_BUGON(inode->idata_size > erofs_blksiz(sbi));
+ if (inode->idata_size < erofs_blksiz(sbi)) {
+ ret = dev_fillzero(sbi, zero_pos,
+ erofs_blksiz(sbi) - inode->idata_size,
false);
if (ret)
return ret;
@@ -729,7 +811,7 @@
free(inode->idata);
inode->idata = NULL;
- erofs_droid_blocklist_write_tail_end(inode, erofs_blknr(pos));
+ erofs_droid_blocklist_write_tail_end(inode, erofs_blknr(sbi, pos));
}
out:
/* now bh_data can drop directly */
@@ -750,20 +832,22 @@
return true;
if (inode->i_size > UINT_MAX)
return true;
+ if (erofs_is_packed_inode(inode))
+ return false;
if (inode->i_uid > USHRT_MAX)
return true;
if (inode->i_gid > USHRT_MAX)
return true;
if (inode->i_nlink > USHRT_MAX)
return true;
- if ((inode->i_mtime != sbi.build_time ||
- inode->i_mtime_nsec != sbi.build_time_nsec) &&
+ if ((inode->i_mtime != inode->sbi->build_time ||
+ inode->i_mtime_nsec != inode->sbi->build_time_nsec) &&
!cfg.c_ignore_mtime)
return true;
return false;
}
-static u32 erofs_new_encode_dev(dev_t dev)
+u32 erofs_new_encode_dev(dev_t dev)
{
const unsigned int major = major(dev);
const unsigned int minor = minor(dev);
@@ -773,7 +857,7 @@
#ifdef WITH_ANDROID
int erofs_droid_inode_fsconfig(struct erofs_inode *inode,
- struct stat64 *st,
+ struct stat *st,
const char *path)
{
/* filesystem_config does not preserve file type bits */
@@ -785,6 +869,9 @@
inode->capabilities = 0;
if (!cfg.fs_config_file && !cfg.mount_point)
return 0;
+ /* avoid loading special inodes */
+ if (path == EROFS_PACKED_INODE)
+ return 0;
if (!cfg.mount_point ||
/* have to drop the mountpoint for rootdir of canned fsconfig */
@@ -818,37 +905,59 @@
}
#else
static int erofs_droid_inode_fsconfig(struct erofs_inode *inode,
- struct stat64 *st,
+ struct stat *st,
const char *path)
{
return 0;
}
#endif
-static int erofs_fill_inode(struct erofs_inode *inode,
- struct stat64 *st,
- const char *path)
+int __erofs_fill_inode(struct erofs_inode *inode, struct stat *st,
+ const char *path)
{
int err = erofs_droid_inode_fsconfig(inode, st, path);
+ struct erofs_sb_info *sbi = inode->sbi;
if (err)
return err;
- inode->i_mode = st->st_mode;
+
inode->i_uid = cfg.c_uid == -1 ? st->st_uid : cfg.c_uid;
inode->i_gid = cfg.c_gid == -1 ? st->st_gid : cfg.c_gid;
+
+ if (inode->i_uid + cfg.c_uid_offset < 0)
+ erofs_err("uid overflow @ %s", path);
+ inode->i_uid += cfg.c_uid_offset;
+
+ if (inode->i_gid + cfg.c_gid_offset < 0)
+ erofs_err("gid overflow @ %s", path);
+ inode->i_gid += cfg.c_gid_offset;
+
inode->i_mtime = st->st_mtime;
inode->i_mtime_nsec = ST_MTIM_NSEC(st);
switch (cfg.c_timeinherit) {
case TIMESTAMP_CLAMPING:
- if (inode->i_mtime < sbi.build_time)
+ if (inode->i_mtime < sbi->build_time)
break;
case TIMESTAMP_FIXED:
- inode->i_mtime = sbi.build_time;
- inode->i_mtime_nsec = sbi.build_time_nsec;
+ inode->i_mtime = sbi->build_time;
+ inode->i_mtime_nsec = sbi->build_time_nsec;
default:
break;
}
+
+ return 0;
+}
+
+static int erofs_fill_inode(struct erofs_inode *inode, struct stat *st,
+ const char *path)
+{
+ int err = __erofs_fill_inode(inode, st, path);
+
+ if (err)
+ return err;
+
+ inode->i_mode = st->st_mode;
inode->i_nlink = 1; /* fix up later if needed */
switch (inode->i_mode & S_IFMT) {
@@ -868,11 +977,14 @@
return -EINVAL;
}
- strncpy(inode->i_srcpath, path, sizeof(inode->i_srcpath) - 1);
- inode->i_srcpath[sizeof(inode->i_srcpath) - 1] = '\0';
+ inode->i_srcpath = strdup(path);
+ if (!inode->i_srcpath)
+ return -ENOMEM;
- inode->dev = st->st_dev;
- inode->i_ino[1] = st->st_ino;
+ if (!S_ISDIR(inode->i_mode)) {
+ inode->dev = st->st_dev;
+ inode->i_ino[1] = st->st_ino;
+ }
if (erofs_should_use_inode_extended(inode)) {
if (cfg.c_force_inodeversion == FORCE_INODE_COMPACT) {
@@ -885,13 +997,11 @@
inode->inode_isize = sizeof(struct erofs_inode_compact);
}
- list_add(&inode->i_hash,
- &inode_hashtable[(st->st_ino ^ st->st_dev) %
- NR_INODE_HASHTABLE]);
+ erofs_insert_ihash(inode, st->st_dev, st->st_ino);
return 0;
}
-static struct erofs_inode *erofs_new_inode(void)
+struct erofs_inode *erofs_new_inode(void)
{
struct erofs_inode *inode;
@@ -899,9 +1009,12 @@
if (!inode)
return ERR_PTR(-ENOMEM);
+ inode->sbi = &sbi;
inode->i_ino[0] = sbi.inos++; /* inode serial number */
inode->i_count = 1;
+ inode->datalayout = EROFS_INODE_FLAT_PLAIN;
+ init_list_head(&inode->i_hash);
init_list_head(&inode->i_subdirs);
init_list_head(&inode->i_xattrs);
return inode;
@@ -910,7 +1023,7 @@
/* get the inode from the (source) path */
static struct erofs_inode *erofs_iget_from_path(const char *path, bool is_src)
{
- struct stat64 st;
+ struct stat st;
struct erofs_inode *inode;
int ret;
@@ -918,7 +1031,7 @@
if (!is_src)
return ERR_PTR(-EINVAL);
- ret = lstat64(path, &st);
+ ret = lstat(path, &st);
if (ret)
return ERR_PTR(-errno);
@@ -940,10 +1053,9 @@
ret = erofs_fill_inode(inode, &st, path);
if (ret) {
- free(inode);
+ erofs_iput(inode);
return ERR_PTR(ret);
}
-
return inode;
}
@@ -951,87 +1063,72 @@
{
const erofs_off_t rootnid_maxoffset = 0xffff << EROFS_ISLOTBITS;
struct erofs_buffer_head *const bh = rootdir->bh;
+ struct erofs_sb_info *sbi = rootdir->sbi;
erofs_off_t off, meta_offset;
erofs_mapbh(bh->block);
off = erofs_btell(bh, false);
if (off > rootnid_maxoffset)
- meta_offset = round_up(off - rootnid_maxoffset, EROFS_BLKSIZ);
+ meta_offset = round_up(off - rootnid_maxoffset, erofs_blksiz(sbi));
else
meta_offset = 0;
- sbi.meta_blkaddr = erofs_blknr(meta_offset);
+ sbi->meta_blkaddr = erofs_blknr(sbi, meta_offset);
rootdir->nid = (off - meta_offset) >> EROFS_ISLOTBITS;
}
-erofs_nid_t erofs_lookupnid(struct erofs_inode *inode)
-{
- struct erofs_buffer_head *const bh = inode->bh;
- erofs_off_t off, meta_offset;
-
- if (!bh)
- return inode->nid;
-
- erofs_mapbh(bh->block);
- off = erofs_btell(bh, false);
-
- meta_offset = blknr_to_addr(sbi.meta_blkaddr);
- DBG_BUGON(off < meta_offset);
- return inode->nid = (off - meta_offset) >> EROFS_ISLOTBITS;
-}
-
-static void erofs_d_invalidate(struct erofs_dentry *d)
-{
- struct erofs_inode *const inode = d->inode;
-
- d->nid = erofs_lookupnid(inode);
- erofs_iput(inode);
-}
-
-static struct erofs_inode *erofs_mkfs_build_tree(struct erofs_inode *dir)
+static int erofs_mkfs_build_tree(struct erofs_inode *dir, struct list_head *dirs)
{
int ret;
DIR *_dir;
struct dirent *dp;
struct erofs_dentry *d;
- unsigned int nr_subdirs;
+ unsigned int nr_subdirs, i_nlink;
+
+ ret = erofs_scan_file_xattrs(dir);
+ if (ret < 0)
+ return ret;
ret = erofs_prepare_xattr_ibody(dir);
if (ret < 0)
- return ERR_PTR(ret);
+ return ret;
if (!S_ISDIR(dir->i_mode)) {
if (S_ISLNK(dir->i_mode)) {
char *const symlink = malloc(dir->i_size);
if (!symlink)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
ret = readlink(dir->i_srcpath, symlink, dir->i_size);
if (ret < 0) {
free(symlink);
- return ERR_PTR(-errno);
+ return -errno;
}
-
ret = erofs_write_file_from_buffer(dir, symlink);
free(symlink);
- if (ret)
- return ERR_PTR(ret);
+ } else if (dir->i_size) {
+ int fd = open(dir->i_srcpath, O_RDONLY | O_BINARY);
+ if (fd < 0)
+ return -errno;
+
+ ret = erofs_write_file(dir, fd, 0);
+ close(fd);
} else {
- ret = erofs_write_file(dir);
- if (ret)
- return ERR_PTR(ret);
+ ret = 0;
}
+ if (ret)
+ return ret;
erofs_prepare_inode_buffer(dir);
erofs_write_tail_end(dir);
- return dir;
+ return 0;
}
_dir = opendir(dir->i_srcpath);
if (!_dir) {
erofs_err("failed to opendir at %s: %s",
dir->i_srcpath, erofs_strerror(errno));
- return ERR_PTR(-errno);
+ return -errno;
}
nr_subdirs = 0;
@@ -1045,8 +1142,7 @@
if (!dp)
break;
- if (is_dot_dotdot(dp->d_name) ||
- !strncmp(dp->d_name, "lost+found", strlen("lost+found")))
+ if (is_dot_dotdot(dp->d_name))
continue;
/* skip if it's a exclude file */
@@ -1059,10 +1155,6 @@
goto err_closedir;
}
nr_subdirs++;
-
- /* to count i_nlink for directories */
- d->type = (dp->d_type == DT_DIR ?
- EROFS_FT_DIR : EROFS_FT_UNKNOWN);
}
if (errno) {
@@ -1073,21 +1165,24 @@
ret = erofs_prepare_dir_file(dir, nr_subdirs);
if (ret)
- goto err;
+ return ret;
ret = erofs_prepare_inode_buffer(dir);
if (ret)
- goto err;
+ return ret;
+ dir->bh->op = &erofs_skip_write_bhops;
if (IS_ROOT(dir))
erofs_fixup_meta_blkaddr(dir);
+ i_nlink = 0;
list_for_each_entry(d, &dir->i_subdirs, d_child) {
- char buf[PATH_MAX], *trimmed;
+ char buf[PATH_MAX];
unsigned char ftype;
+ struct erofs_inode *inode;
if (is_dot_dotdot(d->name)) {
- erofs_d_invalidate(d);
+ ++i_nlink;
continue;
}
@@ -1098,57 +1193,247 @@
goto fail;
}
- trimmed = erofs_trim_for_progressinfo(erofs_fspath(buf),
- sizeof("Processing ...") - 1);
- erofs_update_progressinfo("Processing %s ...", trimmed);
- free(trimmed);
- d->inode = erofs_mkfs_build_tree_from_path(dir, buf);
- if (IS_ERR(d->inode)) {
- ret = PTR_ERR(d->inode);
+ inode = erofs_iget_from_path(buf, true);
+
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
fail:
d->inode = NULL;
d->type = EROFS_FT_UNKNOWN;
- goto err;
+ return ret;
}
- ftype = erofs_mode_to_ftype(d->inode->i_mode);
- DBG_BUGON(ftype == EROFS_FT_DIR && d->type != ftype);
+ /* a hardlink to the existed inode */
+ if (inode->i_parent) {
+ ++inode->i_nlink;
+ } else {
+ inode->i_parent = dir;
+ erofs_igrab(inode);
+ list_add_tail(&inode->i_subdirs, dirs);
+ }
+ ftype = erofs_mode_to_ftype(inode->i_mode);
+ i_nlink += (ftype == EROFS_FT_DIR);
+ d->inode = inode;
d->type = ftype;
-
- erofs_d_invalidate(d);
- erofs_info("add file %s/%s (nid %llu, type %u)",
- dir->i_srcpath, d->name, (unsigned long long)d->nid,
- d->type);
+ erofs_info("file %s/%s dumped (type %u)",
+ dir->i_srcpath, d->name, d->type);
}
- erofs_write_dir_file(dir);
- erofs_write_tail_end(dir);
- return dir;
+ /*
+ * if there're too many subdirs as compact form, set nlink=1
+ * rather than upgrade to use extented form instead.
+ */
+ if (i_nlink > USHRT_MAX &&
+ dir->inode_isize == sizeof(struct erofs_inode_compact))
+ dir->i_nlink = 1;
+ else
+ dir->i_nlink = i_nlink;
+ return 0;
err_closedir:
closedir(_dir);
-err:
- return ERR_PTR(ret);
+ return ret;
}
-struct erofs_inode *erofs_mkfs_build_tree_from_path(struct erofs_inode *parent,
- const char *path)
+struct erofs_inode *erofs_mkfs_build_tree_from_path(const char *path)
{
- struct erofs_inode *const inode = erofs_iget_from_path(path, true);
+ LIST_HEAD(dirs);
+ struct erofs_inode *inode, *root, *dumpdir;
+ root = erofs_iget_from_path(path, true);
+ if (IS_ERR(root))
+ return root;
+
+ (void)erofs_igrab(root);
+ root->i_parent = root; /* rootdir mark */
+ list_add(&root->i_subdirs, &dirs);
+
+ dumpdir = NULL;
+ do {
+ int err;
+ char *trimmed;
+
+ inode = list_first_entry(&dirs, struct erofs_inode, i_subdirs);
+ list_del(&inode->i_subdirs);
+ init_list_head(&inode->i_subdirs);
+
+ trimmed = erofs_trim_for_progressinfo(
+ erofs_fspath(inode->i_srcpath),
+ sizeof("Processing ...") - 1);
+ erofs_update_progressinfo("Processing %s ...", trimmed);
+ free(trimmed);
+
+ err = erofs_mkfs_build_tree(inode, &dirs);
+ if (err) {
+ root = ERR_PTR(err);
+ break;
+ }
+
+ if (S_ISDIR(inode->i_mode)) {
+ inode->next_dirwrite = dumpdir;
+ dumpdir = inode;
+ } else {
+ erofs_iput(inode);
+ }
+ } while (!list_empty(&dirs));
+
+ while (dumpdir) {
+ inode = dumpdir;
+ erofs_write_dir_file(inode);
+ erofs_write_tail_end(inode);
+ inode->bh->op = &erofs_write_inode_bhops;
+ dumpdir = inode->next_dirwrite;
+ erofs_iput(inode);
+ }
+ return root;
+}
+
+struct erofs_inode *erofs_mkfs_build_special_from_fd(int fd, const char *name)
+{
+ struct stat st;
+ struct erofs_inode *inode;
+ int ret;
+
+ ret = lseek(fd, 0, SEEK_SET);
+ if (ret < 0)
+ return ERR_PTR(-errno);
+
+ ret = fstat(fd, &st);
+ if (ret)
+ return ERR_PTR(-errno);
+
+ inode = erofs_new_inode();
if (IS_ERR(inode))
return inode;
- /* a hardlink to the existed inode */
- if (inode->i_parent) {
- ++inode->i_nlink;
- return inode;
+ if (name == EROFS_PACKED_INODE) {
+ st.st_uid = st.st_gid = 0;
+ st.st_nlink = 0;
}
- /* a completely new inode is found */
- if (parent)
- inode->i_parent = parent;
- else
- inode->i_parent = inode; /* rootdir mark */
+ ret = erofs_fill_inode(inode, &st, name);
+ if (ret) {
+ free(inode);
+ return ERR_PTR(ret);
+ }
- return erofs_mkfs_build_tree(inode);
+ if (name == EROFS_PACKED_INODE) {
+ inode->sbi->packed_nid = EROFS_PACKED_NID_UNALLOCATED;
+ inode->nid = inode->sbi->packed_nid;
+ }
+
+ ret = erofs_write_compressed_file(inode, fd);
+ if (ret == -ENOSPC) {
+ ret = lseek(fd, 0, SEEK_SET);
+ if (ret < 0)
+ return ERR_PTR(-errno);
+
+ ret = write_uncompressed_file_from_fd(inode, fd);
+ }
+
+ if (ret) {
+ DBG_BUGON(ret == -ENOSPC);
+ return ERR_PTR(ret);
+ }
+ erofs_prepare_inode_buffer(inode);
+ erofs_write_tail_end(inode);
+ return inode;
+}
+
+int erofs_rebuild_dump_tree(struct erofs_inode *dir)
+{
+ struct erofs_dentry *d, *n;
+ unsigned int nr_subdirs;
+ int ret;
+
+ if (erofs_should_use_inode_extended(dir)) {
+ if (cfg.c_force_inodeversion == FORCE_INODE_COMPACT) {
+ erofs_err("file %s cannot be in compact form",
+ dir->i_srcpath);
+ return -EINVAL;
+ }
+ dir->inode_isize = sizeof(struct erofs_inode_extended);
+ } else {
+ dir->inode_isize = sizeof(struct erofs_inode_compact);
+ }
+
+ /* strip all unnecessary overlayfs xattrs when ovlfs_strip is enabled */
+ if (cfg.c_ovlfs_strip)
+ erofs_clear_opaque_xattr(dir);
+ else if (dir->whiteouts)
+ erofs_set_origin_xattr(dir);
+
+ ret = erofs_prepare_xattr_ibody(dir);
+ if (ret < 0)
+ return ret;
+
+ if (!S_ISDIR(dir->i_mode)) {
+ if (dir->bh)
+ return 0;
+ if (S_ISLNK(dir->i_mode)) {
+ ret = erofs_write_file_from_buffer(dir, dir->i_link);
+ free(dir->i_link);
+ dir->i_link = NULL;
+ } else if (dir->with_diskbuf) {
+ u64 fpos;
+
+ ret = erofs_diskbuf_getfd(dir->i_diskbuf, &fpos);
+ if (ret >= 0)
+ ret = erofs_write_file(dir, ret, fpos);
+ erofs_diskbuf_close(dir->i_diskbuf);
+ free(dir->i_diskbuf);
+ dir->i_diskbuf = NULL;
+ dir->with_diskbuf = false;
+ } else {
+ ret = 0;
+ }
+ if (ret)
+ return ret;
+ ret = erofs_prepare_inode_buffer(dir);
+ if (ret)
+ return ret;
+ erofs_write_tail_end(dir);
+ return 0;
+ }
+
+ nr_subdirs = 0;
+ list_for_each_entry_safe(d, n, &dir->i_subdirs, d_child) {
+ if (cfg.c_ovlfs_strip && erofs_inode_is_whiteout(d->inode)) {
+ erofs_dbg("remove whiteout %s", d->inode->i_srcpath);
+ list_del(&d->d_child);
+ erofs_d_invalidate(d);
+ free(d);
+ continue;
+ }
+ ++nr_subdirs;
+ }
+
+ ret = erofs_prepare_dir_layout(dir, nr_subdirs);
+ if (ret)
+ return ret;
+
+ ret = erofs_prepare_inode_buffer(dir);
+ if (ret)
+ return ret;
+ dir->bh->op = &erofs_skip_write_bhops;
+
+ if (IS_ROOT(dir))
+ erofs_fixup_meta_blkaddr(dir);
+
+ list_for_each_entry(d, &dir->i_subdirs, d_child) {
+ struct erofs_inode *inode;
+
+ if (is_dot_dotdot(d->name))
+ continue;
+
+ inode = erofs_igrab(d->inode);
+ ret = erofs_rebuild_dump_tree(inode);
+ dir->i_nlink += (erofs_mode_to_ftype(inode->i_mode) == EROFS_FT_DIR);
+ erofs_iput(inode);
+ if (ret)
+ return ret;
+ }
+ erofs_write_dir_file(dir);
+ erofs_write_tail_end(dir);
+ dir->bh->op = &erofs_write_inode_bhops;
+ return 0;
}
diff --git a/lib/io.c b/lib/io.c
index 9c663c5..c92f16c 100644
--- a/lib/io.c
+++ b/lib/io.c
@@ -10,6 +10,7 @@
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
+#include <stdlib.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include "erofs/io.h"
@@ -19,16 +20,13 @@
#ifdef HAVE_LINUX_FALLOC_H
#include <linux/falloc.h>
#endif
-
+#ifdef HAVE_SYS_STATFS_H
+#include <sys/statfs.h>
+#endif
#define EROFS_MODNAME "erofs_io"
#include "erofs/print.h"
-static const char *erofs_devname;
-int erofs_devfd = -1;
-static u64 erofs_devsz;
-static unsigned int erofs_nblobs, erofs_blobfd[256];
-
-int dev_get_blkdev_size(int fd, u64 *bytes)
+static int dev_get_blkdev_size(int fd, u64 *bytes)
{
errno = ENOTSUP;
#ifdef BLKGETSIZE64
@@ -48,19 +46,25 @@
return -errno;
}
-void dev_close(void)
+void dev_close(struct erofs_sb_info *sbi)
{
- close(erofs_devfd);
- erofs_devname = NULL;
- erofs_devfd = -1;
- erofs_devsz = 0;
+ close(sbi->devfd);
+ free(sbi->devname);
+ sbi->devname = NULL;
+ sbi->devfd = -1;
+ sbi->devsz = 0;
}
-int dev_open(const char *dev)
+int dev_open(struct erofs_sb_info *sbi, const char *dev)
{
struct stat st;
int fd, ret;
+#if defined(HAVE_SYS_STATFS_H) && defined(HAVE_FSTATFS)
+ bool again = false;
+
+repeat:
+#endif
fd = open(dev, O_RDWR | O_CREAT | O_BINARY, 0644);
if (fd < 0) {
erofs_err("failed to open(%s).", dev);
@@ -76,23 +80,46 @@
switch (st.st_mode & S_IFMT) {
case S_IFBLK:
- ret = dev_get_blkdev_size(fd, &erofs_devsz);
+ ret = dev_get_blkdev_size(fd, &sbi->devsz);
if (ret) {
erofs_err("failed to get block device size(%s).", dev);
close(fd);
return ret;
}
- erofs_devsz = round_down(erofs_devsz, EROFS_BLKSIZ);
+ sbi->devsz = round_down(sbi->devsz, erofs_blksiz(sbi));
break;
case S_IFREG:
- ret = ftruncate(fd, 0);
- if (ret) {
- erofs_err("failed to ftruncate(%s).", dev);
- close(fd);
- return -errno;
+ if (st.st_size) {
+#if defined(HAVE_SYS_STATFS_H) && defined(HAVE_FSTATFS)
+ struct statfs stfs;
+
+ if (again)
+ return -ENOTEMPTY;
+
+ /*
+ * fses like EXT4 and BTRFS will flush dirty blocks
+ * after truncate(0) even after the writeback happens
+ * (see kernel commit 7d8f9f7d150d and ccd2506bd431),
+ * which is NOT our intention. Let's work around this.
+ */
+ if (!fstatfs(fd, &stfs) && (stfs.f_type == 0xEF53 ||
+ stfs.f_type == 0x9123683E)) {
+ close(fd);
+ unlink(dev);
+ again = true;
+ goto repeat;
+ }
+#endif
+ ret = ftruncate(fd, 0);
+ if (ret) {
+ erofs_err("failed to ftruncate(%s).", dev);
+ close(fd);
+ return -errno;
+ }
}
/* INT64_MAX is the limit of kernel vfs */
- erofs_devsz = INT64_MAX;
+ sbi->devsz = INT64_MAX;
+ sbi->devblksz = st.st_blksize;
break;
default:
erofs_err("bad file type (%s, %o).", dev, st.st_mode);
@@ -100,23 +127,27 @@
return -EINVAL;
}
- erofs_devname = dev;
- erofs_devfd = fd;
+ sbi->devname = strdup(dev);
+ if (!sbi->devname) {
+ close(fd);
+ return -ENOMEM;
+ }
+ sbi->devfd = fd;
erofs_info("successfully to open %s", dev);
return 0;
}
-void blob_closeall(void)
+void blob_closeall(struct erofs_sb_info *sbi)
{
unsigned int i;
- for (i = 0; i < erofs_nblobs; ++i)
- close(erofs_blobfd[i]);
- erofs_nblobs = 0;
+ for (i = 0; i < sbi->nblobs; ++i)
+ close(sbi->blobfd[i]);
+ sbi->nblobs = 0;
}
-int blob_open_ro(const char *dev)
+int blob_open_ro(struct erofs_sb_info *sbi, const char *dev)
{
int fd = open(dev, O_RDONLY | O_BINARY);
@@ -125,14 +156,14 @@
return -errno;
}
- erofs_blobfd[erofs_nblobs] = fd;
- erofs_info("successfully to open blob%u %s", erofs_nblobs, dev);
- ++erofs_nblobs;
+ sbi->blobfd[sbi->nblobs] = fd;
+ erofs_info("successfully to open blob%u %s", sbi->nblobs, dev);
+ ++sbi->nblobs;
return 0;
}
/* XXX: temporary soluation. Disk I/O implementation needs to be refactored. */
-int dev_open_ro(const char *dev)
+int dev_open_ro(struct erofs_sb_info *sbi, const char *dev)
{
int fd = open(dev, O_RDONLY | O_BINARY);
@@ -141,18 +172,17 @@
return -errno;
}
- erofs_devfd = fd;
- erofs_devname = dev;
- erofs_devsz = INT64_MAX;
+ sbi->devname = strdup(dev);
+ if (!sbi->devname) {
+ close(fd);
+ return -ENOMEM;
+ }
+ sbi->devfd = fd;
+ sbi->devsz = INT64_MAX;
return 0;
}
-u64 dev_length(void)
-{
- return erofs_devsz;
-}
-
-int dev_write(const void *buf, u64 offset, size_t len)
+int dev_write(struct erofs_sb_info *sbi, const void *buf, u64 offset, size_t len)
{
int ret;
@@ -164,60 +194,60 @@
return -EINVAL;
}
- if (offset >= erofs_devsz || len > erofs_devsz ||
- offset > erofs_devsz - len) {
+ if (offset >= sbi->devsz || len > sbi->devsz ||
+ offset > sbi->devsz - len) {
erofs_err("Write posion[%" PRIu64 ", %zd] is too large beyond the end of device(%" PRIu64 ").",
- offset, len, erofs_devsz);
+ offset, len, sbi->devsz);
return -EINVAL;
}
#ifdef HAVE_PWRITE64
- ret = pwrite64(erofs_devfd, buf, len, (off64_t)offset);
+ ret = pwrite64(sbi->devfd, buf, len, (off64_t)offset);
#else
- ret = pwrite(erofs_devfd, buf, len, (off_t)offset);
+ ret = pwrite(sbi->devfd, buf, len, (off_t)offset);
#endif
if (ret != (int)len) {
if (ret < 0) {
erofs_err("Failed to write data into device - %s:[%" PRIu64 ", %zd].",
- erofs_devname, offset, len);
+ sbi->devname, offset, len);
return -errno;
}
erofs_err("Writing data into device - %s:[%" PRIu64 ", %zd] - was truncated.",
- erofs_devname, offset, len);
+ sbi->devname, offset, len);
return -ERANGE;
}
return 0;
}
-int dev_fillzero(u64 offset, size_t len, bool padding)
+int dev_fillzero(struct erofs_sb_info *sbi, u64 offset, size_t len, bool padding)
{
- static const char zero[EROFS_BLKSIZ] = {0};
+ static const char zero[EROFS_MAX_BLOCK_SIZE] = {0};
int ret;
if (cfg.c_dry_run)
return 0;
#if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE)
- if (!padding && fallocate(erofs_devfd, FALLOC_FL_PUNCH_HOLE |
+ if (!padding && fallocate(sbi->devfd, FALLOC_FL_PUNCH_HOLE |
FALLOC_FL_KEEP_SIZE, offset, len) >= 0)
return 0;
#endif
- while (len > EROFS_BLKSIZ) {
- ret = dev_write(zero, offset, EROFS_BLKSIZ);
+ while (len > erofs_blksiz(sbi)) {
+ ret = dev_write(sbi, zero, offset, erofs_blksiz(sbi));
if (ret)
return ret;
- len -= EROFS_BLKSIZ;
- offset += EROFS_BLKSIZ;
+ len -= erofs_blksiz(sbi);
+ offset += erofs_blksiz(sbi);
}
- return dev_write(zero, offset, len);
+ return dev_write(sbi, zero, offset, len);
}
-int dev_fsync(void)
+int dev_fsync(struct erofs_sb_info *sbi)
{
int ret;
- ret = fsync(erofs_devfd);
+ ret = fsync(sbi->devfd);
if (ret) {
erofs_err("Could not fsync device!!!");
return -EIO;
@@ -225,66 +255,81 @@
return 0;
}
-int dev_resize(unsigned int blocks)
+int dev_resize(struct erofs_sb_info *sbi, unsigned int blocks)
{
int ret;
struct stat st;
u64 length;
- if (cfg.c_dry_run || erofs_devsz != INT64_MAX)
+ if (cfg.c_dry_run || sbi->devsz != INT64_MAX)
return 0;
- ret = fstat(erofs_devfd, &st);
+ ret = fstat(sbi->devfd, &st);
if (ret) {
erofs_err("failed to fstat.");
return -errno;
}
- length = (u64)blocks * EROFS_BLKSIZ;
+ length = (u64)blocks * erofs_blksiz(sbi);
if (st.st_size == length)
return 0;
if (st.st_size > length)
- return ftruncate(erofs_devfd, length);
+ return ftruncate(sbi->devfd, length);
length = length - st.st_size;
#if defined(HAVE_FALLOCATE)
- if (fallocate(erofs_devfd, 0, st.st_size, length) >= 0)
+ if (fallocate(sbi->devfd, 0, st.st_size, length) >= 0)
return 0;
#endif
- return dev_fillzero(st.st_size, length, true);
+ return dev_fillzero(sbi, st.st_size, length, true);
}
-int dev_read(int device_id, void *buf, u64 offset, size_t len)
+int dev_read(struct erofs_sb_info *sbi, int device_id,
+ void *buf, u64 offset, size_t len)
{
- int ret, fd;
+ int read_count, fd;
if (cfg.c_dry_run)
return 0;
+ offset += cfg.c_offset;
+
if (!buf) {
erofs_err("buf is NULL");
return -EINVAL;
}
if (!device_id) {
- fd = erofs_devfd;
+ fd = sbi->devfd;
} else {
- if (device_id > erofs_nblobs) {
+ if (device_id > sbi->nblobs) {
erofs_err("invalid device id %d", device_id);
return -ENODEV;
}
- fd = erofs_blobfd[device_id - 1];
+ fd = sbi->blobfd[device_id - 1];
}
+ while (len > 0) {
#ifdef HAVE_PREAD64
- ret = pread64(fd, buf, len, (off64_t)offset);
+ read_count = pread64(fd, buf, len, (off64_t)offset);
#else
- ret = pread(fd, buf, len, (off_t)offset);
+ read_count = pread(fd, buf, len, (off_t)offset);
#endif
- if (ret != (int)len) {
- erofs_err("Failed to read data from device - %s:[%" PRIu64 ", %zd].",
- erofs_devname, offset, len);
- return -errno;
+ if (read_count < 1) {
+ if (!read_count) {
+ erofs_info("Reach EOF of device - %s:[%" PRIu64 ", %zd].",
+ sbi->devname, offset, len);
+ memset(buf, 0, len);
+ return 0;
+ } else if (errno != EINTR) {
+ erofs_err("Failed to read data from device - %s:[%" PRIu64 ", %zd].",
+ sbi->devname, offset, len);
+ return -errno;
+ }
+ }
+ offset += read_count;
+ len -= read_count;
+ buf += read_count;
}
return 0;
}
@@ -373,7 +418,7 @@
length, 0);
if (ret >= 0)
goto out;
- if (errno != ENOSYS) {
+ if (errno != ENOSYS && errno != EXDEV) {
ret = -errno;
out:
*off_in = off64_in;
diff --git a/lib/kite_deflate.c b/lib/kite_deflate.c
new file mode 100644
index 0000000..8667954
--- /dev/null
+++ b/lib/kite_deflate.c
@@ -0,0 +1,1271 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+/*
+ * erofs-utils/lib/kite_deflate.c
+ *
+ * Copyright (C) 2023, Alibaba Cloud
+ * Copyright (C) 2023, Gao Xiang <xiang@kernel.org>
+ */
+#include "erofs/defs.h"
+#include "erofs/print.h"
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdio.h>
+
+unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2,
+ unsigned long sz);
+
+#ifdef TEST
+#define kite_dbg(x, ...) fprintf(stderr, x "\n", ##__VA_ARGS__)
+#else
+#define kite_dbg(x, ...)
+#endif
+
+#define kHistorySize32 (1U << 15)
+
+#define kNumLenSymbols32 256
+#define kNumLenSymbolsMax kNumLenSymbols32
+
+#define kSymbolEndOfBlock 256
+#define kSymbolMatch (kSymbolEndOfBlock + 1)
+#define kNumLenSlots 29
+#define kMainTableSize (kSymbolMatch + kNumLenSlots)
+
+#define kFixedLenTableSize (kSymbolMatch + 31)
+#define FixedDistTableSize 32
+
+#define kMainTableSize (kSymbolMatch + kNumLenSlots)
+#define kDistTableSize32 30
+
+#define kNumLitLenCodesMin 257
+#define kNumDistCodesMin 1
+
+#define kNumLensCodesMin 4
+#define kLensTableSize 19
+
+#define kMatchMinLen 3
+#define kMatchMaxLen32 kNumLenSymbols32 + kMatchMinLen - 1
+
+#define kTableDirectLevels 16
+#define kBitLensRepNumber_3_6 kTableDirectLevels
+#define kBitLens0Number_3_10 (kBitLensRepNumber_3_6 + 1)
+#define kBitLens0Number_11_138 (kBitLens0Number_3_10 + 1)
+
+static u32 kstaticHuff_mainCodes[kFixedLenTableSize];
+static const u8 kstaticHuff_litLenLevels[kFixedLenTableSize] = {
+ [0 ... 143] = 8, [144 ... 255] = 9,
+ [256 ... 279] = 7, [280 ... 287] = 8,
+};
+static u32 kstaticHuff_distCodes[kFixedLenTableSize];
+
+const u8 kLenStart32[kNumLenSlots] =
+ {0,1,2,3,4,5,6,7,8,10,12,14,16,20,24,28,32,40,48,56,64,80,96,112,128,160,192,224, 255};
+
+const u8 kLenExtraBits32[kNumLenSlots] =
+ {0,0,0,0,0,0,0,0,1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5,
+ 5, 5, 5, 0};
+
+/* First normalized distance for each code (0 = distance of 1) */
+const u32 kDistStart[kDistTableSize32] =
+ {0,1,2,3,4,6,8,12,16,24,32,48,64,96,128,192,256,384,512,768,
+ 1024,1536,2048,3072,4096,6144,8192,12288,16384,24576};
+
+/* extra bits for each distance code */
+const u8 kDistExtraBits[kDistTableSize32] =
+ {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
+
+const u8 kCodeLengthAlphabetOrder[kLensTableSize] =
+ {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+const u8 kLevelExtraBits[3] = {2, 3, 7};
+
+#define kStored 0
+#define kFixedHuffman 1
+#define kDynamicHuffman 2
+
+struct kite_deflate_symbol {
+ u16 len, dist;
+};
+
+struct kite_deflate_table {
+ u32 mainCodes[kMainTableSize];
+ u8 litLenLevels[kMainTableSize];
+ u32 distCodes[kDistTableSize32];
+ u8 distLevels[kDistTableSize32];
+ u32 levelCodes[kLensTableSize];
+ u8 levelLens[kLensTableSize];
+
+ u8 numdistlens, numblcodes;
+ u16 numlitlens;
+};
+
+struct kite_deflate {
+ struct kite_deflate_table *tab;
+ const u8 *in;
+ u8 *out;
+
+ u32 inlen, outlen;
+ u32 pos_in, pos_out;
+ u32 inflightbits;
+ u8 bitpos;
+ u8 numHuffBits;
+ u32 symbols;
+
+ u32 costbits, startpos;
+ u8 encode_mode;
+ bool freq_changed, lastblock;
+
+ /* Previous match for lazy matching */
+ bool prev_valid;
+ u16 prev_longest;
+
+ u32 mainFreqs[kMainTableSize];
+ u32 distFreqs[kDistTableSize32];
+ struct kite_deflate_table tables[2];
+
+ /* don't reset the following fields */
+ struct kite_matchfinder *mf;
+ struct kite_deflate_symbol *sym;
+ u32 max_symbols;
+ bool lazy_search;
+};
+
+#define ZLIB_DISTANCE_TOO_FAR 4096
+
+static u8 g_LenSlots[kNumLenSymbolsMax];
+
+#define kNumLogBits 9 // do not change it
+static u8 g_FastPos[1 << kNumLogBits];
+
+static void writebits(struct kite_deflate *s, unsigned int v, u8 bits)
+{
+ unsigned int rem = sizeof(s->inflightbits) * 8 - s->bitpos;
+
+ s->inflightbits |= (v << s->bitpos) & (!rem - 1);
+ if (bits > rem) {
+ u8 *out = s->out + s->pos_out;
+
+ out[0] = s->inflightbits & 0xff;
+ out[1] = (s->inflightbits >> 8) & 0xff;
+ out[2] = (s->inflightbits >> 16) & 0xff;
+ out[3] = (s->inflightbits >> 24) & 0xff;
+ s->pos_out += 4;
+ DBG_BUGON(s->pos_out > s->outlen);
+ s->inflightbits = v >> rem;
+ s->bitpos = bits - rem;
+ return;
+ }
+ s->bitpos += bits;
+}
+
+static void flushbits(struct kite_deflate *s)
+{
+ u8 *out = s->out + s->pos_out;
+
+ if (!s->bitpos)
+ return;
+ out[0] = s->inflightbits & 0xff;
+ if (s->bitpos >= 8) {
+ out[1] = (s->inflightbits >> 8) & 0xff;
+ if (s->bitpos >= 16) {
+ out[2] = (s->inflightbits >> 16) & 0xff;
+ if (s->bitpos >= 24)
+ out[3] = (s->inflightbits >> 24) & 0xff;
+ }
+ }
+ s->pos_out += round_up(s->bitpos, 8) >> 3;
+ DBG_BUGON(s->pos_out > s->outlen);
+ s->bitpos = 0;
+ s->inflightbits = 0;
+}
+
+#define kMaxLen 16
+
+static void deflate_genhuffcodes(const u8 *lens, u32 *p, unsigned int nr_codes,
+ const u32 *bl_count)
+{
+ u32 nextCodes[kMaxLen + 1]; /* next code value for each bit length */
+ unsigned int code = 0; /* running code value */
+ unsigned int bits, k;
+
+ for (bits = 1; bits <= kMaxLen; ++bits) {
+ code = (code + bl_count[bits - 1]) << 1;
+ nextCodes[bits] = code;
+ }
+
+ DBG_BUGON(code + bl_count[kMaxLen] != 1 << kMaxLen);
+
+ for (k = 0; k < nr_codes; ++k)
+ p[k] = nextCodes[lens[k]]++;
+}
+
+static u32 deflate_reversebits_one(u32 code, u8 bits)
+{
+ unsigned int x = code;
+
+ x = ((x & 0x5555) << 1) | ((x & 0xAAAA) >> 1);
+ x = ((x & 0x3333) << 2) | ((x & 0xCCCC) >> 2);
+ x = ((x & 0x0F0F) << 4) | ((x & 0xF0F0) >> 4);
+
+ return (((x & 0x00FF) << 8) | ((x & 0xFF00) >> 8)) >> (16 - bits);
+}
+
+static void Huffman_ReverseBits(u32 *codes, const u8 *lens, unsigned int n)
+{
+ while (n) {
+ u32 code = *codes;
+
+ *codes++ = deflate_reversebits_one(code, *lens++);
+ --n;
+ }
+}
+
+static void kite_deflate_init_once(void)
+{
+ static const u32 static_bl_count[kMaxLen + 1] = {
+ [7] = 279 - 256 + 1,
+ [8] = (143 + 1) + (287 - 280 + 1),
+ [9] = 255 - 144 + 1,
+ };
+ unsigned int i, c, j, k;
+
+ if (kstaticHuff_distCodes[31])
+ return;
+ deflate_genhuffcodes(kstaticHuff_litLenLevels, kstaticHuff_mainCodes,
+ kFixedLenTableSize, static_bl_count);
+ Huffman_ReverseBits(kstaticHuff_mainCodes, kstaticHuff_litLenLevels,
+ kFixedLenTableSize);
+
+ for (i = 0; i < ARRAY_SIZE(kstaticHuff_distCodes); ++i)
+ kstaticHuff_distCodes[i] = deflate_reversebits_one(i, 5);
+
+ for (i = 0; i < kNumLenSlots; i++) {
+ c = kLenStart32[i];
+ j = 1 << kLenExtraBits32[i];
+
+ for (k = 0; k < j; k++, c++)
+ g_LenSlots[c] = (u8)i;
+ }
+
+ c = 0;
+ for (i = 0; i < /*kFastSlots*/ kNumLogBits * 2; i++) {
+ k = 1 << kDistExtraBits[i];
+ for (j = 0; j < k; j++)
+ g_FastPos[c++] = i;
+ }
+}
+
+static void kite_deflate_scanlens(unsigned int numlens, u8 *lens, u32 *freqs)
+{
+ int n; /* iterates over all tree elements */
+ int prevlen = -1; /* last emitted length */
+ int curlen; /* length of current code */
+ int nextlen = lens[0]; /* length of next code */
+ int count = 0; /* repeat count of the current code */
+ int max_count = 7; /* max repeat count */
+ int min_count = 4; /* min repeat count */
+
+ if (!nextlen)
+ max_count = 138, min_count = 3;
+
+ for (n = 0; n < numlens; n++) {
+ curlen = nextlen;
+ nextlen = n + 1 < numlens ? lens[n + 1] : -1;
+ ++count;
+
+ if (count < max_count && curlen == nextlen)
+ continue;
+ if (count < min_count) {
+ freqs[curlen] += count;
+ } else if (curlen != 0) {
+ if (curlen != prevlen)
+ freqs[curlen]++;
+ freqs[kBitLensRepNumber_3_6]++;
+ } else if (count <= 10) {
+ freqs[kBitLens0Number_3_10]++;
+ } else {
+ freqs[kBitLens0Number_11_138]++;
+ }
+
+ count = 0;
+ prevlen = curlen;
+ if (!nextlen)
+ max_count = 138, min_count = 3;
+ else if (curlen == nextlen)
+ max_count = 6, min_count = 3;
+ else
+ max_count = 7, min_count = 4;
+ }
+}
+
+static void kite_deflate_sendtree(struct kite_deflate *s, const u8 *lens,
+ unsigned int numlens)
+{
+ int n; /* iterates over all tree elements */
+ int prevlen = -1; /* last emitted length */
+ int curlen; /* length of current code */
+ int nextlen = lens[0]; /* length of next code */
+ int count = 0; /* repeat count of the current code */
+ int max_count = 7; /* max repeat count */
+ int min_count = 4; /* min repeat count */
+ const u8 *bl_lens = s->tab->levelLens;
+ const u32 *bl_codes = s->tab->levelCodes;
+
+ if (!nextlen)
+ max_count = 138, min_count = 3;
+
+ for (n = 0; n < numlens; n++) {
+ curlen = nextlen;
+ nextlen = n + 1 < numlens ? lens[n + 1] : -1;
+ ++count;
+
+ if (count < max_count && curlen == nextlen)
+ continue;
+ if (count < min_count) {
+ do {
+ writebits(s, bl_codes[curlen], bl_lens[curlen]);
+ } while (--count);
+ } else if (curlen) {
+ if (curlen != prevlen) {
+ writebits(s, bl_codes[curlen], bl_lens[curlen]);
+ count--;
+ }
+ writebits(s, bl_codes[kBitLensRepNumber_3_6],
+ bl_lens[kBitLensRepNumber_3_6]);
+ writebits(s, count - 3, 2);
+ } else if (count <= 10) {
+ writebits(s, bl_codes[kBitLens0Number_3_10],
+ bl_lens[kBitLens0Number_3_10]);
+ writebits(s, count - 3, 3);
+ } else {
+ writebits(s, bl_codes[kBitLens0Number_11_138],
+ bl_lens[kBitLens0Number_11_138]);
+ writebits(s, count - 11, 7);
+ }
+
+ count = 0;
+ prevlen = curlen;
+ if (!nextlen)
+ max_count = 138, min_count = 3;
+ else if (curlen == nextlen)
+ max_count = 6, min_count = 3;
+ else
+ max_count = 7, min_count = 4;
+ }
+}
+
+static void kite_deflate_setfixedtrees(struct kite_deflate *s)
+{
+ writebits(s, (kFixedHuffman << 1) + s->lastblock, 3);
+}
+
+static void kite_deflate_sendtrees(struct kite_deflate *s)
+{
+ struct kite_deflate_table *t = s->tab;
+ unsigned int i;
+
+ writebits(s, (kDynamicHuffman << 1) + s->lastblock, 3);
+ writebits(s, t->numlitlens - kNumLitLenCodesMin, 5);
+ writebits(s, t->numdistlens - kNumDistCodesMin, 5);
+ writebits(s, t->numblcodes - kNumLensCodesMin, 4);
+
+ for (i = 0; i < t->numblcodes; i++)
+ writebits(s, t->levelLens[kCodeLengthAlphabetOrder[i]], 3);
+
+ Huffman_ReverseBits(t->levelCodes, t->levelLens, kLensTableSize);
+ kite_deflate_sendtree(s, t->litLenLevels, t->numlitlens);
+ kite_deflate_sendtree(s, t->distLevels, t->numdistlens);
+}
+
+static inline unsigned int deflateDistSlot(unsigned int pos)
+{
+ const unsigned int zz = (kNumLogBits - 1) &
+ ((((1U << kNumLogBits) - 1) - pos) >> (31 - 3));
+
+ return g_FastPos[pos >> zz] + (zz * 2);
+}
+
+static void kite_deflate_writeblock(struct kite_deflate *s, bool fixed)
+{
+ int i;
+ u32 *mainCodes, *distCodes;
+ const u8 *litLenLevels, *distLevels;
+
+ if (!fixed) {
+ struct kite_deflate_table *t = s->tab;
+
+ mainCodes = t->mainCodes; distCodes = t->distCodes;
+ litLenLevels = t->litLenLevels; distLevels = t->distLevels;
+
+ Huffman_ReverseBits(mainCodes, litLenLevels, kMainTableSize);
+ Huffman_ReverseBits(distCodes, distLevels, kDistTableSize32);
+ } else {
+ mainCodes = kstaticHuff_mainCodes;
+ distCodes = kstaticHuff_distCodes;
+
+ litLenLevels = kstaticHuff_litLenLevels;
+ distLevels = NULL;
+ }
+
+ for (i = 0; i < s->symbols; ++i) {
+ struct kite_deflate_symbol *sym = &s->sym[i];
+
+ if (sym->len < kMatchMinLen) { /* literal */
+ writebits(s, mainCodes[sym->dist],
+ litLenLevels[sym->dist]);
+ } else {
+ unsigned int lenSlot, distSlot;
+ unsigned int lc = sym->len - kMatchMinLen;
+
+ lenSlot = g_LenSlots[lc];
+ writebits(s, mainCodes[kSymbolMatch + lenSlot],
+ litLenLevels[kSymbolMatch + lenSlot]);
+ writebits(s, lc - kLenStart32[lenSlot],
+ kLenExtraBits32[lenSlot]);
+
+ distSlot = deflateDistSlot(sym->dist - 1);
+ writebits(s, distCodes[distSlot],
+ fixed ? 5 : distLevels[distSlot]);
+ writebits(s, sym->dist - 1 - kDistStart[distSlot],
+ kDistExtraBits[distSlot]);
+ }
+ }
+ writebits(s, mainCodes[kSymbolEndOfBlock],
+ litLenLevels[kSymbolEndOfBlock]);
+}
+
+static u32 Huffman_GetPrice(const u32 *freqs, const u8 *lens, u32 num)
+{
+ u32 price = 0;
+
+ while (num) {
+ price += (*lens++) * (*freqs++);
+ --num;
+ }
+ return price;
+}
+
+static u32 Huffman_GetPriceEx(const u32 *freqs, const u8 *lens, u32 num,
+ const u8 *extraBits, u32 extraBase)
+{
+ return Huffman_GetPrice(freqs, lens, num) +
+ Huffman_GetPrice(freqs + extraBase, extraBits, num - extraBase);
+}
+
+/* Adapted from C/HuffEnc.c (7zip) for now */
+#define HeapSortDown(p, k, size, temp) \
+ { for (;;) { \
+ size_t s = (k << 1); \
+ if (s > size) break; \
+ if (s < size && p[s + 1] > p[s]) s++; \
+ if (temp >= p[s]) break; \
+ p[k] = p[s]; k = s; \
+ } p[k] = temp; }
+
+static void HeapSort(u32 *p, size_t size)
+{
+ if (size <= 1)
+ return;
+ p--;
+ {
+ size_t i = size / 2;
+ do
+ {
+ u32 temp = p[i];
+ size_t k = i;
+ HeapSortDown(p, k, size, temp)
+ }
+ while (--i != 0);
+ }
+ /*
+ do
+ {
+ size_t k = 1;
+ UInt32 temp = p[size];
+ p[size--] = p[1];
+ HeapSortDown(p, k, size, temp)
+ }
+ while (size > 1);
+ */
+ while (size > 3)
+ {
+ u32 temp = p[size];
+ size_t k = (p[3] > p[2]) ? 3 : 2;
+ p[size--] = p[1];
+ p[1] = p[k];
+ HeapSortDown(p, k, size, temp)
+ }
+ {
+ u32 temp = p[size];
+ p[size] = p[1];
+ if (size > 2 && p[2] < temp)
+ {
+ p[1] = p[2];
+ p[2] = temp;
+ }
+ else
+ p[1] = temp;
+ }
+}
+
+#define NUM_BITS 10
+#define MASK (((unsigned)1 << NUM_BITS) - 1)
+
+static void Huffman_Generate(const u32 *freqs, u32 *p, u8 *lens,
+ unsigned int numSymbols, unsigned int maxLen)
+{
+ u32 num, i;
+
+ num = 0;
+ /* if (maxLen > 10) maxLen = 10; */
+
+ for (i = 0; i < numSymbols; i++) {
+ u32 freq = freqs[i];
+
+ if (!freq)
+ lens[i] = 0;
+ else
+ p[num++] = i | (freq << NUM_BITS);
+ }
+ HeapSort(p, num);
+
+ if (num < 2) {
+ unsigned int minCode = 0, maxCode = 1;
+
+ if (num == 1) {
+ maxCode = (unsigned int)p[0] & MASK;
+ if (!maxCode)
+ maxCode++;
+ }
+ p[minCode] = 0;
+ p[maxCode] = 1;
+ lens[minCode] = lens[maxCode] = 1;
+ return;
+ }
+
+ {
+ u32 b, e, i;
+
+ i = b = e = 0;
+ do {
+ u32 n, m, freq;
+
+ n = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++;
+ freq = (p[n] & ~MASK);
+ p[n] = (p[n] & MASK) | (e << NUM_BITS);
+ m = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++;
+ freq += (p[m] & ~MASK);
+ p[m] = (p[m] & MASK) | (e << NUM_BITS);
+ p[e] = (p[e] & MASK) | freq;
+ e++;
+ } while (num - e > 1);
+
+ {
+ u32 lenCounters[kMaxLen + 1];
+
+ for (i = 0; i <= kMaxLen; i++)
+ lenCounters[i] = 0;
+
+ p[--e] &= MASK;
+ lenCounters[1] = 2;
+ while (e > 0) {
+ u32 len = (p[p[--e] >> NUM_BITS] >> NUM_BITS) + 1;
+
+ p[e] = (p[e] & MASK) | (len << NUM_BITS);
+ if (len >= maxLen)
+ for (len = maxLen - 1; lenCounters[len] == 0; len--);
+ lenCounters[len]--;
+ lenCounters[(size_t)len + 1] += 2;
+ }
+
+ {
+ u32 len;
+
+ i = 0;
+ for (len = maxLen; len != 0; len--) {
+ u32 k;
+ for (k = lenCounters[len]; k != 0; k--)
+ lens[p[i++] & MASK] = (u8)len;
+ }
+ }
+ deflate_genhuffcodes(lens, p, numSymbols, lenCounters);
+ }
+ }
+}
+
+static void kite_deflate_fixdynblock(struct kite_deflate *s)
+{
+ struct kite_deflate_table *t = s->tab;
+ unsigned int numlitlens, numdistlens, numblcodes;
+ u32 levelFreqs[kLensTableSize] = {0};
+ u32 opt_mainlen;
+
+ if (!s->freq_changed)
+ return;
+
+ /* in order to match zlib */
+ s->numHuffBits = kMaxLen;
+// s->numHuffBits = (s->symbols > 18000 ? 12 :
+// (s->symbols > 7000 ? 11 : (s->symbols > 2000 ? 10 : 9)));
+
+ Huffman_Generate(s->mainFreqs, t->mainCodes, t->litLenLevels,
+ kMainTableSize, s->numHuffBits);
+ Huffman_Generate(s->distFreqs, t->distCodes, t->distLevels,
+ kDistTableSize32, s->numHuffBits);
+
+ /* code lengths for the literal/length alphabet */
+ numlitlens = kMainTableSize;
+ while (numlitlens > kNumLitLenCodesMin &&
+ !t->litLenLevels[numlitlens - 1])
+ --numlitlens;
+
+ /* code lengths for the distance alphabet */
+ numdistlens = kDistTableSize32;
+ while (numdistlens > kNumDistCodesMin &&
+ !t->distLevels[numdistlens - 1])
+ --numdistlens;
+
+ kite_deflate_scanlens(numlitlens, t->litLenLevels, levelFreqs);
+ kite_deflate_scanlens(numdistlens, t->distLevels, levelFreqs);
+ Huffman_Generate(levelFreqs, t->levelCodes, t->levelLens,
+ kLensTableSize, 7);
+ numblcodes = kLensTableSize;
+ while (numblcodes > kNumLensCodesMin &&
+ !t->levelLens[kCodeLengthAlphabetOrder[numblcodes - 1]])
+ --numblcodes;
+
+ t->numlitlens = numlitlens;
+ t->numdistlens = numdistlens;
+ t->numblcodes = numblcodes;
+
+ opt_mainlen = Huffman_GetPriceEx(s->mainFreqs, t->litLenLevels,
+ kMainTableSize, kLenExtraBits32, kSymbolMatch) +
+ Huffman_GetPriceEx(s->distFreqs, t->distLevels,
+ kDistTableSize32, kDistExtraBits, 0);
+ s->costbits = 3 + 5 + 5 + 4 + 3 * numblcodes +
+ Huffman_GetPriceEx(levelFreqs, t->levelLens,
+ kLensTableSize, kLevelExtraBits, kTableDirectLevels) +
+ opt_mainlen;
+ s->freq_changed = false;
+}
+
+
+/*
+ * an array used used by the LZ-based encoder to hold the length-distance pairs
+ * found by LZ matchfinder.
+ */
+struct kite_match {
+ unsigned int len;
+ unsigned int dist;
+};
+
+struct kite_matchfinder {
+ /* pointer to buffer with data to be compressed */
+ const u8 *buffer;
+
+ /* indicate the first byte that doesn't contain valid input data */
+ const u8 *end;
+
+ /* LZ matchfinder hash chain representation */
+ u32 *hash, *chain;
+
+ u32 base;
+
+ /* indicate the next byte to run through the match finder */
+ u32 offset;
+
+ u32 cyclic_pos;
+
+ /* maximum length of a match that the matchfinder will try to find. */
+ u16 nice_len;
+
+ /* the total sliding window size */
+ u16 wsiz;
+
+ /* how many rounds a matchfinder searches on a hash chain for */
+ u16 depth;
+
+ /* do not perform lazy search no less than this match length */
+ u16 max_lazy;
+
+ /* reduce lazy search no less than this match length */
+ u8 good_len;
+
+ /* current match for lazy matching */
+ struct kite_match *matches;
+ struct kite_match matches_matrix[2][4];
+};
+
+/*
+ * This mysterious table is just the CRC of each possible byte. It can be
+ * computed using the standard bit-at-a-time methods. The polynomial can
+ * be seen in entry 128, 0x8408. This corresponds to x^0 + x^5 + x^12.
+ * Add the implicit x^16, and you have the standard CRC-CCITT.
+ */
+u16 const crc_ccitt_table[256] __attribute__((__aligned__(128))) = {
+ 0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
+ 0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7,
+ 0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e,
+ 0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876,
+ 0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd,
+ 0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5,
+ 0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c,
+ 0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974,
+ 0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
+ 0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3,
+ 0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a,
+ 0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72,
+ 0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9,
+ 0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1,
+ 0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738,
+ 0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70,
+ 0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7,
+ 0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
+ 0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036,
+ 0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e,
+ 0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5,
+ 0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd,
+ 0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134,
+ 0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c,
+ 0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3,
+ 0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb,
+ 0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
+ 0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a,
+ 0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1,
+ 0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9,
+ 0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330,
+ 0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78
+};
+
+int kite_mf_getmatches_hc3(struct kite_matchfinder *mf, u16 depth, u16 bestlen)
+{
+ const u8 *cur = mf->buffer + mf->offset;
+ const u8 *qbase = mf->buffer - mf->base;
+ u32 curMatch;
+ unsigned int v, hv, i, k, p, wsiz;
+
+ if (mf->end - cur < bestlen + 1)
+ return 0;
+
+ v = get_unaligned((u16 *)cur);
+ hv = v ^ crc_ccitt_table[cur[2]];
+ curMatch = mf->hash[hv];
+ p = mf->base + mf->offset;
+ mf->hash[hv] = p;
+ mf->chain[mf->cyclic_pos] = curMatch;
+ wsiz = mf->wsiz;
+ k = 1;
+
+ if (depth) {
+ unsigned int wpos = wsiz + mf->cyclic_pos;
+
+ hv = min_t(unsigned int, mf->nice_len, mf->end - cur);
+ DBG_BUGON(hv > kMatchMaxLen32);
+ do {
+ unsigned int diff = p - curMatch;
+ const u8 *q;
+
+ if (diff >= wsiz)
+ break;
+
+ q = qbase + curMatch;
+ curMatch = mf->chain[(wpos - diff) & (wsiz - 1)];
+ if (v == get_unaligned((u16 *)q) && (bestlen < 3 || (
+ get_unaligned((u16 *)(cur + bestlen - 1)) ==
+ get_unaligned((u16 *)(q + bestlen - 1)) &&
+ !memcmp(cur + 3, q + 3, bestlen - 3)))) {
+ DBG_BUGON(cur[2] != q[2]);
+ i = erofs_memcmp2(cur + bestlen + 1,
+ q + bestlen + 1, hv - bestlen - 1);
+ bestlen += 1 + i;
+
+ k -= (k >= ARRAY_SIZE(mf->matches_matrix[0]));
+ mf->matches[k++] = (struct kite_match) {
+ .len = bestlen,
+ .dist = diff,
+ };
+ if (bestlen >= hv)
+ break;
+ }
+ } while (--depth);
+ }
+ mf->offset++;
+ mf->cyclic_pos = (mf->cyclic_pos + 1) & (wsiz - 1);
+ return k - 1;
+}
+
+/* let's align with zlib */
+static const struct kite_matchfinder_cfg {
+ u16 good_length; /* reduce lazy search above this match length */
+ u16 max_lazy; /* do not perform lazy search above this match length */
+ u16 nice_length; /* quit search above this match length */
+ u16 depth;
+ bool lazy_search;
+} kite_mfcfg[10] = {
+/* good lazy nice depth */
+/* 0 */ {0, 0, 0, 0, false}, /* store only [unsupported] */
+/* 1 */ {4, 4, 8, 4, false}, /* maximum speed, no lazy matches */
+/* 2 */ {4, 5, 16, 8, false},
+/* 3 */ {4, 6, 32, 32, false},
+
+/* 4 */ {4, 4, 16, 16, true}, /* lazy matches */
+/* 5 */ {8, 16, 32, 32, true},
+/* 6 */ {8, 16, 128, 128, true},
+/* 7 */ {8, 32, 128, 256, true},
+/* 8 */ {32, 128, 258, 1024, true},
+/* 9 */ {32, 258, 258, 4096, true}, /* maximum compression */
+};
+
+static int kite_mf_init(struct kite_matchfinder *mf, int wsiz, int level)
+{
+ const struct kite_matchfinder_cfg *cfg;
+
+ if (!level || level >= ARRAY_SIZE(kite_mfcfg))
+ return -EINVAL;
+ cfg = &kite_mfcfg[level];
+
+ if (wsiz > kHistorySize32 || (1 << ilog2(wsiz)) != wsiz)
+ return -EINVAL;
+
+ mf->hash = calloc(0x10000, sizeof(mf->hash[0]));
+ if (!mf->hash)
+ return -ENOMEM;
+
+ mf->chain = malloc(sizeof(mf->chain[0]) * wsiz);
+ if (!mf->chain) {
+ free(mf->hash);
+ mf->hash = NULL;
+ return -ENOMEM;
+ }
+ mf->wsiz = wsiz;
+
+ mf->good_len = cfg->good_length;
+ mf->nice_len = cfg->nice_length;
+ mf->depth = cfg->depth;
+ mf->max_lazy = cfg->max_lazy;
+ return cfg->lazy_search;
+}
+
+static void kite_mf_reset(struct kite_matchfinder *mf,
+ const void *buffer, const void *end)
+{
+ mf->buffer = buffer;
+ mf->end = end;
+
+ /*
+ * Set the initial value as max_distance + 1. This would avoid hash
+ * zero initialization.
+ */
+ mf->base += mf->offset + kHistorySize32 + 1;
+
+ mf->offset = 0;
+ mf->cyclic_pos = 0;
+
+ mf->matches = mf->matches_matrix[0];
+ mf->matches_matrix[0][0].len =
+ mf->matches_matrix[1][0].len = kMatchMinLen - 1;
+}
+
+static bool deflate_count_code(struct kite_deflate *s, bool literal,
+ unsigned int lenSlot, unsigned int distSlot)
+{
+ struct kite_deflate_table *t = s->tab;
+ unsigned int lenbase = (literal ? 0 : kSymbolMatch);
+ u64 rem = (s->outlen - s->pos_out) * 8 - s->bitpos;
+ bool recalc = false;
+ unsigned int bits;
+
+ s->freq_changed = true;
+ ++s->mainFreqs[lenbase + lenSlot];
+ if (!literal)
+ ++s->distFreqs[distSlot];
+
+ if (s->encode_mode == 1) {
+ if (literal) {
+ bits = kstaticHuff_litLenLevels[lenSlot];
+ goto out;
+ }
+ bits = kstaticHuff_litLenLevels[kSymbolMatch + lenSlot] +
+ kLenExtraBits32[lenSlot] + 5 + kDistExtraBits[distSlot];
+ goto out;
+ }
+
+ /* XXX: more ideas to be done later */
+ recalc |= (!literal && !t->distLevels[distSlot]);
+ recalc |= !t->litLenLevels[lenbase + lenSlot];
+ if (recalc) {
+ kite_dbg("recalc %c lS %u dS %u", literal ? 'l' : 'm',
+ lenSlot, distSlot);
+ s->tab = s->tables + (s->tab == s->tables);
+ kite_deflate_fixdynblock(s);
+ bits = 0;
+ goto out;
+ }
+
+ if (literal) {
+ bits = t->litLenLevels[lenSlot];
+ goto out;
+ }
+
+ bits = t->distLevels[distSlot] + kDistExtraBits[distSlot] +
+ t->litLenLevels[kSymbolMatch + lenSlot] +
+ kLenExtraBits32[lenSlot];
+out:
+ if (rem < s->costbits + bits) {
+ --s->mainFreqs[lenbase + lenSlot];
+ if (!literal)
+ --s->distFreqs[distSlot];
+ if (recalc)
+ s->tab = s->tables + (s->tab == s->tables);
+ return false;
+ }
+ s->costbits += bits;
+ return true;
+}
+
+static bool kite_deflate_tally(struct kite_deflate *s,
+ struct kite_match *match)
+{
+ struct kite_deflate_symbol *sym = s->sym + s->symbols;
+ u32 fixedcost = ~0;
+ bool hassp;
+
+ *sym = (struct kite_deflate_symbol) {
+ .len = match->len,
+ .dist = match->dist,
+ };
+
+retry:
+ if (sym->len < kMatchMinLen) {
+ hassp = deflate_count_code(s, true, sym->dist, 0);
+ } else {
+ unsigned int lc = sym->len - kMatchMinLen;
+ unsigned int lenSlot = g_LenSlots[lc];
+ unsigned int distSlot = deflateDistSlot(sym->dist - 1);
+
+ hassp = deflate_count_code(s, false, lenSlot, distSlot);
+ }
+
+ if (!hassp) {
+ if (s->encode_mode == 1) {
+ fixedcost = s->costbits;
+ s->encode_mode = 2;
+ goto retry;
+ }
+ s->lastblock = true;
+ if (fixedcost <= s->costbits)
+ s->encode_mode = 1;
+ return true;
+ }
+ ++s->symbols;
+ return false;
+}
+
+static void kite_deflate_writestore(struct kite_deflate *s)
+{
+ bool fb = !s->startpos && !s->bitpos;
+ unsigned int totalsiz = s->pos_in - s->prev_valid - s->startpos;
+
+ do {
+ unsigned int len = min_t(unsigned int, totalsiz, 65535);
+
+ totalsiz -= len;
+ writebits(s, (fb << 3) | (kStored << 1) |
+ (s->lastblock && !totalsiz), 3 + fb);
+ flushbits(s);
+ writebits(s, len, 16);
+ writebits(s, len ^ 0xffff, 16);
+ flushbits(s);
+ memcpy(s->out + s->pos_out, s->in + s->startpos, len);
+ s->pos_out += len;
+ s->startpos += len;
+ } while (totalsiz);
+}
+
+static void kite_deflate_endblock(struct kite_deflate *s)
+{
+ if (s->encode_mode == 1) {
+ u32 fixedcost = s->costbits;
+ unsigned int storelen, storeblocks, storecost;
+
+ kite_deflate_fixdynblock(s);
+ if (fixedcost > s->costbits)
+ s->encode_mode = 2;
+ else
+ s->costbits = fixedcost;
+
+ storelen = s->pos_in - s->prev_valid - s->startpos;
+ storeblocks = max(DIV_ROUND_UP(storelen, 65535), 1U);
+ storecost = (8 - s->bitpos) + storeblocks - 1 +
+ storeblocks * 32 + storelen * 8;
+ if (s->costbits > storecost) {
+ s->costbits = storecost;
+ s->encode_mode = 0;
+ }
+ }
+
+ s->lastblock |= (s->costbits + s->bitpos >=
+ (s->outlen - s->pos_out) * 8);
+}
+
+static void kite_deflate_startblock(struct kite_deflate *s)
+{
+ memset(s->mainFreqs, 0, sizeof(s->mainFreqs));
+ memset(s->distFreqs, 0, sizeof(s->distFreqs));
+ memset(s->tables, 0, sizeof(s->tables[0]));
+ s->symbols = 0;
+ s->mainFreqs[kSymbolEndOfBlock]++;
+ s->encode_mode = 1;
+ s->tab = s->tables;
+ s->costbits = 3 + kstaticHuff_litLenLevels[kSymbolEndOfBlock];
+}
+
+static bool kite_deflate_commitblock(struct kite_deflate *s)
+{
+ if (s->encode_mode == 1) {
+ kite_deflate_setfixedtrees(s);
+ kite_deflate_writeblock(s, true);
+ } else if (s->encode_mode == 2) {
+ kite_deflate_sendtrees(s);
+ kite_deflate_writeblock(s, false);
+ } else {
+ kite_deflate_writestore(s);
+ }
+ s->startpos = s->pos_in - s->prev_valid;
+ return s->lastblock;
+}
+
+static bool kite_deflate_fast(struct kite_deflate *s)
+{
+ struct kite_matchfinder *mf = s->mf;
+
+ kite_deflate_startblock(s);
+ while (1) {
+ int matches = kite_mf_getmatches_hc3(mf, mf->depth,
+ kMatchMinLen - 1);
+
+ if (matches) {
+ unsigned int len = mf->matches[matches].len;
+ unsigned int dist = mf->matches[matches].dist;
+
+ if (len == kMatchMinLen && dist > ZLIB_DISTANCE_TOO_FAR)
+ goto nomatch;
+
+ kite_dbg("%u matches found: longest [%u,%u] of distance %u",
+ matches, s->pos_in, s->pos_in + len - 1, dist);
+
+ if (kite_deflate_tally(s, mf->matches + matches))
+ break;
+ s->pos_in += len;
+ /* skip the rest bytes */
+ while (--len)
+ (void)kite_mf_getmatches_hc3(mf, 0, 0);
+ } else {
+nomatch:
+ mf->matches[0].dist = s->in[s->pos_in];
+ if (isprint(s->in[s->pos_in]))
+ kite_dbg("literal %c pos_in %u", s->in[s->pos_in], s->pos_in);
+ else
+ kite_dbg("literal %x pos_in %u", s->in[s->pos_in], s->pos_in);
+
+ if (kite_deflate_tally(s, mf->matches))
+ break;
+ ++s->pos_in;
+ }
+
+ s->lastblock |= (s->pos_in >= s->inlen);
+ if (s->pos_in >= s->inlen || s->symbols >= s->max_symbols) {
+ kite_deflate_endblock(s);
+ break;
+ }
+ }
+ return kite_deflate_commitblock(s);
+}
+
+static bool kite_deflate_slow(struct kite_deflate *s)
+{
+ struct kite_matchfinder *mf = s->mf;
+ bool flush = false;
+
+ kite_deflate_startblock(s);
+ while (1) {
+ struct kite_match *prev_matches = mf->matches;
+ unsigned int len = kMatchMinLen - 1;
+ int matches;
+ unsigned int len0;
+
+ mf->matches = mf->matches_matrix[
+ mf->matches == mf->matches_matrix[0]];
+ mf->matches[0].dist = s->in[s->pos_in];
+
+ len0 = prev_matches[s->prev_longest].len;
+ if (len0 < mf->max_lazy) {
+ matches = kite_mf_getmatches_hc3(mf, mf->depth >>
+ (len0 >= mf->good_len), len0);
+ if (matches) {
+ len = mf->matches[matches].len;
+ if (len == kMatchMinLen &&
+ mf->matches[matches].dist > ZLIB_DISTANCE_TOO_FAR) {
+ matches = 0;
+ len = kMatchMinLen - 1;
+ }
+ }
+ } else {
+ matches = 0;
+ (void)kite_mf_getmatches_hc3(mf, 0, 0);
+ }
+
+ if (len < len0) {
+ if (kite_deflate_tally(s,
+ prev_matches + s->prev_longest))
+ break;
+
+ s->pos_in += --len0;
+ /* skip the rest bytes */
+ while (--len0)
+ (void)kite_mf_getmatches_hc3(mf, 0, 0);
+ s->prev_valid = false;
+ s->prev_longest = 0;
+ } else {
+ if (!s->prev_valid)
+ s->prev_valid = true;
+ else if (kite_deflate_tally(s, prev_matches))
+ break;
+ ++s->pos_in;
+ s->prev_longest = matches;
+ }
+
+ s->lastblock |= (s->pos_in >= s->inlen);
+ if (s->pos_in >= s->inlen) {
+ flush = true;
+ break;
+ }
+ if (s->symbols >= s->max_symbols) {
+ kite_deflate_endblock(s);
+ break;
+ }
+ }
+
+ if (flush && s->prev_valid) {
+ (void)kite_deflate_tally(s, mf->matches + s->prev_longest);
+ s->prev_valid = false;
+ }
+ return kite_deflate_commitblock(s);
+}
+
+void kite_deflate_end(struct kite_deflate *s)
+{
+ if (s->mf) {
+ if (s->mf->hash)
+ free(s->mf->hash);
+ if (s->mf->chain)
+ free(s->mf->chain);
+ free(s->mf);
+ }
+ if (s->sym)
+ free(s->sym);
+ free(s);
+}
+
+struct kite_deflate *kite_deflate_init(int level, unsigned int dict_size)
+{
+ struct kite_deflate *s;
+ int err;
+
+ kite_deflate_init_once();
+ s = calloc(1, sizeof(*s));
+ if (!s)
+ return ERR_PTR(-ENOMEM);
+
+ s->max_symbols = 16384;
+ s->sym = malloc(sizeof(s->sym[0]) * s->max_symbols);
+ if (!s->sym) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ s->mf = malloc(sizeof(*s->mf));
+ if (!s->mf) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ if (!dict_size)
+ dict_size = kHistorySize32;
+
+ err = kite_mf_init(s->mf, dict_size, level);
+ if (err < 0)
+ goto err_out;
+
+ s->lazy_search = err;
+ return s;
+err_out:
+ if (s->mf)
+ free(s->mf);
+ if (s->sym)
+ free(s->sym);
+ free(s);
+ return ERR_PTR(err);
+}
+
+int kite_deflate_destsize(struct kite_deflate *s, const u8 *in, u8 *out,
+ unsigned int *srcsize, unsigned int target_dstsize)
+{
+ memset(s, 0, offsetof(struct kite_deflate, mainFreqs));
+ s->in = in;
+ s->inlen = *srcsize;
+ s->out = out;
+ s->outlen = target_dstsize;
+ kite_mf_reset(s->mf, in, in + s->inlen);
+
+ if (s->lazy_search)
+ while (!kite_deflate_slow(s));
+ else
+ while (!kite_deflate_fast(s));
+ flushbits(s);
+
+ *srcsize = s->startpos;
+ return s->pos_out;
+}
+
+#if TEST
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+int main(int argc, char *argv[])
+{
+ int fd;
+ u64 filelength;
+ u8 out[1048576], *buf;
+ int dstsize = 4096;
+ unsigned int srcsize, outsize;
+ struct kite_deflate *s;
+
+ fd = open(argv[1], O_RDONLY);
+ if (fd < 0)
+ return -errno;
+ if (argc > 2)
+ dstsize = atoi(argv[2]);
+ filelength = lseek(fd, 0, SEEK_END);
+
+ s = kite_deflate_init(9, 0);
+ if (IS_ERR(s))
+ return PTR_ERR(s);
+
+ filelength = lseek(fd, 0, SEEK_END);
+ buf = mmap(NULL, filelength, PROT_READ, MAP_SHARED, fd, 0);
+ if (buf == MAP_FAILED)
+ return -errno;
+ close(fd);
+
+ srcsize = filelength;
+ outsize = kite_deflate_destsize(s, buf, out, &srcsize, dstsize);
+ fd = open("out.txt", O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ write(fd, out, outsize);
+ close(fd);
+ kite_deflate_end(s);
+ return 0;
+}
+#endif
diff --git a/lib/liberofs_uuid.h b/lib/liberofs_uuid.h
new file mode 100644
index 0000000..63b358a
--- /dev/null
+++ b/lib/liberofs_uuid.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+#ifndef __EROFS_LIB_UUID_H
+#define __EROFS_LIB_UUID_H
+
+void erofs_uuid_generate(unsigned char *out);
+void erofs_uuid_unparse_lower(const unsigned char *buf, char *out);
+int erofs_uuid_parse(const char *in, unsigned char *uu);
+
+#endif
diff --git a/lib/namei.c b/lib/namei.c
index 7b69a59..294d7a3 100644
--- a/lib/namei.c
+++ b/lib/namei.c
@@ -26,11 +26,15 @@
{
int ret, ifmt;
char buf[sizeof(struct erofs_inode_extended)];
+ struct erofs_sb_info *sbi = vi->sbi;
struct erofs_inode_compact *dic;
struct erofs_inode_extended *die;
- const erofs_off_t inode_loc = iloc(vi->nid);
+ erofs_off_t inode_loc;
- ret = dev_read(0, buf, inode_loc, sizeof(*dic));
+ DBG_BUGON(!sbi);
+ inode_loc = erofs_iloc(vi);
+
+ ret = dev_read(sbi, 0, buf, inode_loc, sizeof(*dic));
if (ret < 0)
return -EIO;
@@ -47,7 +51,8 @@
case EROFS_INODE_LAYOUT_EXTENDED:
vi->inode_isize = sizeof(struct erofs_inode_extended);
- ret = dev_read(0, buf + sizeof(*dic), inode_loc + sizeof(*dic),
+ ret = dev_read(sbi, 0, buf + sizeof(*dic),
+ inode_loc + sizeof(*dic),
sizeof(*die) - sizeof(*dic));
if (ret < 0)
return -EIO;
@@ -55,6 +60,7 @@
die = (struct erofs_inode_extended *)buf;
vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount);
vi->i_mode = le16_to_cpu(die->i_mode);
+ vi->i_ino[0] = le32_to_cpu(die->i_ino);
switch (vi->i_mode & S_IFMT) {
case S_IFREG:
@@ -90,6 +96,7 @@
vi->inode_isize = sizeof(struct erofs_inode_compact);
vi->xattr_isize = erofs_xattr_ibody_size(dic->i_xattr_icount);
vi->i_mode = le16_to_cpu(dic->i_mode);
+ vi->i_ino[0] = le32_to_cpu(dic->i_ino);
switch (vi->i_mode & S_IFMT) {
case S_IFREG:
@@ -114,8 +121,8 @@
vi->i_gid = le16_to_cpu(dic->i_gid);
vi->i_nlink = le16_to_cpu(dic->i_nlink);
- vi->i_mtime = sbi.build_time;
- vi->i_mtime_nsec = sbi.build_time_nsec;
+ vi->i_mtime = sbi->build_time;
+ vi->i_mtime_nsec = sbi->build_time_nsec;
vi->i_size = le32_to_cpu(dic->i_size);
if (vi->datalayout == EROFS_INODE_CHUNK_BASED)
@@ -134,10 +141,11 @@
vi->u.chunkformat, vi->nid | 0ULL);
return -EOPNOTSUPP;
}
- vi->u.chunkbits = LOG_BLOCK_SIZE +
+ vi->u.chunkbits = sbi->blkszbits +
(vi->u.chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK);
- } else if (erofs_inode_is_data_compressed(vi->datalayout))
+ } else if (erofs_inode_is_data_compressed(vi->datalayout)) {
return z_erofs_fill_inode(vi);
+ }
return 0;
bogusimode:
erofs_err("bogus i_mode (%o) @ nid %llu", vi->i_mode, vi->nid | 0ULL);
@@ -182,17 +190,18 @@
}
struct nameidata {
+ struct erofs_sb_info *sbi;
erofs_nid_t nid;
unsigned int ftype;
};
-int erofs_namei(struct nameidata *nd,
- const char *name, unsigned int len)
+int erofs_namei(struct nameidata *nd, const char *name, unsigned int len)
{
erofs_nid_t nid = nd->nid;
int ret;
- char buf[EROFS_BLKSIZ];
- struct erofs_inode vi = { .nid = nid };
+ char buf[EROFS_MAX_BLOCK_SIZE];
+ struct erofs_sb_info *sbi = nd->sbi;
+ struct erofs_inode vi = { .sbi = sbi, .nid = nid };
erofs_off_t offset;
ret = erofs_read_inode_from_disk(&vi);
@@ -202,7 +211,7 @@
offset = 0;
while (offset < vi.i_size) {
erofs_off_t maxsize = min_t(erofs_off_t,
- vi.i_size - offset, EROFS_BLKSIZ);
+ vi.i_size - offset, erofs_blksiz(sbi));
struct erofs_dirent *de = (void *)buf;
unsigned int nameoff;
@@ -212,7 +221,7 @@
nameoff = le16_to_cpu(de->nameoff);
if (nameoff < sizeof(struct erofs_dirent) ||
- nameoff >= EROFS_BLKSIZ) {
+ nameoff >= erofs_blksiz(sbi)) {
erofs_err("invalid de[0].nameoff %u @ nid %llu",
nameoff, nid | 0ULL);
return -EFSCORRUPTED;
@@ -234,7 +243,7 @@
static int link_path_walk(const char *name, struct nameidata *nd)
{
- nd->nid = sbi.root_nid;
+ nd->nid = nd->sbi->root_nid;
while (*name == '/')
name++;
@@ -253,7 +262,6 @@
if (ret)
return ret;
- name = p;
/* Skip until no more slashes. */
for (name = p; *name == '/'; ++name)
;
@@ -264,7 +272,7 @@
int erofs_ilookup(const char *path, struct erofs_inode *vi)
{
int ret;
- struct nameidata nd;
+ struct nameidata nd = { .sbi = vi->sbi };
ret = link_path_walk(path, &nd);
if (ret)
diff --git a/lib/rb_tree.c b/lib/rb_tree.c
new file mode 100644
index 0000000..28800a9
--- /dev/null
+++ b/lib/rb_tree.c
@@ -0,0 +1,512 @@
+// SPDX-License-Identifier: Unlicense
+//
+// Based on Julienne Walker's <http://eternallyconfuzzled.com/> rb_tree
+// implementation.
+//
+// Modified by Mirek Rusin <http://github.com/mirek/rb_tree>.
+//
+// This is free and unencumbered software released into the public domain.
+//
+// Anyone is free to copy, modify, publish, use, compile, sell, or
+// distribute this software, either in source code form or as a compiled
+// binary, for any purpose, commercial or non-commercial, and by any
+// means.
+//
+// In jurisdictions that recognize copyright laws, the author or authors
+// of this software dedicate any and all copyright interest in the
+// software to the public domain. We make this dedication for the benefit
+// of the public at large and to the detriment of our heirs and
+// successors. We intend this dedication to be an overt act of
+// relinquishment in perpetuity of all present and future rights to this
+// software under copyright law.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+//
+// For more information, please refer to <http://unlicense.org/>
+//
+
+#include "rb_tree.h"
+
+// rb_node
+
+struct rb_node *
+rb_node_alloc () {
+ return malloc(sizeof(struct rb_node));
+}
+
+struct rb_node *
+rb_node_init (struct rb_node *self, void *value) {
+ if (self) {
+ self->red = 1;
+ self->link[0] = self->link[1] = NULL;
+ self->value = value;
+ }
+ return self;
+}
+
+struct rb_node *
+rb_node_create (void *value) {
+ return rb_node_init(rb_node_alloc(), value);
+}
+
+void
+rb_node_dealloc (struct rb_node *self) {
+ if (self) {
+ free(self);
+ }
+}
+
+static int
+rb_node_is_red (const struct rb_node *self) {
+ return self ? self->red : 0;
+}
+
+static struct rb_node *
+rb_node_rotate (struct rb_node *self, int dir) {
+ struct rb_node *result = NULL;
+ if (self) {
+ result = self->link[!dir];
+ self->link[!dir] = result->link[dir];
+ result->link[dir] = self;
+ self->red = 1;
+ result->red = 0;
+ }
+ return result;
+}
+
+static struct rb_node *
+rb_node_rotate2 (struct rb_node *self, int dir) {
+ struct rb_node *result = NULL;
+ if (self) {
+ self->link[!dir] = rb_node_rotate(self->link[!dir], !dir);
+ result = rb_node_rotate(self, dir);
+ }
+ return result;
+}
+
+// rb_tree - default callbacks
+
+int
+rb_tree_node_cmp_ptr_cb (struct rb_tree *self, struct rb_node *a, struct rb_node *b) {
+ return (a->value > b->value) - (a->value < b->value);
+}
+
+void
+rb_tree_node_dealloc_cb (struct rb_tree *self, struct rb_node *node) {
+ if (self) {
+ if (node) {
+ rb_node_dealloc(node);
+ }
+ }
+}
+
+// rb_tree
+
+struct rb_tree *
+rb_tree_alloc () {
+ return malloc(sizeof(struct rb_tree));
+}
+
+struct rb_tree *
+rb_tree_init (struct rb_tree *self, rb_tree_node_cmp_f node_cmp_cb) {
+ if (self) {
+ self->root = NULL;
+ self->size = 0;
+ self->cmp = node_cmp_cb ? node_cmp_cb : rb_tree_node_cmp_ptr_cb;
+ }
+ return self;
+}
+
+struct rb_tree *
+rb_tree_create (rb_tree_node_cmp_f node_cb) {
+ return rb_tree_init(rb_tree_alloc(), node_cb);
+}
+
+void
+rb_tree_dealloc (struct rb_tree *self, rb_tree_node_f node_cb) {
+ if (self) {
+ if (node_cb) {
+ struct rb_node *node = self->root;
+ struct rb_node *save = NULL;
+
+ // Rotate away the left links so that
+ // we can treat this like the destruction
+ // of a linked list
+ while (node) {
+ if (node->link[0] == NULL) {
+
+ // No left links, just kill the node and move on
+ save = node->link[1];
+ node_cb(self, node);
+ node = NULL;
+ } else {
+
+ // Rotate away the left link and check again
+ save = node->link[0];
+ node->link[0] = save->link[1];
+ save->link[1] = node;
+ }
+ node = save;
+ }
+ }
+ free(self);
+ }
+}
+
+int
+rb_tree_test (struct rb_tree *self, struct rb_node *root) {
+ int lh, rh;
+
+ if ( root == NULL )
+ return 1;
+ else {
+ struct rb_node *ln = root->link[0];
+ struct rb_node *rn = root->link[1];
+
+ /* Consecutive red links */
+ if (rb_node_is_red(root)) {
+ if (rb_node_is_red(ln) || rb_node_is_red(rn)) {
+ printf("Red violation");
+ return 0;
+ }
+ }
+
+ lh = rb_tree_test(self, ln);
+ rh = rb_tree_test(self, rn);
+
+ /* Invalid binary search tree */
+ if ( ( ln != NULL && self->cmp(self, ln, root) >= 0 )
+ || ( rn != NULL && self->cmp(self, rn, root) <= 0))
+ {
+ puts ( "Binary tree violation" );
+ return 0;
+ }
+
+ /* Black height mismatch */
+ if ( lh != 0 && rh != 0 && lh != rh ) {
+ puts ( "Black violation" );
+ return 0;
+ }
+
+ /* Only count black links */
+ if ( lh != 0 && rh != 0 )
+ return rb_node_is_red ( root ) ? lh : lh + 1;
+ else
+ return 0;
+ }
+}
+
+void *
+rb_tree_find(struct rb_tree *self, void *value) {
+ void *result = NULL;
+ if (self) {
+ struct rb_node node = { .value = value };
+ struct rb_node *it = self->root;
+ int cmp = 0;
+ while (it) {
+ if ((cmp = self->cmp(self, it, &node))) {
+
+ // If the tree supports duplicates, they should be
+ // chained to the right subtree for this to work
+ it = it->link[cmp < 0];
+ } else {
+ break;
+ }
+ }
+ result = it ? it->value : NULL;
+ }
+ return result;
+}
+
+// Creates (malloc'ates)
+int
+rb_tree_insert (struct rb_tree *self, void *value) {
+ return rb_tree_insert_node(self, rb_node_create(value));
+}
+
+// Returns 1 on success, 0 otherwise.
+int
+rb_tree_insert_node (struct rb_tree *self, struct rb_node *node) {
+ if (self && node) {
+ if (self->root == NULL) {
+ self->root = node;
+ } else {
+ struct rb_node head = { 0 }; // False tree root
+ struct rb_node *g, *t; // Grandparent & parent
+ struct rb_node *p, *q; // Iterator & parent
+ int dir = 0, last = 0;
+
+ // Set up our helpers
+ t = &head;
+ g = p = NULL;
+ q = t->link[1] = self->root;
+
+ // Search down the tree for a place to insert
+ while (1) {
+ if (q == NULL) {
+
+ // Insert node at the first null link.
+ p->link[dir] = q = node;
+ } else if (rb_node_is_red(q->link[0]) && rb_node_is_red(q->link[1])) {
+
+ // Simple red violation: color flip
+ q->red = 1;
+ q->link[0]->red = 0;
+ q->link[1]->red = 0;
+ }
+
+ if (rb_node_is_red(q) && rb_node_is_red(p)) {
+
+ // Hard red violation: rotations necessary
+ int dir2 = t->link[1] == g;
+ if (q == p->link[last]) {
+ t->link[dir2] = rb_node_rotate(g, !last);
+ } else {
+ t->link[dir2] = rb_node_rotate2(g, !last);
+ }
+ }
+
+ // Stop working if we inserted a node. This
+ // check also disallows duplicates in the tree
+ if (self->cmp(self, q, node) == 0) {
+ break;
+ }
+
+ last = dir;
+ dir = self->cmp(self, q, node) < 0;
+
+ // Move the helpers down
+ if (g != NULL) {
+ t = g;
+ }
+
+ g = p, p = q;
+ q = q->link[dir];
+ }
+
+ // Update the root (it may be different)
+ self->root = head.link[1];
+ }
+
+ // Make the root black for simplified logic
+ self->root->red = 0;
+ ++self->size;
+ return 1;
+ }
+ return 0;
+}
+
+// Returns 1 if the value was removed, 0 otherwise. Optional node callback
+// can be provided to dealloc node and/or user data. Use rb_tree_node_dealloc
+// default callback to deallocate node created by rb_tree_insert(...).
+int
+rb_tree_remove_with_cb (struct rb_tree *self, void *value, rb_tree_node_f node_cb) {
+ if (self->root != NULL) {
+ struct rb_node head = {0}; // False tree root
+ struct rb_node node = { .value = value }; // Value wrapper node
+ struct rb_node *q, *p, *g; // Helpers
+ struct rb_node *f = NULL; // Found item
+ int dir = 1;
+
+ // Set up our helpers
+ q = &head;
+ g = p = NULL;
+ q->link[1] = self->root;
+
+ // Search and push a red node down
+ // to fix red violations as we go
+ while (q->link[dir] != NULL) {
+ int last = dir;
+
+ // Move the helpers down
+ g = p, p = q;
+ q = q->link[dir];
+ dir = self->cmp(self, q, &node) < 0;
+
+ // Save the node with matching value and keep
+ // going; we'll do removal tasks at the end
+ if (self->cmp(self, q, &node) == 0) {
+ f = q;
+ }
+
+ // Push the red node down with rotations and color flips
+ if (!rb_node_is_red(q) && !rb_node_is_red(q->link[dir])) {
+ if (rb_node_is_red(q->link[!dir])) {
+ p = p->link[last] = rb_node_rotate(q, dir);
+ } else if (!rb_node_is_red(q->link[!dir])) {
+ struct rb_node *s = p->link[!last];
+ if (s) {
+ if (!rb_node_is_red(s->link[!last]) && !rb_node_is_red(s->link[last])) {
+
+ // Color flip
+ p->red = 0;
+ s->red = 1;
+ q->red = 1;
+ } else {
+ int dir2 = g->link[1] == p;
+ if (rb_node_is_red(s->link[last])) {
+ g->link[dir2] = rb_node_rotate2(p, last);
+ } else if (rb_node_is_red(s->link[!last])) {
+ g->link[dir2] = rb_node_rotate(p, last);
+ }
+
+ // Ensure correct coloring
+ q->red = g->link[dir2]->red = 1;
+ g->link[dir2]->link[0]->red = 0;
+ g->link[dir2]->link[1]->red = 0;
+ }
+ }
+ }
+ }
+ }
+
+ // Replace and remove the saved node
+ if (f) {
+ void *tmp = f->value;
+ f->value = q->value;
+ q->value = tmp;
+
+ p->link[p->link[1] == q] = q->link[q->link[0] == NULL];
+
+ if (node_cb) {
+ node_cb(self, q);
+ }
+ q = NULL;
+ }
+
+ // Update the root (it may be different)
+ self->root = head.link[1];
+
+ // Make the root black for simplified logic
+ if (self->root != NULL) {
+ self->root->red = 0;
+ }
+
+ --self->size;
+ }
+ return 1;
+}
+
+int
+rb_tree_remove (struct rb_tree *self, void *value) {
+ int result = 0;
+ if (self) {
+ result = rb_tree_remove_with_cb(self, value, rb_tree_node_dealloc_cb);
+ }
+ return result;
+}
+
+size_t
+rb_tree_size (struct rb_tree *self) {
+ size_t result = 0;
+ if (self) {
+ result = self->size;
+ }
+ return result;
+}
+
+// rb_iter
+
+struct rb_iter *
+rb_iter_alloc () {
+ return malloc(sizeof(struct rb_iter));
+}
+
+struct rb_iter *
+rb_iter_init (struct rb_iter *self) {
+ if (self) {
+ self->tree = NULL;
+ self->node = NULL;
+ self->top = 0;
+ }
+ return self;
+}
+
+struct rb_iter *
+rb_iter_create () {
+ return rb_iter_init(rb_iter_alloc());
+}
+
+void
+rb_iter_dealloc (struct rb_iter *self) {
+ if (self) {
+ free(self);
+ }
+}
+
+// Internal function, init traversal object, dir determines whether
+// to begin traversal at the smallest or largest valued node.
+static void *
+rb_iter_start (struct rb_iter *self, struct rb_tree *tree, int dir) {
+ void *result = NULL;
+ if (self) {
+ self->tree = tree;
+ self->node = tree->root;
+ self->top = 0;
+
+ // Save the path for later selfersal
+ if (self->node != NULL) {
+ while (self->node->link[dir] != NULL) {
+ self->path[self->top++] = self->node;
+ self->node = self->node->link[dir];
+ }
+ }
+
+ result = self->node == NULL ? NULL : self->node->value;
+ }
+ return result;
+}
+
+// Traverse a red black tree in the user-specified direction (0 asc, 1 desc)
+static void *
+rb_iter_move (struct rb_iter *self, int dir) {
+ if (self->node->link[dir] != NULL) {
+
+ // Continue down this branch
+ self->path[self->top++] = self->node;
+ self->node = self->node->link[dir];
+ while ( self->node->link[!dir] != NULL ) {
+ self->path[self->top++] = self->node;
+ self->node = self->node->link[!dir];
+ }
+ } else {
+
+ // Move to the next branch
+ struct rb_node *last = NULL;
+ do {
+ if (self->top == 0) {
+ self->node = NULL;
+ break;
+ }
+ last = self->node;
+ self->node = self->path[--self->top];
+ } while (last == self->node->link[dir]);
+ }
+ return self->node == NULL ? NULL : self->node->value;
+}
+
+void *
+rb_iter_first (struct rb_iter *self, struct rb_tree *tree) {
+ return rb_iter_start(self, tree, 0);
+}
+
+void *
+rb_iter_last (struct rb_iter *self, struct rb_tree *tree) {
+ return rb_iter_start(self, tree, 1);
+}
+
+void *
+rb_iter_next (struct rb_iter *self) {
+ return rb_iter_move(self, 1);
+}
+
+void *
+rb_iter_prev (struct rb_iter *self) {
+ return rb_iter_move(self, 0);
+}
diff --git a/lib/rb_tree.h b/lib/rb_tree.h
new file mode 100644
index 0000000..67ec0a7
--- /dev/null
+++ b/lib/rb_tree.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: Unlicense */
+//
+// Based on Julienne Walker's <http://eternallyconfuzzled.com/> rb_tree
+// implementation.
+//
+// Modified by Mirek Rusin <http://github.com/mirek/rb_tree>.
+//
+// This is free and unencumbered software released into the public domain.
+//
+// Anyone is free to copy, modify, publish, use, compile, sell, or
+// distribute this software, either in source code form or as a compiled
+// binary, for any purpose, commercial or non-commercial, and by any
+// means.
+//
+// In jurisdictions that recognize copyright laws, the author or authors
+// of this software dedicate any and all copyright interest in the
+// software to the public domain. We make this dedication for the benefit
+// of the public at large and to the detriment of our heirs and
+// successors. We intend this dedication to be an overt act of
+// relinquishment in perpetuity of all present and future rights to this
+// software under copyright law.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+//
+// For more information, please refer to <http://unlicense.org/>
+//
+
+#ifndef __RB_TREE_H__
+#define __RB_TREE_H__ 1
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#ifndef RB_ITER_MAX_HEIGHT
+#define RB_ITER_MAX_HEIGHT 64 // Tallest allowable tree to iterate
+#endif
+
+struct rb_node;
+struct rb_tree;
+
+typedef int (*rb_tree_node_cmp_f) (struct rb_tree *self, struct rb_node *a, struct rb_node *b);
+typedef void (*rb_tree_node_f) (struct rb_tree *self, struct rb_node *node);
+
+struct rb_node {
+ int red; // Color red (1), black (0)
+ struct rb_node *link[2]; // Link left [0] and right [1]
+ void *value; // User provided, used indirectly via rb_tree_node_cmp_f.
+};
+
+struct rb_tree {
+ struct rb_node *root;
+ rb_tree_node_cmp_f cmp;
+ size_t size;
+ void *info; // User provided, not used by rb_tree.
+};
+
+struct rb_iter {
+ struct rb_tree *tree;
+ struct rb_node *node; // Current node
+ struct rb_node *path[RB_ITER_MAX_HEIGHT]; // Traversal path
+ size_t top; // Top of stack
+ void *info; // User provided, not used by rb_iter.
+};
+
+int rb_tree_node_cmp_ptr_cb (struct rb_tree *self, struct rb_node *a, struct rb_node *b);
+void rb_tree_node_dealloc_cb (struct rb_tree *self, struct rb_node *node);
+
+struct rb_node *rb_node_alloc ();
+struct rb_node *rb_node_create (void *value);
+struct rb_node *rb_node_init (struct rb_node *self, void *value);
+void rb_node_dealloc (struct rb_node *self);
+
+struct rb_tree *rb_tree_alloc ();
+struct rb_tree *rb_tree_create (rb_tree_node_cmp_f cmp);
+struct rb_tree *rb_tree_init (struct rb_tree *self, rb_tree_node_cmp_f cmp);
+void rb_tree_dealloc (struct rb_tree *self, rb_tree_node_f node_cb);
+void *rb_tree_find (struct rb_tree *self, void *value);
+int rb_tree_insert (struct rb_tree *self, void *value);
+int rb_tree_remove (struct rb_tree *self, void *value);
+size_t rb_tree_size (struct rb_tree *self);
+
+int rb_tree_insert_node (struct rb_tree *self, struct rb_node *node);
+int rb_tree_remove_with_cb (struct rb_tree *self, void *value, rb_tree_node_f node_cb);
+
+int rb_tree_test (struct rb_tree *self, struct rb_node *root);
+
+struct rb_iter *rb_iter_alloc ();
+struct rb_iter *rb_iter_init (struct rb_iter *self);
+struct rb_iter *rb_iter_create ();
+void rb_iter_dealloc (struct rb_iter *self);
+void *rb_iter_first (struct rb_iter *self, struct rb_tree *tree);
+void *rb_iter_last (struct rb_iter *self, struct rb_tree *tree);
+void *rb_iter_next (struct rb_iter *self);
+void *rb_iter_prev (struct rb_iter *self);
+
+#endif
diff --git a/lib/rebuild.c b/lib/rebuild.c
new file mode 100644
index 0000000..5993730
--- /dev/null
+++ b/lib/rebuild.c
@@ -0,0 +1,409 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <config.h>
+#if defined(HAVE_SYS_SYSMACROS_H)
+#include <sys/sysmacros.h>
+#endif
+#include "erofs/print.h"
+#include "erofs/inode.h"
+#include "erofs/rebuild.h"
+#include "erofs/io.h"
+#include "erofs/dir.h"
+#include "erofs/xattr.h"
+#include "erofs/blobchunk.h"
+#include "erofs/internal.h"
+
+#ifdef HAVE_LINUX_AUFS_TYPE_H
+#include <linux/aufs_type.h>
+#else
+#define AUFS_WH_PFX ".wh."
+#define AUFS_DIROPQ_NAME AUFS_WH_PFX ".opq"
+#define AUFS_WH_DIROPQ AUFS_WH_PFX AUFS_DIROPQ_NAME
+#endif
+
+static struct erofs_dentry *erofs_rebuild_mkdir(struct erofs_inode *dir,
+ const char *s)
+{
+ struct erofs_inode *inode;
+ struct erofs_dentry *d;
+
+ inode = erofs_new_inode();
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+
+ inode->i_mode = S_IFDIR | 0755;
+ inode->i_parent = dir;
+ inode->i_uid = getuid();
+ inode->i_gid = getgid();
+ inode->i_mtime = inode->sbi->build_time;
+ inode->i_mtime_nsec = inode->sbi->build_time_nsec;
+ erofs_init_empty_dir(inode);
+
+ d = erofs_d_alloc(dir, s);
+ if (!IS_ERR(d)) {
+ d->type = EROFS_FT_DIR;
+ d->inode = inode;
+ }
+ return d;
+}
+
+struct erofs_dentry *erofs_rebuild_get_dentry(struct erofs_inode *pwd,
+ char *path, bool aufs, bool *whout, bool *opq, bool to_head)
+{
+ struct erofs_dentry *d = NULL;
+ unsigned int len = strlen(path);
+ char *s = path;
+
+ *whout = false;
+ *opq = false;
+
+ while (s < path + len) {
+ char *slash = memchr(s, '/', path + len - s);
+
+ if (slash) {
+ if (s == slash) {
+ while (*++s == '/'); /* skip '//...' */
+ continue;
+ }
+ *slash = '\0';
+ }
+
+ if (!memcmp(s, ".", 2)) {
+ /* null */
+ } else if (!memcmp(s, "..", 3)) {
+ pwd = pwd->i_parent;
+ } else {
+ struct erofs_inode *inode = NULL;
+
+ if (aufs && !slash) {
+ if (!memcmp(s, AUFS_WH_DIROPQ, sizeof(AUFS_WH_DIROPQ))) {
+ *opq = true;
+ break;
+ }
+ if (!memcmp(s, AUFS_WH_PFX, sizeof(AUFS_WH_PFX) - 1)) {
+ s += sizeof(AUFS_WH_PFX) - 1;
+ *whout = true;
+ }
+ }
+
+ list_for_each_entry(d, &pwd->i_subdirs, d_child) {
+ if (!strcmp(d->name, s)) {
+ if (d->type != EROFS_FT_DIR && slash)
+ return ERR_PTR(-EIO);
+ inode = d->inode;
+ break;
+ }
+ }
+
+ if (inode) {
+ if (to_head) {
+ list_del(&d->d_child);
+ list_add(&d->d_child, &pwd->i_subdirs);
+ }
+ pwd = inode;
+ } else if (!slash) {
+ d = erofs_d_alloc(pwd, s);
+ if (IS_ERR(d))
+ return d;
+ d->type = EROFS_FT_UNKNOWN;
+ d->inode = pwd;
+ } else {
+ d = erofs_rebuild_mkdir(pwd, s);
+ if (IS_ERR(d))
+ return d;
+ pwd = d->inode;
+ }
+ }
+ if (slash) {
+ *slash = '/';
+ s = slash + 1;
+ } else {
+ break;
+ }
+ }
+ return d;
+}
+
+static int erofs_rebuild_fixup_inode_index(struct erofs_inode *inode)
+{
+ int ret;
+ unsigned int count, unit, chunkbits, i;
+ struct erofs_inode_chunk_index *idx;
+ erofs_off_t chunksize;
+ erofs_blk_t blkaddr;
+
+ /* TODO: fill data map in other layouts */
+ if (inode->datalayout != EROFS_INODE_CHUNK_BASED &&
+ inode->datalayout != EROFS_INODE_FLAT_PLAIN) {
+ erofs_err("%s: unsupported datalayout %d", inode->i_srcpath, inode->datalayout);
+ return -EOPNOTSUPP;
+ }
+
+ if (inode->sbi->extra_devices) {
+ chunkbits = inode->u.chunkbits;
+ if (chunkbits < sbi.blkszbits) {
+ erofs_err("%s: chunk size %u is too small to fit the target block size %u",
+ inode->i_srcpath, 1U << chunkbits, 1U << sbi.blkszbits);
+ return -EINVAL;
+ }
+ } else {
+ chunkbits = ilog2(inode->i_size - 1) + 1;
+ if (chunkbits < sbi.blkszbits)
+ chunkbits = sbi.blkszbits;
+ if (chunkbits - sbi.blkszbits > EROFS_CHUNK_FORMAT_BLKBITS_MASK)
+ chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + sbi.blkszbits;
+ }
+ chunksize = 1ULL << chunkbits;
+ count = DIV_ROUND_UP(inode->i_size, chunksize);
+
+ unit = sizeof(struct erofs_inode_chunk_index);
+ inode->extent_isize = count * unit;
+ idx = malloc(max(sizeof(*idx), sizeof(void *)));
+ if (!idx)
+ return -ENOMEM;
+ inode->chunkindexes = idx;
+
+ for (i = 0; i < count; i++) {
+ struct erofs_blobchunk *chunk;
+ struct erofs_map_blocks map = {
+ .index = UINT_MAX,
+ };
+
+ map.m_la = i << chunkbits;
+ ret = erofs_map_blocks(inode, &map, 0);
+ if (ret)
+ goto err;
+
+ blkaddr = erofs_blknr(&sbi, map.m_pa);
+ chunk = erofs_get_unhashed_chunk(inode->dev, blkaddr, 0);
+ if (IS_ERR(chunk)) {
+ ret = PTR_ERR(chunk);
+ goto err;
+ }
+ *(void **)idx++ = chunk;
+
+ }
+ inode->datalayout = EROFS_INODE_CHUNK_BASED;
+ inode->u.chunkformat = EROFS_CHUNK_FORMAT_INDEXES;
+ inode->u.chunkformat |= chunkbits - sbi.blkszbits;
+ return 0;
+err:
+ free(inode->chunkindexes);
+ inode->chunkindexes = NULL;
+ return ret;
+}
+
+static int erofs_rebuild_fill_inode(struct erofs_inode *inode)
+{
+ switch (inode->i_mode & S_IFMT) {
+ case S_IFCHR:
+ if (erofs_inode_is_whiteout(inode))
+ inode->i_parent->whiteouts = true;
+ /* fallthrough */
+ case S_IFBLK:
+ case S_IFIFO:
+ case S_IFSOCK:
+ inode->i_size = 0;
+ erofs_dbg("\tdev: %d %d", major(inode->u.i_rdev),
+ minor(inode->u.i_rdev));
+ inode->u.i_rdev = erofs_new_encode_dev(inode->u.i_rdev);
+ return 0;
+ case S_IFDIR:
+ return erofs_init_empty_dir(inode);
+ case S_IFLNK: {
+ int ret;
+
+ inode->i_link = malloc(inode->i_size + 1);
+ if (!inode->i_link)
+ return -ENOMEM;
+ ret = erofs_pread(inode, inode->i_link, inode->i_size, 0);
+ erofs_dbg("\tsymlink: %s -> %s", inode->i_srcpath, inode->i_link);
+ return ret;
+ }
+ case S_IFREG:
+ if (inode->i_size)
+ return erofs_rebuild_fixup_inode_index(inode);
+ return 0;
+ default:
+ break;
+ }
+ return -EINVAL;
+}
+
+/*
+ * @parent: parent directory in inode tree
+ * @ctx.dir: parent directory when itering erofs_iterate_dir()
+ */
+struct erofs_rebuild_dir_context {
+ struct erofs_dir_context ctx;
+ struct erofs_inode *parent;
+};
+
+static int erofs_rebuild_dirent_iter(struct erofs_dir_context *ctx)
+{
+ struct erofs_rebuild_dir_context *rctx = (void *)ctx;
+ struct erofs_inode *parent = rctx->parent;
+ struct erofs_inode *dir = ctx->dir;
+ struct erofs_inode *inode, *candidate;
+ struct erofs_inode src;
+ struct erofs_dentry *d;
+ char *path, *dname;
+ bool dumb;
+ int ret;
+
+ if (ctx->dot_dotdot)
+ return 0;
+
+ ret = asprintf(&path, "%s/%.*s", rctx->parent->i_srcpath,
+ ctx->de_namelen, ctx->dname);
+ if (ret < 0)
+ return ret;
+
+ erofs_dbg("parsing %s", path);
+ dname = path + strlen(parent->i_srcpath) + 1;
+
+ d = erofs_rebuild_get_dentry(parent, dname, false,
+ &dumb, &dumb, false);
+ if (IS_ERR(d)) {
+ ret = PTR_ERR(d);
+ goto out;
+ }
+
+ ret = 0;
+ if (d->type != EROFS_FT_UNKNOWN) {
+ /*
+ * bail out if the file exists in the upper layers. (Note that
+ * extended attributes won't be merged too even for dirs.)
+ */
+ if (!S_ISDIR(d->inode->i_mode) || d->inode->opaque)
+ goto out;
+
+ /* merge directory entries */
+ src = (struct erofs_inode) {
+ .sbi = dir->sbi,
+ .nid = ctx->de_nid
+ };
+ ret = erofs_read_inode_from_disk(&src);
+ if (ret || !S_ISDIR(src.i_mode))
+ goto out;
+ parent = d->inode;
+ inode = dir = &src;
+ } else {
+ u64 nid;
+
+ DBG_BUGON(parent != d->inode);
+ inode = erofs_new_inode();
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ goto out;
+ }
+
+ /* reuse i_ino[0] to read nid in source fs */
+ nid = inode->i_ino[0];
+ inode->sbi = dir->sbi;
+ inode->nid = ctx->de_nid;
+ ret = erofs_read_inode_from_disk(inode);
+ if (ret)
+ goto out;
+
+ /* restore nid in new generated fs */
+ inode->i_ino[1] = inode->i_ino[0];
+ inode->i_ino[0] = nid;
+ inode->dev = inode->sbi->dev;
+
+ if (S_ISREG(inode->i_mode) && inode->i_nlink > 1 &&
+ (candidate = erofs_iget(inode->dev, ctx->de_nid))) {
+ /* hardlink file */
+ erofs_iput(inode);
+ inode = candidate;
+ if (S_ISDIR(inode->i_mode)) {
+ erofs_err("hardlink directory not supported");
+ ret = -EISDIR;
+ goto out;
+ }
+ inode->i_nlink++;
+ erofs_dbg("\thardlink: %s -> %s", path, inode->i_srcpath);
+ } else {
+ ret = erofs_read_xattrs_from_disk(inode);
+ if (ret) {
+ erofs_iput(inode);
+ goto out;
+ }
+
+ inode->i_parent = d->inode;
+ inode->i_srcpath = path;
+ path = NULL;
+ inode->i_ino[1] = inode->nid;
+ inode->i_nlink = 1;
+
+ ret = erofs_rebuild_fill_inode(inode);
+ if (ret) {
+ erofs_iput(inode);
+ goto out;
+ }
+
+ erofs_insert_ihash(inode, inode->dev, inode->i_ino[1]);
+ parent = dir = inode;
+ }
+
+ d->inode = inode;
+ d->type = erofs_mode_to_ftype(inode->i_mode);
+ }
+
+ if (S_ISDIR(inode->i_mode)) {
+ struct erofs_rebuild_dir_context nctx = *rctx;
+
+ nctx.parent = parent;
+ nctx.ctx.dir = dir;
+ ret = erofs_iterate_dir(&nctx.ctx, false);
+ if (ret)
+ goto out;
+ }
+
+ /* reset sbi, nid after subdirs are all loaded for the final dump */
+ inode->sbi = &sbi;
+ inode->nid = 0;
+out:
+ free(path);
+ return ret;
+}
+
+int erofs_rebuild_load_tree(struct erofs_inode *root, struct erofs_sb_info *sbi)
+{
+ struct erofs_inode inode = {};
+ struct erofs_rebuild_dir_context ctx;
+ int ret;
+
+ if (!sbi->devname) {
+ erofs_err("failed to find a device for rebuilding");
+ return -EINVAL;
+ }
+
+ ret = erofs_read_superblock(sbi);
+ if (ret) {
+ erofs_err("failed to read superblock of %s", sbi->devname);
+ return ret;
+ }
+
+ inode.nid = sbi->root_nid;
+ inode.sbi = sbi;
+ ret = erofs_read_inode_from_disk(&inode);
+ if (ret) {
+ erofs_err("failed to read root inode of %s", sbi->devname);
+ return ret;
+ }
+ inode.i_srcpath = strdup("/");
+
+ ctx = (struct erofs_rebuild_dir_context) {
+ .ctx.dir = &inode,
+ .ctx.cb = erofs_rebuild_dirent_iter,
+ .parent = root,
+ };
+ ret = erofs_iterate_dir(&ctx.ctx, false);
+ free(inode.i_srcpath);
+ return ret;
+}
diff --git a/lib/rolling_hash.h b/lib/rolling_hash.h
new file mode 100644
index 0000000..448db34
--- /dev/null
+++ b/lib/rolling_hash.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+/*
+ * Copyright (C) 2022 Alibaba Cloud
+ */
+#ifndef __ROLLING_HASH_H__
+#define __ROLLING_HASH_H__
+
+#include <erofs/defs.h>
+
+#define PRIME_NUMBER 4294967295LL
+#define RADIX 256
+
+static inline long long erofs_rolling_hash_init(u8 *input,
+ int len, bool backwards)
+{
+ long long hash = 0;
+
+ if (!backwards) {
+ int i;
+
+ for (i = 0; i < len; ++i)
+ hash = (RADIX * hash + input[i]) % PRIME_NUMBER;
+ } else {
+ while (len)
+ hash = (RADIX * hash + input[--len]) % PRIME_NUMBER;
+ }
+ return hash;
+}
+
+/* RM = R ^ (M-1) % Q */
+/*
+ * NOTE: value of "hash" could be negative so we cannot use unsiged types for "hash"
+ * "long long" is used here and PRIME_NUMBER can be ULONG_MAX
+ */
+static inline long long erofs_rolling_hash_advance(long long old_hash,
+ unsigned long long RM,
+ u8 to_remove, u8 to_add)
+{
+ long long hash = old_hash;
+ long long to_remove_val = (to_remove * RM) % PRIME_NUMBER;
+
+ hash = RADIX * (old_hash - to_remove_val) % PRIME_NUMBER;
+ hash = (hash + to_add) % PRIME_NUMBER;
+
+ /* We might get negative value of hash, converting it to positive */
+ if (hash < 0)
+ hash += PRIME_NUMBER;
+ return hash;
+}
+
+static inline long long erofs_rollinghash_calc_rm(int window_size)
+{
+ int i;
+ long long RM = 1;
+
+ for (i = 0; i < window_size - 1; ++i)
+ RM = (RM * RADIX) % PRIME_NUMBER;
+ return RM;
+}
+#endif
diff --git a/lib/sha256.c b/lib/sha256.c
index dd0e058..9bb7fbb 100644
--- a/lib/sha256.c
+++ b/lib/sha256.c
@@ -1,49 +1,45 @@
+// SPDX-License-Identifier: Unlicense
/*
* sha256.c --- The sha256 algorithm
*
- * Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
- * (copied from libtomcrypt and then relicensed under GPLv2)
- *
- * %Begin-Header%
- * This file may be redistributed under the terms of the GNU Library
- * General Public License, version 2.
- * %End-Header%
+ * (copied from LibTomCrypt with adaption.)
*/
-#include "erofs/defs.h"
+#include "sha256.h"
#include <string.h>
-static const __u32 K[64] = {
- 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, 0x3956c25bUL,
- 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, 0xd807aa98UL, 0x12835b01UL,
- 0x243185beUL, 0x550c7dc3UL, 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL,
- 0xc19bf174UL, 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
- 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, 0x983e5152UL,
- 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, 0xc6e00bf3UL, 0xd5a79147UL,
- 0x06ca6351UL, 0x14292967UL, 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL,
- 0x53380d13UL, 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
- 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, 0xd192e819UL,
- 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, 0x19a4c116UL, 0x1e376c08UL,
- 0x2748774cUL, 0x34b0bcb5UL, 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL,
- 0x682e6ff3UL, 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
- 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL
+/* This is based on SHA256 implementation in LibTomCrypt that was released into
+ * public domain by Tom St Denis. */
+/* the K array */
+static const unsigned long K[64] = {
+ 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, 0x3956c25bUL,
+ 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, 0xd807aa98UL, 0x12835b01UL,
+ 0x243185beUL, 0x550c7dc3UL, 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL,
+ 0xc19bf174UL, 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
+ 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, 0x983e5152UL,
+ 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, 0xc6e00bf3UL, 0xd5a79147UL,
+ 0x06ca6351UL, 0x14292967UL, 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL,
+ 0x53380d13UL, 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
+ 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, 0xd192e819UL,
+ 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, 0x19a4c116UL, 0x1e376c08UL,
+ 0x2748774cUL, 0x34b0bcb5UL, 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL,
+ 0x682e6ff3UL, 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
+ 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL
};
-
/* Various logical functions */
+#define RORc(x, y) \
+( ((((unsigned long) (x) & 0xFFFFFFFFUL) >> (unsigned long) ((y) & 31)) | \
+ ((unsigned long) (x) << (unsigned long) (32 - ((y) & 31)))) & 0xFFFFFFFFUL)
#define Ch(x,y,z) (z ^ (x & (y ^ z)))
#define Maj(x,y,z) (((x | y) & z) | (x & y))
-#define S(x, n) RORc((x),(n))
+#define S(x, n) RORc((x), (n))
#define R(x, n) (((x)&0xFFFFFFFFUL)>>(n))
#define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22))
#define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25))
#define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3))
#define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10))
-#define RORc(x, y) ( ((((__u32)(x)&0xFFFFFFFFUL)>>(__u32)((y)&31)) | ((__u32)(x)<<(__u32)(32-((y)&31)))) & 0xFFFFFFFFUL)
-
-#define RND(a,b,c,d,e,f,g,h,i) \
- t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \
- t1 = Sigma0(a) + Maj(a, b, c); \
- d += t0; \
- h = t0 + t1;
+#ifndef MIN
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+#endif
#define STORE64H(x, y) \
do { \
@@ -61,145 +57,149 @@
(y)[2] = (unsigned char)(((x)>>8)&255); (y)[3] = (unsigned char)((x)&255); } while(0)
#define LOAD32H(x, y) \
- do { x = ((__u32)((y)[0] & 255)<<24) | \
- ((__u32)((y)[1] & 255)<<16) | \
- ((__u32)((y)[2] & 255)<<8) | \
- ((__u32)((y)[3] & 255)); } while(0)
+ do { x = ((u32)((y)[0] & 255)<<24) | \
+ ((u32)((y)[1] & 255)<<16) | \
+ ((u32)((y)[2] & 255)<<8) | \
+ ((u32)((y)[3] & 255)); } while(0)
-struct sha256_state {
- __u64 length;
- __u32 state[8], curlen;
- unsigned char buf[64];
-};
-
-/* This is a highly simplified version from libtomcrypt */
-struct hash_state {
- struct sha256_state sha256;
-};
-
-static void sha256_compress(struct hash_state * md, const unsigned char *buf)
+/* compress 512-bits */
+static int sha256_compress(struct sha256_state *md, unsigned char *buf)
{
- __u32 S[8], W[64], t0, t1;
- __u32 t;
- int i;
-
- /* copy state into S */
- for (i = 0; i < 8; i++) {
- S[i] = md->sha256.state[i];
- }
-
- /* copy the state into 512-bits into W[0..15] */
- for (i = 0; i < 16; i++) {
- LOAD32H(W[i], buf + (4*i));
- }
-
- /* fill W[16..63] */
- for (i = 16; i < 64; i++) {
- W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16];
- }
-
- /* Compress */
- for (i = 0; i < 64; ++i) {
- RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i);
- t = S[7]; S[7] = S[6]; S[6] = S[5]; S[5] = S[4];
- S[4] = S[3]; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t;
- }
-
- /* feedback */
- for (i = 0; i < 8; i++) {
- md->sha256.state[i] = md->sha256.state[i] + S[i];
- }
+ u32 S[8], W[64], t0, t1;
+ u32 t;
+ int i;
+ /* copy state into S */
+ for (i = 0; i < 8; i++) {
+ S[i] = md->state[i];
+ }
+ /* copy the state into 512-bits into W[0..15] */
+ for (i = 0; i < 16; i++)
+ LOAD32H(W[i], buf + (4 * i));
+ /* fill W[16..63] */
+ for (i = 16; i < 64; i++) {
+ W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) +
+ W[i - 16];
+ }
+ /* Compress */
+#define RND(a,b,c,d,e,f,g,h,i) \
+ t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \
+ t1 = Sigma0(a) + Maj(a, b, c); \
+ d += t0; \
+ h = t0 + t1;
+ for (i = 0; i < 64; ++i) {
+ RND(S[0], S[1], S[2], S[3], S[4], S[5], S[6], S[7], i);
+ t = S[7]; S[7] = S[6]; S[6] = S[5]; S[5] = S[4];
+ S[4] = S[3]; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t;
+ }
+ /* feedback */
+ for (i = 0; i < 8; i++) {
+ md->state[i] = md->state[i] + S[i];
+ }
+ return 0;
}
-
-static void sha256_init(struct hash_state * md)
+/* Initialize the hash state */
+void erofs_sha256_init(struct sha256_state *md)
{
- md->sha256.curlen = 0;
- md->sha256.length = 0;
- md->sha256.state[0] = 0x6A09E667UL;
- md->sha256.state[1] = 0xBB67AE85UL;
- md->sha256.state[2] = 0x3C6EF372UL;
- md->sha256.state[3] = 0xA54FF53AUL;
- md->sha256.state[4] = 0x510E527FUL;
- md->sha256.state[5] = 0x9B05688CUL;
- md->sha256.state[6] = 0x1F83D9ABUL;
- md->sha256.state[7] = 0x5BE0CD19UL;
+ md->curlen = 0;
+ md->length = 0;
+ md->state[0] = 0x6A09E667UL;
+ md->state[1] = 0xBB67AE85UL;
+ md->state[2] = 0x3C6EF372UL;
+ md->state[3] = 0xA54FF53AUL;
+ md->state[4] = 0x510E527FUL;
+ md->state[5] = 0x9B05688CUL;
+ md->state[6] = 0x1F83D9ABUL;
+ md->state[7] = 0x5BE0CD19UL;
}
-
-#define MIN(x, y) ( ((x)<(y))?(x):(y) )
-#define SHA256_BLOCKSIZE 64
-static void sha256_process(struct hash_state * md, const unsigned char *in, unsigned long inlen)
+/**
+ Process a block of memory though the hash
+ @param md The hash state
+ @param in The data to hash
+ @param inlen The length of the data (octets)
+ @return CRYPT_OK if successful
+*/
+int erofs_sha256_process(struct sha256_state *md,
+ const unsigned char *in, unsigned long inlen)
{
- unsigned long n;
-
- while (inlen > 0) {
- if (md->sha256.curlen == 0 && inlen >= SHA256_BLOCKSIZE) {
- sha256_compress(md, in);
- md->sha256.length += SHA256_BLOCKSIZE * 8;
- in += SHA256_BLOCKSIZE;
- inlen -= SHA256_BLOCKSIZE;
- } else {
- n = MIN(inlen, (SHA256_BLOCKSIZE - md->sha256.curlen));
- memcpy(md->sha256.buf + md->sha256.curlen, in, (size_t)n);
- md->sha256.curlen += n;
- in += n;
- inlen -= n;
- if (md->sha256.curlen == SHA256_BLOCKSIZE) {
- sha256_compress(md, md->sha256.buf);
- md->sha256.length += 8*SHA256_BLOCKSIZE;
- md->sha256.curlen = 0;
- }
- }
- }
+ unsigned long n;
+#define block_size 64
+ if (md->curlen > sizeof(md->buf))
+ return -1;
+ while (inlen > 0) {
+ if (md->curlen == 0 && inlen >= block_size) {
+ if (sha256_compress(md, (unsigned char *) in) < 0)
+ return -1;
+ md->length += block_size * 8;
+ in += block_size;
+ inlen -= block_size;
+ } else {
+ n = MIN(inlen, (block_size - md->curlen));
+ memcpy(md->buf + md->curlen, in, n);
+ md->curlen += n;
+ in += n;
+ inlen -= n;
+ if (md->curlen == block_size) {
+ if (sha256_compress(md, md->buf) < 0)
+ return -1;
+ md->length += 8 * block_size;
+ md->curlen = 0;
+ }
+ }
+ }
+ return 0;
}
-
-static void sha256_done(struct hash_state * md, unsigned char *out)
+/**
+ Terminate the hash to get the digest
+ @param md The hash state
+ @param out [out] The destination of the hash (32 bytes)
+ @return CRYPT_OK if successful
+*/
+int erofs_sha256_done(struct sha256_state *md, unsigned char *out)
{
- int i;
-
- /* increase the length of the message */
- md->sha256.length += md->sha256.curlen * 8;
-
- /* append the '1' bit */
- md->sha256.buf[md->sha256.curlen++] = (unsigned char)0x80;
-
- /* if the length is currently above 56 bytes we append zeros
- * then compress. Then we can fall back to padding zeros and length
- * encoding like normal.
- */
- if (md->sha256.curlen > 56) {
- while (md->sha256.curlen < 64) {
- md->sha256.buf[md->sha256.curlen++] = (unsigned char)0;
- }
- sha256_compress(md, md->sha256.buf);
- md->sha256.curlen = 0;
- }
-
- /* pad upto 56 bytes of zeroes */
- while (md->sha256.curlen < 56) {
- md->sha256.buf[md->sha256.curlen++] = (unsigned char)0;
- }
-
- /* store length */
- STORE64H(md->sha256.length, md->sha256.buf+56);
- sha256_compress(md, md->sha256.buf);
-
- /* copy output */
- for (i = 0; i < 8; i++) {
- STORE32H(md->sha256.state[i], out+(4*i));
- }
+ int i;
+ if (md->curlen >= sizeof(md->buf))
+ return -1;
+ /* increase the length of the message */
+ md->length += md->curlen * 8;
+ /* append the '1' bit */
+ md->buf[md->curlen++] = (unsigned char) 0x80;
+ /* if the length is currently above 56 bytes we append zeros
+ * then compress. Then we can fall back to padding zeros and length
+ * encoding like normal.
+ */
+ if (md->curlen > 56) {
+ while (md->curlen < 64) {
+ md->buf[md->curlen++] = (unsigned char) 0;
+ }
+ sha256_compress(md, md->buf);
+ md->curlen = 0;
+ }
+ /* pad upto 56 bytes of zeroes */
+ while (md->curlen < 56) {
+ md->buf[md->curlen++] = (unsigned char) 0;
+ }
+ /* store length */
+ STORE64H(md->length, md->buf+56);
+ sha256_compress(md, md->buf);
+ /* copy output */
+ for (i = 0; i < 8; i++)
+ STORE32H(md->state[i], out + (4 * i));
+ return 0;
}
void erofs_sha256(const unsigned char *in, unsigned long in_size,
unsigned char out[32])
{
- struct hash_state md;
+ struct sha256_state md;
- sha256_init(&md);
- sha256_process(&md, in, in_size);
- sha256_done(&md, out);
+ erofs_sha256_init(&md);
+ erofs_sha256_process(&md, in, in_size);
+ erofs_sha256_done(&md, out);
}
#ifdef UNITTEST
+#include <stdio.h>
+
static const struct {
char *msg;
unsigned char hash[32];
diff --git a/lib/sha256.h b/lib/sha256.h
new file mode 100644
index 0000000..dd39970
--- /dev/null
+++ b/lib/sha256.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+#ifndef __EROFS_LIB_SHA256_H
+#define __EROFS_LIB_SHA256_H
+
+#include "erofs/defs.h"
+
+struct sha256_state {
+ u64 length;
+ u32 state[8], curlen;
+ u8 buf[64];
+};
+
+void erofs_sha256_init(struct sha256_state *md);
+int erofs_sha256_process(struct sha256_state *md,
+ const unsigned char *in, unsigned long inlen);
+int erofs_sha256_done(struct sha256_state *md, unsigned char *out);
+
+void erofs_sha256(const unsigned char *in, unsigned long in_size,
+ unsigned char out[32]);
+
+#endif
diff --git a/lib/super.c b/lib/super.c
index f486eb7..f952f7e 100644
--- a/lib/super.c
+++ b/lib/super.c
@@ -6,6 +6,7 @@
#include <stdlib.h>
#include "erofs/io.h"
#include "erofs/print.h"
+#include "erofs/xattr.h"
static bool check_layout_compatibility(struct erofs_sb_info *sbi,
struct erofs_super_block *dsb)
@@ -31,12 +32,13 @@
sbi->total_blocks = sbi->primarydevice_blocks;
- if (!erofs_sb_has_device_table())
+ if (!erofs_sb_has_device_table(sbi))
ondisk_extradevs = 0;
else
ondisk_extradevs = le16_to_cpu(dsb->extra_devices);
- if (ondisk_extradevs != sbi->extra_devices) {
+ if (sbi->extra_devices &&
+ ondisk_extradevs != sbi->extra_devices) {
erofs_err("extra devices don't match (ondisk %u, given %u)",
ondisk_extradevs, sbi->extra_devices);
return -EINVAL;
@@ -44,32 +46,40 @@
if (!ondisk_extradevs)
return 0;
+ sbi->extra_devices = ondisk_extradevs;
sbi->device_id_mask = roundup_pow_of_two(ondisk_extradevs + 1) - 1;
sbi->devs = calloc(ondisk_extradevs, sizeof(*sbi->devs));
+ if (!sbi->devs)
+ return -ENOMEM;
pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE;
for (i = 0; i < ondisk_extradevs; ++i) {
struct erofs_deviceslot dis;
int ret;
- ret = dev_read(0, &dis, pos, sizeof(dis));
- if (ret < 0)
+ ret = dev_read(sbi, 0, &dis, pos, sizeof(dis));
+ if (ret < 0) {
+ free(sbi->devs);
+ sbi->devs = NULL;
return ret;
+ }
- sbi->devs[i].mapped_blkaddr = dis.mapped_blkaddr;
- sbi->total_blocks += dis.blocks;
+ sbi->devs[i].mapped_blkaddr = le32_to_cpu(dis.mapped_blkaddr);
+ sbi->devs[i].blocks = le32_to_cpu(dis.blocks);
+ memcpy(sbi->devs[i].tag, dis.tag, sizeof(dis.tag));
+ sbi->total_blocks += sbi->devs[i].blocks;
pos += EROFS_DEVT_SLOT_SIZE;
}
return 0;
}
-int erofs_read_superblock(void)
+int erofs_read_superblock(struct erofs_sb_info *sbi)
{
- char data[EROFS_BLKSIZ];
+ u8 data[EROFS_MAX_BLOCK_SIZE];
struct erofs_super_block *dsb;
- unsigned int blkszbits;
int ret;
- ret = blk_read(0, data, 0, 1);
+ sbi->blkszbits = ilog2(EROFS_MAX_BLOCK_SIZE);
+ ret = blk_read(sbi, 0, data, 0, erofs_blknr(sbi, sizeof(data)));
if (ret < 0) {
erofs_err("cannot read erofs superblock: %d", ret);
return -EIO;
@@ -82,30 +92,57 @@
return ret;
}
- sbi.feature_compat = le32_to_cpu(dsb->feature_compat);
+ sbi->feature_compat = le32_to_cpu(dsb->feature_compat);
- blkszbits = dsb->blkszbits;
- /* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */
- if (blkszbits != LOG_BLOCK_SIZE) {
- erofs_err("blksize %d isn't supported on this platform",
- 1 << blkszbits);
+ sbi->blkszbits = dsb->blkszbits;
+ if (sbi->blkszbits < 9 ||
+ sbi->blkszbits > ilog2(EROFS_MAX_BLOCK_SIZE)) {
+ erofs_err("blksize %llu isn't supported on this platform",
+ erofs_blksiz(sbi) | 0ULL);
+ return ret;
+ } else if (!check_layout_compatibility(sbi, dsb)) {
return ret;
}
- if (!check_layout_compatibility(&sbi, dsb))
+ sbi->primarydevice_blocks = le32_to_cpu(dsb->blocks);
+ sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr);
+ sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
+ sbi->xattr_prefix_start = le32_to_cpu(dsb->xattr_prefix_start);
+ sbi->xattr_prefix_count = dsb->xattr_prefix_count;
+ sbi->islotbits = EROFS_ISLOTBITS;
+ sbi->root_nid = le16_to_cpu(dsb->root_nid);
+ sbi->packed_nid = le64_to_cpu(dsb->packed_nid);
+ sbi->inos = le64_to_cpu(dsb->inos);
+ sbi->checksum = le32_to_cpu(dsb->checksum);
+ sbi->extslots = dsb->sb_extslots;
+
+ sbi->build_time = le64_to_cpu(dsb->build_time);
+ sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec);
+
+ memcpy(&sbi->uuid, dsb->uuid, sizeof(dsb->uuid));
+
+ if (erofs_sb_has_compr_cfgs(sbi))
+ sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs);
+ else
+ sbi->lz4_max_distance = le16_to_cpu(dsb->u1.lz4_max_distance);
+
+ ret = erofs_init_devices(sbi, dsb);
+ if (ret)
return ret;
- sbi.primarydevice_blocks = le32_to_cpu(dsb->blocks);
- sbi.meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr);
- sbi.xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
- sbi.islotbits = EROFS_ISLOTBITS;
- sbi.root_nid = le16_to_cpu(dsb->root_nid);
- sbi.inos = le64_to_cpu(dsb->inos);
- sbi.checksum = le32_to_cpu(dsb->checksum);
+ ret = erofs_xattr_prefixes_init(sbi);
+ if (ret && sbi->devs) {
+ free(sbi->devs);
+ sbi->devs = NULL;
+ }
+ return ret;
+}
- sbi.build_time = le64_to_cpu(dsb->build_time);
- sbi.build_time_nsec = le32_to_cpu(dsb->build_time_nsec);
-
- memcpy(&sbi.uuid, dsb->uuid, sizeof(dsb->uuid));
- return erofs_init_devices(&sbi, dsb);
+void erofs_put_super(struct erofs_sb_info *sbi)
+{
+ if (sbi->devs) {
+ free(sbi->devs);
+ sbi->devs = NULL;
+ }
+ erofs_xattr_prefixes_cleanup(sbi);
}
diff --git a/lib/tar.c b/lib/tar.c
new file mode 100644
index 0000000..8204939
--- /dev/null
+++ b/lib/tar.c
@@ -0,0 +1,976 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#if defined(HAVE_ZLIB)
+#include <zlib.h>
+#endif
+#include "erofs/print.h"
+#include "erofs/cache.h"
+#include "erofs/diskbuf.h"
+#include "erofs/inode.h"
+#include "erofs/list.h"
+#include "erofs/tar.h"
+#include "erofs/io.h"
+#include "erofs/xattr.h"
+#include "erofs/blobchunk.h"
+#include "erofs/rebuild.h"
+
+/* This file is a tape/volume header. Ignore it on extraction. */
+#define GNUTYPE_VOLHDR 'V'
+
+struct tar_header {
+ char name[100]; /* 0-99 */
+ char mode[8]; /* 100-107 */
+ char uid[8]; /* 108-115 */
+ char gid[8]; /* 116-123 */
+ char size[12]; /* 124-135 */
+ char mtime[12]; /* 136-147 */
+ char chksum[8]; /* 148-155 */
+ char typeflag; /* 156-156 */
+ char linkname[100]; /* 157-256 */
+ char magic[6]; /* 257-262 */
+ char version[2]; /* 263-264 */
+ char uname[32]; /* 265-296 */
+ char gname[32]; /* 297-328 */
+ char devmajor[8]; /* 329-336 */
+ char devminor[8]; /* 337-344 */
+ char prefix[155]; /* 345-499 */
+ char padding[12]; /* 500-512 (pad to exactly the 512 byte) */
+};
+
+s64 erofs_read_from_fd(int fd, void *buf, u64 bytes)
+{
+ s64 i = 0;
+
+ while (bytes) {
+ int len = bytes > INT_MAX ? INT_MAX : bytes;
+ int ret;
+
+ ret = read(fd, buf + i, len);
+ if (ret < 1) {
+ if (ret == 0) {
+ break;
+ } else if (errno != EINTR) {
+ erofs_err("failed to read : %s\n",
+ strerror(errno));
+ return -errno;
+ }
+ }
+ bytes -= ret;
+ i += ret;
+ }
+ return i;
+}
+
+void erofs_iostream_close(struct erofs_iostream *ios)
+{
+ free(ios->buffer);
+ if (ios->decoder == EROFS_IOS_DECODER_GZIP) {
+#if defined(HAVE_ZLIB)
+ gzclose(ios->handler);
+#endif
+ return;
+ }
+ close(ios->fd);
+}
+
+int erofs_iostream_open(struct erofs_iostream *ios, int fd, int decoder)
+{
+ s64 fsz;
+
+ ios->tail = ios->head = 0;
+ ios->decoder = decoder;
+ if (decoder == EROFS_IOS_DECODER_GZIP) {
+#if defined(HAVE_ZLIB)
+ ios->handler = gzdopen(fd, "r");
+ if (!ios->handler)
+ return -ENOMEM;
+ ios->sz = fsz = 0;
+ ios->bufsize = 32768;
+#else
+ return -EOPNOTSUPP;
+#endif
+ } else {
+ ios->fd = fd;
+ fsz = lseek(fd, 0, SEEK_END);
+ if (fsz <= 0) {
+ ios->feof = !fsz;
+ ios->sz = 0;
+ } else {
+ ios->feof = false;
+ ios->sz = fsz;
+ if (lseek(fd, 0, SEEK_SET))
+ return -EIO;
+#ifdef HAVE_POSIX_FADVISE
+ if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL))
+ erofs_warn("failed to fadvise: %s, ignored.",
+ erofs_strerror(errno));
+#endif
+ }
+ ios->bufsize = 16384;
+ }
+
+ do {
+ ios->buffer = malloc(ios->bufsize);
+ if (ios->buffer)
+ break;
+ ios->bufsize >>= 1;
+ } while (ios->bufsize >= 1024);
+
+ if (!ios->buffer)
+ return -ENOMEM;
+ return 0;
+}
+
+int erofs_iostream_read(struct erofs_iostream *ios, void **buf, u64 bytes)
+{
+ unsigned int rabytes = ios->tail - ios->head;
+ int ret;
+
+ if (rabytes >= bytes) {
+ *buf = ios->buffer + ios->head;
+ ios->head += bytes;
+ return bytes;
+ }
+
+ if (ios->head) {
+ memmove(ios->buffer, ios->buffer + ios->head, rabytes);
+ ios->head = 0;
+ ios->tail = rabytes;
+ }
+
+ if (!ios->feof) {
+ if (ios->decoder == EROFS_IOS_DECODER_GZIP) {
+#if defined(HAVE_ZLIB)
+ ret = gzread(ios->handler, ios->buffer + rabytes,
+ ios->bufsize - rabytes);
+ if (!ret) {
+ int errnum;
+ const char *errstr;
+
+ errstr = gzerror(ios->handler, &errnum);
+ if (errnum != Z_STREAM_END) {
+ erofs_err("failed to gzread: %s", errstr);
+ return -EIO;
+ }
+ ios->feof = true;
+ }
+ ios->tail += ret;
+#else
+ return -EOPNOTSUPP;
+#endif
+ } else {
+ ret = erofs_read_from_fd(ios->fd, ios->buffer + rabytes,
+ ios->bufsize - rabytes);
+ if (ret < 0)
+ return ret;
+ ios->tail += ret;
+ if (ret < ios->bufsize - rabytes)
+ ios->feof = true;
+ }
+ }
+ *buf = ios->buffer;
+ ret = min_t(int, ios->tail, bytes);
+ ios->head = ret;
+ return ret;
+}
+
+int erofs_iostream_bread(struct erofs_iostream *ios, void *buf, u64 bytes)
+{
+ u64 rem = bytes;
+ void *src;
+ int ret;
+
+ do {
+ ret = erofs_iostream_read(ios, &src, rem);
+ if (ret < 0)
+ return ret;
+ memcpy(buf, src, ret);
+ rem -= ret;
+ } while (rem && ret);
+
+ return bytes - rem;
+}
+
+int erofs_iostream_lskip(struct erofs_iostream *ios, u64 sz)
+{
+ unsigned int rabytes = ios->tail - ios->head;
+ int ret;
+ void *dummy;
+
+ if (rabytes >= sz) {
+ ios->head += sz;
+ return 0;
+ }
+
+ sz -= rabytes;
+ ios->head = ios->tail = 0;
+ if (ios->feof)
+ return sz;
+
+ if (ios->sz) {
+ s64 cur = lseek(ios->fd, sz, SEEK_CUR);
+
+ if (cur > ios->sz)
+ return cur - ios->sz;
+ return 0;
+ }
+
+ do {
+ ret = erofs_iostream_read(ios, &dummy, sz);
+ if (ret < 0)
+ return ret;
+ sz -= ret;
+ } while (!(ios->feof || !ret || !sz));
+
+ return sz;
+}
+
+static long long tarerofs_otoi(const char *ptr, int len)
+{
+ char inp[32];
+ char *endp = inp;
+ long long val;
+
+ memcpy(inp, ptr, len);
+ inp[len] = '\0';
+
+ errno = 0;
+ val = strtol(ptr, &endp, 8);
+ if ((!val && endp == inp) |
+ (*endp && *endp != ' '))
+ errno = EINVAL;
+ return val;
+}
+
+static long long tarerofs_parsenum(const char *ptr, int len)
+{
+ /*
+ * For fields containing numbers or timestamps that are out of range
+ * for the basic format, the GNU format uses a base-256 representation
+ * instead of an ASCII octal number.
+ */
+ if (*(char *)ptr == '\200') {
+ long long res = 0;
+
+ while (--len)
+ res = (res << 8) + (u8)*(++ptr);
+ return res;
+ }
+ return tarerofs_otoi(ptr, len);
+}
+
+struct tarerofs_xattr_item {
+ struct list_head list;
+ char *kv;
+ unsigned int len, namelen;
+};
+
+int tarerofs_insert_xattr(struct list_head *xattrs,
+ char *kv, int namelen, int len, bool skip)
+{
+ struct tarerofs_xattr_item *item;
+ char *nv;
+
+ DBG_BUGON(namelen >= len);
+ list_for_each_entry(item, xattrs, list) {
+ if (!strncmp(item->kv, kv, namelen + 1)) {
+ if (skip)
+ return 0;
+ goto found;
+ }
+ }
+
+ item = malloc(sizeof(*item));
+ if (!item)
+ return -ENOMEM;
+ item->kv = NULL;
+ item->namelen = namelen;
+ namelen = 0;
+ list_add_tail(&item->list, xattrs);
+found:
+ nv = realloc(item->kv, len);
+ if (!nv)
+ return -ENOMEM;
+ item->kv = nv;
+ item->len = len;
+ memcpy(nv + namelen, kv + namelen, len - namelen);
+ return 0;
+}
+
+int tarerofs_merge_xattrs(struct list_head *dst, struct list_head *src)
+{
+ struct tarerofs_xattr_item *item;
+
+ list_for_each_entry(item, src, list) {
+ int ret;
+
+ ret = tarerofs_insert_xattr(dst, item->kv, item->namelen,
+ item->len, true);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+void tarerofs_remove_xattrs(struct list_head *xattrs)
+{
+ struct tarerofs_xattr_item *item, *n;
+
+ list_for_each_entry_safe(item, n, xattrs, list) {
+ DBG_BUGON(!item->kv);
+ free(item->kv);
+ list_del(&item->list);
+ free(item);
+ }
+}
+
+int tarerofs_apply_xattrs(struct erofs_inode *inode, struct list_head *xattrs)
+{
+ struct tarerofs_xattr_item *item;
+ int ret;
+
+ list_for_each_entry(item, xattrs, list) {
+ const char *v = item->kv + item->namelen + 1;
+ unsigned int vsz = item->len - item->namelen - 1;
+
+ if (item->len <= item->namelen - 1) {
+ DBG_BUGON(item->len < item->namelen - 1);
+ continue;
+ }
+ item->kv[item->namelen] = '\0';
+ erofs_dbg("Recording xattr(%s)=\"%s\" (of %u bytes) to file %s",
+ item->kv, v, vsz, inode->i_srcpath);
+ ret = erofs_setxattr(inode, item->kv, v, vsz);
+ if (ret == -ENODATA)
+ erofs_err("Failed to set xattr(%s)=%s to file %s",
+ item->kv, v, inode->i_srcpath);
+ else if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static const char lookup_table[65] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
+
+static int base64_decode(const char *src, int len, u8 *dst)
+{
+ int i, bits = 0, ac = 0;
+ const char *p;
+ u8 *cp = dst;
+
+ if(!(len % 4)) {
+ /* Check for and ignore any end padding */
+ if (src[len - 2] == '=' && src[len - 1] == '=')
+ len -= 2;
+ else if (src[len - 1] == '=')
+ --len;
+ }
+
+ for (i = 0; i < len; i++) {
+ p = strchr(lookup_table, src[i]);
+ if (p == NULL || src[i] == 0)
+ return -2;
+ ac += (p - lookup_table) << bits;
+ bits += 6;
+ if (bits >= 8) {
+ *cp++ = ac & 0xff;
+ ac >>= 8;
+ bits -= 8;
+ }
+ }
+ if (ac)
+ return -1;
+ return cp - dst;
+}
+
+int tarerofs_parse_pax_header(struct erofs_iostream *ios,
+ struct erofs_pax_header *eh, u32 size)
+{
+ char *buf, *p;
+ int ret;
+
+ buf = malloc(size);
+ if (!buf)
+ return -ENOMEM;
+ p = buf;
+
+ ret = erofs_iostream_bread(ios, buf, size);
+ if (ret != size)
+ goto out;
+
+ while (p < buf + size) {
+ char *kv, *value;
+ int len, n;
+ /* extended records are of the format: "LEN NAME=VALUE\n" */
+ ret = sscanf(p, "%d %n", &len, &n);
+ if (ret < 1 || len <= n || len > buf + size - p) {
+ ret = -EIO;
+ goto out;
+ }
+ kv = p + n;
+ p += len;
+ len -= n;
+
+ if (p[-1] != '\n') {
+ ret = -EIO;
+ goto out;
+ }
+ p[-1] = '\0';
+
+ value = memchr(kv, '=', p - kv);
+ if (!value) {
+ ret = -EIO;
+ goto out;
+ } else {
+ long long lln;
+
+ value++;
+
+ if (!strncmp(kv, "path=", sizeof("path=") - 1)) {
+ int j = p - 1 - value;
+ free(eh->path);
+ eh->path = strdup(value);
+ while (eh->path[j - 1] == '/')
+ eh->path[--j] = '\0';
+ } else if (!strncmp(kv, "linkpath=",
+ sizeof("linkpath=") - 1)) {
+ free(eh->link);
+ eh->link = strdup(value);
+ } else if (!strncmp(kv, "mtime=",
+ sizeof("mtime=") - 1)) {
+ ret = sscanf(value, "%lld %n", &lln, &n);
+ if(ret < 1) {
+ ret = -EIO;
+ goto out;
+ }
+ eh->st.st_mtime = lln;
+ if (value[n] == '.') {
+ ret = sscanf(value + n + 1, "%d", &n);
+ if (ret < 1) {
+ ret = -EIO;
+ goto out;
+ }
+#if ST_MTIM_NSEC
+ ST_MTIM_NSEC(&eh->st) = n;
+#endif
+ }
+ eh->use_mtime = true;
+ } else if (!strncmp(kv, "size=",
+ sizeof("size=") - 1)) {
+ ret = sscanf(value, "%lld %n", &lln, &n);
+ if(ret < 1 || value[n] != '\0') {
+ ret = -EIO;
+ goto out;
+ }
+ eh->st.st_size = lln;
+ eh->use_size = true;
+ } else if (!strncmp(kv, "uid=", sizeof("uid=") - 1)) {
+ ret = sscanf(value, "%lld %n", &lln, &n);
+ if(ret < 1 || value[n] != '\0') {
+ ret = -EIO;
+ goto out;
+ }
+ eh->st.st_uid = lln;
+ eh->use_uid = true;
+ } else if (!strncmp(kv, "gid=", sizeof("gid=") - 1)) {
+ ret = sscanf(value, "%lld %n", &lln, &n);
+ if(ret < 1 || value[n] != '\0') {
+ ret = -EIO;
+ goto out;
+ }
+ eh->st.st_gid = lln;
+ eh->use_gid = true;
+ } else if (!strncmp(kv, "SCHILY.xattr.",
+ sizeof("SCHILY.xattr.") - 1)) {
+ char *key = kv + sizeof("SCHILY.xattr.") - 1;
+
+ --len; /* p[-1] == '\0' */
+ ret = tarerofs_insert_xattr(&eh->xattrs, key,
+ value - key - 1,
+ len - (key - kv), false);
+ if (ret)
+ goto out;
+ } else if (!strncmp(kv, "LIBARCHIVE.xattr.",
+ sizeof("LIBARCHIVE.xattr.") - 1)) {
+ char *key;
+ key = kv + sizeof("LIBARCHIVE.xattr.") - 1;
+
+ --len; /* p[-1] == '\0' */
+ ret = base64_decode(value, len - (value - kv),
+ (u8 *)value);
+ if (ret < 0) {
+ ret = -EFSCORRUPTED;
+ goto out;
+ }
+
+ ret = tarerofs_insert_xattr(&eh->xattrs, key,
+ value - key - 1,
+ value - key + ret, false);
+ if (ret)
+ goto out;
+ } else {
+ erofs_info("unrecognized pax keyword \"%s\", ignoring", kv);
+ }
+ }
+ }
+ ret = 0;
+out:
+ free(buf);
+ return ret;
+}
+
+void tarerofs_remove_inode(struct erofs_inode *inode)
+{
+ struct erofs_dentry *d;
+
+ --inode->i_nlink;
+ if (!S_ISDIR(inode->i_mode))
+ return;
+
+ /* remove all subdirss */
+ list_for_each_entry(d, &inode->i_subdirs, d_child) {
+ if (!is_dot_dotdot(d->name))
+ tarerofs_remove_inode(d->inode);
+ erofs_iput(d->inode);
+ d->inode = NULL;
+ }
+ --inode->i_parent->i_nlink;
+}
+
+static int tarerofs_write_file_data(struct erofs_inode *inode,
+ struct erofs_tarfile *tar)
+{
+ unsigned int j;
+ void *buf;
+ int fd, nread;
+ u64 off;
+
+ if (!inode->i_diskbuf) {
+ inode->i_diskbuf = calloc(1, sizeof(*inode->i_diskbuf));
+ if (!inode->i_diskbuf)
+ return -ENOSPC;
+ } else {
+ erofs_diskbuf_close(inode->i_diskbuf);
+ }
+
+ fd = erofs_diskbuf_reserve(inode->i_diskbuf, 0, &off);
+ if (fd < 0)
+ return -EBADF;
+
+ for (j = inode->i_size; j; ) {
+ nread = erofs_iostream_read(&tar->ios, &buf, j);
+ if (nread < 0)
+ break;
+ if (write(fd, buf, nread) != nread) {
+ nread = -EIO;
+ break;
+ }
+ j -= nread;
+ }
+ erofs_diskbuf_commit(inode->i_diskbuf, inode->i_size);
+ inode->with_diskbuf = true;
+ return 0;
+}
+
+static int tarerofs_write_file_index(struct erofs_inode *inode,
+ struct erofs_tarfile *tar, erofs_off_t data_offset)
+{
+ int ret;
+
+ ret = tarerofs_write_chunkes(inode, data_offset);
+ if (ret)
+ return ret;
+ if (erofs_iostream_lskip(&tar->ios, inode->i_size))
+ return -EIO;
+ return 0;
+}
+
+int tarerofs_parse_tar(struct erofs_inode *root, struct erofs_tarfile *tar)
+{
+ char path[PATH_MAX];
+ struct erofs_pax_header eh = tar->global;
+ struct erofs_sb_info *sbi = root->sbi;
+ bool whout, opq, e = false;
+ struct stat st;
+ erofs_off_t tar_offset, data_offset;
+
+ struct tar_header *th;
+ struct erofs_dentry *d;
+ struct erofs_inode *inode;
+ unsigned int j, csum, cksum;
+ int ckksum, ret, rem;
+
+ if (eh.path)
+ eh.path = strdup(eh.path);
+ if (eh.link)
+ eh.link = strdup(eh.link);
+ init_list_head(&eh.xattrs);
+
+restart:
+ rem = tar->offset & 511;
+ if (rem) {
+ if (erofs_iostream_lskip(&tar->ios, 512 - rem)) {
+ ret = -EIO;
+ goto out;
+ }
+ tar->offset += 512 - rem;
+ }
+
+ tar_offset = tar->offset;
+ ret = erofs_iostream_read(&tar->ios, (void **)&th, sizeof(*th));
+ if (ret != sizeof(*th)) {
+ erofs_err("failed to read header block @ %llu", tar_offset);
+ ret = -EIO;
+ goto out;
+ }
+ tar->offset += sizeof(*th);
+ if (*th->name == '\0') {
+ if (e) { /* end of tar 2 empty blocks */
+ ret = 1;
+ goto out;
+ }
+ e = true; /* empty jump to next block */
+ goto restart;
+ }
+
+ /* chksum field itself treated as ' ' */
+ csum = tarerofs_otoi(th->chksum, sizeof(th->chksum));
+ if (errno) {
+ erofs_err("invalid chksum @ %llu", tar_offset);
+ ret = -EBADMSG;
+ goto out;
+ }
+ cksum = 0;
+ for (j = 0; j < 8; ++j)
+ cksum += (unsigned int)' ';
+ ckksum = cksum;
+ for (j = 0; j < 148; ++j) {
+ cksum += (unsigned int)((u8*)th)[j];
+ ckksum += (int)((char*)th)[j];
+ }
+ for (j = 156; j < 500; ++j) {
+ cksum += (unsigned int)((u8*)th)[j];
+ ckksum += (int)((char*)th)[j];
+ }
+ if (csum != cksum && csum != ckksum) {
+ erofs_err("chksum mismatch @ %llu", tar_offset);
+ ret = -EBADMSG;
+ goto out;
+ }
+
+ if (th->typeflag == GNUTYPE_VOLHDR) {
+ if (th->size[0])
+ erofs_warn("GNUTYPE_VOLHDR with non-zeroed size @ %llu",
+ tar_offset);
+ /* anyway, strncpy could cause some GCC warning here */
+ memcpy(sbi->volume_name, th->name, sizeof(sbi->volume_name));
+ goto restart;
+ }
+
+ if (memcmp(th->magic, "ustar", 5)) {
+ erofs_err("invalid tar magic @ %llu", tar_offset);
+ ret = -EIO;
+ goto out;
+ }
+
+ st.st_mode = tarerofs_otoi(th->mode, sizeof(th->mode));
+ if (errno)
+ goto invalid_tar;
+
+ if (eh.use_uid) {
+ st.st_uid = eh.st.st_uid;
+ } else {
+ st.st_uid = tarerofs_parsenum(th->uid, sizeof(th->uid));
+ if (errno)
+ goto invalid_tar;
+ }
+
+ if (eh.use_gid) {
+ st.st_gid = eh.st.st_gid;
+ } else {
+ st.st_gid = tarerofs_parsenum(th->gid, sizeof(th->gid));
+ if (errno)
+ goto invalid_tar;
+ }
+
+ if (eh.use_size) {
+ st.st_size = eh.st.st_size;
+ } else {
+ st.st_size = tarerofs_parsenum(th->size, sizeof(th->size));
+ if (errno)
+ goto invalid_tar;
+ }
+
+ if (eh.use_mtime) {
+ st.st_mtime = eh.st.st_mtime;
+#if ST_MTIM_NSEC
+ ST_MTIM_NSEC(&st) = ST_MTIM_NSEC(&eh.st);
+#endif
+ } else {
+ st.st_mtime = tarerofs_parsenum(th->mtime, sizeof(th->mtime));
+ if (errno)
+ goto invalid_tar;
+ }
+
+ if (th->typeflag <= '7' && !eh.path) {
+ eh.path = path;
+ j = 0;
+ if (*th->prefix) {
+ memcpy(path, th->prefix, sizeof(th->prefix));
+ path[sizeof(th->prefix)] = '\0';
+ j = strlen(path);
+ if (path[j - 1] != '/') {
+ path[j] = '/';
+ path[++j] = '\0';
+ }
+ }
+ memcpy(path + j, th->name, sizeof(th->name));
+ path[j + sizeof(th->name)] = '\0';
+ j = strlen(path);
+ while (path[j - 1] == '/')
+ path[--j] = '\0';
+ }
+
+ data_offset = tar->offset;
+ tar->offset += st.st_size;
+ switch(th->typeflag) {
+ case '0':
+ case '7':
+ case '1':
+ st.st_mode |= S_IFREG;
+ break;
+ case '2':
+ st.st_mode |= S_IFLNK;
+ break;
+ case '3':
+ st.st_mode |= S_IFCHR;
+ break;
+ case '4':
+ st.st_mode |= S_IFBLK;
+ break;
+ case '5':
+ st.st_mode |= S_IFDIR;
+ break;
+ case '6':
+ st.st_mode |= S_IFIFO;
+ break;
+ case 'g':
+ ret = tarerofs_parse_pax_header(&tar->ios, &tar->global,
+ st.st_size);
+ if (ret)
+ goto out;
+ if (tar->global.path) {
+ free(eh.path);
+ eh.path = strdup(tar->global.path);
+ }
+ if (tar->global.link) {
+ free(eh.link);
+ eh.link = strdup(tar->global.link);
+ }
+ goto restart;
+ case 'x':
+ ret = tarerofs_parse_pax_header(&tar->ios, &eh, st.st_size);
+ if (ret)
+ goto out;
+ goto restart;
+ case 'L':
+ free(eh.path);
+ eh.path = malloc(st.st_size + 1);
+ if (st.st_size != erofs_iostream_bread(&tar->ios, eh.path,
+ st.st_size))
+ goto invalid_tar;
+ eh.path[st.st_size] = '\0';
+ goto restart;
+ case 'K':
+ free(eh.link);
+ eh.link = malloc(st.st_size + 1);
+ if (st.st_size > PATH_MAX || st.st_size !=
+ erofs_iostream_bread(&tar->ios, eh.link, st.st_size))
+ goto invalid_tar;
+ eh.link[st.st_size] = '\0';
+ goto restart;
+ default:
+ erofs_info("unrecognized typeflag %xh @ %llu - ignoring",
+ th->typeflag, tar_offset);
+ (void)erofs_iostream_lskip(&tar->ios, st.st_size);
+ ret = 0;
+ goto out;
+ }
+
+ st.st_rdev = 0;
+ if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode)) {
+ int major, minor;
+
+ major = tarerofs_parsenum(th->devmajor, sizeof(th->devmajor));
+ if (errno) {
+ erofs_err("invalid device major @ %llu", tar_offset);
+ goto out;
+ }
+
+ minor = tarerofs_parsenum(th->devminor, sizeof(th->devminor));
+ if (errno) {
+ erofs_err("invalid device minor @ %llu", tar_offset);
+ goto out;
+ }
+
+ st.st_rdev = (major << 8) | (minor & 0xff) | ((minor & ~0xff) << 12);
+ } else if (th->typeflag == '1' || th->typeflag == '2') {
+ if (!eh.link)
+ eh.link = strndup(th->linkname, sizeof(th->linkname));
+ }
+
+ if (tar->index_mode && !tar->mapfile &&
+ erofs_blkoff(sbi, data_offset)) {
+ erofs_err("invalid tar data alignment @ %llu", tar_offset);
+ ret = -EIO;
+ goto out;
+ }
+
+ erofs_dbg("parsing %s (mode %05o)", eh.path, st.st_mode);
+
+ d = erofs_rebuild_get_dentry(root, eh.path, tar->aufs, &whout, &opq, true);
+ if (IS_ERR(d)) {
+ ret = PTR_ERR(d);
+ goto out;
+ }
+
+ if (!d) {
+ /* some tarballs include '.' which indicates the root directory */
+ if (!S_ISDIR(st.st_mode)) {
+ ret = -ENOTDIR;
+ goto out;
+ }
+ inode = root;
+ } else if (opq) {
+ DBG_BUGON(d->type == EROFS_FT_UNKNOWN);
+ DBG_BUGON(!d->inode);
+ ret = erofs_set_opaque_xattr(d->inode);
+ goto out;
+ } else if (th->typeflag == '1') { /* hard link cases */
+ struct erofs_dentry *d2;
+ bool dumb;
+
+ if (S_ISDIR(st.st_mode)) {
+ ret = -EISDIR;
+ goto out;
+ }
+
+ if (d->type != EROFS_FT_UNKNOWN) {
+ tarerofs_remove_inode(d->inode);
+ erofs_iput(d->inode);
+ }
+ d->inode = NULL;
+
+ d2 = erofs_rebuild_get_dentry(root, eh.link, tar->aufs,
+ &dumb, &dumb, false);
+ if (IS_ERR(d2)) {
+ ret = PTR_ERR(d2);
+ goto out;
+ }
+ if (d2->type == EROFS_FT_UNKNOWN) {
+ ret = -ENOENT;
+ goto out;
+ }
+ if (S_ISDIR(d2->inode->i_mode)) {
+ ret = -EISDIR;
+ goto out;
+ }
+ inode = erofs_igrab(d2->inode);
+ d->inode = inode;
+ d->type = d2->type;
+ ++inode->i_nlink;
+ ret = 0;
+ goto out;
+ } else if (d->type != EROFS_FT_UNKNOWN) {
+ if (d->type != EROFS_FT_DIR || !S_ISDIR(st.st_mode)) {
+ struct erofs_inode *parent = d->inode->i_parent;
+
+ tarerofs_remove_inode(d->inode);
+ erofs_iput(d->inode);
+ d->inode = parent;
+ goto new_inode;
+ }
+ inode = d->inode;
+ } else {
+new_inode:
+ inode = erofs_new_inode();
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ goto out;
+ }
+ inode->i_parent = d->inode;
+ d->inode = inode;
+ d->type = erofs_mode_to_ftype(st.st_mode);
+ }
+
+ if (whout) {
+ inode->i_mode = (inode->i_mode & ~S_IFMT) | S_IFCHR;
+ inode->u.i_rdev = EROFS_WHITEOUT_DEV;
+ d->type = EROFS_FT_CHRDEV;
+
+ /*
+ * Mark the parent directory as copied-up to avoid exposing
+ * whiteouts if mounted. See kernel commit b79e05aaa166
+ * ("ovl: no direct iteration for dir with origin xattr")
+ */
+ inode->i_parent->whiteouts = true;
+ } else {
+ inode->i_mode = st.st_mode;
+ if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode))
+ inode->u.i_rdev = erofs_new_encode_dev(st.st_rdev);
+ }
+
+ inode->i_srcpath = strdup(eh.path);
+ if (!inode->i_srcpath) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = __erofs_fill_inode(inode, &st, eh.path);
+ if (ret)
+ goto out;
+ inode->i_size = st.st_size;
+
+ if (!S_ISDIR(inode->i_mode)) {
+ if (S_ISLNK(inode->i_mode)) {
+ inode->i_size = strlen(eh.link);
+ inode->i_link = malloc(inode->i_size + 1);
+ memcpy(inode->i_link, eh.link, inode->i_size + 1);
+ } else if (inode->i_size) {
+ if (tar->index_mode)
+ ret = tarerofs_write_file_index(inode, tar,
+ data_offset);
+ else
+ ret = tarerofs_write_file_data(inode, tar);
+ if (ret)
+ goto out;
+ }
+ inode->i_nlink++;
+ } else if (!inode->i_nlink) {
+ ret = erofs_init_empty_dir(inode);
+ if (ret)
+ goto out;
+ }
+
+ ret = tarerofs_merge_xattrs(&eh.xattrs, &tar->global.xattrs);
+ if (ret)
+ goto out;
+
+ ret = tarerofs_apply_xattrs(inode, &eh.xattrs);
+
+out:
+ if (eh.path != path)
+ free(eh.path);
+ free(eh.link);
+ tarerofs_remove_xattrs(&eh.xattrs);
+ return ret;
+
+invalid_tar:
+ erofs_err("invalid tar @ %llu", tar_offset);
+ ret = -EIO;
+ goto out;
+}
diff --git a/lib/uuid.c b/lib/uuid.c
new file mode 100644
index 0000000..ec0f9d9
--- /dev/null
+++ b/lib/uuid.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+/*
+ * Copyright (C) 2023 Norbert Lange <nolange79@gmail.com>
+ */
+
+#include <string.h>
+#include <errno.h>
+
+#include "erofs/config.h"
+#include "erofs/defs.h"
+#include "liberofs_uuid.h"
+
+#ifdef HAVE_LIBUUID
+#include <uuid.h>
+#else
+
+#include <stdlib.h>
+#ifdef HAVE_SYS_RANDOM_H
+#include <sys/random.h>
+#else
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#endif
+
+/* Flags to be used, will be modified if kernel does not support them */
+static unsigned int erofs_grnd_flag =
+#ifdef GRND_INSECURE
+ GRND_INSECURE;
+#else
+ 0x0004;
+#endif
+
+static int s_getrandom(void *out, unsigned size, bool insecure)
+{
+ unsigned int kflags = erofs_grnd_flag;
+ unsigned int flags = insecure ? kflags : 0;
+
+ for (;;)
+ {
+#ifdef HAVE_SYS_RANDOM_H
+ ssize_t r = getrandom(out, size, flags);
+#else
+ ssize_t r = (ssize_t)syscall(__NR_getrandom, out, size, flags);
+#endif
+ int err;
+
+ if (r == size)
+ break;
+ err = errno;
+ if (err != EINTR) {
+ if (err == EINVAL && kflags) {
+ // Kernel likely does not support GRND_INSECURE
+ erofs_grnd_flag = 0;
+ kflags = 0;
+ continue;
+ }
+ return -err;
+ }
+ }
+ return 0;
+}
+#endif
+
+void erofs_uuid_generate(unsigned char *out)
+{
+#ifdef HAVE_LIBUUID
+ uuid_t new_uuid;
+
+ do {
+ uuid_generate(new_uuid);
+ } while (uuid_is_null(new_uuid));
+#else
+ unsigned char new_uuid[16];
+ int res __maybe_unused;
+
+ res = s_getrandom(new_uuid, sizeof(new_uuid), true);
+ BUG_ON(res != 0);
+
+ // UID type + version bits
+ new_uuid[0] = (new_uuid[4 + 2] & 0x0f) | 0x40;
+ new_uuid[1] = (new_uuid[4 + 2 + 2] & 0x3f) | 0x80;
+#endif
+ memcpy(out, new_uuid, sizeof(new_uuid));
+}
+
+int erofs_uuid_parse(const char *in, unsigned char *uu) {
+#ifdef HAVE_LIBUUID
+ return uuid_parse((char *)in, uu);
+#else
+ unsigned char new_uuid[16];
+ unsigned int hypens = ((1U << 3) | (1U << 5) | (1U << 7) | (1U << 9));
+ int i;
+
+ for (i = 0; i < sizeof(new_uuid); hypens >>= 1, i++)
+ {
+ char c[] = { in[0], in[1], '\0' };
+ char* endptr = c;
+ unsigned long val = strtoul(c, &endptr, 16);
+
+ if (endptr - c != 2)
+ return -EINVAL;
+
+ in += 2;
+
+ if ((hypens & 1U) != 0) {
+ if (*in++ != '-')
+ return -EINVAL;
+ }
+ new_uuid[i] = (unsigned char)val;
+ }
+
+ if (*in != '\0')
+ return -EINVAL;
+ memcpy(uu, new_uuid, sizeof(new_uuid));
+ return 0;
+#endif
+}
diff --git a/lib/uuid_unparse.c b/lib/uuid_unparse.c
new file mode 100644
index 0000000..3255c4b
--- /dev/null
+++ b/lib/uuid_unparse.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+/*
+ * Copyright (C) 2023 Norbert Lange <nolange79@gmail.com>
+ */
+
+#include <stdio.h>
+
+#include "erofs/config.h"
+#include "liberofs_uuid.h"
+
+void erofs_uuid_unparse_lower(const unsigned char *buf, char *out) {
+ sprintf(out, "%04x%04x-%04x-%04x-%04x-%04x%04x%04x",
+ (buf[0] << 8) | buf[1],
+ (buf[2] << 8) | buf[3],
+ (buf[4] << 8) | buf[5],
+ (buf[6] << 8) | buf[7],
+ (buf[8] << 8) | buf[9],
+ (buf[10] << 8) | buf[11],
+ (buf[12] << 8) | buf[13],
+ (buf[14] << 8) | buf[15]);
+}
diff --git a/lib/xattr.c b/lib/xattr.c
index 71ffe3e..6c8ebf4 100644
--- a/lib/xattr.c
+++ b/lib/xattr.c
@@ -17,15 +17,84 @@
#include "erofs/xattr.h"
#include "erofs/cache.h"
#include "erofs/io.h"
+#include "erofs/fragments.h"
+#include "erofs/xxhash.h"
#include "liberofs_private.h"
+#ifndef XATTR_SYSTEM_PREFIX
+#define XATTR_SYSTEM_PREFIX "system."
+#endif
+#ifndef XATTR_SYSTEM_PREFIX_LEN
+#define XATTR_SYSTEM_PREFIX_LEN (sizeof(XATTR_SYSTEM_PREFIX) - 1)
+#endif
+#ifndef XATTR_USER_PREFIX
+#define XATTR_USER_PREFIX "user."
+#endif
+#ifndef XATTR_USER_PREFIX_LEN
+#define XATTR_USER_PREFIX_LEN (sizeof(XATTR_USER_PREFIX) - 1)
+#endif
+#ifndef XATTR_SECURITY_PREFIX
+#define XATTR_SECURITY_PREFIX "security."
+#endif
+#ifndef XATTR_SECURITY_PREFIX_LEN
+#define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1)
+#endif
+#ifndef XATTR_TRUSTED_PREFIX
+#define XATTR_TRUSTED_PREFIX "trusted."
+#endif
+#ifndef XATTR_TRUSTED_PREFIX_LEN
+#define XATTR_TRUSTED_PREFIX_LEN (sizeof(XATTR_TRUSTED_PREFIX) - 1)
+#endif
+#ifndef XATTR_NAME_POSIX_ACL_ACCESS
+#define XATTR_NAME_POSIX_ACL_ACCESS "system.posix_acl_access"
+#endif
+#ifndef XATTR_NAME_POSIX_ACL_DEFAULT
+#define XATTR_NAME_POSIX_ACL_DEFAULT "system.posix_acl_default"
+#endif
+#ifndef XATTR_NAME_SECURITY_SELINUX
+#define XATTR_NAME_SECURITY_SELINUX "security.selinux"
+#endif
+#ifndef XATTR_NAME_SECURITY_CAPABILITY
+#define XATTR_NAME_SECURITY_CAPABILITY "security.capability"
+#endif
+#ifndef OVL_XATTR_NAMESPACE
+#define OVL_XATTR_NAMESPACE "overlay."
+#endif
+#ifndef OVL_XATTR_OPAQUE_POSTFIX
+#define OVL_XATTR_OPAQUE_POSTFIX "opaque"
+#endif
+#ifndef OVL_XATTR_ORIGIN_POSTFIX
+#define OVL_XATTR_ORIGIN_POSTFIX "origin"
+#endif
+#ifndef OVL_XATTR_TRUSTED_PREFIX
+#define OVL_XATTR_TRUSTED_PREFIX XATTR_TRUSTED_PREFIX OVL_XATTR_NAMESPACE
+#endif
+#ifndef OVL_XATTR_OPAQUE
+#define OVL_XATTR_OPAQUE OVL_XATTR_TRUSTED_PREFIX OVL_XATTR_OPAQUE_POSTFIX
+#endif
+#ifndef OVL_XATTR_ORIGIN
+#define OVL_XATTR_ORIGIN OVL_XATTR_TRUSTED_PREFIX OVL_XATTR_ORIGIN_POSTFIX
+#endif
+
#define EA_HASHTABLE_BITS 16
+/* one extra byte for the trailing `\0` of attribute name */
+#define EROFS_XATTR_KSIZE(kvlen) (kvlen[0] + 1)
+#define EROFS_XATTR_KVSIZE(kvlen) (EROFS_XATTR_KSIZE(kvlen) + kvlen[1])
+
+/*
+ * @base_index: the index of the matched predefined short prefix
+ * @prefix: the index of the matched long prefix, if any;
+ * same as base_index otherwise
+ * @prefix_len: the length of the matched long prefix if any;
+ * the length of the matched predefined short prefix otherwise
+ */
struct xattr_item {
+ struct xattr_item *next_shared_xattr;
const char *kvbuf;
unsigned int hash[2], len[2], count;
int shared_xattr_id;
- u8 prefix;
+ unsigned int prefix, base_index, prefix_len;
struct hlist_node node;
};
@@ -36,12 +105,12 @@
static DECLARE_HASHTABLE(ea_hashtable, EA_HASHTABLE_BITS);
-static LIST_HEAD(shared_xattrs_list);
-static unsigned int shared_xattrs_count, shared_xattrs_size;
+static struct xattr_item *shared_xattrs_list;
+static unsigned int shared_xattrs_count;
static struct xattr_prefix {
const char *prefix;
- u16 prefix_len;
+ unsigned int prefix_len;
} xattr_types[] = {
[EROFS_XATTR_INDEX_USER] = {
XATTR_USER_PREFIX,
@@ -61,73 +130,17 @@
}
};
-static unsigned int BKDRHash(char *str, unsigned int len)
-{
- const unsigned int seed = 131313;
- unsigned int hash = 0;
+struct ea_type_node {
+ struct list_head list;
+ struct xattr_prefix type;
+ unsigned int index, base_index, base_len;
+};
- while (len) {
- hash = hash * seed + (*str++);
- --len;
- }
- return hash;
-}
+static LIST_HEAD(ea_name_prefixes);
+static unsigned int ea_prefix_count;
-static unsigned int xattr_item_hash(u8 prefix, char *buf,
- unsigned int len[2], unsigned int hash[2])
-{
- hash[0] = BKDRHash(buf, len[0]); /* key */
- hash[1] = BKDRHash(buf + len[0], len[1]); /* value */
-
- return prefix ^ hash[0] ^ hash[1];
-}
-
-static unsigned int put_xattritem(struct xattr_item *item)
-{
- if (item->count > 1)
- return --item->count;
- free(item);
- return 0;
-}
-
-static struct xattr_item *get_xattritem(u8 prefix, char *kvbuf,
- unsigned int len[2])
-{
- struct xattr_item *item;
- unsigned int hash[2], hkey;
-
- hkey = xattr_item_hash(prefix, kvbuf, len, hash);
-
- hash_for_each_possible(ea_hashtable, item, node, hkey) {
- if (prefix == item->prefix &&
- item->len[0] == len[0] && item->len[1] == len[1] &&
- item->hash[0] == hash[0] && item->hash[1] == hash[1] &&
- !memcmp(kvbuf, item->kvbuf, len[0] + len[1])) {
- free(kvbuf);
- ++item->count;
- return item;
- }
- }
-
- item = malloc(sizeof(*item));
- if (!item) {
- free(kvbuf);
- return ERR_PTR(-ENOMEM);
- }
- INIT_HLIST_NODE(&item->node);
- item->count = 1;
- item->kvbuf = kvbuf;
- item->len[0] = len[0];
- item->len[1] = len[1];
- item->hash[0] = hash[0];
- item->hash[1] = hash[1];
- item->shared_xattr_id = -1;
- item->prefix = prefix;
- hash_add(ea_hashtable, &item->node, hkey);
- return item;
-}
-
-static bool match_prefix(const char *key, u8 *index, u16 *len)
+static bool match_prefix(const char *key, unsigned int *index,
+ unsigned int *len)
{
struct xattr_prefix *p;
@@ -141,21 +154,98 @@
return false;
}
+static unsigned int BKDRHash(char *str, unsigned int len)
+{
+ const unsigned int seed = 131313;
+ unsigned int hash = 0;
+
+ while (len) {
+ hash = hash * seed + (*str++);
+ --len;
+ }
+ return hash;
+}
+
+static unsigned int xattr_item_hash(char *buf, unsigned int len[2],
+ unsigned int hash[2])
+{
+ hash[0] = BKDRHash(buf, len[0]); /* key */
+ hash[1] = BKDRHash(buf + len[0], len[1]); /* value */
+ return hash[0] ^ hash[1];
+}
+
+static unsigned int put_xattritem(struct xattr_item *item)
+{
+ if (item->count > 1)
+ return --item->count;
+ free(item);
+ return 0;
+}
+
+static struct xattr_item *get_xattritem(char *kvbuf, unsigned int len[2])
+{
+ struct xattr_item *item;
+ struct ea_type_node *tnode;
+ unsigned int hash[2], hkey;
+
+ hkey = xattr_item_hash(kvbuf, len, hash);
+ hash_for_each_possible(ea_hashtable, item, node, hkey) {
+ if (item->len[0] == len[0] && item->len[1] == len[1] &&
+ item->hash[0] == hash[0] && item->hash[1] == hash[1] &&
+ !memcmp(kvbuf, item->kvbuf, len[0] + len[1])) {
+ free(kvbuf);
+ ++item->count;
+ return item;
+ }
+ }
+
+ item = malloc(sizeof(*item));
+ if (!item) {
+ free(kvbuf);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ if (!match_prefix(kvbuf, &item->base_index, &item->prefix_len)) {
+ free(item);
+ free(kvbuf);
+ return ERR_PTR(-ENODATA);
+ }
+ DBG_BUGON(len[0] < item->prefix_len);
+
+ INIT_HLIST_NODE(&item->node);
+ item->count = 1;
+ item->kvbuf = kvbuf;
+ item->len[0] = len[0];
+ item->len[1] = len[1];
+ item->hash[0] = hash[0];
+ item->hash[1] = hash[1];
+ item->shared_xattr_id = -1;
+ item->prefix = item->base_index;
+
+ list_for_each_entry(tnode, &ea_name_prefixes, list) {
+ if (item->base_index == tnode->base_index &&
+ !strncmp(tnode->type.prefix, kvbuf,
+ tnode->type.prefix_len)) {
+ item->prefix = tnode->index;
+ item->prefix_len = tnode->type.prefix_len;
+ break;
+ }
+ }
+ hash_add(ea_hashtable, &item->node, hkey);
+ return item;
+}
+
static struct xattr_item *parse_one_xattr(const char *path, const char *key,
unsigned int keylen)
{
ssize_t ret;
- u8 prefix;
- u16 prefixlen;
unsigned int len[2];
char *kvbuf;
erofs_dbg("parse xattr [%s] of %s", path, key);
- if (!match_prefix(key, &prefix, &prefixlen))
- return ERR_PTR(-ENODATA);
-
- DBG_BUGON(keylen < prefixlen);
+ /* length of the key */
+ len[0] = keylen;
/* determine length of the value */
#ifdef HAVE_LGETXATTR
@@ -170,19 +260,18 @@
len[1] = ret;
/* allocate key-value buffer */
- len[0] = keylen - prefixlen;
-
- kvbuf = malloc(len[0] + len[1]);
+ kvbuf = malloc(EROFS_XATTR_KVSIZE(len));
if (!kvbuf)
return ERR_PTR(-ENOMEM);
- memcpy(kvbuf, key + prefixlen, len[0]);
+ memcpy(kvbuf, key, EROFS_XATTR_KSIZE(len));
if (len[1]) {
/* copy value to buffer */
#ifdef HAVE_LGETXATTR
- ret = lgetxattr(path, key, kvbuf + len[0], len[1]);
+ ret = lgetxattr(path, key, kvbuf + EROFS_XATTR_KSIZE(len),
+ len[1]);
#elif defined(__APPLE__)
- ret = getxattr(path, key, kvbuf + len[0], len[1], 0,
- XATTR_NOFOLLOW);
+ ret = getxattr(path, key, kvbuf + EROFS_XATTR_KSIZE(len),
+ len[1], 0, XATTR_NOFOLLOW);
#else
free(kvbuf);
return ERR_PTR(-EOPNOTSUPP);
@@ -197,7 +286,7 @@
len[1] = ret;
}
}
- return get_xattritem(prefix, kvbuf, len);
+ return get_xattritem(kvbuf, len);
}
static struct xattr_item *erofs_get_selabel_xattr(const char *srcpath,
@@ -210,12 +299,10 @@
unsigned int len[2];
char *kvbuf, *fspath;
-#ifdef WITH_ANDROID
if (cfg.mount_point)
ret = asprintf(&fspath, "/%s/%s", cfg.mount_point,
erofs_fspath(srcpath));
else
-#endif
ret = asprintf(&fspath, "/%s", erofs_fspath(srcpath));
if (ret <= 0)
return ERR_PTR(-ENOMEM);
@@ -234,16 +321,17 @@
return NULL;
}
- len[0] = sizeof("selinux") - 1;
+ len[0] = sizeof(XATTR_NAME_SECURITY_SELINUX) - 1;
len[1] = strlen(secontext);
- kvbuf = malloc(len[0] + len[1] + 1);
+ kvbuf = malloc(EROFS_XATTR_KVSIZE(len));
if (!kvbuf) {
freecon(secontext);
return ERR_PTR(-ENOMEM);
}
- sprintf(kvbuf, "selinux%s", secontext);
+ sprintf(kvbuf, "%s", XATTR_NAME_SECURITY_SELINUX);
+ memcpy(kvbuf + EROFS_XATTR_KSIZE(len), secontext, len[1]);
freecon(secontext);
- return get_xattritem(EROFS_XATTR_INDEX_SECURITY, kvbuf, len);
+ return get_xattritem(kvbuf, len);
}
#endif
return NULL;
@@ -263,18 +351,8 @@
static int shared_xattr_add(struct xattr_item *item)
{
- struct inode_xattr_node *node = malloc(sizeof(*node));
-
- if (!node)
- return -ENOMEM;
-
- init_list_head(&node->list);
- node->item = item;
- list_add(&node->list, &shared_xattrs_list);
-
- shared_xattrs_size += sizeof(struct erofs_xattr_entry);
- shared_xattrs_size = EROFS_XATTR_ALIGN(shared_xattrs_size +
- item->len[0] + item->len[1]);
+ item->next_shared_xattr = shared_xattrs_list;
+ shared_xattrs_list = item;
return ++shared_xattrs_count;
}
@@ -299,6 +377,18 @@
if (cfg.sehnd && !strcmp(key, XATTR_SECURITY_PREFIX "selinux"))
return true;
#endif
+
+ /* skip xattrs with unidentified "system." prefix */
+ if (!strncmp(key, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) {
+ if (!strcmp(key, XATTR_NAME_POSIX_ACL_ACCESS) ||
+ !strcmp(key, XATTR_NAME_POSIX_ACL_DEFAULT)) {
+ return false;
+ } else {
+ erofs_warn("skip unidentified xattr: %s", key);
+ return true;
+ }
+ }
+
return false;
}
@@ -317,7 +407,7 @@
unsigned int keylen;
struct xattr_item *item;
- if (kllen < 0 && errno != ENODATA) {
+ if (kllen < 0 && errno != ENODATA && errno != EOPNOTSUPP) {
erofs_err("llistxattr to get the size of names for %s failed",
path);
return -errno;
@@ -384,6 +474,59 @@
return ret;
}
+int erofs_setxattr(struct erofs_inode *inode, char *key,
+ const void *value, size_t size)
+{
+ char *kvbuf;
+ unsigned int len[2];
+ struct xattr_item *item;
+
+ len[0] = strlen(key);
+ len[1] = size;
+
+ kvbuf = malloc(EROFS_XATTR_KVSIZE(len));
+ if (!kvbuf)
+ return -ENOMEM;
+
+ memcpy(kvbuf, key, EROFS_XATTR_KSIZE(len));
+ memcpy(kvbuf + EROFS_XATTR_KSIZE(len), value, size);
+
+ item = get_xattritem(kvbuf, len);
+ if (IS_ERR(item))
+ return PTR_ERR(item);
+ DBG_BUGON(!item);
+
+ return erofs_xattr_add(&inode->i_xattrs, item);
+}
+
+static void erofs_removexattr(struct erofs_inode *inode, const char *key)
+{
+ struct inode_xattr_node *node, *n;
+
+ list_for_each_entry_safe(node, n, &inode->i_xattrs, list) {
+ if (!strcmp(node->item->kvbuf, key)) {
+ list_del(&node->list);
+ put_xattritem(node->item);
+ free(node);
+ }
+ }
+}
+
+int erofs_set_opaque_xattr(struct erofs_inode *inode)
+{
+ return erofs_setxattr(inode, OVL_XATTR_OPAQUE, "y", 1);
+}
+
+void erofs_clear_opaque_xattr(struct erofs_inode *inode)
+{
+ erofs_removexattr(inode, OVL_XATTR_OPAQUE);
+}
+
+int erofs_set_origin_xattr(struct erofs_inode *inode)
+{
+ return erofs_setxattr(inode, OVL_XATTR_ORIGIN, NULL, 0);
+}
+
#ifdef WITH_ANDROID
static int erofs_droid_xattr_set_caps(struct erofs_inode *inode)
{
@@ -396,26 +539,25 @@
if (!capabilities)
return 0;
- len[0] = sizeof("capability") - 1;
+ len[0] = sizeof(XATTR_NAME_SECURITY_CAPABILITY) - 1;
len[1] = sizeof(caps);
- kvbuf = malloc(len[0] + len[1]);
+ kvbuf = malloc(EROFS_XATTR_KVSIZE(len));
if (!kvbuf)
return -ENOMEM;
- memcpy(kvbuf, "capability", len[0]);
+ sprintf(kvbuf, "%s", XATTR_NAME_SECURITY_CAPABILITY);
caps.magic_etc = VFS_CAP_REVISION_2 | VFS_CAP_FLAGS_EFFECTIVE;
caps.data[0].permitted = (u32) capabilities;
caps.data[0].inheritable = 0;
caps.data[1].permitted = (u32) (capabilities >> 32);
caps.data[1].inheritable = 0;
- memcpy(kvbuf + len[0], &caps, len[1]);
+ memcpy(kvbuf + EROFS_XATTR_KSIZE(len), &caps, len[1]);
- item = get_xattritem(EROFS_XATTR_INDEX_SECURITY, kvbuf, len);
+ item = get_xattritem(kvbuf, len);
if (IS_ERR(item))
return PTR_ERR(item);
- if (!item)
- return 0;
+ DBG_BUGON(!item);
return erofs_xattr_add(&inode->i_xattrs, item);
}
@@ -426,10 +568,9 @@
}
#endif
-int erofs_prepare_xattr_ibody(struct erofs_inode *inode)
+int erofs_scan_file_xattrs(struct erofs_inode *inode)
{
int ret;
- struct inode_xattr_node *node;
struct list_head *ixattrs = &inode->i_xattrs;
/* check if xattr is disabled */
@@ -440,24 +581,109 @@
if (ret < 0)
return ret;
- ret = erofs_droid_xattr_set_caps(inode);
- if (ret < 0)
- return ret;
+ return erofs_droid_xattr_set_caps(inode);
+}
- if (list_empty(ixattrs))
+int erofs_read_xattrs_from_disk(struct erofs_inode *inode)
+{
+ ssize_t kllen;
+ char *keylst, *key;
+ int ret;
+
+ init_list_head(&inode->i_xattrs);
+ kllen = erofs_listxattr(inode, NULL, 0);
+ if (kllen < 0)
+ return kllen;
+ if (kllen <= 1)
return 0;
+ keylst = malloc(kllen);
+ if (!keylst)
+ return -ENOMEM;
+
+ ret = erofs_listxattr(inode, keylst, kllen);
+ if (ret < 0)
+ goto out;
+
+ for (key = keylst; key < keylst + kllen; key += strlen(key) + 1) {
+ void *value = NULL;
+ size_t size = 0;
+
+ if (!strcmp(key, OVL_XATTR_OPAQUE)) {
+ if (!S_ISDIR(inode->i_mode)) {
+ erofs_dbg("file %s: opaque xattr on non-dir",
+ inode->i_srcpath);
+ ret = -EINVAL;
+ goto out;
+ }
+ inode->opaque = true;
+ }
+
+ ret = erofs_getxattr(inode, key, NULL, 0);
+ if (ret < 0)
+ goto out;
+ if (ret) {
+ size = ret;
+ value = malloc(size);
+ if (!value) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = erofs_getxattr(inode, key, value, size);
+ if (ret < 0) {
+ free(value);
+ goto out;
+ }
+ DBG_BUGON(ret != size);
+ } else if (S_ISDIR(inode->i_mode) &&
+ !strcmp(key, OVL_XATTR_ORIGIN)) {
+ ret = 0;
+ inode->whiteouts = true;
+ continue;
+ }
+
+ ret = erofs_setxattr(inode, key, value, size);
+ free(value);
+ if (ret)
+ break;
+ }
+out:
+ free(keylst);
+ return ret;
+}
+
+static inline unsigned int erofs_next_xattr_align(unsigned int pos,
+ struct xattr_item *item)
+{
+ return EROFS_XATTR_ALIGN(pos + sizeof(struct erofs_xattr_entry) +
+ item->len[0] + item->len[1] - item->prefix_len);
+}
+
+int erofs_prepare_xattr_ibody(struct erofs_inode *inode)
+{
+ int ret;
+ struct inode_xattr_node *node;
+ struct list_head *ixattrs = &inode->i_xattrs;
+ unsigned int h_shared_count;
+
+ if (list_empty(ixattrs)) {
+ inode->xattr_isize = 0;
+ return 0;
+ }
+
/* get xattr ibody size */
+ h_shared_count = 0;
ret = sizeof(struct erofs_xattr_ibody_header);
list_for_each_entry(node, ixattrs, list) {
- const struct xattr_item *item = node->item;
+ struct xattr_item *item = node->item;
- if (item->shared_xattr_id >= 0) {
+ if (item->shared_xattr_id >= 0 && h_shared_count < UCHAR_MAX) {
+ ++h_shared_count;
ret += sizeof(__le32);
continue;
}
- ret += sizeof(struct erofs_xattr_entry);
- ret = EROFS_XATTR_ALIGN(ret + item->len[0] + item->len[1]);
+ ret = erofs_next_xattr_align(ret, item);
}
inode->xattr_isize = ret;
return ret;
@@ -467,7 +693,7 @@
{
int ret;
DIR *_dir;
- struct stat64 st;
+ struct stat st;
_dir = opendir(path);
if (!_dir) {
@@ -502,7 +728,7 @@
goto fail;
}
- ret = lstat64(buf, &st);
+ ret = lstat(buf, &st);
if (ret) {
ret = -errno;
goto fail;
@@ -545,32 +771,17 @@
if (sharedxattrs)
return;
- shared_xattrs_size = shared_xattrs_count = 0;
+ shared_xattrs_count = 0;
}
-static bool erofs_bh_flush_write_shared_xattrs(struct erofs_buffer_head *bh)
-{
- void *buf = bh->fsprivate;
- int err = dev_write(buf, erofs_btell(bh, false), shared_xattrs_size);
-
- if (err)
- return false;
- free(buf);
- return erofs_bh_flush_generic_end(bh);
-}
-
-static struct erofs_bhops erofs_write_shared_xattrs_bhops = {
- .flush = erofs_bh_flush_write_shared_xattrs,
-};
-
-static int comp_xattr_item(const void *a, const void *b)
+static int comp_shared_xattr_item(const void *a, const void *b)
{
const struct xattr_item *ia, *ib;
unsigned int la, lb;
int ret;
- ia = (*((const struct inode_xattr_node **)a))->item;
- ib = (*((const struct inode_xattr_node **)b))->item;
+ ia = *((const struct xattr_item **)a);
+ ib = *((const struct xattr_item **)b);
la = ia->len[0] + ia->len[1];
lb = ib->len[0] + ib->len[1];
@@ -581,21 +792,88 @@
return la > lb;
}
-int erofs_build_shared_xattrs_from_path(const char *path)
+int erofs_xattr_write_name_prefixes(struct erofs_sb_info *sbi, FILE *f)
+{
+ struct ea_type_node *tnode;
+ off_t offset;
+
+ if (!ea_prefix_count)
+ return 0;
+ offset = ftello(f);
+ if (offset < 0)
+ return -errno;
+ if (offset > UINT32_MAX)
+ return -EOVERFLOW;
+
+ offset = round_up(offset, 4);
+ if (fseek(f, offset, SEEK_SET))
+ return -errno;
+ sbi->xattr_prefix_start = (u32)offset >> 2;
+ sbi->xattr_prefix_count = ea_prefix_count;
+
+ list_for_each_entry(tnode, &ea_name_prefixes, list) {
+ union {
+ struct {
+ __le16 size;
+ struct erofs_xattr_long_prefix prefix;
+ } s;
+ u8 data[EROFS_NAME_LEN + 2 +
+ sizeof(struct erofs_xattr_long_prefix)];
+ } u;
+ int len, infix_len;
+
+ u.s.prefix.base_index = tnode->base_index;
+ infix_len = tnode->type.prefix_len - tnode->base_len;
+ memcpy(u.s.prefix.infix, tnode->type.prefix + tnode->base_len,
+ infix_len);
+ len = sizeof(struct erofs_xattr_long_prefix) + infix_len;
+ u.s.size = cpu_to_le16(len);
+ if (fwrite(&u.s, sizeof(__le16) + len, 1, f) != 1)
+ return -EIO;
+ offset = round_up(offset + sizeof(__le16) + len, 4);
+ if (fseek(f, offset, SEEK_SET))
+ return -errno;
+ }
+ erofs_sb_set_fragments(sbi);
+ erofs_sb_set_xattr_prefixes(sbi);
+ return 0;
+}
+
+static void erofs_write_xattr_entry(char *buf, struct xattr_item *item)
+{
+ struct erofs_xattr_entry entry = {
+ .e_name_index = item->prefix,
+ .e_name_len = item->len[0] - item->prefix_len,
+ .e_value_size = cpu_to_le16(item->len[1]),
+ };
+
+ memcpy(buf, &entry, sizeof(entry));
+ buf += sizeof(struct erofs_xattr_entry);
+ memcpy(buf, item->kvbuf + item->prefix_len,
+ item->len[0] - item->prefix_len);
+ buf += item->len[0] - item->prefix_len;
+ memcpy(buf, item->kvbuf + item->len[0] + 1, item->len[1]);
+
+ erofs_dbg("writing xattr %d %s (%d %s)", item->base_index, item->kvbuf,
+ item->prefix, item->kvbuf + item->prefix_len);
+}
+
+int erofs_build_shared_xattrs_from_path(struct erofs_sb_info *sbi, const char *path)
{
int ret;
struct erofs_buffer_head *bh;
- struct inode_xattr_node *node, *n, **sorted_n;
+ struct xattr_item *item, *n, **sorted_n;
char *buf;
unsigned int p, i;
erofs_off_t off;
+ erofs_off_t shared_xattrs_size = 0;
/* check if xattr or shared xattr is disabled */
if (cfg.c_inline_xattr_tolerance < 0 ||
cfg.c_inline_xattr_tolerance == INT_MAX)
return 0;
- if (shared_xattrs_size || shared_xattrs_count) {
+ if (shared_xattrs_count) {
DBG_BUGON(1);
return -EINVAL;
}
@@ -604,15 +882,34 @@
if (ret)
return ret;
- if (!shared_xattrs_size)
+ if (!shared_xattrs_count)
goto out;
- buf = calloc(1, shared_xattrs_size);
- if (!buf)
+ sorted_n = malloc((shared_xattrs_count + 1) * sizeof(n));
+ if (!sorted_n)
return -ENOMEM;
+ i = 0;
+ while (shared_xattrs_list) {
+ item = shared_xattrs_list;
+ sorted_n[i++] = item;
+ shared_xattrs_list = item->next_shared_xattr;
+ shared_xattrs_size = erofs_next_xattr_align(shared_xattrs_size,
+ item);
+ }
+ DBG_BUGON(i != shared_xattrs_count);
+ sorted_n[i] = NULL;
+ qsort(sorted_n, shared_xattrs_count, sizeof(n), comp_shared_xattr_item);
+
+ buf = calloc(1, shared_xattrs_size);
+ if (!buf) {
+ free(sorted_n);
+ return -ENOMEM;
+ }
+
bh = erofs_balloc(XATTR, shared_xattrs_size, 0, 0);
if (IS_ERR(bh)) {
+ free(sorted_n);
free(buf);
return PTR_ERR(bh);
}
@@ -621,51 +918,33 @@
erofs_mapbh(bh->block);
off = erofs_btell(bh, false);
- sbi.xattr_blkaddr = off / EROFS_BLKSIZ;
- off %= EROFS_BLKSIZ;
+ sbi->xattr_blkaddr = off / erofs_blksiz(sbi);
+ off %= erofs_blksiz(sbi);
p = 0;
-
- sorted_n = malloc(shared_xattrs_count * sizeof(n));
- if (!sorted_n)
- return -ENOMEM;
- i = 0;
- list_for_each_entry_safe(node, n, &shared_xattrs_list, list) {
- list_del(&node->list);
- sorted_n[i++] = node;
- }
- DBG_BUGON(i != shared_xattrs_count);
- qsort(sorted_n, shared_xattrs_count, sizeof(n), comp_xattr_item);
-
for (i = 0; i < shared_xattrs_count; i++) {
- struct inode_xattr_node *const tnode = sorted_n[i];
- struct xattr_item *const item = tnode->item;
- const struct erofs_xattr_entry entry = {
- .e_name_index = item->prefix,
- .e_name_len = item->len[0],
- .e_value_size = cpu_to_le16(item->len[1])
- };
-
- item->shared_xattr_id = (off + p) /
- sizeof(struct erofs_xattr_entry);
-
- memcpy(buf + p, &entry, sizeof(entry));
- p += sizeof(struct erofs_xattr_entry);
- memcpy(buf + p, item->kvbuf, item->len[0] + item->len[1]);
- p = EROFS_XATTR_ALIGN(p + item->len[0] + item->len[1]);
- free(tnode);
+ item = sorted_n[i];
+ erofs_write_xattr_entry(buf + p, item);
+ item->next_shared_xattr = sorted_n[i + 1];
+ item->shared_xattr_id = (off + p) / sizeof(__le32);
+ p = erofs_next_xattr_align(p, item);
}
-
+ shared_xattrs_list = sorted_n[0];
free(sorted_n);
- bh->fsprivate = buf;
- bh->op = &erofs_write_shared_xattrs_bhops;
+ bh->op = &erofs_drop_directly_bhops;
+ ret = dev_write(sbi, buf, erofs_btell(bh, false), shared_xattrs_size);
+ free(buf);
+ erofs_bdrop(bh, false);
out:
erofs_cleanxattrs(true);
- return 0;
+ return ret;
}
-char *erofs_export_xattr_ibody(struct list_head *ixattrs, unsigned int size)
+char *erofs_export_xattr_ibody(struct erofs_inode *inode)
{
+ struct list_head *ixattrs = &inode->i_xattrs;
+ unsigned int size = inode->xattr_isize;
struct inode_xattr_node *node, *n;
+ struct xattr_item *item;
struct erofs_xattr_ibody_header *header;
LIST_HEAD(ilst);
unsigned int p;
@@ -677,14 +956,35 @@
header = (struct erofs_xattr_ibody_header *)buf;
header->h_shared_count = 0;
+ if (cfg.c_xattr_name_filter) {
+ u32 name_filter = 0;
+ int hashbit;
+ unsigned int base_len;
+
+ list_for_each_entry(node, ixattrs, list) {
+ item = node->item;
+ base_len = xattr_types[item->base_index].prefix_len;
+ hashbit = xxh32(item->kvbuf + base_len,
+ item->len[0] - base_len,
+ EROFS_XATTR_FILTER_SEED + item->base_index) &
+ (EROFS_XATTR_FILTER_BITS - 1);
+ name_filter |= (1UL << hashbit);
+ }
+ name_filter = EROFS_XATTR_FILTER_DEFAULT & ~name_filter;
+
+ header->h_name_filter = cpu_to_le32(name_filter);
+ if (header->h_name_filter)
+ erofs_sb_set_xattr_filter(inode->sbi);
+ }
+
p = sizeof(struct erofs_xattr_ibody_header);
list_for_each_entry_safe(node, n, ixattrs, list) {
- struct xattr_item *const item = node->item;
-
+ item = node->item;
list_del(&node->list);
/* move inline xattrs to the onstack list */
- if (item->shared_xattr_id < 0) {
+ if (item->shared_xattr_id < 0 ||
+ header->h_shared_count >= UCHAR_MAX) {
list_add(&node->list, &ilst);
continue;
}
@@ -697,18 +997,9 @@
}
list_for_each_entry_safe(node, n, &ilst, list) {
- struct xattr_item *const item = node->item;
- const struct erofs_xattr_entry entry = {
- .e_name_index = item->prefix,
- .e_name_len = item->len[0],
- .e_value_size = cpu_to_le16(item->len[1])
- };
-
- memcpy(buf + p, &entry, sizeof(entry));
- p += sizeof(struct erofs_xattr_entry);
- memcpy(buf + p, item->kvbuf, item->len[0] + item->len[1]);
- p = EROFS_XATTR_ALIGN(p + item->len[0] + item->len[1]);
-
+ item = node->item;
+ erofs_write_xattr_entry(buf + p, item);
+ p = erofs_next_xattr_align(p, item);
list_del(&node->list);
free(node);
put_xattritem(item);
@@ -716,3 +1007,656 @@
DBG_BUGON(p > size);
return buf;
}
+
+struct xattr_iter {
+ char page[EROFS_MAX_BLOCK_SIZE];
+
+ void *kaddr;
+
+ erofs_blk_t blkaddr;
+ unsigned int ofs;
+ struct erofs_sb_info *sbi;
+};
+
+static int init_inode_xattrs(struct erofs_inode *vi)
+{
+ struct erofs_sb_info *sbi = vi->sbi;
+ struct xattr_iter it;
+ unsigned int i;
+ struct erofs_xattr_ibody_header *ih;
+ int ret = 0;
+
+ /* the most case is that xattrs of this inode are initialized. */
+ if (vi->flags & EROFS_I_EA_INITED)
+ return ret;
+
+ /*
+ * bypass all xattr operations if ->xattr_isize is not greater than
+ * sizeof(struct erofs_xattr_ibody_header), in detail:
+ * 1) it is not enough to contain erofs_xattr_ibody_header then
+ * ->xattr_isize should be 0 (it means no xattr);
+ * 2) it is just to contain erofs_xattr_ibody_header, which is on-disk
+ * undefined right now (maybe use later with some new sb feature).
+ */
+ if (vi->xattr_isize == sizeof(struct erofs_xattr_ibody_header)) {
+ erofs_err("xattr_isize %d of nid %llu is not supported yet",
+ vi->xattr_isize, vi->nid);
+ return -EOPNOTSUPP;
+ } else if (vi->xattr_isize < sizeof(struct erofs_xattr_ibody_header)) {
+ if (vi->xattr_isize) {
+ erofs_err("bogus xattr ibody @ nid %llu", vi->nid);
+ DBG_BUGON(1);
+ return -EFSCORRUPTED; /* xattr ondisk layout error */
+ }
+ return -ENOATTR;
+ }
+
+ it.blkaddr = erofs_blknr(sbi, erofs_iloc(vi) + vi->inode_isize);
+ it.ofs = erofs_blkoff(sbi, erofs_iloc(vi) + vi->inode_isize);
+
+ ret = blk_read(sbi, 0, it.page, it.blkaddr, 1);
+ if (ret < 0)
+ return -EIO;
+
+ it.kaddr = it.page;
+ ih = (struct erofs_xattr_ibody_header *)(it.kaddr + it.ofs);
+
+ vi->xattr_shared_count = ih->h_shared_count;
+ vi->xattr_shared_xattrs = malloc(vi->xattr_shared_count * sizeof(uint));
+ if (!vi->xattr_shared_xattrs)
+ return -ENOMEM;
+
+ /* let's skip ibody header */
+ it.ofs += sizeof(struct erofs_xattr_ibody_header);
+
+ for (i = 0; i < vi->xattr_shared_count; ++i) {
+ if (it.ofs >= erofs_blksiz(sbi)) {
+ /* cannot be unaligned */
+ DBG_BUGON(it.ofs != erofs_blksiz(sbi));
+
+ ret = blk_read(sbi, 0, it.page, ++it.blkaddr, 1);
+ if (ret < 0) {
+ free(vi->xattr_shared_xattrs);
+ vi->xattr_shared_xattrs = NULL;
+ return -EIO;
+ }
+
+ it.kaddr = it.page;
+ it.ofs = 0;
+ }
+ vi->xattr_shared_xattrs[i] =
+ le32_to_cpu(*(__le32 *)(it.kaddr + it.ofs));
+ it.ofs += sizeof(__le32);
+ }
+
+ vi->flags |= EROFS_I_EA_INITED;
+
+ return ret;
+}
+
+/*
+ * the general idea for these return values is
+ * if 0 is returned, go on processing the current xattr;
+ * 1 (> 0) is returned, skip this round to process the next xattr;
+ * -err (< 0) is returned, an error (maybe ENOXATTR) occurred
+ * and need to be handled
+ */
+struct xattr_iter_handlers {
+ int (*entry)(struct xattr_iter *_it, struct erofs_xattr_entry *entry);
+ int (*name)(struct xattr_iter *_it, unsigned int processed, char *buf,
+ unsigned int len);
+ int (*alloc_buffer)(struct xattr_iter *_it, unsigned int value_sz);
+ void (*value)(struct xattr_iter *_it, unsigned int processed, char *buf,
+ unsigned int len);
+};
+
+static inline int xattr_iter_fixup(struct xattr_iter *it)
+{
+ struct erofs_sb_info *sbi = it->sbi;
+ int ret;
+
+ if (it->ofs < erofs_blksiz(sbi))
+ return 0;
+
+ it->blkaddr += erofs_blknr(sbi, it->ofs);
+
+ ret = blk_read(sbi, 0, it->page, it->blkaddr, 1);
+ if (ret < 0)
+ return -EIO;
+
+ it->kaddr = it->page;
+ it->ofs = erofs_blkoff(sbi, it->ofs);
+ return 0;
+}
+
+static int inline_xattr_iter_pre(struct xattr_iter *it,
+ struct erofs_inode *vi)
+{
+ struct erofs_sb_info *sbi = vi->sbi;
+ unsigned int xattr_header_sz, inline_xattr_ofs;
+ int ret;
+
+ xattr_header_sz = inlinexattr_header_size(vi);
+ if (xattr_header_sz >= vi->xattr_isize) {
+ DBG_BUGON(xattr_header_sz > vi->xattr_isize);
+ return -ENOATTR;
+ }
+
+ inline_xattr_ofs = vi->inode_isize + xattr_header_sz;
+
+ it->blkaddr = erofs_blknr(sbi, erofs_iloc(vi) + inline_xattr_ofs);
+ it->ofs = erofs_blkoff(sbi, erofs_iloc(vi) + inline_xattr_ofs);
+
+ ret = blk_read(sbi, 0, it->page, it->blkaddr, 1);
+ if (ret < 0)
+ return -EIO;
+
+ it->kaddr = it->page;
+ return vi->xattr_isize - xattr_header_sz;
+}
+
+/*
+ * Regardless of success or failure, `xattr_foreach' will end up with
+ * `ofs' pointing to the next xattr item rather than an arbitrary position.
+ */
+static int xattr_foreach(struct xattr_iter *it,
+ const struct xattr_iter_handlers *op,
+ unsigned int *tlimit)
+{
+ struct erofs_sb_info *sbi = it->sbi;
+ struct erofs_xattr_entry entry;
+ unsigned int value_sz, processed, slice;
+ int err;
+
+ /* 0. fixup blkaddr, ofs, ipage */
+ err = xattr_iter_fixup(it);
+ if (err)
+ return err;
+
+ /*
+ * 1. read xattr entry to the memory,
+ * since we do EROFS_XATTR_ALIGN
+ * therefore entry should be in the page
+ */
+ entry = *(struct erofs_xattr_entry *)(it->kaddr + it->ofs);
+ if (tlimit) {
+ unsigned int entry_sz = erofs_xattr_entry_size(&entry);
+
+ /* xattr on-disk corruption: xattr entry beyond xattr_isize */
+ if (*tlimit < entry_sz) {
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
+ }
+ *tlimit -= entry_sz;
+ }
+
+ it->ofs += sizeof(struct erofs_xattr_entry);
+ value_sz = le16_to_cpu(entry.e_value_size);
+
+ /* handle entry */
+ err = op->entry(it, &entry);
+ if (err) {
+ it->ofs += entry.e_name_len + value_sz;
+ goto out;
+ }
+
+ /* 2. handle xattr name (ofs will finally be at the end of name) */
+ processed = 0;
+
+ while (processed < entry.e_name_len) {
+ if (it->ofs >= erofs_blksiz(sbi)) {
+ DBG_BUGON(it->ofs > erofs_blksiz(sbi));
+
+ err = xattr_iter_fixup(it);
+ if (err)
+ goto out;
+ it->ofs = 0;
+ }
+
+ slice = min_t(unsigned int, erofs_blksiz(sbi) - it->ofs,
+ entry.e_name_len - processed);
+
+ /* handle name */
+ err = op->name(it, processed, it->kaddr + it->ofs, slice);
+ if (err) {
+ it->ofs += entry.e_name_len - processed + value_sz;
+ goto out;
+ }
+
+ it->ofs += slice;
+ processed += slice;
+ }
+
+ /* 3. handle xattr value */
+ processed = 0;
+
+ if (op->alloc_buffer) {
+ err = op->alloc_buffer(it, value_sz);
+ if (err) {
+ it->ofs += value_sz;
+ goto out;
+ }
+ }
+
+ while (processed < value_sz) {
+ if (it->ofs >= erofs_blksiz(sbi)) {
+ DBG_BUGON(it->ofs > erofs_blksiz(sbi));
+
+ err = xattr_iter_fixup(it);
+ if (err)
+ goto out;
+ it->ofs = 0;
+ }
+
+ slice = min_t(unsigned int, erofs_blksiz(sbi) - it->ofs,
+ value_sz - processed);
+ op->value(it, processed, it->kaddr + it->ofs, slice);
+ it->ofs += slice;
+ processed += slice;
+ }
+
+out:
+ /* xattrs should be 4-byte aligned (on-disk constraint) */
+ it->ofs = EROFS_XATTR_ALIGN(it->ofs);
+ return err < 0 ? err : 0;
+}
+
+struct getxattr_iter {
+ struct xattr_iter it;
+
+ int buffer_size, index, infix_len;
+ char *buffer;
+ const char *name;
+ size_t len;
+};
+
+static int erofs_xattr_long_entrymatch(struct getxattr_iter *it,
+ struct erofs_xattr_entry *entry)
+{
+ struct erofs_sb_info *sbi = it->it.sbi;
+ struct erofs_xattr_prefix_item *pf = sbi->xattr_prefixes +
+ (entry->e_name_index & EROFS_XATTR_LONG_PREFIX_MASK);
+
+ if (pf >= sbi->xattr_prefixes + sbi->xattr_prefix_count)
+ return -ENOATTR;
+
+ if (it->index != pf->prefix->base_index ||
+ it->len != entry->e_name_len + pf->infix_len)
+ return -ENOATTR;
+
+ if (memcmp(it->name, pf->prefix->infix, pf->infix_len))
+ return -ENOATTR;
+
+ it->infix_len = pf->infix_len;
+ return 0;
+}
+
+static int xattr_entrymatch(struct xattr_iter *_it,
+ struct erofs_xattr_entry *entry)
+{
+ struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
+
+ /* should also match the infix for long name prefixes */
+ if (entry->e_name_index & EROFS_XATTR_LONG_PREFIX)
+ return erofs_xattr_long_entrymatch(it, entry);
+
+ if (it->index != entry->e_name_index ||
+ it->len != entry->e_name_len)
+ return -ENOATTR;
+ it->infix_len = 0;
+ return 0;
+}
+
+static int xattr_namematch(struct xattr_iter *_it,
+ unsigned int processed, char *buf, unsigned int len)
+{
+ struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
+
+ if (memcmp(buf, it->name + it->infix_len + processed, len))
+ return -ENOATTR;
+ return 0;
+}
+
+static int xattr_checkbuffer(struct xattr_iter *_it,
+ unsigned int value_sz)
+{
+ struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
+ int err = it->buffer_size < value_sz ? -ERANGE : 0;
+
+ it->buffer_size = value_sz;
+ return !it->buffer ? 1 : err;
+}
+
+static void xattr_copyvalue(struct xattr_iter *_it,
+ unsigned int processed,
+ char *buf, unsigned int len)
+{
+ struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
+
+ memcpy(it->buffer + processed, buf, len);
+}
+
+static const struct xattr_iter_handlers find_xattr_handlers = {
+ .entry = xattr_entrymatch,
+ .name = xattr_namematch,
+ .alloc_buffer = xattr_checkbuffer,
+ .value = xattr_copyvalue
+};
+
+static int inline_getxattr(struct erofs_inode *vi, struct getxattr_iter *it)
+{
+ int ret;
+ unsigned int remaining;
+
+ ret = inline_xattr_iter_pre(&it->it, vi);
+ if (ret < 0)
+ return ret;
+
+ remaining = ret;
+ while (remaining) {
+ ret = xattr_foreach(&it->it, &find_xattr_handlers, &remaining);
+ if (ret != -ENOATTR)
+ break;
+ }
+
+ return ret ? ret : it->buffer_size;
+}
+
+static int shared_getxattr(struct erofs_inode *vi, struct getxattr_iter *it)
+{
+ unsigned int i;
+ int ret = -ENOATTR;
+
+ for (i = 0; i < vi->xattr_shared_count; ++i) {
+ erofs_blk_t blkaddr =
+ xattrblock_addr(vi, vi->xattr_shared_xattrs[i]);
+
+ it->it.ofs = xattrblock_offset(vi, vi->xattr_shared_xattrs[i]);
+
+ if (!i || blkaddr != it->it.blkaddr) {
+ ret = blk_read(vi->sbi, 0, it->it.page, blkaddr, 1);
+ if (ret < 0)
+ return -EIO;
+
+ it->it.kaddr = it->it.page;
+ it->it.blkaddr = blkaddr;
+ }
+
+ ret = xattr_foreach(&it->it, &find_xattr_handlers, NULL);
+ if (ret != -ENOATTR)
+ break;
+ }
+
+ return ret ? ret : it->buffer_size;
+}
+
+int erofs_getxattr(struct erofs_inode *vi, const char *name, char *buffer,
+ size_t buffer_size)
+{
+ int ret;
+ unsigned int prefix, prefixlen;
+ struct getxattr_iter it;
+
+ if (!name)
+ return -EINVAL;
+
+ ret = init_inode_xattrs(vi);
+ if (ret)
+ return ret;
+
+ if (!match_prefix(name, &prefix, &prefixlen))
+ return -ENODATA;
+
+ it.it.sbi = vi->sbi;
+ it.index = prefix;
+ it.name = name + prefixlen;
+ it.len = strlen(it.name);
+ if (it.len > EROFS_NAME_LEN)
+ return -ERANGE;
+
+ it.buffer = buffer;
+ it.buffer_size = buffer_size;
+
+ ret = inline_getxattr(vi, &it);
+ if (ret == -ENOATTR)
+ ret = shared_getxattr(vi, &it);
+ return ret;
+}
+
+struct listxattr_iter {
+ struct xattr_iter it;
+
+ char *buffer;
+ int buffer_size, buffer_ofs;
+};
+
+static int xattr_entrylist(struct xattr_iter *_it,
+ struct erofs_xattr_entry *entry)
+{
+ struct listxattr_iter *it =
+ container_of(_it, struct listxattr_iter, it);
+ unsigned int base_index = entry->e_name_index;
+ unsigned int prefix_len, infix_len = 0;
+ const char *prefix, *infix = NULL;
+
+ if (entry->e_name_index & EROFS_XATTR_LONG_PREFIX) {
+ struct erofs_sb_info *sbi = _it->sbi;
+ struct erofs_xattr_prefix_item *pf = sbi->xattr_prefixes +
+ (entry->e_name_index & EROFS_XATTR_LONG_PREFIX_MASK);
+
+ if (pf >= sbi->xattr_prefixes + sbi->xattr_prefix_count)
+ return 1;
+ infix = pf->prefix->infix;
+ infix_len = pf->infix_len;
+ base_index = pf->prefix->base_index;
+ }
+
+ if (base_index >= ARRAY_SIZE(xattr_types))
+ return 1;
+ prefix = xattr_types[base_index].prefix;
+ prefix_len = xattr_types[base_index].prefix_len;
+
+ if (!it->buffer) {
+ it->buffer_ofs += prefix_len + infix_len +
+ entry->e_name_len + 1;
+ return 1;
+ }
+
+ if (it->buffer_ofs + prefix_len + infix_len
+ + entry->e_name_len + 1 > it->buffer_size)
+ return -ERANGE;
+
+ memcpy(it->buffer + it->buffer_ofs, prefix, prefix_len);
+ memcpy(it->buffer + it->buffer_ofs + prefix_len, infix, infix_len);
+ it->buffer_ofs += prefix_len + infix_len;
+ return 0;
+}
+
+static int xattr_namelist(struct xattr_iter *_it,
+ unsigned int processed, char *buf, unsigned int len)
+{
+ struct listxattr_iter *it =
+ container_of(_it, struct listxattr_iter, it);
+
+ memcpy(it->buffer + it->buffer_ofs, buf, len);
+ it->buffer_ofs += len;
+ return 0;
+}
+
+static int xattr_skipvalue(struct xattr_iter *_it,
+ unsigned int value_sz)
+{
+ struct listxattr_iter *it =
+ container_of(_it, struct listxattr_iter, it);
+
+ it->buffer[it->buffer_ofs++] = '\0';
+ return 1;
+}
+
+static const struct xattr_iter_handlers list_xattr_handlers = {
+ .entry = xattr_entrylist,
+ .name = xattr_namelist,
+ .alloc_buffer = xattr_skipvalue,
+ .value = NULL
+};
+
+static int inline_listxattr(struct erofs_inode *vi, struct listxattr_iter *it)
+{
+ int ret;
+ unsigned int remaining;
+
+ ret = inline_xattr_iter_pre(&it->it, vi);
+ if (ret < 0)
+ return ret;
+
+ remaining = ret;
+ while (remaining) {
+ ret = xattr_foreach(&it->it, &list_xattr_handlers, &remaining);
+ if (ret)
+ break;
+ }
+
+ return ret ? ret : it->buffer_ofs;
+}
+
+static int shared_listxattr(struct erofs_inode *vi, struct listxattr_iter *it)
+{
+ unsigned int i;
+ int ret = 0;
+
+ for (i = 0; i < vi->xattr_shared_count; ++i) {
+ erofs_blk_t blkaddr =
+ xattrblock_addr(vi, vi->xattr_shared_xattrs[i]);
+
+ it->it.ofs = xattrblock_offset(vi, vi->xattr_shared_xattrs[i]);
+ if (!i || blkaddr != it->it.blkaddr) {
+ ret = blk_read(vi->sbi, 0, it->it.page, blkaddr, 1);
+ if (ret < 0)
+ return -EIO;
+
+ it->it.kaddr = it->it.page;
+ it->it.blkaddr = blkaddr;
+ }
+
+ ret = xattr_foreach(&it->it, &list_xattr_handlers, NULL);
+ if (ret)
+ break;
+ }
+
+ return ret ? ret : it->buffer_ofs;
+}
+
+int erofs_listxattr(struct erofs_inode *vi, char *buffer, size_t buffer_size)
+{
+ int ret;
+ struct listxattr_iter it;
+
+ ret = init_inode_xattrs(vi);
+ if (ret == -ENOATTR)
+ return 0;
+ if (ret)
+ return ret;
+
+ it.it.sbi = vi->sbi;
+ it.buffer = buffer;
+ it.buffer_size = buffer_size;
+ it.buffer_ofs = 0;
+
+ ret = inline_listxattr(vi, &it);
+ if (ret < 0 && ret != -ENOATTR)
+ return ret;
+ return shared_listxattr(vi, &it);
+}
+
+int erofs_xattr_insert_name_prefix(const char *prefix)
+{
+ struct ea_type_node *tnode;
+
+ if (ea_prefix_count >= 0x80 || strlen(prefix) > UINT8_MAX)
+ return -EOVERFLOW;
+
+ tnode = calloc(1, sizeof(*tnode));
+ if (!tnode)
+ return -ENOMEM;
+
+ if (!match_prefix(prefix, &tnode->base_index, &tnode->base_len)) {
+ free(tnode);
+ return -ENODATA;
+ }
+
+ tnode->type.prefix_len = strlen(prefix);
+ tnode->type.prefix = strdup(prefix);
+ if (!tnode->type.prefix) {
+ free(tnode);
+ return -ENOMEM;
+ }
+
+ tnode->index = EROFS_XATTR_LONG_PREFIX | ea_prefix_count;
+ ea_prefix_count++;
+ init_list_head(&tnode->list);
+ list_add_tail(&tnode->list, &ea_name_prefixes);
+ return 0;
+}
+
+void erofs_xattr_cleanup_name_prefixes(void)
+{
+ struct ea_type_node *tnode, *n;
+
+ list_for_each_entry_safe(tnode, n, &ea_name_prefixes, list) {
+ list_del(&tnode->list);
+ free((void *)tnode->type.prefix);
+ free(tnode);
+ }
+}
+
+void erofs_xattr_prefixes_cleanup(struct erofs_sb_info *sbi)
+{
+ int i;
+
+ if (sbi->xattr_prefixes) {
+ for (i = 0; i < sbi->xattr_prefix_count; i++)
+ free(sbi->xattr_prefixes[i].prefix);
+ free(sbi->xattr_prefixes);
+ sbi->xattr_prefixes = NULL;
+ }
+}
+
+int erofs_xattr_prefixes_init(struct erofs_sb_info *sbi)
+{
+ erofs_off_t pos = (erofs_off_t)sbi->xattr_prefix_start << 2;
+ struct erofs_xattr_prefix_item *pfs;
+ erofs_nid_t nid = 0;
+ int ret = 0, i, len;
+ void *buf;
+
+ if (!sbi->xattr_prefix_count)
+ return 0;
+
+ if (sbi->packed_nid)
+ nid = sbi->packed_nid;
+
+ pfs = calloc(sbi->xattr_prefix_count, sizeof(*pfs));
+ if (!pfs)
+ return -ENOMEM;
+
+ for (i = 0; i < sbi->xattr_prefix_count; i++) {
+ buf = erofs_read_metadata(sbi, nid, &pos, &len);
+ if (IS_ERR(buf)) {
+ ret = PTR_ERR(buf);
+ goto out;
+ }
+ if (len < sizeof(*pfs->prefix) ||
+ len > EROFS_NAME_LEN + sizeof(*pfs->prefix)) {
+ free(buf);
+ ret = -EFSCORRUPTED;
+ goto out;
+ }
+ pfs[i].prefix = buf;
+ pfs[i].infix_len = len - sizeof(struct erofs_xattr_long_prefix);
+ }
+out:
+ sbi->xattr_prefixes = pfs;
+ if (ret)
+ erofs_xattr_prefixes_cleanup(sbi);
+ return ret;
+}
diff --git a/lib/xxhash.c b/lib/xxhash.c
new file mode 100644
index 0000000..7289c77
--- /dev/null
+++ b/lib/xxhash.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only
+/*
+ * The xxhash is copied from the linux kernel at:
+ * https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/xxhash.c
+ *
+ * The original copyright is:
+ *
+ * xxHash - Extremely Fast Hash algorithm
+ * Copyright (C) 2012-2016, Yann Collet.
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ *
+ * You can contact the author at:
+ * - xxHash homepage: https://cyan4973.github.io/xxHash/
+ * - xxHash source repository: https://github.com/Cyan4973/xxHash
+ */
+
+#include "erofs/defs.h"
+#include "erofs/xxhash.h"
+
+/*-*************************************
+ * Macros
+ **************************************/
+#define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r)))
+
+/*-*************************************
+ * Constants
+ **************************************/
+static const uint32_t PRIME32_1 = 2654435761U;
+static const uint32_t PRIME32_2 = 2246822519U;
+static const uint32_t PRIME32_3 = 3266489917U;
+static const uint32_t PRIME32_4 = 668265263U;
+static const uint32_t PRIME32_5 = 374761393U;
+
+/*-***************************
+ * Simple Hash Functions
+ ****************************/
+static uint32_t xxh32_round(uint32_t seed, const uint32_t input)
+{
+ seed += input * PRIME32_2;
+ seed = xxh_rotl32(seed, 13);
+ seed *= PRIME32_1;
+ return seed;
+}
+
+uint32_t xxh32(const void *input, const size_t len, const uint32_t seed)
+{
+ const uint8_t *p = (const uint8_t *)input;
+ const uint8_t *b_end = p + len;
+ uint32_t h32;
+
+ if (len >= 16) {
+ const uint8_t *const limit = b_end - 16;
+ uint32_t v1 = seed + PRIME32_1 + PRIME32_2;
+ uint32_t v2 = seed + PRIME32_2;
+ uint32_t v3 = seed + 0;
+ uint32_t v4 = seed - PRIME32_1;
+
+ do {
+ v1 = xxh32_round(v1, get_unaligned_le32(p));
+ p += 4;
+ v2 = xxh32_round(v2, get_unaligned_le32(p));
+ p += 4;
+ v3 = xxh32_round(v3, get_unaligned_le32(p));
+ p += 4;
+ v4 = xxh32_round(v4, get_unaligned_le32(p));
+ p += 4;
+ } while (p <= limit);
+
+ h32 = xxh_rotl32(v1, 1) + xxh_rotl32(v2, 7) +
+ xxh_rotl32(v3, 12) + xxh_rotl32(v4, 18);
+ } else {
+ h32 = seed + PRIME32_5;
+ }
+
+ h32 += (uint32_t)len;
+
+ while (p + 4 <= b_end) {
+ h32 += get_unaligned_le32(p) * PRIME32_3;
+ h32 = xxh_rotl32(h32, 17) * PRIME32_4;
+ p += 4;
+ }
+
+ while (p < b_end) {
+ h32 += (*p) * PRIME32_5;
+ h32 = xxh_rotl32(h32, 11) * PRIME32_1;
+ p++;
+ }
+
+ h32 ^= h32 >> 15;
+ h32 *= PRIME32_2;
+ h32 ^= h32 >> 13;
+ h32 *= PRIME32_3;
+ h32 ^= h32 >> 16;
+
+ return h32;
+}
diff --git a/lib/zmap.c b/lib/zmap.c
index 95745c5..81fa22b 100644
--- a/lib/zmap.c
+++ b/lib/zmap.c
@@ -16,13 +16,15 @@
int z_erofs_fill_inode(struct erofs_inode *vi)
{
- if (!erofs_sb_has_big_pcluster() &&
- !erofs_sb_has_ztailpacking() &&
- vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
+ struct erofs_sb_info *sbi = vi->sbi;
+
+ if (!erofs_sb_has_big_pcluster(sbi) &&
+ !erofs_sb_has_ztailpacking(sbi) && !erofs_sb_has_fragments(sbi) &&
+ vi->datalayout == EROFS_INODE_COMPRESSED_FULL) {
vi->z_advise = 0;
vi->z_algorithmtype[0] = 0;
vi->z_algorithmtype[1] = 0;
- vi->z_logical_clusterbits = LOG_BLOCK_SIZE;
+ vi->z_logical_clusterbits = sbi->blkszbits;
vi->flags |= EROFS_I_Z_INITED;
}
@@ -35,20 +37,28 @@
erofs_off_t pos;
struct z_erofs_map_header *h;
char buf[sizeof(struct z_erofs_map_header)];
+ struct erofs_sb_info *sbi = vi->sbi;
if (vi->flags & EROFS_I_Z_INITED)
return 0;
- DBG_BUGON(!erofs_sb_has_big_pcluster() &&
- !erofs_sb_has_ztailpacking() &&
- vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY);
- pos = round_up(iloc(vi->nid) + vi->inode_isize + vi->xattr_isize, 8);
-
- ret = dev_read(0, buf, pos, sizeof(buf));
+ pos = round_up(erofs_iloc(vi) + vi->inode_isize + vi->xattr_isize, 8);
+ ret = dev_read(sbi, 0, buf, pos, sizeof(buf));
if (ret < 0)
return -EIO;
h = (struct z_erofs_map_header *)buf;
+ /*
+ * if the highest bit of the 8-byte map header is set, the whole file
+ * is stored in the packed inode. The rest bits keeps z_fragmentoff.
+ */
+ if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) {
+ vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
+ vi->fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63);
+ vi->z_tailextent_headlcn = 0;
+ goto out;
+ }
+
vi->z_advise = le16_to_cpu(h->h_advise);
vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
@@ -59,8 +69,8 @@
return -EOPNOTSUPP;
}
- vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7);
- if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION &&
+ vi->z_logical_clusterbits = sbi->blkszbits + (h->h_clusterbits & 7);
+ if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT &&
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
erofs_err("big pcluster head1/2 of compact indexes should be consistent for nid %llu",
@@ -75,7 +85,7 @@
ret = z_erofs_do_map_blocks(vi, &map,
EROFS_GET_BLOCKS_FINDTAIL);
if (!map.m_plen ||
- erofs_blkoff(map.m_pa) + map.m_plen > EROFS_BLKSIZ) {
+ erofs_blkoff(sbi, map.m_pa) + map.m_plen > erofs_blksiz(sbi)) {
erofs_err("invalid tail-packing pclustersize %llu",
map.m_plen | 0ULL);
return -EFSCORRUPTED;
@@ -83,6 +93,17 @@
if (ret < 0)
return ret;
}
+ if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER &&
+ !(h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT)) {
+ struct erofs_map_blocks map = { .index = UINT_MAX };
+
+ vi->fragmentoff = le32_to_cpu(h->h_fragmentoff);
+ ret = z_erofs_do_map_blocks(vi, &map,
+ EROFS_GET_BLOCKS_FINDTAIL);
+ if (ret < 0)
+ return ret;
+ }
+out:
vi->flags |= EROFS_I_Z_INITED;
return 0;
}
@@ -97,8 +118,9 @@
u8 type, headtype;
u16 clusterofs;
u16 delta[2];
- erofs_blk_t pblk, compressedlcs;
+ erofs_blk_t pblk, compressedblks;
erofs_off_t nextpackoff;
+ bool partialref;
};
static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m,
@@ -111,7 +133,7 @@
if (map->index == eblk)
return 0;
- ret = blk_read(0, mpage, eblk, 1);
+ ret = blk_read(m->inode->sbi, 0, mpage, eblk, 1);
if (ret < 0)
return -EIO;
@@ -124,43 +146,45 @@
unsigned long lcn)
{
struct erofs_inode *const vi = m->inode;
- const erofs_off_t ibase = iloc(vi->nid);
- const erofs_off_t pos =
- Z_EROFS_VLE_LEGACY_INDEX_ALIGN(ibase + vi->inode_isize +
- vi->xattr_isize) +
- lcn * sizeof(struct z_erofs_vle_decompressed_index);
- struct z_erofs_vle_decompressed_index *di;
+ struct erofs_sb_info *sbi = vi->sbi;
+ const erofs_off_t ibase = erofs_iloc(vi);
+ const erofs_off_t pos = Z_EROFS_FULL_INDEX_ALIGN(ibase +
+ vi->inode_isize + vi->xattr_isize) +
+ lcn * sizeof(struct z_erofs_lcluster_index);
+ struct z_erofs_lcluster_index *di;
unsigned int advise, type;
int err;
- err = z_erofs_reload_indexes(m, erofs_blknr(pos));
+ err = z_erofs_reload_indexes(m, erofs_blknr(sbi, pos));
if (err)
return err;
- m->nextpackoff = pos + sizeof(struct z_erofs_vle_decompressed_index);
+ m->nextpackoff = pos + sizeof(struct z_erofs_lcluster_index);
m->lcn = lcn;
- di = m->kaddr + erofs_blkoff(pos);
+ di = m->kaddr + erofs_blkoff(sbi, pos);
advise = le16_to_cpu(di->di_advise);
- type = (advise >> Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT) &
- ((1 << Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) - 1);
+ type = (advise >> Z_EROFS_LI_LCLUSTER_TYPE_BIT) &
+ ((1 << Z_EROFS_LI_LCLUSTER_TYPE_BITS) - 1);
switch (type) {
- case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+ case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
m->clusterofs = 1 << vi->z_logical_clusterbits;
m->delta[0] = le16_to_cpu(di->di_u.delta[0]);
- if (m->delta[0] & Z_EROFS_VLE_DI_D0_CBLKCNT) {
+ if (m->delta[0] & Z_EROFS_LI_D0_CBLKCNT) {
if (!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) {
DBG_BUGON(1);
return -EFSCORRUPTED;
}
- m->compressedlcs = m->delta[0] &
- ~Z_EROFS_VLE_DI_D0_CBLKCNT;
+ m->compressedblks = m->delta[0] &
+ ~Z_EROFS_LI_D0_CBLKCNT;
m->delta[0] = 1;
}
m->delta[1] = le16_to_cpu(di->di_u.delta[1]);
break;
- case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
- case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+ case Z_EROFS_LCLUSTER_TYPE_PLAIN:
+ case Z_EROFS_LCLUSTER_TYPE_HEAD1:
+ if (advise & Z_EROFS_LI_PARTIAL_REF)
+ m->partialref = true;
m->clusterofs = le16_to_cpu(di->di_clusterofs);
m->pblk = le32_to_cpu(di->di_u.blkaddr);
break;
@@ -197,13 +221,13 @@
lo = decode_compactedbits(lclusterbits, lomask,
in, encodebits * i, &type);
- if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
+ if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD)
return d1;
++d1;
} while (++i < vcnt);
- /* vcnt - 1 (Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) item */
- if (!(lo & Z_EROFS_VLE_DI_D0_CBLKCNT))
+ /* vcnt - 1 (Z_EROFS_LCLUSTER_TYPE_NONHEAD) item */
+ if (!(lo & Z_EROFS_LI_D0_CBLKCNT))
d1 += lo - 1;
return d1;
}
@@ -220,7 +244,7 @@
u8 *in, type;
bool big_pcluster;
- if (1 << amortizedshift == 4)
+ if (1 << amortizedshift == 4 && lclusterbits <= 14)
vcnt = 2;
else if (1 << amortizedshift == 2 && lclusterbits == 12)
vcnt = 16;
@@ -232,7 +256,7 @@
(vcnt << amortizedshift);
big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
- eofs = erofs_blkoff(pos);
+ eofs = erofs_blkoff(vi->sbi, pos);
base = round_down(eofs, vcnt << amortizedshift);
in = m->kaddr + base;
@@ -241,19 +265,19 @@
lo = decode_compactedbits(lclusterbits, lomask,
in, encodebits * i, &type);
m->type = type;
- if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
+ if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
m->clusterofs = 1 << lclusterbits;
/* figure out lookahead_distance: delta[1] if needed */
if (lookahead)
m->delta[1] = get_compacted_la_distance(lclusterbits,
encodebits, vcnt, in, i);
- if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) {
+ if (lo & Z_EROFS_LI_D0_CBLKCNT) {
if (!big_pcluster) {
DBG_BUGON(1);
return -EFSCORRUPTED;
}
- m->compressedlcs = lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT;
+ m->compressedblks = lo & ~Z_EROFS_LI_D0_CBLKCNT;
m->delta[0] = 1;
return 0;
} else if (i + 1 != (int)vcnt) {
@@ -267,9 +291,9 @@
*/
lo = decode_compactedbits(lclusterbits, lomask,
in, encodebits * (i - 1), &type);
- if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
+ if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD)
lo = 0;
- else if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT)
+ else if (lo & Z_EROFS_LI_D0_CBLKCNT)
lo = 1;
m->delta[0] = lo + 1;
return 0;
@@ -283,7 +307,7 @@
--i;
lo = decode_compactedbits(lclusterbits, lomask,
in, encodebits * i, &type);
- if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
+ if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD)
i -= lo;
if (i >= 0)
@@ -295,13 +319,13 @@
--i;
lo = decode_compactedbits(lclusterbits, lomask,
in, encodebits * i, &type);
- if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
- if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) {
+ if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
+ if (lo & Z_EROFS_LI_D0_CBLKCNT) {
--i;
- nblk += lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT;
+ nblk += lo & ~Z_EROFS_LI_D0_CBLKCNT;
continue;
}
- if (lo == 1) {
+ if (lo <= 1) {
DBG_BUGON(1);
/* --i; ++nblk; continue; */
return -EFSCORRUPTED;
@@ -321,19 +345,16 @@
unsigned long lcn, bool lookahead)
{
struct erofs_inode *const vi = m->inode;
- const unsigned int lclusterbits = vi->z_logical_clusterbits;
- const erofs_off_t ebase = round_up(iloc(vi->nid) + vi->inode_isize +
+ struct erofs_sb_info *sbi = vi->sbi;
+ const erofs_off_t ebase = round_up(erofs_iloc(vi) + vi->inode_isize +
vi->xattr_isize, 8) +
sizeof(struct z_erofs_map_header);
- const unsigned int totalidx = DIV_ROUND_UP(vi->i_size, EROFS_BLKSIZ);
+ const unsigned int totalidx = BLK_ROUND_UP(sbi, vi->i_size);
unsigned int compacted_4b_initial, compacted_2b;
unsigned int amortizedshift;
erofs_off_t pos;
int err;
- if (lclusterbits != 12)
- return -EOPNOTSUPP;
-
if (lcn >= totalidx)
return -EINVAL;
@@ -366,7 +387,7 @@
amortizedshift = 2;
out:
pos += lcn * (1 << amortizedshift);
- err = z_erofs_reload_indexes(m, erofs_blknr(pos));
+ err = z_erofs_reload_indexes(m, erofs_blknr(sbi, pos));
if (err)
return err;
return unpack_compacted_index(m, amortizedshift, pos, lookahead);
@@ -377,10 +398,10 @@
{
const unsigned int datamode = m->inode->datalayout;
- if (datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY)
+ if (datamode == EROFS_INODE_COMPRESSED_FULL)
return legacy_load_cluster_from_disk(m, lcn);
- if (datamode == EROFS_INODE_FLAT_COMPRESSION)
+ if (datamode == EROFS_INODE_COMPRESSED_COMPACT)
return compacted_load_cluster_from_disk(m, lcn, lookahead);
return -EINVAL;
@@ -409,7 +430,7 @@
return err;
switch (m->type) {
- case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+ case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
if (!m->delta[0]) {
erofs_err("invalid lookback distance 0 @ nid %llu",
(unsigned long long)vi->nid);
@@ -417,8 +438,8 @@
return -EFSCORRUPTED;
}
return z_erofs_extent_lookback(m, m->delta[0]);
- case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
- case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+ case Z_EROFS_LCLUSTER_TYPE_PLAIN:
+ case Z_EROFS_LCLUSTER_TYPE_HEAD1:
m->headtype = m->type;
map->m_la = (lcn << lclusterbits) | m->clusterofs;
break;
@@ -435,22 +456,23 @@
unsigned int initial_lcn)
{
struct erofs_inode *const vi = m->inode;
+ struct erofs_sb_info *sbi = vi->sbi;
struct erofs_map_blocks *const map = m->map;
const unsigned int lclusterbits = vi->z_logical_clusterbits;
unsigned long lcn;
int err;
- DBG_BUGON(m->type != Z_EROFS_VLE_CLUSTER_TYPE_PLAIN &&
- m->type != Z_EROFS_VLE_CLUSTER_TYPE_HEAD);
+ DBG_BUGON(m->type != Z_EROFS_LCLUSTER_TYPE_PLAIN &&
+ m->type != Z_EROFS_LCLUSTER_TYPE_HEAD1);
- if (m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN ||
+ if (m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) {
map->m_plen = 1 << lclusterbits;
return 0;
}
lcn = m->lcn + 1;
- if (m->compressedlcs)
+ if (m->compressedblks)
goto out;
err = z_erofs_load_cluster_from_disk(m, lcn, false);
@@ -459,28 +481,28 @@
/*
* If the 1st NONHEAD lcluster has already been handled initially w/o
- * valid compressedlcs, which means at least it mustn't be CBLKCNT, or
+ * valid compressedblks, which means at least it mustn't be CBLKCNT, or
* an internal implemenatation error is detected.
*
* The following code can also handle it properly anyway, but let's
* BUG_ON in the debugging mode only for developers to notice that.
*/
DBG_BUGON(lcn == initial_lcn &&
- m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD);
+ m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD);
switch (m->type) {
- case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
- case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+ case Z_EROFS_LCLUSTER_TYPE_PLAIN:
+ case Z_EROFS_LCLUSTER_TYPE_HEAD1:
/*
* if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
* rather than CBLKCNT, it's a 1 lcluster-sized pcluster.
*/
- m->compressedlcs = 1;
+ m->compressedblks = 1 << (lclusterbits - sbi->blkszbits);
break;
- case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+ case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
if (m->delta[0] != 1)
goto err_bonus_cblkcnt;
- if (m->compressedlcs)
+ if (m->compressedblks)
break;
/* fallthrough */
default:
@@ -490,7 +512,7 @@
return -EFSCORRUPTED;
}
out:
- map->m_plen = m->compressedlcs << lclusterbits;
+ map->m_plen = m->compressedblks << sbi->blkszbits;
return 0;
err_bonus_cblkcnt:
erofs_err("bogus CBLKCNT @ lcn %lu of nid %llu",
@@ -518,11 +540,11 @@
if (err)
return err;
- if (m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
+ if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
DBG_BUGON(!m->delta[1] &&
m->clusterofs != 1 << lclusterbits);
- } else if (m->type == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN ||
- m->type == Z_EROFS_VLE_CLUSTER_TYPE_HEAD) {
+ } else if (m->type == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
+ m->type == Z_EROFS_LCLUSTER_TYPE_HEAD1) {
/* go on until the next HEAD lcluster */
if (lcn != headlcn)
break;
@@ -545,7 +567,9 @@
struct erofs_map_blocks *map,
int flags)
{
+ struct erofs_sb_info *sbi = vi->sbi;
bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER;
+ bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
struct z_erofs_maprecorder m = {
.inode = vi,
.map = map,
@@ -571,11 +595,18 @@
map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED;
end = (m.lcn + 1ULL) << lclusterbits;
switch (m.type) {
- case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
- case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+ case Z_EROFS_LCLUSTER_TYPE_PLAIN:
+ case Z_EROFS_LCLUSTER_TYPE_HEAD1:
if (endoff >= m.clusterofs) {
m.headtype = m.type;
map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
+ /*
+ * For ztailpacking files, in order to inline data more
+ * effectively, special EOF lclusters are now supported
+ * which can have three parts at most.
+ */
+ if (ztailpacking && end > vi->i_size)
+ end = vi->i_size;
break;
}
/* m.lcn should be >= 1 if endoff < m.clusterofs */
@@ -589,7 +620,7 @@
map->m_flags |= EROFS_MAP_FULL_MAPPED;
m.delta[0] = 1;
/* fallthrough */
- case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+ case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
/* get the correspoinding first chunk */
err = z_erofs_extent_lookback(&m, m.delta[0]);
if (err)
@@ -601,25 +632,43 @@
err = -EOPNOTSUPP;
goto out;
}
-
+ if (m.partialref)
+ map->m_flags |= EROFS_MAP_PARTIAL_REF;
map->m_llen = end - map->m_la;
- if (flags & EROFS_GET_BLOCKS_FINDTAIL)
+ if (flags & EROFS_GET_BLOCKS_FINDTAIL) {
vi->z_tailextent_headlcn = m.lcn;
+ /* for non-compact indexes, fragmentoff is 64 bits */
+ if (fragment && vi->datalayout == EROFS_INODE_COMPRESSED_FULL)
+ vi->fragmentoff |= (u64)m.pblk << 32;
+ }
if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) {
map->m_flags |= EROFS_MAP_META;
map->m_pa = vi->z_idataoff;
map->m_plen = vi->z_idata_size;
+ } else if (fragment && m.lcn == vi->z_tailextent_headlcn) {
+ map->m_flags |= EROFS_MAP_FRAGMENT;
} else {
- map->m_pa = blknr_to_addr(m.pblk);
+ map->m_pa = erofs_pos(sbi, m.pblk);
err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
if (err)
goto out;
}
- if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN)
- map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
- else
+ if (m.headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN) {
+ if (map->m_llen > map->m_plen) {
+ DBG_BUGON(1);
+ err = -EFSCORRUPTED;
+ goto out;
+ }
+ if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER)
+ map->m_algorithmformat =
+ Z_EROFS_COMPRESSION_INTERLACED;
+ else
+ map->m_algorithmformat =
+ Z_EROFS_COMPRESSION_SHIFTED;
+ } else {
map->m_algorithmformat = vi->z_algorithmtype[0];
+ }
if (flags & EROFS_GET_BLOCKS_FIEMAP) {
err = z_erofs_get_extent_decompressedlen(&m);
@@ -652,6 +701,15 @@
if (err)
goto out;
+ if ((vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) &&
+ !vi->z_tailextent_headlcn) {
+ map->m_la = 0;
+ map->m_llen = vi->i_size;
+ map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED |
+ EROFS_MAP_FRAGMENT;
+ goto out;
+ }
+
err = z_erofs_do_map_blocks(vi, map, flags);
out:
DBG_BUGON(err < 0 && err != -ENOMEM);
diff --git a/man/dump.erofs.1 b/man/dump.erofs.1
index 209e5f9..7316f4b 100644
--- a/man/dump.erofs.1
+++ b/man/dump.erofs.1
@@ -9,18 +9,28 @@
.SH DESCRIPTION
.B dump.erofs
is used to retrieve erofs metadata from \fIIMAGE\fP and demonstrate
+.br
1) overall disk statistics,
+.br
2) superblock information,
+.br
3) file information of the given inode NID,
+.br
4) file extent information of the given inode NID.
.SH OPTIONS
.TP
.BI "\-\-device=" path
Specify an extra device to be used together.
-You may give multiple `--device' options in the correct order.
+You may give multiple
+.B --device
+options in the correct order.
.TP
.BI "\-\-ls"
-List directory contents. An inode should be specified together.
+List directory contents.
+.I NID
+or
+.I path
+required.
.TP
.BI "\-\-nid=" NID
Specify an inode NID in order to print its file information.
@@ -29,16 +39,21 @@
Specify an inode path in order to print its file information.
.TP
.BI \-e
-Show the file extent information. An inode should be specified together.
+Show the file extent information.
+.I NID
+or
+.I path
+required.
.TP
.BI \-V
Print the version number and exit.
.TP
.BI \-s
-Show superblock information of the an EROFS-formatted image.
+Show superblock information.
+This is the default if no options are specified.
.TP
.BI \-S
-Show EROFS disk statistics, including file type/size distribution, number of (un)compressed files, compression ratio of the whole image, etc.
+Show image statistics, including file type/size distribution, number of (un)compressed files, compression ratio, etc.
.SH AUTHOR
Initial code was written by Wang Qi <mpiglet@outlook.com>, Guo Xuenan <guoxuenan@huawei.com>.
.PP
diff --git a/man/erofsfuse.1 b/man/erofsfuse.1
index 9db6827..8b99368 100644
--- a/man/erofsfuse.1
+++ b/man/erofsfuse.1
@@ -26,6 +26,9 @@
.BI "\-\-device=" path
Specify an extra device to be used together.
You may give multiple `--device' options in the correct order.
+.TP
+.BI "\-\-offset=" #
+Specify `--offset' bytes to skip when reading image file. The default is 0.
.SS "FUSE options:"
.TP
\fB-d -o\fR debug
diff --git a/man/fsck.erofs.1 b/man/fsck.erofs.1
index f3e9c3b..364219a 100644
--- a/man/fsck.erofs.1
+++ b/man/fsck.erofs.1
@@ -2,7 +2,7 @@
.\"
.TH FSCK.EROFS 1
.SH NAME
-fsck.erofs \- tool to check the EROFS filesystem's integrity
+fsck.erofs \- tool to check an EROFS filesystem's integrity
.SH SYNOPSIS
\fBfsck.erofs\fR [\fIOPTIONS\fR] \fIIMAGE\fR
.SH DESCRIPTION
@@ -22,15 +22,18 @@
non-compressed files.
.TP
.BI "\-\-device=" path
-Specify an extra device to be used together.
-You may give multiple `--device' options in the correct order.
+Specify an extra blob device to be used together.
+You may give multiple
+.B --device
+options in the correct order.
.TP
.B \-\-extract
-Check if all files are well encoded. This will induce more I/Os to read
-compressed file data, so it might take too much time depending on the image.
+Check if all files are well encoded. This read all compressed files,
+and hence create more I/O load,
+so it might take too much time depending on the image.
.TP
.B \-\-help
-Display this help and exit.
+Display help string and exit.
.SH AUTHOR
This version of \fBfsck.erofs\fR is written by
Daeho Jeong <daehojeong@google.com>.
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index d811f20..00ac2ac 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -20,55 +20,86 @@
from \fISOURCE\fR directory.
.SH OPTIONS
.TP
-.BI "\-z " compression-algorithm " [" ",#" "]"
-Set an algorithm for file compression, which can be set with an optional
-compression level separated by a comma.
+.BI "\-z " compression-algorithm \fR[\fP, # \fR][\fP: ... \fR]\fP
+Set a primary algorithm for data compression, which can be set with an optional
+compression level (1 to 12 for LZ4HC, 0 to 9 for LZMA and 100 to 109 for LZMA
+extreme compression) separated by a comma. Alternative algorithms could be
+specified and separated by colons.
+.TP
+.BI "\-b " block-size
+Set the fundamental block size of the filesystem in bytes. In other words,
+specify the smallest amount of data that can be accessed at a time. The
+default is the system page size. It cannot be less than 512 bytes.
.TP
.BI "\-C " max-pcluster-size
-Specify the maximum size of compress physical cluster in bytes. It may enable
-big pcluster feature if needed (Linux v5.13+).
+Specify the maximum size of compress physical cluster in bytes.
+This may cause the big pcluster feature to be enabled (Linux v5.13+).
.TP
.BI "\-d " #
Specify the level of debugging messages. The default is 2, which shows basic
warning messages.
.TP
.BI "\-x " #
-Specify the upper limit of an xattr which is still inlined. The default is 2.
-Disable storing xattrs if < 0.
+Limit how many xattrs will be inlined. The default is 2.
+Disables storing xattrs if < 0.
.TP
-.BI "\-E " extended-option " [,...]"
+.BI "\-E " extended-option \fR[\fP, ... \fR]\fP
Set extended options for the filesystem. Extended options are comma separated,
-and may take an argument using the equals ('=') sign.
+and may take an extra argument using the equals ('=') sign.
The following extended options are supported:
.RS 1.2i
.TP
+.BI all-fragments
+Forcely record the whole files into a special inode for better compression and
+it may take an argument as the pcluster size of the packed inode in bytes.
+(Linux v6.1+)
+.TP
+.BI dedupe
+Enable global compressed data deduplication to minimize duplicated data in
+the filesystem. May further reduce image size when used with
+.BR -E\ fragments .
+(Linux v6.1+)
+.TP
.BI force-inode-compact
-Forcely generate compact inodes (32-byte inodes) to output.
+Force generation of compact (32-byte) inodes.
.TP
.BI force-inode-extended
-Forcely generate extended inodes (64-byte inodes) to output.
+Force generation of extended (64-byte) inodes.
.TP
.BI force-inode-blockmap
-Forcely generate inode chunk format in 4-byte block address array.
+Force generation of inode chunk format as a 4-byte block address array.
.TP
.BI force-chunk-indexes
-Forcely generate inode chunk format in 8-byte chunk indexes (with device id).
+Forcely generate inode chunk format as an 8-byte chunk index (with device ID).
+.TP
+.BI fragments\fR[\fP= size \fR]\fP
+Pack the tail part (pcluster) of compressed files, or entire files, into a
+special inode for smaller image sizes, and it may take an argument as the
+pcluster size of the packed inode in bytes. (Linux v6.1+)
.TP
.BI legacy-compress
-Drop "inplace decompression" and "compacted indexes" support, which is used
-to generate compatible EROFS images for Linux v4.19 - 5.3.
+Disable "inplace decompression" and "compacted indexes",
+for compatibility with Linux pre-v5.4.
.TP
.BI noinline_data
Don't inline regular files to enable FSDAX for these files (Linux v5.15+).
.TP
+.B ^xattr-name-filter
+Turn off/on xattr name filter to optimize negative xattr lookups (Linux v6.6+).
+.TP
.BI ztailpacking
Pack the tail part (pcluster) of compressed files into its metadata to save
more space and the tail part I/O. (Linux v5.17+)
.RE
.TP
+.BI "\-L " volume-label
+Set the volume label for the filesystem to
+.IR volume-label .
+The maximum length of the volume label is 16 bytes.
+.TP
.BI "\-T " #
-Set all files to the given UNIX timestamp. Reproducible builds requires setting
-all to a specific one.
+Set all files to the given UNIX timestamp. Reproducible builds require setting
+all to a specific one. By default, the source file's modification time is used.
.TP
.BI "\-U " UUID
Set the universally unique identifier (UUID) of the filesystem to
@@ -80,53 +111,93 @@
Make all files owned by root.
.TP
.BI "\-\-blobdev " file
-Specify another extra blob device to store chunk-based data.
+Specify an extra blob device to store chunk-based data.
.TP
.BI "\-\-chunksize " #
Generate chunk-based files with #-byte chunks.
.TP
.BI "\-\-compress-hints " file
-If the optional
-.BI "\-\-compress-hints " file
-argument is given,
-.B mkfs.erofs
-uses it to apply the per-file compression strategy. Each line is defined by
-tokens separated by spaces in the following form:
+Apply a per-file compression strategy. Each line in
+.I file
+is defined by
+tokens separated by spaces in the following form. Optionally, instead of
+the given primary algorithm, alternative algorithms can be specified with
+\fIalgorithm-index\fR explicitly:
.RS 1.2i
-<pcluster-in-bytes> <match-pattern>
+<pcluster-size-in-bytes> [algorithm-index] <match-pattern>
.RE
+.IR match-pattern s
+are extended regular expressions, matched against absolute paths within
+the output filesystem, with no leading /.
.TP
.BI "\-\-exclude-path=" path
Ignore file that matches the exact literal path.
-You may give multiple `--exclude-path' options.
+You may give multiple
+.B --exclude-path
+options.
.TP
.BI "\-\-exclude-regex=" regex
-Ignore files that match the given regular expression.
-You may give multiple `--exclude-regex` options.
+Ignore files that match the given extended regular expression.
+You may give multiple
+.B --exclude-regex
+options.
.TP
.BI "\-\-file-contexts=" file
-Specify a \fIfile_contexts\fR file to setup / override selinux labels.
+Read SELinux label configuration/overrides from \fIfile\fR in the
+.BR selinux_file (5)
+format.
.TP
.BI "\-\-force-uid=" UID
-Set all file uids to \fIUID\fR.
+Set all file UIDs to \fIUID\fR.
.TP
.BI "\-\-force-gid=" GID
-Set all file gids to \fIGID\fR.
+Set all file GIDs to \fIGID\fR.
+.TP
+.BI "\-\-gid-offset=" GIDOFFSET
+Add \fIGIDOFFSET\fR to all file GIDs.
+When this option is used together with
+.BR --force-gid ,
+the final file gids are
+set to \fIGID\fR + \fIGID-OFFSET\fR.
+.TP
+.B \-\-gzip
+Filter tarball streams through gzip.
.TP
.B \-\-help
-Display this help and exit.
+Display help string and exit.
.TP
.B "\-\-ignore-mtime"
-File modification time is ignored whenever it would cause \fBmkfs.erofs\fR to
+Ignore the file modification time whenever it would cause \fBmkfs.erofs\fR to
use extended inodes over compact inodes. When not using a fixed timestamp, this
-can reduce total metadata size.
+can reduce total metadata size. Implied by
+.BR "-E force-inode-compact" .
.TP
.BI "\-\-max-extent-bytes " #
-Specify maximum decompressed extent size # in bytes.
+Specify maximum decompressed extent size in bytes.
.TP
.B "\-\-preserve-mtime"
-File modification time is preserved whenever \fBmkfs.erofs\fR decides to use
-extended inodes over compact inodes.
+Use extended inodes instead of compact inodes if the file modification time
+would overflow compact inodes. This is the default. Overrides
+.BR --ignore-mtime .
+.TP
+.B "\-\-tar=f"
+Generate a full EROFS image from a tarball.
+.TP
+.B "\-\-tar=i"
+Generate an meta-only EROFS image from a tarball.
+.TP
+.BI "\-\-uid-offset=" UIDOFFSET
+Add \fIUIDOFFSET\fR to all file UIDs.
+When this option is used together with
+.BR --force-uid ,
+the final file uids are
+set to \fIUID\fR + \fIUIDOFFSET\fR.
+.TP
+.BI "\-\-xattr-prefix=" PREFIX
+Specify a customized extended attribute namespace prefix for space saving,
+e.g. "trusted.overlay.". You may give multiple
+.B --xattr-prefix
+options (Linux v6.4+).
.SH AUTHOR
This version of \fBmkfs.erofs\fR is written by Li Guifu <blucerlee@gmail.com>,
Miao Xie <miaoxie@huawei.com> and Gao Xiang <xiang@kernel.org> with
diff --git a/mkfs/Makefile.am b/mkfs/Makefile.am
index 709d9bf..dd75485 100644
--- a/mkfs/Makefile.am
+++ b/mkfs/Makefile.am
@@ -2,8 +2,9 @@
AUTOMAKE_OPTIONS = foreign
bin_PROGRAMS = mkfs.erofs
-AM_CPPFLAGS = ${libuuid_CFLAGS} ${libselinux_CFLAGS}
+AM_CPPFLAGS = ${libselinux_CFLAGS}
mkfs_erofs_SOURCES = main.c
mkfs_erofs_CFLAGS = -Wall -I$(top_srcdir)/include
-mkfs_erofs_LDADD = ${libuuid_LIBS} $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
- ${liblz4_LIBS} ${liblzma_LIBS}
+mkfs_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
+ ${libuuid_LIBS} ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} \
+ ${libdeflate_LIBS}
diff --git a/mkfs/main.c b/mkfs/main.c
index d2c9830..6d2b700 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -15,19 +15,21 @@
#include "erofs/config.h"
#include "erofs/print.h"
#include "erofs/cache.h"
+#include "erofs/diskbuf.h"
#include "erofs/inode.h"
+#include "erofs/tar.h"
#include "erofs/io.h"
#include "erofs/compress.h"
+#include "erofs/dedupe.h"
#include "erofs/xattr.h"
#include "erofs/exclude.h"
#include "erofs/block_list.h"
#include "erofs/compress_hints.h"
#include "erofs/blobchunk.h"
+#include "erofs/fragments.h"
+#include "erofs/rebuild.h"
#include "../lib/liberofs_private.h"
-
-#ifdef HAVE_LIBUUID
-#include <uuid.h>
-#endif
+#include "../lib/liberofs_uuid.h"
#define EROFS_SUPER_END (EROFS_SUPER_OFFSET + sizeof(struct erofs_super_block))
@@ -43,6 +45,7 @@
{"all-root", no_argument, NULL, 7},
#ifndef NDEBUG
{"random-pclusterblks", no_argument, NULL, 8},
+ {"random-algorithms", no_argument, NULL, 18},
#endif
{"max-extent-bytes", required_argument, NULL, 9},
{"compress-hints", required_argument, NULL, 10},
@@ -51,41 +54,53 @@
{"blobdev", required_argument, NULL, 13},
{"ignore-mtime", no_argument, NULL, 14},
{"preserve-mtime", no_argument, NULL, 15},
-#ifdef WITH_ANDROID
+ {"uid-offset", required_argument, NULL, 16},
+ {"gid-offset", required_argument, NULL, 17},
+ {"tar", optional_argument, NULL, 20},
+ {"aufs", no_argument, NULL, 21},
{"mount-point", required_argument, NULL, 512},
+ {"xattr-prefix", required_argument, NULL, 19},
+#ifdef WITH_ANDROID
{"product-out", required_argument, NULL, 513},
{"fs-config-file", required_argument, NULL, 514},
{"block-list-file", required_argument, NULL, 515},
#endif
+ {"ovlfs-strip", optional_argument, NULL, 516},
+#ifdef HAVE_ZLIB
+ {"gzip", no_argument, NULL, 517},
+#endif
{0, 0, 0, 0},
};
static void print_available_compressors(FILE *f, const char *delim)
{
- unsigned int i = 0;
+ int i = 0;
+ bool comma = false;
const char *s;
- while ((s = z_erofs_list_available_compressors(i)) != NULL) {
- if (i++)
+ while ((s = z_erofs_list_available_compressors(&i)) != NULL) {
+ if (comma)
fputs(delim, f);
fputs(s, f);
+ comma = true;
}
fputc('\n', f);
}
static void usage(void)
{
- fputs("usage: [options] FILE DIRECTORY\n\n"
- "Generate erofs image from DIRECTORY to FILE, and [options] are:\n"
+ fputs("usage: [options] FILE SOURCE(s)\n"
+ "Generate EROFS image (FILE) from DIRECTORY, TARBALL and/or EROFS images. And [options] are:\n"
+ " -b# set block size to # (# = page size by default)\n"
" -d# set output message level to # (maximum 9)\n"
" -x# set xattr tolerance to # (< 0, disable xattrs; default 2)\n"
- " -zX[,Y] X=compressor (Y=compression level, optional)\n"
+ " -zX[,Y][:..] X=compressor (Y=compression level, optional)\n"
+ " alternative algorithms can be separated by colons(:)\n"
" -C# specify the size of compress physical cluster in bytes\n"
" -EX[,...] X=extended options\n"
+ " -L volume-label set the volume label (maximum 16)\n"
" -T# set a fixed UNIX timestamp # to all files\n"
-#ifdef HAVE_LIBUUID
" -UX use a given filesystem UUID\n"
-#endif
" --all-root make all files owned by root\n"
" --blobdev=X specify an extra device X to store chunked data\n"
" --chunksize=# generate chunk-based files with #-byte chunks\n"
@@ -97,17 +112,27 @@
#endif
" --force-uid=# set all file uids to # (# = UID)\n"
" --force-gid=# set all file gids to # (# = GID)\n"
+ " --uid-offset=# add offset # to all file uids (# = id offset)\n"
+ " --gid-offset=# add offset # to all file gids (# = id offset)\n"
+#ifdef HAVE_ZLIB
+ " --gzip try to filter the tarball stream through gzip\n"
+#endif
" --help display this help and exit\n"
" --ignore-mtime use build time instead of strict per-file modification time\n"
" --max-extent-bytes=# set maximum decompressed extent size # in bytes\n"
" --preserve-mtime keep per-file modification time strictly\n"
+ " --aufs replace aufs special files with overlayfs metadata\n"
+ " --tar=[fi] generate an image from tarball(s)\n"
+ " --ovlfs-strip=[01] strip overlayfs metadata in the target image (e.g. whiteouts)\n"
" --quiet quiet execution (do not write anything to standard output.)\n"
#ifndef NDEBUG
" --random-pclusterblks randomize pclusterblks for big pcluster (debugging only)\n"
+ " --random-algorithms randomize per-file algorithms (debugging only)\n"
#endif
+ " --xattr-prefix=X X=extra xattr name prefix\n"
+ " --mount-point=X X=prefix of target fs path (default: /)\n"
#ifdef WITH_ANDROID
"\nwith following android-specific options:\n"
- " --mount-point=X X=prefix of target fs path (default: /)\n"
" --product-out=X X=product_out directory\n"
" --fs-config-file=X X=fs_config file\n"
" --block-list-file=X X=block_list file\n"
@@ -116,6 +141,15 @@
print_available_compressors(stderr, ", ");
}
+static unsigned int pclustersize_packed, pclustersize_max;
+static struct erofs_tarfile erofstar = {
+ .global.xattrs = LIST_HEAD_INIT(erofstar.global.xattrs)
+};
+static bool tar_mode, rebuild_mode, gzip_supported;
+
+static unsigned int rebuild_src_count;
+static LIST_HEAD(rebuild_src_list);
+
static int parse_extended_opts(const char *opts)
{
#define MATCH_EXTENTED_OPT(opt, token, keylen) \
@@ -126,12 +160,13 @@
value = NULL;
for (token = opts; *token != '\0'; token = next) {
+ bool clear = false;
const char *p = strchr(token, ',');
next = NULL;
- if (p)
+ if (p) {
next = p + 1;
- else {
+ } else {
p = token + strlen(token);
next = p;
}
@@ -149,83 +184,147 @@
vallen = 0;
}
+ if (token[0] == '^') {
+ if (keylen < 2)
+ return -EINVAL;
+ ++token;
+ --keylen;
+ clear = true;
+ }
+
if (MATCH_EXTENTED_OPT("legacy-compress", token, keylen)) {
if (vallen)
return -EINVAL;
/* disable compacted indexes and 0padding */
cfg.c_legacy_compress = true;
- }
-
- if (MATCH_EXTENTED_OPT("force-inode-compact", token, keylen)) {
+ } else if (MATCH_EXTENTED_OPT("force-inode-compact", token, keylen)) {
if (vallen)
return -EINVAL;
cfg.c_force_inodeversion = FORCE_INODE_COMPACT;
cfg.c_ignore_mtime = true;
- }
-
- if (MATCH_EXTENTED_OPT("force-inode-extended", token, keylen)) {
+ } else if (MATCH_EXTENTED_OPT("force-inode-extended", token, keylen)) {
if (vallen)
return -EINVAL;
cfg.c_force_inodeversion = FORCE_INODE_EXTENDED;
- }
-
- if (MATCH_EXTENTED_OPT("nosbcrc", token, keylen)) {
+ } else if (MATCH_EXTENTED_OPT("nosbcrc", token, keylen)) {
if (vallen)
return -EINVAL;
- erofs_sb_clear_sb_chksum();
- }
-
- if (MATCH_EXTENTED_OPT("noinline_data", token, keylen)) {
+ erofs_sb_clear_sb_chksum(&sbi);
+ } else if (MATCH_EXTENTED_OPT("noinline_data", token, keylen)) {
if (vallen)
return -EINVAL;
- cfg.c_noinline_data = true;
- }
-
- if (MATCH_EXTENTED_OPT("force-inode-blockmap", token, keylen)) {
+ cfg.c_inline_data = false;
+ } else if (MATCH_EXTENTED_OPT("inline_data", token, keylen)) {
+ if (vallen)
+ return -EINVAL;
+ cfg.c_inline_data = !clear;
+ } else if (MATCH_EXTENTED_OPT("force-inode-blockmap", token, keylen)) {
if (vallen)
return -EINVAL;
cfg.c_force_chunkformat = FORCE_INODE_BLOCK_MAP;
- }
-
- if (MATCH_EXTENTED_OPT("force-chunk-indexes", token, keylen)) {
+ } else if (MATCH_EXTENTED_OPT("force-chunk-indexes", token, keylen)) {
if (vallen)
return -EINVAL;
cfg.c_force_chunkformat = FORCE_INODE_CHUNK_INDEXES;
- }
-
- if (MATCH_EXTENTED_OPT("ztailpacking", token, keylen)) {
+ } else if (MATCH_EXTENTED_OPT("ztailpacking", token, keylen)) {
if (vallen)
return -EINVAL;
- cfg.c_ztailpacking = true;
+ cfg.c_ztailpacking = !clear;
+ } else if (MATCH_EXTENTED_OPT("all-fragments", token, keylen)) {
+ cfg.c_all_fragments = true;
+ goto handle_fragment;
+ } else if (MATCH_EXTENTED_OPT("fragments", token, keylen)) {
+ char *endptr;
+ u64 i;
+
+handle_fragment:
+ cfg.c_fragments = true;
+ if (vallen) {
+ i = strtoull(value, &endptr, 0);
+ if (endptr - value != vallen) {
+ erofs_err("invalid pcluster size for the packed file %s",
+ next);
+ return -EINVAL;
+ }
+ pclustersize_packed = i;
+ }
+ } else if (MATCH_EXTENTED_OPT("dedupe", token, keylen)) {
+ if (vallen)
+ return -EINVAL;
+ cfg.c_dedupe = !clear;
+ } else if (MATCH_EXTENTED_OPT("xattr-name-filter", token, keylen)) {
+ if (vallen)
+ return -EINVAL;
+ cfg.c_xattr_name_filter = !clear;
+ } else {
+ erofs_err("unknown extended option %.*s",
+ p - token, token);
+ return -EINVAL;
}
}
return 0;
}
+static int mkfs_parse_compress_algs(char *algs)
+{
+ unsigned int i;
+ char *s;
+
+ for (s = strtok(algs, ":"), i = 0; s; s = strtok(NULL, ":"), ++i) {
+ const char *lv;
+
+ if (i >= EROFS_MAX_COMPR_CFGS - 1) {
+ erofs_err("too many algorithm types");
+ return -EINVAL;
+ }
+
+ lv = strchr(s, ',');
+ if (lv) {
+ cfg.c_compr_level[i] = atoi(lv + 1);
+ cfg.c_compr_alg[i] = strndup(s, lv - s);
+ } else {
+ cfg.c_compr_level[i] = -1;
+ cfg.c_compr_alg[i] = strdup(s);
+ }
+ }
+ return 0;
+}
+
+static void erofs_rebuild_cleanup(void)
+{
+ struct erofs_sb_info *src, *n;
+
+ list_for_each_entry_safe(src, n, &rebuild_src_list, list) {
+ list_del(&src->list);
+ erofs_put_super(src);
+ dev_close(src);
+ free(src);
+ }
+ rebuild_src_count = 0;
+}
+
static int mkfs_parse_options_cfg(int argc, char *argv[])
{
char *endptr;
- int opt, i;
+ int opt, i, err;
bool quiet = false;
- while ((opt = getopt_long(argc, argv, "C:E:T:U:d:x:z:",
+ while ((opt = getopt_long(argc, argv, "C:E:L:T:U:b:d:x:z:",
long_options, NULL)) != -1) {
switch (opt) {
case 'z':
- if (!optarg) {
- cfg.c_compr_alg_master = "(default)";
- break;
+ i = mkfs_parse_compress_algs(optarg);
+ if (i)
+ return i;
+ break;
+
+ case 'b':
+ i = atoi(optarg);
+ if (i < 512 || i > EROFS_MAX_BLOCK_SIZE) {
+ erofs_err("invalid block size %s", optarg);
+ return -EINVAL;
}
- /* get specified compression level */
- for (i = 0; optarg[i] != '\0'; ++i) {
- if (optarg[i] == ',') {
- cfg.c_compr_level_master =
- atoi(optarg + i + 1);
- optarg[i] = '\0';
- break;
- }
- }
- cfg.c_compr_alg_master = strndup(optarg, i);
+ sbi.blkszbits = ilog2(i);
break;
case 'd':
@@ -251,6 +350,17 @@
if (opt)
return opt;
break;
+
+ case 'L':
+ if (optarg == NULL ||
+ strlen(optarg) > sizeof(sbi.volume_name)) {
+ erofs_err("invalid volume label");
+ return -EINVAL;
+ }
+ strncpy(sbi.volume_name, optarg,
+ sizeof(sbi.volume_name));
+ break;
+
case 'T':
cfg.c_unix_timestamp = strtoull(optarg, &endptr, 0);
if (cfg.c_unix_timestamp == -1 || *endptr != '\0') {
@@ -259,14 +369,12 @@
}
cfg.c_timeinherit = TIMESTAMP_FIXED;
break;
-#ifdef HAVE_LIBUUID
case 'U':
- if (uuid_parse(optarg, sbi.uuid)) {
+ if (erofs_uuid_parse(optarg, sbi.uuid)) {
erofs_err("invalid UUID %s", optarg);
return -EINVAL;
}
break;
-#endif
case 2:
opt = erofs_parse_exclude_path(optarg, false);
if (opt) {
@@ -310,6 +418,9 @@
case 8:
cfg.c_random_pclusterblks = true;
break;
+ case 18:
+ cfg.c_random_algorithms = true;
+ break;
#endif
case 9:
cfg.c_max_decompressed_extent_bytes =
@@ -323,7 +434,6 @@
case 10:
cfg.c_compress_hints_file = optarg;
break;
-#ifdef WITH_ANDROID
case 512:
cfg.mount_point = optarg;
/* all trailing '/' should be deleted */
@@ -331,6 +441,7 @@
if (opt && optarg[opt - 1] == '/')
optarg[opt - 1] = '\0';
break;
+#ifdef WITH_ANDROID
case 513:
cfg.target_out_path = optarg;
break;
@@ -343,14 +454,12 @@
#endif
case 'C':
i = strtoull(optarg, &endptr, 0);
- if (*endptr != '\0' ||
- i < EROFS_BLKSIZ || i % EROFS_BLKSIZ) {
+ if (*endptr != '\0') {
erofs_err("invalid physical clustersize %s",
optarg);
return -EINVAL;
}
- cfg.c_pclusterblks_max = i / EROFS_BLKSIZ;
- cfg.c_pclusterblks_def = cfg.c_pclusterblks_max;
+ pclustersize_max = i;
break;
case 11:
i = strtol(optarg, &endptr, 0);
@@ -364,12 +473,7 @@
optarg);
return -EINVAL;
}
- if (i < EROFS_BLKSIZ) {
- erofs_err("chunksize %s must be larger than block size",
- optarg);
- return -EINVAL;
- }
- erofs_sb_set_chunked_file();
+ erofs_sb_set_chunked_file(&sbi);
break;
case 12:
quiet = true;
@@ -383,6 +487,53 @@
case 15:
cfg.c_ignore_mtime = false;
break;
+ case 16:
+ errno = 0;
+ cfg.c_uid_offset = strtoll(optarg, &endptr, 0);
+ if (errno || *endptr != '\0') {
+ erofs_err("invalid uid offset %s", optarg);
+ return -EINVAL;
+ }
+ break;
+ case 17:
+ errno = 0;
+ cfg.c_gid_offset = strtoll(optarg, &endptr, 0);
+ if (errno || *endptr != '\0') {
+ erofs_err("invalid gid offset %s", optarg);
+ return -EINVAL;
+ }
+ break;
+ case 19:
+ errno = 0;
+ opt = erofs_xattr_insert_name_prefix(optarg);
+ if (opt) {
+ erofs_err("failed to parse xattr name prefix: %s",
+ erofs_strerror(opt));
+ return opt;
+ }
+ cfg.c_extra_ea_name_prefixes = true;
+ break;
+ case 20:
+ if (optarg && (!strcmp(optarg, "i") ||
+ !strcmp(optarg, "0") || !memcmp(optarg, "0,", 2))) {
+ erofstar.index_mode = true;
+ if (!memcmp(optarg, "0,", 2))
+ erofstar.mapfile = strdup(optarg + 2);
+ }
+ tar_mode = true;
+ break;
+ case 21:
+ erofstar.aufs = true;
+ break;
+ case 516:
+ if (!optarg || !strcmp(optarg, "1"))
+ cfg.c_ovlfs_strip = true;
+ else
+ cfg.c_ovlfs_strip = false;
+ break;
+ case 517:
+ gzip_supported = true;
+ break;
case 1:
usage();
exit(0);
@@ -392,7 +543,7 @@
}
}
- if (cfg.c_blobdev_path && cfg.c_chunkbits < LOG_BLOCK_SIZE) {
+ if (cfg.c_blobdev_path && cfg.c_chunkbits < sbi.blkszbits) {
erofs_err("--blobdev must be used together with --chunksize");
return -EINVAL;
}
@@ -414,58 +565,152 @@
return -ENOMEM;
if (optind >= argc) {
- erofs_err("missing argument: DIRECTORY");
- return -EINVAL;
- }
+ if (!tar_mode) {
+ erofs_err("missing argument: SOURCE(s)");
+ return -EINVAL;
+ } else {
+ int dupfd;
- cfg.c_src_path = realpath(argv[optind++], NULL);
- if (!cfg.c_src_path) {
- erofs_err("failed to parse source directory: %s",
- erofs_strerror(-errno));
- return -ENOENT;
- }
+ dupfd = dup(STDIN_FILENO);
+ if (dupfd < 0) {
+ erofs_err("failed to duplicate STDIN_FILENO: %s",
+ strerror(errno));
+ return -errno;
+ }
+ err = erofs_iostream_open(&erofstar.ios, dupfd, gzip_supported);
+ if (err)
+ return err;
+ }
+ } else {
+ struct stat st;
- if (optind < argc) {
- erofs_err("unexpected argument: %s\n", argv[optind]);
- return -EINVAL;
+ cfg.c_src_path = realpath(argv[optind++], NULL);
+ if (!cfg.c_src_path) {
+ erofs_err("failed to parse source directory: %s",
+ erofs_strerror(-errno));
+ return -ENOENT;
+ }
+
+ if (tar_mode) {
+ int fd = open(cfg.c_src_path, O_RDONLY);
+
+ if (fd < 0) {
+ erofs_err("failed to open file: %s", cfg.c_src_path);
+ return -errno;
+ }
+ err = erofs_iostream_open(&erofstar.ios, fd, gzip_supported);
+ if (err)
+ return err;
+ } else {
+ err = lstat(cfg.c_src_path, &st);
+ if (err)
+ return -errno;
+ if (S_ISDIR(st.st_mode))
+ erofs_set_fs_root(cfg.c_src_path);
+ else
+ rebuild_mode = true;
+ }
+
+ if (rebuild_mode) {
+ char *srcpath = cfg.c_src_path;
+ struct erofs_sb_info *src;
+
+ do {
+ src = calloc(1, sizeof(struct erofs_sb_info));
+ if (!src) {
+ erofs_rebuild_cleanup();
+ return -ENOMEM;
+ }
+
+ err = dev_open_ro(src, srcpath);
+ if (err) {
+ free(src);
+ erofs_rebuild_cleanup();
+ return err;
+ }
+
+ /* extra device index starts from 1 */
+ src->dev = ++rebuild_src_count;
+ list_add(&src->list, &rebuild_src_list);
+ } while (optind < argc && (srcpath = argv[optind++]));
+ } else if (optind < argc) {
+ erofs_err("unexpected argument: %s\n", argv[optind]);
+ return -EINVAL;
+ }
}
if (quiet) {
cfg.c_dbg_lvl = EROFS_ERR;
cfg.c_showprogress = false;
}
+
+ if (cfg.c_compr_alg[0] && erofs_blksiz(&sbi) != getpagesize())
+ erofs_warn("Please note that subpage blocksize with compression isn't yet supported in kernel. "
+ "This compressed image will only work with bs = ps = %u bytes",
+ erofs_blksiz(&sbi));
+
+ if (pclustersize_max) {
+ if (pclustersize_max < erofs_blksiz(&sbi) ||
+ pclustersize_max % erofs_blksiz(&sbi)) {
+ erofs_err("invalid physical clustersize %u",
+ pclustersize_max);
+ return -EINVAL;
+ }
+ cfg.c_pclusterblks_max = pclustersize_max >> sbi.blkszbits;
+ cfg.c_pclusterblks_def = cfg.c_pclusterblks_max;
+ }
+ if (cfg.c_chunkbits && cfg.c_chunkbits < sbi.blkszbits) {
+ erofs_err("chunksize %u must be larger than block size",
+ 1u << cfg.c_chunkbits);
+ return -EINVAL;
+ }
+
+ if (pclustersize_packed) {
+ if (pclustersize_max < erofs_blksiz(&sbi) ||
+ pclustersize_max % erofs_blksiz(&sbi)) {
+ erofs_err("invalid pcluster size for the packed file %u",
+ pclustersize_packed);
+ return -EINVAL;
+ }
+ cfg.c_pclusterblks_packed = pclustersize_packed >> sbi.blkszbits;
+ }
return 0;
}
int erofs_mkfs_update_super_block(struct erofs_buffer_head *bh,
erofs_nid_t root_nid,
- erofs_blk_t *blocks)
+ erofs_blk_t *blocks,
+ erofs_nid_t packed_nid)
{
struct erofs_super_block sb = {
.magic = cpu_to_le32(EROFS_SUPER_MAGIC_V1),
- .blkszbits = LOG_BLOCK_SIZE,
+ .blkszbits = sbi.blkszbits,
.inos = cpu_to_le64(sbi.inos),
.build_time = cpu_to_le64(sbi.build_time),
.build_time_nsec = cpu_to_le32(sbi.build_time_nsec),
.blocks = 0,
- .meta_blkaddr = sbi.meta_blkaddr,
- .xattr_blkaddr = sbi.xattr_blkaddr,
+ .meta_blkaddr = cpu_to_le32(sbi.meta_blkaddr),
+ .xattr_blkaddr = cpu_to_le32(sbi.xattr_blkaddr),
+ .xattr_prefix_count = sbi.xattr_prefix_count,
+ .xattr_prefix_start = cpu_to_le32(sbi.xattr_prefix_start),
.feature_incompat = cpu_to_le32(sbi.feature_incompat),
.feature_compat = cpu_to_le32(sbi.feature_compat &
~EROFS_FEATURE_COMPAT_SB_CHKSUM),
.extra_devices = cpu_to_le16(sbi.extra_devices),
.devt_slotoff = cpu_to_le16(sbi.devt_slotoff),
};
- const unsigned int sb_blksize =
- round_up(EROFS_SUPER_END, EROFS_BLKSIZ);
+ const u32 sb_blksize = round_up(EROFS_SUPER_END, erofs_blksiz(&sbi));
char *buf;
+ int ret;
*blocks = erofs_mapbh(NULL);
sb.blocks = cpu_to_le32(*blocks);
sb.root_nid = cpu_to_le16(root_nid);
+ sb.packed_nid = cpu_to_le64(packed_nid);
memcpy(sb.uuid, sbi.uuid, sizeof(sb.uuid));
+ memcpy(sb.volume_name, sbi.volume_name, sizeof(sb.volume_name));
- if (erofs_sb_has_compr_cfgs())
- sb.u1.available_compr_algs = sbi.available_compr_algs;
+ if (erofs_sb_has_compr_cfgs(&sbi))
+ sb.u1.available_compr_algs = cpu_to_le16(sbi.available_compr_algs);
else
sb.u1.lz4_max_distance = cpu_to_le16(sbi.lz4_max_distance);
@@ -477,19 +722,21 @@
}
memcpy(buf + EROFS_SUPER_OFFSET, &sb, sizeof(sb));
- bh->fsprivate = buf;
- bh->op = &erofs_buf_write_bhops;
- return 0;
+ ret = dev_write(&sbi, buf, erofs_btell(bh, false), EROFS_SUPER_END);
+ free(buf);
+ erofs_bdrop(bh, false);
+ return ret;
}
static int erofs_mkfs_superblock_csum_set(void)
{
int ret;
- u8 buf[EROFS_BLKSIZ];
+ u8 buf[EROFS_MAX_BLOCK_SIZE];
u32 crc;
+ unsigned int len;
struct erofs_super_block *sb;
- ret = blk_read(0, buf, 0, 1);
+ ret = blk_read(&sbi, 0, buf, 0, erofs_blknr(&sbi, EROFS_SUPER_END) + 1);
if (ret) {
erofs_err("failed to read superblock to set checksum: %s",
erofs_strerror(ret));
@@ -510,12 +757,16 @@
/* turn on checksum feature */
sb->feature_compat = cpu_to_le32(le32_to_cpu(sb->feature_compat) |
EROFS_FEATURE_COMPAT_SB_CHKSUM);
- crc = erofs_crc32c(~0, (u8 *)sb, EROFS_BLKSIZ - EROFS_SUPER_OFFSET);
+ if (erofs_blksiz(&sbi) > EROFS_SUPER_OFFSET)
+ len = erofs_blksiz(&sbi) - EROFS_SUPER_OFFSET;
+ else
+ len = erofs_blksiz(&sbi);
+ crc = erofs_crc32c(~0, (u8 *)sb, len);
/* set up checksum field to erofs_super_block */
sb->checksum = cpu_to_le32(crc);
- ret = blk_write(buf, 0, 1);
+ ret = blk_write(&sbi, buf, 0, 1);
if (ret) {
erofs_err("failed to write checksummed superblock: %s",
erofs_strerror(ret));
@@ -530,16 +781,15 @@
{
cfg.c_showprogress = true;
cfg.c_legacy_compress = false;
- sbi.feature_incompat = EROFS_FEATURE_INCOMPAT_LZ4_0PADDING;
+ cfg.c_inline_data = true;
+ cfg.c_xattr_name_filter = true;
+ sbi.blkszbits = ilog2(min_t(u32, getpagesize(), EROFS_MAX_BLOCK_SIZE));
+ sbi.feature_incompat = EROFS_FEATURE_INCOMPAT_ZERO_PADDING;
sbi.feature_compat = EROFS_FEATURE_COMPAT_SB_CHKSUM |
EROFS_FEATURE_COMPAT_MTIME;
/* generate a default uuid first */
-#ifdef HAVE_LIBUUID
- do {
- uuid_generate(sbi.uuid);
- } while (uuid_is_null(sbi.uuid));
-#endif
+ erofs_uuid_generate(sbi.uuid);
}
/* https://reproducible-builds.org/specs/source-date-epoch/ for more details */
@@ -574,17 +824,111 @@
if (cfg.c_dbg_lvl >= EROFS_WARN)
printf("%s %s\n", basename(argv[0]), cfg.c_version);
}
+static struct erofs_inode *erofs_alloc_root_inode(void)
+{
+ struct erofs_inode *root;
+
+ root = erofs_new_inode();
+ if (IS_ERR(root))
+ return root;
+ root->i_srcpath = strdup("/");
+ root->i_mode = S_IFDIR | 0777;
+ root->i_parent = root;
+ root->i_mtime = root->sbi->build_time;
+ root->i_mtime_nsec = root->sbi->build_time_nsec;
+ erofs_init_empty_dir(root);
+ return root;
+}
+
+static int erofs_rebuild_load_trees(struct erofs_inode *root)
+{
+ struct erofs_sb_info *src;
+ unsigned int extra_devices = 0;
+ erofs_blk_t nblocks;
+ int ret, idx;
+
+ list_for_each_entry(src, &rebuild_src_list, list) {
+ ret = erofs_rebuild_load_tree(root, src);
+ if (ret) {
+ erofs_err("failed to load %s", src->devname);
+ return ret;
+ }
+ if (src->extra_devices > 1) {
+ erofs_err("%s: unsupported number of extra devices",
+ src->devname, src->extra_devices);
+ return -EOPNOTSUPP;
+ }
+ extra_devices += src->extra_devices;
+ }
+
+ if (extra_devices && extra_devices != rebuild_src_count) {
+ erofs_err("extra_devices(%u) is mismatched with source images(%u)",
+ extra_devices, rebuild_src_count);
+ return -EOPNOTSUPP;
+ }
+
+ ret = erofs_mkfs_init_devices(&sbi, rebuild_src_count);
+ if (ret)
+ return ret;
+
+ list_for_each_entry(src, &rebuild_src_list, list) {
+ u8 *tag = NULL;
+
+ if (extra_devices) {
+ nblocks = src->devs[0].blocks;
+ tag = src->devs[0].tag;
+ } else {
+ nblocks = src->primarydevice_blocks;
+ }
+ DBG_BUGON(src->dev < 1);
+ idx = src->dev - 1;
+ sbi.devs[idx].blocks = nblocks;
+ if (tag && *tag)
+ memcpy(sbi.devs[idx].tag, tag, sizeof(sbi.devs[0].tag));
+ else
+ /* convert UUID of the source image to a hex string */
+ sprintf((char *)sbi.devs[idx].tag,
+ "%04x%04x%04x%04x%04x%04x%04x%04x",
+ (src->uuid[0] << 8) | src->uuid[1],
+ (src->uuid[2] << 8) | src->uuid[3],
+ (src->uuid[4] << 8) | src->uuid[5],
+ (src->uuid[6] << 8) | src->uuid[7],
+ (src->uuid[8] << 8) | src->uuid[9],
+ (src->uuid[10] << 8) | src->uuid[11],
+ (src->uuid[12] << 8) | src->uuid[13],
+ (src->uuid[14] << 8) | src->uuid[15]);
+ }
+ return 0;
+}
+
+static void erofs_mkfs_showsummaries(erofs_blk_t nblocks)
+{
+ char uuid_str[37] = {};
+
+ if (!(cfg.c_dbg_lvl > EROFS_ERR && cfg.c_showprogress))
+ return;
+
+ erofs_uuid_unparse_lower(sbi.uuid, uuid_str);
+
+ fprintf(stdout, "------\nFilesystem UUID: %s\n"
+ "Filesystem total blocks: %u (of %u-byte blocks)\n"
+ "Filesystem total inodes: %llu\n"
+ "Filesystem total metadata blocks: %u\n"
+ "Filesystem total deduplicated bytes (of source files): %llu\n",
+ uuid_str, nblocks, 1U << sbi.blkszbits, sbi.inos | 0ULL,
+ erofs_total_metablocks(),
+ sbi.saved_by_deduplication | 0ULL);
+}
int main(int argc, char **argv)
{
int err = 0;
struct erofs_buffer_head *sb_bh;
- struct erofs_inode *root_inode;
- erofs_nid_t root_nid;
- struct stat64 st;
+ struct erofs_inode *root_inode, *packed_inode;
+ erofs_nid_t root_nid, packed_nid;
erofs_blk_t nblocks;
struct timeval t;
- char uuid_str[37] = "not available";
+ FILE *packedfile = NULL;
erofs_init_configure();
erofs_mkfs_default_options();
@@ -603,22 +947,6 @@
return 1;
}
- if (cfg.c_chunkbits) {
- err = erofs_blob_init(cfg.c_blobdev_path);
- if (err)
- return 1;
- }
-
- err = lstat64(cfg.c_src_path, &st);
- if (err)
- return 1;
- if (!S_ISDIR(st.st_mode)) {
- erofs_err("root of the filesystem is not a directory - %s",
- cfg.c_src_path);
- usage();
- return 1;
- }
-
if (cfg.c_unix_timestamp != -1) {
sbi.build_time = cfg.c_unix_timestamp;
sbi.build_time_nsec = 0;
@@ -627,12 +955,20 @@
sbi.build_time_nsec = t.tv_usec;
}
- err = dev_open(cfg.c_img_path);
+ err = dev_open(&sbi, cfg.c_img_path);
if (err) {
usage();
return 1;
}
+ if (tar_mode && !erofstar.index_mode) {
+ err = erofs_diskbuf_init(1);
+ if (err) {
+ erofs_err("failed to initialize diskbuf: %s",
+ strerror(-err));
+ goto exit;
+ }
+ }
#ifdef WITH_ANDROID
if (cfg.fs_config_file &&
load_canned_fs_config(cfg.fs_config_file) < 0) {
@@ -640,21 +976,64 @@
return 1;
}
- if (cfg.block_list_file && erofs_droid_blocklist_fopen() < 0) {
+ if (cfg.block_list_file &&
+ erofs_blocklist_open(cfg.block_list_file, false)) {
erofs_err("failed to open %s", cfg.block_list_file);
return 1;
}
#endif
erofs_show_config();
- if (erofs_sb_has_chunked_file())
- erofs_warn("EXPERIMENTAL chunked file feature in use. Use at your own risk!");
- if (cfg.c_ztailpacking)
- erofs_warn("EXPERIMENTAL compressed inline data feature in use. Use at your own risk!");
- erofs_set_fs_root(cfg.c_src_path);
+ if (cfg.c_fragments || cfg.c_extra_ea_name_prefixes) {
+ if (!cfg.c_pclusterblks_packed)
+ cfg.c_pclusterblks_packed = cfg.c_pclusterblks_def;
+
+ packedfile = erofs_packedfile_init();
+ if (IS_ERR(packedfile)) {
+ erofs_err("failed to initialize packedfile");
+ return 1;
+ }
+ }
+
+ if (cfg.c_fragments) {
+ err = z_erofs_fragments_init();
+ if (err) {
+ erofs_err("failed to initialize fragments");
+ return 1;
+ }
+ }
+
#ifndef NDEBUG
if (cfg.c_random_pclusterblks)
srand(time(NULL));
#endif
+ if (tar_mode && erofstar.index_mode) {
+ if (erofstar.mapfile) {
+ err = erofs_blocklist_open(erofstar.mapfile, true);
+ if (err) {
+ erofs_err("failed to open %s", erofstar.mapfile);
+ goto exit;
+ }
+ } else {
+ sbi.blkszbits = 9;
+ }
+ }
+
+ if (rebuild_mode) {
+ struct erofs_sb_info *src;
+
+ erofs_warn("EXPERIMENTAL rebuild mode in use. Use at your own risk!");
+
+ src = list_first_entry(&rebuild_src_list, struct erofs_sb_info, list);
+ if (!src)
+ goto exit;
+ err = erofs_read_superblock(src);
+ if (err) {
+ erofs_err("failed to read superblock of %s", src->devname);
+ goto exit;
+ }
+ sbi.blkszbits = src->blkszbits;
+ }
+
sb_bh = erofs_buffer_init();
if (IS_ERR(sb_bh)) {
err = PTR_ERR(sb_bh);
@@ -669,57 +1048,135 @@
goto exit;
}
- err = erofs_load_compress_hints();
+ /* make sure that the super block should be the very first blocks */
+ (void)erofs_mapbh(sb_bh->block);
+ if (erofs_btell(sb_bh, false) != 0) {
+ erofs_err("failed to reserve erofs_super_block");
+ goto exit;
+ }
+
+ err = erofs_load_compress_hints(&sbi);
if (err) {
erofs_err("failed to load compress hints %s",
cfg.c_compress_hints_file);
goto exit;
}
- err = z_erofs_compress_init(sb_bh);
+ err = z_erofs_compress_init(&sbi, sb_bh);
if (err) {
erofs_err("failed to initialize compressor: %s",
erofs_strerror(err));
goto exit;
}
- err = erofs_generate_devtable();
+ if (cfg.c_dedupe) {
+ if (!cfg.c_compr_alg[0]) {
+ erofs_err("Compression is not enabled. Turn on chunk-based data deduplication instead.");
+ cfg.c_chunkbits = sbi.blkszbits;
+ } else {
+ err = z_erofs_dedupe_init(erofs_blksiz(&sbi));
+ if (err) {
+ erofs_err("failed to initialize deduplication: %s",
+ erofs_strerror(err));
+ goto exit;
+ }
+ }
+ }
+
+ if (cfg.c_chunkbits) {
+ err = erofs_blob_init(cfg.c_blobdev_path);
+ if (err)
+ return 1;
+ }
+
+ if ((erofstar.index_mode && !erofstar.mapfile) || cfg.c_blobdev_path)
+ err = erofs_mkfs_init_devices(&sbi, 1);
if (err) {
erofs_err("failed to generate device table: %s",
erofs_strerror(err));
goto exit;
}
-#ifdef HAVE_LIBUUID
- uuid_unparse_lower(sbi.uuid, uuid_str);
-#endif
- erofs_info("filesystem UUID: %s", uuid_str);
erofs_inode_manager_init();
- err = erofs_build_shared_xattrs_from_path(cfg.c_src_path);
- if (err) {
- erofs_err("failed to build shared xattrs: %s",
- erofs_strerror(err));
- goto exit;
- }
+ if (tar_mode) {
+ root_inode = erofs_alloc_root_inode();
+ if (IS_ERR(root_inode)) {
+ err = PTR_ERR(root_inode);
+ goto exit;
+ }
- root_inode = erofs_mkfs_build_tree_from_path(NULL, cfg.c_src_path);
- if (IS_ERR(root_inode)) {
- err = PTR_ERR(root_inode);
- goto exit;
- }
+ while (!(err = tarerofs_parse_tar(root_inode, &erofstar)));
+ if (err < 0)
+ goto exit;
+
+ err = erofs_rebuild_dump_tree(root_inode);
+ if (err < 0)
+ goto exit;
+ } else if (rebuild_mode) {
+ root_inode = erofs_alloc_root_inode();
+ if (IS_ERR(root_inode)) {
+ err = PTR_ERR(root_inode);
+ goto exit;
+ }
+
+ err = erofs_rebuild_load_trees(root_inode);
+ if (err)
+ goto exit;
+ err = erofs_rebuild_dump_tree(root_inode);
+ if (err)
+ goto exit;
+ } else {
+ err = erofs_build_shared_xattrs_from_path(&sbi, cfg.c_src_path);
+ if (err) {
+ erofs_err("failed to build shared xattrs: %s",
+ erofs_strerror(err));
+ goto exit;
+ }
+
+ if (cfg.c_extra_ea_name_prefixes)
+ erofs_xattr_write_name_prefixes(&sbi, packedfile);
+
+ root_inode = erofs_mkfs_build_tree_from_path(cfg.c_src_path);
+ if (IS_ERR(root_inode)) {
+ err = PTR_ERR(root_inode);
+ goto exit;
+ }
+ }
root_nid = erofs_lookupnid(root_inode);
erofs_iput(root_inode);
- if (cfg.c_chunkbits) {
- erofs_info("total metadata: %u blocks", erofs_mapbh(NULL));
- err = erofs_blob_remap();
+ if (erofstar.index_mode || cfg.c_chunkbits || sbi.extra_devices) {
+ if (erofstar.index_mode && !erofstar.mapfile)
+ sbi.devs[0].blocks =
+ BLK_ROUND_UP(&sbi, erofstar.offset);
+ err = erofs_mkfs_dump_blobs(&sbi);
if (err)
goto exit;
}
- err = erofs_mkfs_update_super_block(sb_bh, root_nid, &nblocks);
+ packed_nid = 0;
+ if ((cfg.c_fragments || cfg.c_extra_ea_name_prefixes) &&
+ erofs_sb_has_fragments(&sbi)) {
+ erofs_update_progressinfo("Handling packed_file ...");
+ packed_inode = erofs_mkfs_build_packedfile();
+ if (IS_ERR(packed_inode)) {
+ err = PTR_ERR(packed_inode);
+ goto exit;
+ }
+ packed_nid = erofs_lookupnid(packed_inode);
+ erofs_iput(packed_inode);
+ }
+
+ /* flush all buffers except for the superblock */
+ if (!erofs_bflush(NULL)) {
+ err = -EIO;
+ goto exit;
+ }
+
+ err = erofs_mkfs_update_super_block(sb_bh, root_nid, &nblocks,
+ packed_nid);
if (err)
goto exit;
@@ -727,28 +1184,35 @@
if (!erofs_bflush(NULL))
err = -EIO;
else
- err = dev_resize(nblocks);
+ err = dev_resize(&sbi, nblocks);
- if (!err && erofs_sb_has_sb_chksum())
+ if (!err && erofs_sb_has_sb_chksum(&sbi))
err = erofs_mkfs_superblock_csum_set();
exit:
z_erofs_compress_exit();
-#ifdef WITH_ANDROID
- erofs_droid_blocklist_fclose();
-#endif
- dev_close();
+ z_erofs_dedupe_exit();
+ erofs_blocklist_close();
+ dev_close(&sbi);
erofs_cleanup_compress_hints();
erofs_cleanup_exclude_rules();
if (cfg.c_chunkbits)
erofs_blob_exit();
+ if (cfg.c_fragments)
+ z_erofs_fragments_exit();
+ erofs_packedfile_exit();
+ erofs_xattr_cleanup_name_prefixes();
+ erofs_rebuild_cleanup();
+ erofs_diskbuf_exit();
erofs_exit_configure();
+ if (tar_mode)
+ erofs_iostream_close(&erofstar.ios);
if (err) {
erofs_err("\tCould not format the device : %s\n",
erofs_strerror(err));
return 1;
- } else {
- erofs_update_progressinfo("Build completed.\n");
}
+ erofs_update_progressinfo("Build completed.\n");
+ erofs_mkfs_showsummaries(nblocks);
return 0;
}