Snap for 11819167 from f3f0f0a21769367202e97cdd7fa3842f39867fec to busytown-mac-infra-release

Change-Id: I4ba7ee10986726bc0bf08951debf446f12a1c586
diff --git a/.github/ISSUE_TEMPLATE.txt b/.github/ISSUE_TEMPLATE.txt
new file mode 100644
index 0000000..0e736fb
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE.txt
@@ -0,0 +1,9 @@
+Please **do not** send pull-requests or open new issues on Github.
+
+Besides, the current erofs-utils repo is:
+git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git
+
+Github is not _the place_ for EROFS development, and some mirrors
+are actually unofficial and not frequently monitored.
+
+* Send bug reports and/or feedback to: linux-erofs@lists.ozlabs.org
diff --git a/.github/PULL_REQUEST_TEMPLATE.txt b/.github/PULL_REQUEST_TEMPLATE.txt
new file mode 100644
index 0000000..0e736fb
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.txt
@@ -0,0 +1,9 @@
+Please **do not** send pull-requests or open new issues on Github.
+
+Besides, the current erofs-utils repo is:
+git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git
+
+Github is not _the place_ for EROFS development, and some mirrors
+are actually unofficial and not frequently monitored.
+
+* Send bug reports and/or feedback to: linux-erofs@lists.ozlabs.org
diff --git a/AUTHORS b/AUTHORS
index 6b41df8..bc67a65 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,7 +1,7 @@
 EROFS USERSPACE UTILITIES
 M: Li Guifu <bluce.lee@aliyun.com>
 M: Gao Xiang <xiang@kernel.org>
-M: Huang Jianan <huangjianan@oppo.com>
+M: Huang Jianan <jnhuang95@gmail.com>
 R: Chao Yu <chao@kernel.org>
 R: Miao Xie <miaoxie@huawei.com>
 R: Fang Wei <fangwei1@huawei.com>
diff --git a/Android.bp b/Android.bp
index fd7fc3d..bd7e06d 100644
--- a/Android.bp
+++ b/Android.bp
@@ -73,6 +73,8 @@
         "-DHAVE_SYS_IOCTL_H",
         "-DHAVE_LLISTXATTR",
         "-DHAVE_LGETXATTR",
+        "-D_FILE_OFFSET_BITS=64",
+        "-DEROFS_MAX_BLOCK_SIZE=16384",
     ],
 }
 
@@ -108,6 +110,9 @@
     srcs: [
         "lib/*.c",
     ],
+    exclude_srcs: [
+        "lib/compressor_libdeflate.c",
+    ],
     export_include_dirs: ["include"],
 
     target: {
@@ -123,7 +128,6 @@
     defaults: ["erofs-utils_defaults"],
 
     srcs: [
-        "lib/*.c",
         "mkfs/*.c",
     ],
     static_libs: [
@@ -158,7 +162,6 @@
     host_supported: true,
     recovery_available: true,
     srcs: [
-        "lib/*.c",
         "dump/*.c",
     ],
     static_libs: [
@@ -177,7 +180,6 @@
     host_supported: true,
     recovery_available: true,
     srcs: [
-        "lib/*.c",
         "fsck/*.c",
     ],
     static_libs: [
diff --git a/ChangeLog b/ChangeLog
index 97d7336..99220c8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,43 @@
+erofs-utils 1.7.1
+
+ * A quick maintenance release includes the following fixes:
+   - fix a build issue of cross-compilation with autoconf (Sandeep Dhavale);
+   - fix an invalid error code in lib/tar.c (Erik Sjölund);
+   - fix corrupted directories with hardlinks.
+
+ -- Gao Xiang <xiang@kernel.org>  Fri, 20 Oct 2023 00:00:00 +0800
+
+erofs-utils 1.7
+
+ * This release includes the following updates:
+   - support arbitrary valid block sizes in addition to page size;
+   - (mkfs.erofs) arrange on-disk meta with Breadth-First Traversal instead;
+   - support long xattr name prefixes (Jingbo Xu);
+   - support UUID functionality without libuuid (Norbert Lange);
+   - (mkfs.erofs, experimental) add DEFLATE algorithm support;
+   - (mkfs.erofs, experimental) support building images directly from tarballs;
+   - (dump.erofs) print more superblock fields (Guo Xuenan);
+   - (mkfs.erofs, experimental) introduce preliminary rebuild mode (Jingbo Xu);
+   - various bugfixes and cleanups (Sandeep Dhavale, Guo Xuenan, Yue Hu,
+           Weizhao Ouyang, Kelvin Zhang, Noboru Asai, Yifan Zhao and Li Yiyan);
+
+ -- Gao Xiang <xiang@kernel.org>  Thu, 21 Sep 2023 00:00:00 +0800
+
+erofs-utils 1.6
+
+ * This release includes the following updates:
+   - support fragments by using `-Efragments` (Yue Hu);
+   - support compressed data deduplication by using `-Ededupe` (Ziyang Zhang);
+   - (erofsfuse) support extended attributes (Huang Jianan);
+   - (mkfs.erofs) support multiple algorithms in a single image (Gao Xiang);
+   - (mkfs.erofs) support chunk-based sparse files (Gao Xiang);
+   - (mkfs.erofs) add volume-label setting support (Naoto Yamaguchi);
+   - (mkfs.erofs) add uid/gid offsetting support (Naoto Yamaguchi);
+   - (mkfs.erofs) pack files entirely by using `-Eall-fragments` (Gao Xiang);
+   - various bugfixes and cleanups;
+
+ -- Gao Xiang <xiang@kernel.org>  Sun, 12 Mar 2023 00:00:00 +0800
+
 erofs-utils 1.5
 
  * This release includes the following updates:
diff --git a/METADATA b/METADATA
index 56dfd1b..2b7423b 100644
--- a/METADATA
+++ b/METADATA
@@ -1,6 +1,6 @@
 # This project was upgraded with external_updater.
 # Usage: tools/external_updater/updater.sh update erofs-utils
-# For more info, check https://cs.android.com/android/platform/superproject/+/master:tools/external_updater/README.md
+# For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md
 
 name: "erofs-utils"
 description: "EROFS Utilities"
@@ -9,11 +9,11 @@
     type: GIT
     value: "https://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git"
   }
-  version: "v1.5"
+  version: "v1.7.1"
   license_type: RESTRICTED
   last_upgrade_date {
     year: 2023
-    month: 1
-    day: 18
+    month: 10
+    day: 23
   }
 }
diff --git a/README b/README
index 92b3128..e224b23 100644
--- a/README
+++ b/README
@@ -1,7 +1,7 @@
 erofs-utils
 ===========
 
-userspace tools for EROFS filesystem, currently including:
+Userspace tools for EROFS filesystem, currently including:
 
   mkfs.erofs    filesystem formatter
   erofsfuse     FUSE daemon alternative
@@ -9,76 +9,60 @@
   fsck.erofs    filesystem compatibility & consistency checker as well
                 as extractor
 
-Dependencies & build
---------------------
 
- lz4 1.8.0+ for lz4 enabled [2], lz4 1.9.3+ highly recommended [4][5].
- XZ Utils 5.3.2alpha [6] or later versions for MicroLZMA enabled.
+EROFS filesystem overview
+-------------------------
 
- libfuse 2.6+ for erofsfuse enabled as a plus.
+EROFS filesystem stands for Enhanced Read-Only File System.  It aims to
+form a generic read-only filesystem solution for various read-only use
+cases instead of just focusing on storage space saving without
+considering any side effects of runtime performance.
 
-How to build with lz4-1.9.0 or above
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Typically EROFS could be considered in the following use scenarios:
+  - Firmwares in performance-sensitive systems, such as system
+    partitions of Android smartphones;
 
-To build, you can run the following commands in order:
+  - Mountable immutable images such as container images for effective
+    metadata & data access compared with tar, cpio or other local
+    filesystems (e.g. ext4, XFS, btrfs, etc.)
 
-::
+  - FSDAX-enabled rootfs for secure containers (Linux 5.15+);
 
-	$ ./autogen.sh
-	$ ./configure
-	$ make
+  - Live CDs which need a set of files with another high-performance
+    algorithm to optimize startup time; others files for archival
+    purposes only are not needed;
 
-mkfs.erofs binary will be generated under mkfs folder.
+  - and more.
 
-* For lz4 < 1.9.2, there are some stability issues about
-  LZ4_compress_destSize(). (lz4hc isn't impacted) [3].
+Note that all EROFS metadata is uncompressed by design, so that you
+could take EROFS as a drop-in read-only replacement of ext4, XFS,
+btrfs, etc. without any compression-based dependencies and EROFS can
+bring more effective filesystem accesses to users with reduced
+metadata.
 
-** For lz4 = 1.9.2, there is a noticeable regression about
-   LZ4_decompress_safe_partial() [5], which impacts erofsfuse
-   functionality for legacy images (without 0PADDING).
+For more details of EROFS filesystem itself, please refer to:
+https://www.kernel.org/doc/html/next/filesystems/erofs.html
 
-How to build with lz4-1.8.0~1.8.3
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+For more details on how to build erofs-utils, see `docs/INSTALL.md`.
 
-For these old lz4 versions, lz4hc algorithm cannot be supported
-without lz4-static installed due to LZ4_compress_HC_destSize()
-unstable api usage, which means lz4 will only be available if
-lz4-static isn't found.
-
-On Fedora, lz4-static can be installed by using:
-
-	yum install lz4-static.x86_64
-
-However, it's still not recommended using those versions directly
-since there are serious bugs in these compressors, see [2] [3] [4]
-as well.
-
-How to build with liblzma
-~~~~~~~~~~~~~~~~~~~~~~~~~
-
-In order to enable LZMA support, build with the following commands:
-	$ ./configure --enable-lzma
-	$ make
-
-Additionally, you could specify liblzma build paths with:
-	--with-liblzma-incdir and --with-liblzma-libdir
+For more details about filesystem performance, see
+`docs/PERFORMANCE.md`.
 
 
 mkfs.erofs
 ----------
 
-two main kinds of EROFS images can be generated: (un)compressed.
+Two main kinds of EROFS images can be generated: (un)compressed images.
 
- - For uncompressed images, there will be none of compression
-   files in these images. However, it can decide whether the tail
-   block of a file should be inlined or not properly [1].
+ - For uncompressed images, there will be none of compresssed files in
+   these images.  However, it can decide whether the tail block of a
+   file should be inlined or not properly [1].
 
- - For compressed images, it'll try to use specific algorithms
-   first for each regular file and see if storage space can be
-   saved with compression. If not, fallback to an uncompressed
-   file.
+ - For compressed images, it'll try to use the given algorithms first
+   for each regular file and see if storage space can be saved with
+   compression. If not, fallback to an uncompressed file.
 
-How to generate EROFS images (lz4 for Linux 5.3+, lzma for Linux 5.16+)
+How to generate EROFS images (LZ4 for Linux 5.3+, LZMA for Linux 5.16+)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Currently lz4(hc) and lzma are available for compression, e.g.
@@ -113,12 +97,55 @@
 please evaluate carefully in advance. Or make your own per-(sub)file
 compression strategies according to file access patterns if needed.
 
+How to generate EROFS images with multiple algorithms (Linux 5.16+)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It's possible to generate an EROFS image with files in different
+algorithms due to various purposes.  For example, LZMA for archival
+purposes and LZ4 for runtime purposes.
+
+In order to use alternative algorithms, just specify two or more
+compressing configurations together separated by ':' like below:
+    -zlzma:lz4hc,12:lzma,9 -C32768
+
+Although mkfs still choose the first one by default, you could try to
+write a compress-hints file like below:
+    4096  1 .*\.so$
+    32768 2 .*\.txt$
+    4096    sbin/.*$
+    16384 0 .*
+
+and specify with `--compress-hints=` so that ".so" files will use
+"lz4hc,12" compression with 4k pclusters, ".txt" files will use
+"lzma,9" compression with 32k pclusters, files  under "/sbin" will use
+the default "lzma" compression with 4k plusters and other files will
+use "lzma" compression with 16k pclusters.
+
+Note that the largest pcluster size should be specified with the "-C"
+option (here 32k pcluster size), otherwise all larger pclusters will be
+limited.
+
+How to generate well-compressed EROFS images
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Even if EROFS is not designed for such purposes in the beginning, it
+could still produce some smaller images (not always) compared to other
+approaches with better performance (see `docs/PERFORMANCE.md`).  In
+order to build well-compressed EROFS images, try the following options:
+ -C1048576                     (5.13+)
+ -Eztailpacking                (5.16+)
+ -Efragments / -Eall-fragments ( 6.1+);
+ -Ededupe                      ( 6.1+).
+
+Also EROFS uses lz4hc level 9 by default, whereas some other approaches
+use lz4hc level 12 by default.  So please explicitly specify
+`-zlz4hc,12 ` for comparison purposes.
+
 How to generate legacy EROFS images (Linux 4.19+)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Decompression inplace and compacted indexes have been introduced in
-Linux upstream v5.3, which are not forward-compatible with older
-kernels.
+Linux v5.3, which are not forward-compatible with older kernels.
 
 In order to generate _legacy_ EROFS images for old kernels,
 consider adding "-E legacy-compress" to the command line, e.g.
@@ -153,25 +180,10 @@
 
 Therefore, NEVER use it if performance is the top concern.
 
-Note that extended attributes and ACLs aren't implemented yet due to
-the current Android use case vs limited time. If you are interested,
-contribution is, as always, welcome.
-
-How to build erofsfuse
-~~~~~~~~~~~~~~~~~~~~~~
-
-It's disabled by default as an experimental feature for now due to
-the extra libfuse dependency, to enable and build it manually:
-
-	$ ./configure --enable-fuse
-	$ make
-
-erofsfuse binary will be generated under fuse folder.
-
 How to mount an EROFS image with erofsfuse
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-As the other FUSE implementations, it's quite simple to mount with
+As the other FUSE implementations, it's quite easy to mount by using
 erofsfuse, e.g.:
  $ erofsfuse foo.erofs.img foo/
 
@@ -192,99 +204,24 @@
 EROFS filesystems. Note that extended attributes and ACLs are still
 unsupported when extracting images with fsck.erofs.
 
-Container images
-----------------
-
-EROFS filesystem is well-suitably used for container images with
-advanced features like chunk-based files, multi-devices (blobs)
-and new fscache backend for lazy pulling and cache management, etc.
-
-For example, CNCF Dragonfly Nydus image service [7] introduces an
-(EROFS-compatible) RAFS v6 image format to overcome flaws of the
-current OCIv1 tgz images so that:
-
- - Images can be downloaded on demand in chunks aka lazy pulling with
-   new fscache backend (5.19+) or userspace block devices (5.16+);
-
- - Finer chunk-based content-addressable data deduplication to minimize
-   storage, transmission and memory footprints;
-
- - Merged filesystem tree to remove all metadata of intermediate layers
-   as an option;
-
- - (e)stargz, zstd::chunked and other formats can be converted and run
-   on the fly;
-
- - and more.
-
-Apart from Dragonfly Nydus, a native user daemon is planned to be added
-to erofs-utils to parse EROFS, (e)stargz and zstd::chunked images from
-network too as a real part of EROFS filesystem project.
+Note that fragment extraction with fsck.erofs could be slow now and
+it needs to be optimized later.  If you are interested, contribution
+is, as always, welcome.
 
 
 Contribution
 ------------
 
-erofs-utils is a part of EROFS filesystem project, feel free to send
-patches or feedback to:
+erofs-utils is a part of EROFS filesystem project, which is completely
+community-driven open source software.  If you have interest in EROFS,
+feel free to send feedback and/or patches to:
   linux-erofs mailing list   <linux-erofs@lists.ozlabs.org>
 
 
 Comments
 --------
 
-[1] According to the EROFS on-disk format, the tail block of files
-    could be inlined aggressively with its metadata in order to reduce
-    the I/O overhead and save the storage space (called tail-packing).
-
-[2] There was a bug until lz4-1.8.3, which can crash erofs-utils
-    randomly. Fortunately bugfix by our colleague Qiuyang Sun was
-    merged in lz4-1.9.0.
-
-    For more details, please refer to
-    https://github.com/lz4/lz4/commit/660d21272e4c8a0f49db5fc1e6853f08713dff82
-
-[3] There were many bugfixes merged into lz4-1.9.2 for
-    LZ4_compress_destSize(), and I once ran into some crashs due to
-    those issues. * Again lz4hc is not affected. *
-
-    [LZ4_compress_destSize] Allow 2 more bytes of match length
-    https://github.com/lz4/lz4/commit/690009e2c2f9e5dcb0d40e7c0c40610ce6006eda
-
-    [LZ4_compress_destSize] Fix rare data corruption bug
-    https://github.com/lz4/lz4/commit/6bc6f836a18d1f8fd05c8fc2b42f1d800bc25de1
-
-    [LZ4_compress_destSize] Fix overflow condition
-    https://github.com/lz4/lz4/commit/13a2d9e34ffc4170720ce417c73e396d0ac1471a
-
-    [LZ4_compress_destSize] Fix off-by-one error in fix
-    https://github.com/lz4/lz4/commit/7c32101c655d93b61fc212dcd512b87119dd7333
-
-    [LZ4_compress_destSize] Fix off-by-one error
-    https://github.com/lz4/lz4/commit/d7cad81093cd805110291f84d64d385557d0ffba
-
-    since upstream lz4 doesn't have stable branch for old versions, it's
-    preferred to use latest upstream lz4 library (although some regressions
-    could happen since new features are also introduced to latest upstream
-    version as well) or backport all stable bugfixes to old stable versions,
-    e.g. our unofficial lz4 fork: https://github.com/erofs/lz4
-
-[4] LZ4HC didn't compress long zeroed buffer properly with
-    LZ4_compress_HC_destSize()
-    https://github.com/lz4/lz4/issues/784
-
-    which has been resolved in
-    https://github.com/lz4/lz4/commit/e7fe105ac6ed02019d34731d2ba3aceb11b51bb1
-
-    and already included in lz4-1.9.3, see:
-    https://github.com/lz4/lz4/releases/tag/v1.9.3
-
-[5] LZ4_decompress_safe_partial is broken in 1.9.2
-    https://github.com/lz4/lz4/issues/783
-
-    which is also resolved in lz4-1.9.3.
-
-[6] https://tukaani.org/xz/xz-5.3.2alpha.tar.xz
-
-[7] https://nydus.dev
-    https://github.com/dragonflyoss/image-service
+[1] According to the EROFS on-disk format, the tail blocks of files
+    could be inlined aggressively with their metadata (called
+    tail-packing) in order to minimize the extra I/Os and the storage
+    space.
diff --git a/VERSION b/VERSION
index ef7a460..8cf9ed8 100644
--- a/VERSION
+++ b/VERSION
@@ -1,2 +1,2 @@
-1.5
-2022-06-13
+1.7.1
+2023-10-20
diff --git a/configure.ac b/configure.ac
index a736ff0..a546310 100644
--- a/configure.ac
+++ b/configure.ac
@@ -29,6 +29,41 @@
   AC_MSG_ERROR([pkg-config is required. See pkg-config.freedesktop.org])
 fi
 
+dnl Check if the flag is supported by compiler
+dnl CC_CHECK_CFLAGS_SILENT([FLAG], [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND])
+AC_DEFUN([CC_CHECK_CFLAGS_SILENT], [
+  AC_CACHE_VAL(AS_TR_SH([cc_cv_cflags_$1]),
+    [ac_save_CFLAGS="$CFLAGS"
+     CFLAGS="$CFLAGS $1"
+     AC_LINK_IFELSE([AC_LANG_SOURCE([int main() { return 0; }])],
+       [eval "AS_TR_SH([cc_cv_cflags_$1])='yes'"],
+       [eval "AS_TR_SH([cc_cv_cflags_$1])='no'"])
+     CFLAGS="$ac_save_CFLAGS"
+    ])
+
+  AS_IF([eval test x$]AS_TR_SH([cc_cv_cflags_$1])[ = xyes],
+    [$2], [$3])
+])
+
+dnl Check if the flag is supported by compiler (cacheable)
+dnl CC_CHECK_CFLAG([FLAG], [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND])
+AC_DEFUN([CC_CHECK_CFLAG], [
+  AC_CACHE_CHECK([if $CC supports $1 flag],
+    AS_TR_SH([cc_cv_cflags_$1]),
+    CC_CHECK_CFLAGS_SILENT([$1]) dnl Don't execute actions here!
+  )
+
+  AS_IF([eval test x$]AS_TR_SH([cc_cv_cflags_$1])[ = xyes],
+    [$2], [$3])
+])
+
+dnl CC_CHECK_CFLAGS([FLAG1 FLAG2], [action-if-found], [action-if-not])
+AC_DEFUN([CC_CHECK_CFLAGS], [
+  for flag in $1; do
+    CC_CHECK_CFLAG($flag, [$2], [$3])
+  done
+])
+
 dnl EROFS_UTILS_PARSE_DIRECTORY
 dnl Input:  $1 = a string to a relative or absolute directory
 dnl Output: $2 = the variable to set with the absolute directory
@@ -59,6 +94,8 @@
  fi
 ])
 
+AC_ARG_VAR([MAX_BLOCK_SIZE], [The maximum block size which erofs-utils supports])
+
 AC_ARG_ENABLE([debug],
     [AS_HELP_STRING([--enable-debug],
                     [enable debugging mode @<:@default=no@:>@])],
@@ -71,6 +108,12 @@
     [enable_werror="$enableval"],
     [enable_werror="no"])
 
+AC_ARG_ENABLE([fuzzing],
+    [AS_HELP_STRING([--enable-fuzzing],
+                    [set up fuzzing mode @<:@default=no@:>@])],
+    [enable_fuzzing="$enableval"],
+    [enable_fuzzing="no"])
+
 AC_ARG_ENABLE(lz4,
    [AS_HELP_STRING([--disable-lz4], [disable LZ4 compression support @<:@default=enabled@:>@])],
    [enable_lz4="$enableval"], [enable_lz4="yes"])
@@ -79,6 +122,15 @@
    [AS_HELP_STRING([--enable-lzma], [enable LZMA compression support @<:@default=no@:>@])],
    [enable_lzma="$enableval"], [enable_lzma="no"])
 
+AC_ARG_WITH(zlib,
+   [AS_HELP_STRING([--without-zlib],
+      [Ignore presence of zlib inflate support @<:@default=enabled@:>@])])
+
+AC_ARG_WITH(libdeflate,
+   [AS_HELP_STRING([--with-libdeflate],
+      [Enable and build with libdeflate inflate support @<:@default=disabled@:>@])], [],
+      [with_libdeflate="no"])
+
 AC_ARG_ENABLE(fuse,
    [AS_HELP_STRING([--enable-fuse], [enable erofsfuse @<:@default=no@:>@])],
    [enable_fuse="$enableval"], [enable_fuse="no"])
@@ -124,6 +176,7 @@
 	fcntl.h
 	getopt.h
 	inttypes.h
+	linux/aufs_type.h
 	linux/falloc.h
 	linux/fs.h
 	linux/types.h
@@ -134,7 +187,10 @@
 	stdlib.h
 	string.h
 	sys/ioctl.h
+	sys/mman.h
+	sys/random.h
 	sys/stat.h
+	sys/statfs.h
 	sys/sysmacros.h
 	sys/time.h
 	unistd.h
@@ -190,8 +246,12 @@
 	llistxattr
 	memset
 	realpath
+	lseek64
+	ftello64
 	pread64
 	pwrite64
+	posix_fadvise
+	fstatfs
 	strdup
 	strerror
 	strrchr
@@ -199,6 +259,35 @@
 	tmpfile64
 	utimensat]))
 
+# Detect maximum block size if necessary
+AS_IF([test "x$MAX_BLOCK_SIZE" = "x"], [
+  AC_CACHE_CHECK([sysconf (_SC_PAGESIZE)], [erofs_cv_max_block_size],
+               AC_RUN_IFELSE([AC_LANG_PROGRAM(
+[[
+#include <unistd.h>
+#include <stdio.h>
+]],
+[[
+    int result;
+    FILE *f;
+
+    result = sysconf(_SC_PAGESIZE);
+    if (result < 0)
+	return 1;
+
+    f = fopen("conftest.out", "w");
+    if (!f)
+	return 1;
+
+    fprintf(f, "%d", result);
+    fclose(f);
+    return 0;
+]])],
+                             [erofs_cv_max_block_size=`cat conftest.out`],
+                             [erofs_cv_max_block_size=4096],
+                             [erofs_cv_max_block_size=4096]))
+], [erofs_cv_max_block_size=$MAX_BLOCK_SIZE])
+
 # Configure debug mode
 AS_IF([test "x$enable_debug" != "xno"], [], [
   dnl Turn off all assert checking.
@@ -319,11 +408,53 @@
   CPPFLAGS="${saved_CPPFLAGS}"
 fi
 
+# Configure zlib
+AS_IF([test "x$with_zlib" != "xno"], [
+  PKG_CHECK_MODULES([zlib], [zlib])
+  # Paranoia: don't trust the result reported by pkgconfig before trying out
+  saved_LIBS="$LIBS"
+  saved_CPPFLAGS=${CPPFLAGS}
+  CPPFLAGS="${zlib_CFLAGS} ${CPPFLAGS}"
+  LIBS="${zlib_LIBS} $LIBS"
+  AC_CHECK_LIB(z, inflate, [
+    have_zlib="yes" ], [
+    AC_MSG_ERROR([zlib doesn't work properly])])
+  LIBS="${saved_LIBS}"
+  CPPFLAGS="${saved_CPPFLAGS}"], [have_zlib="no"])
+
+# Configure libdeflate
+AS_IF([test "x$with_libdeflate" != "xno"], [
+  PKG_CHECK_MODULES([libdeflate], [libdeflate])
+  # Paranoia: don't trust the result reported by pkgconfig before trying out
+  saved_LIBS="$LIBS"
+  saved_CPPFLAGS=${CPPFLAGS}
+  CPPFLAGS="${libdeflate_CFLAGS} ${CPPFLAGS}"
+  LIBS="${libdeflate_LIBS} $LIBS"
+  AC_CHECK_LIB(deflate, libdeflate_deflate_decompress, [
+    have_libdeflate="yes" ], [
+    AC_MSG_ERROR([libdeflate doesn't work properly])])
+  LIBS="${saved_LIBS}"
+  CPPFLAGS="${saved_CPPFLAGS}"], [have_libdeflate="no"])
+
+# Enable 64-bit off_t
+CFLAGS+=" -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64"
+
+# Configure fuzzing mode
+AS_IF([test "x$enable_fuzzing" != "xyes"], [], [
+  CC_CHECK_CFLAGS(["-fsanitize=address,fuzzer-no-link"], [
+    CFLAGS="$CFLAGS -g -O1 -fsanitize=address,fuzzer-no-link"
+  ], [
+    AC_MSG_ERROR([Compiler doesn't support `-fsanitize=address,fuzzer-no-link`])
+  ])
+])
+AM_CONDITIONAL([ENABLE_FUZZING], [test "x${enable_fuzzing}" = "xyes"])
+
 # Set up needed symbols, conditionals and compiler/linker flags
 AM_CONDITIONAL([ENABLE_LZ4], [test "x${have_lz4}" = "xyes"])
 AM_CONDITIONAL([ENABLE_LZ4HC], [test "x${have_lz4hc}" = "xyes"])
 AM_CONDITIONAL([ENABLE_FUSE], [test "x${have_fuse}" = "xyes"])
 AM_CONDITIONAL([ENABLE_LIBLZMA], [test "x${have_liblzma}" = "xyes"])
+AM_CONDITIONAL([ENABLE_LIBDEFLATE], [test "x${have_libdeflate}" = "xyes"])
 
 if test "x$have_uuid" = "xyes"; then
   AC_DEFINE([HAVE_LIBUUID], 1, [Define to 1 if libuuid is found])
@@ -361,6 +492,21 @@
   AC_SUBST([liblzma_CFLAGS])
 fi
 
+if test "x$have_zlib" = "xyes"; then
+  AC_DEFINE([HAVE_ZLIB], 1, [Define to 1 if zlib is found])
+fi
+
+if test "x$have_libdeflate" = "xyes"; then
+  AC_DEFINE([HAVE_LIBDEFLATE], 1, [Define to 1 if libdeflate is found])
+fi
+
+# Dump maximum block size
+AS_IF([test "x$erofs_cv_max_block_size" = "x"],
+      [$erofs_cv_max_block_size = 4096], [])
+
+AC_DEFINE_UNQUOTED([EROFS_MAX_BLOCK_SIZE], [$erofs_cv_max_block_size],
+		   [The maximum block size which erofs-utils supports])
+
 AC_CONFIG_FILES([Makefile
 		 man/Makefile
 		 lib/Makefile
diff --git a/docs/INSTALL.md b/docs/INSTALL.md
new file mode 100644
index 0000000..2e818da
--- /dev/null
+++ b/docs/INSTALL.md
@@ -0,0 +1,71 @@
+This document describes how to configure and build erofs-utils from
+source.
+
+See the [README](../README) file in the top level directory about
+the brief overview of erofs-utils.
+
+## Dependencies & build
+
+LZ4 1.9.3+ for LZ4(HC) enabled [^1].
+
+[XZ Utils 5.3.2alpha+](https://tukaani.org/xz/xz-5.3.2alpha.tar.gz) for
+LZMA enabled, [XZ Utils 5.4+](https://tukaani.org/xz/xz-5.4.1.tar.gz)
+highly recommended.
+
+libfuse 2.6+ for erofsfuse enabled.
+
+[^1]: It's not recommended to use LZ4 versions under 1.9.3 since
+unexpected crashes could make trouble to end users due to broken
+LZ4_compress_destSize() (fixed in v1.9.2),
+[LZ4_compress_HC_destSize()](https://github.com/lz4/lz4/commit/660d21272e4c8a0f49db5fc1e6853f08713dff82) or
+[LZ4_decompress_safe_partial()](https://github.com/lz4/lz4/issues/783).
+
+## How to build with LZ4
+
+To build, the following commands can be used in order:
+
+``` sh
+$ ./autogen.sh
+$ ./configure
+$ make
+```
+
+`mkfs.erofs`, `dump.erofs` and `fsck.erofs` binaries will be
+generated under the corresponding folders.
+
+## How to build with liblzma
+
+In order to enable LZMA support, build with the following commands:
+
+``` sh
+$ ./configure --enable-lzma
+$ make
+```
+
+Additionally, you could specify liblzma target paths with
+`--with-liblzma-incdir` and `--with-liblzma-libdir` manually.
+
+## How to build erofsfuse
+
+It's disabled by default as an experimental feature for now due
+to the extra libfuse dependency, to enable and build it manually:
+
+``` sh
+$ ./configure --enable-fuse
+$ make
+```
+
+`erofsfuse` binary will be generated under `fuse` folder.
+
+## How to install erofs-utils manually
+
+Use the following command to install erofs-utils binaries:
+
+``` sh
+# make install
+```
+
+By default, `make install` will install all the files in
+`/usr/local/bin`, `/usr/local/lib` etc.  You can specify an
+installation prefix other than `/usr/local` using `--prefix`,
+for instance `--prefix=$HOME`.
diff --git a/docs/PERFORMANCE.md b/docs/PERFORMANCE.md
new file mode 100644
index 0000000..5431856
--- /dev/null
+++ b/docs/PERFORMANCE.md
@@ -0,0 +1,201 @@
+# Test setup
+
+Processor: x86_64, Intel(R) Xeon(R) Platinum 8369B CPU @ 2.70GHz * 2 VCores
+
+Storage: Cloud disk, 3000 IOPS upper limit
+
+OS Kernel: Linux 6.2
+
+Software: LZ4 1.9.3, erofs-utils 1.6, squashfs-tools 4.5.1
+
+Disclaimer: Test results could be varied from different hardware and/or data patterns. Therefore, the following results are **ONLY for reference**.
+
+# Benchmark on multiple files
+
+[Rootfs of Debian docker image](https://github.com/debuerreotype/docker-debian-artifacts/blob/dist-amd64/bullseye/rootfs.tar.xz?raw=true) is used as the dataset, which contains 7000+ files and directories.
+Note that that dataset can be replaced regularly, and the SHA1 of the snapshot "rootfs.tar.xz" used here is "aee9b01a530078dbef8f08521bfcabe65b244955".
+
+## Image size
+
+|   Size    | Filesystem | Cluster size | Build options                                                  |
+|-----------|------------|--------------|----------------------------------------------------------------|
+| 124669952 | erofs      | uncompressed | -T0 [^1]                                                       |
+| 124522496 | squashfs   | uncompressed | -noD -noI -noX -noF -no-xattrs -all-time 0 -no-duplicates [^2] |
+|  73601024 | squashfs   | 4096         | -b 4096 -comp lz4 -Xhc -no-xattrs -all-time 0                  |
+|  73121792 | erofs      | 4096         | -zlz4hc,12 [^3] -C4096 -Efragments -T0                         |
+|  67162112 | squashfs   | 16384        | -b 16384 -comp lz4 -Xhc -no-xattrs -all-time 0                 |
+|  65478656 | erofs      | 16384        | -zlz4hc,12 -C16384 -Efragments -T0                             |
+|  61456384 | squashfs   | 65536        | -b 65536 -comp lz4 -Xhc -no-xattrs -all-time 0                 |
+|  59834368 | erofs      | 65536        | -zlz4hc,12 -C65536 -Efragments -T0                             |
+|  59150336 | squashfs   | 131072       | -b 131072 -comp lz4 -Xhc -no-xattrs -all-time 0                |
+|  58515456 | erofs      | 131072       | -zlz4hc,12 -C131072 -Efragments -T0                            |
+
+[^1]: Forcely reset all timestamps to match squashfs on-disk basic inodes for now.
+[^2]: Currently erofs-utils doesn't actively de-duplicate identical files although the on-disk format supports this.
+[^3]: Because squashfs uses level 12 for LZ4HC by default.
+
+## Sequential data access
+
+```bash
+hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "tar cf - . | cat > /dev/null"
+```
+
+| Filesystem | Cluster size | Time                            |
+|------------|--------------|---------------------------------|
+| squashfs   | 4096         | 10.257 s ±  0.031 s             |
+| erofs      | uncompressed |  1.111 s ±  0.022 s             |
+| squashfs   | uncompressed |  1.034 s ±  0.020 s             |
+| squashfs   | 131072       | 941.3 ms ±   7.5 ms             |
+| erofs      | 4096         | 848.1 ms ±  17.8 ms             |
+| erofs      | 131072       | 724.2 ms ±  11.0 ms             |
+
+## Sequential metadata access
+
+```bash
+hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "tar cf /dev/null ."
+```
+
+| Filesystem | Cluster size | Time                            |
+|------------|--------------|---------------------------------|
+| erofs      | uncompressed | 419.6 ms ±   8.2 ms             |
+| squashfs   | 4096         | 142.5 ms ±   5.4 ms             |
+| squashfs   | uncompressed | 129.2 ms ±   3.9 ms             |
+| squashfs   | 131072       | 125.4 ms ±   4.0 ms             |
+| erofs      | 4096         |  75.5 ms ±   3.5 ms             |
+| erofs      | 131072       |  65.8 ms ±   3.6 ms             |
+
+[ Note that erofs-utils currently doesn't perform quite well for such cases due to metadata arrangement when building.  It will be fixed in the later versions. ]
+
+## Small random data access (~7%)
+
+```bash
+find mnt -type f -printf "%p\n" | sort -R | head -n 500 > list.txt
+hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "cat list.txt | xargs cat > /dev/null"
+```
+
+| Filesystem | Cluster size | Time                            |
+|------------|--------------|---------------------------------|
+| squashfs   | 4096         |  1.386 s ±  0.032 s             |
+| squashfs   | uncompressed |  1.083 s ±  0.044 s             |
+| squashfs   | 131072       |  1.067 s ±  0.046 s             |
+| erofs      | 4096         | 249.6 ms ±   6.5 ms             |
+| erofs      | uncompressed | 237.8 ms ±   6.3 ms             |
+| erofs      | 131072       | 189.6 ms ±   7.8 ms             |
+
+
+## Small random metadata access (~7%)
+
+```bash
+find mnt -type f -printf "%p\n" | sort -R | head -n 500 > list.txt
+hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "cat list.txt | xargs stat"
+```
+
+| Filesystem | Cluster size | Time                            |
+|------------|--------------|---------------------------------|
+| squashfs   | 4096         | 817.0 ms ±  34.5 ms             |
+| squashfs   | 131072       | 801.0 ms ±  40.1 ms             |
+| squashfs   | uncompressed | 741.3 ms ±  18.2 ms             |
+| erofs      | uncompressed | 197.8 ms ±   4.1 ms             |
+| erofs      | 4096         |  63.1 ms ±   2.0 ms             |
+| erofs      | 131072       |  60.7 ms ±   3.6 ms             |
+
+## Full random data access (~100%)
+
+```bash
+find mnt -type f -printf "%p\n" | sort -R > list.txt
+hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "cat list.txt | xargs cat > /dev/null"
+```
+
+| Filesystem | Cluster size | Time                            |
+|------------|--------------|---------------------------------|
+| squashfs   | 4096         | 20.668 s ±  0.040 s             |
+| squashfs   | uncompressed | 12.543 s ±  0.041 s             |
+| squashfs   | 131072       | 11.753 s ±  0.412 s             |
+| erofs      | uncompressed |  1.493 s ±  0.023 s             |
+| erofs      | 4096         |  1.223 s ±  0.013 s             |
+| erofs      | 131072       | 598.2 ms ±   6.6 ms             |
+
+## Full random metadata access (~100%)
+
+```bash
+find mnt -type f -printf "%p\n" | sort -R > list.txt
+hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "cat list.txt | xargs stat"
+```
+
+| Filesystem | Cluster size | Time                            |
+|------------|--------------|---------------------------------|
+| squashfs   | 131072       |  9.212 s ±  0.467 s             |
+| squashfs   | 4096         |  8.905 s ±  0.147 s             |
+| squashfs   | uncompressed |  7.961 s ±  0.045 s             |
+| erofs      | 4096         | 661.2 ms ±  14.9 ms             |
+| erofs      | uncompressed | 125.8 ms ±   6.6 ms             |
+| erofs      | 131072       | 119.6 ms ±   5.5 ms             |
+
+
+# FIO benchmark on a single large file
+
+`silesia.tar` (203M) is used to benchmark, which could be generated from unzipping [silesia.zip](http://mattmahoney.net/dc/silesia.zip) and tar.
+
+## Image size
+
+|   Size    | Filesystem | Cluster size | Build options                                             |
+|-----------|------------|--------------|-----------------------------------------------------------|
+| 114339840 | squashfs   | 4096         | -b 4096 -comp lz4 -Xhc -no-xattrs                         |
+| 104972288 | erofs      | 4096         | -zlz4hc,12 -C4096                                         |
+|  98033664 | squashfs   | 16384        | -b 16384 -comp lz4 -Xhc -no-xattrs                        |
+|  89571328 | erofs      | 16384        | -zlz4hc,12 -C16384                                        |
+|  85143552 | squashfs   | 65536        | -b 65536 -comp lz4 -Xhc -no-xattrs                        |
+|  81211392 | squashfs   | 131072       | -b 131072 -comp lz4 -Xhc -no-xattrs                       |
+|  80519168 | erofs      | 65536        | -zlz4hc,12 -C65536                                        |
+|  78888960 | erofs      | 131072       | -zlz4hc,12 -C131072                                       |
+
+## Sequential I/Os
+
+```bash
+fio -filename=silesia.tar -bs=4k -rw=read -name=job1
+```
+
+| Filesystem | Cluster size | Bandwidth |
+|------------|--------------|-----------|
+| erofs      | 65536        | 624 MiB/s |
+| erofs      | 16384        | 600 MiB/s |
+| erofs      | 4096         | 569 MiB/s |
+| erofs      | 131072       | 535 MiB/s |
+| squashfs   | 131072       | 236 MiB/s |
+| squashfs   | 65536        | 157 MiB/s |
+| squashfs   | 16384        | 55.2MiB/s |
+| squashfs   | 4096         | 12.5MiB/s |
+
+## Full Random I/Os
+
+```bash
+fio -filename=silesia.tar -bs=4k -rw=randread -name=job1
+```
+
+| Filesystem | Cluster size | Bandwidth |
+|------------|--------------|-----------|
+| erofs      | 131072       | 242 MiB/s |
+| squashfs   | 131072       | 232 MiB/s |
+| erofs      | 65536        | 198 MiB/s |
+| squashfs   | 65536        | 150 MiB/s |
+| erofs      | 16384        | 96.4MiB/s |
+| squashfs   | 16384        | 49.5MiB/s |
+| erofs      | 4096         | 33.7MiB/s |
+| squashfs   | 4096         | 6817KiB/s |
+
+## Small Random I/Os (~5%)
+
+```bash
+fio -filename=silesia.tar -bs=4k -rw=randread --io_size=10m -name=job1
+```
+
+| Filesystem | Cluster size | Bandwidth |
+|------------|--------------|-----------|
+| erofs      | 131072       | 19.2MiB/s |
+| erofs      | 65536        | 16.9MiB/s |
+| squashfs   | 131072       | 15.1MiB/s |
+| erofs      | 16384        | 14.7MiB/s |
+| squashfs   | 65536        | 13.8MiB/s |
+| erofs      | 4096         | 13.0MiB/s |
+| squashfs   | 16384        | 11.7MiB/s |
+| squashfs   | 4096         | 4376KiB/s |
diff --git a/docs/compress-hints.example b/docs/compress-hints.example
new file mode 100644
index 0000000..4f481ff
--- /dev/null
+++ b/docs/compress-hints.example
@@ -0,0 +1,7 @@
+# https://github.com/debuerreotype/docker-debian-artifacts/blob/dist-amd64/bullseye/rootfs.tar.xz?raw=true
+# -zlzma:lz4hc,12:lzma,109 -C131072 --compress-hints=compress-hints.example  image size: 66M
+# -zlz4hc,12                                                                 image size: 76M
+4096     1 .*\.so.*$
+4096     1 bin/
+4096     1 sbin/
+131072   2 etc/
diff --git a/dump/Makefile.am b/dump/Makefile.am
index c2bef6d..aed20c2 100644
--- a/dump/Makefile.am
+++ b/dump/Makefile.am
@@ -7,4 +7,4 @@
 dump_erofs_SOURCES = main.c
 dump_erofs_CFLAGS = -Wall -I$(top_srcdir)/include
 dump_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
-	${libuuid_LIBS} ${liblz4_LIBS} ${liblzma_LIBS}
+	${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS}
diff --git a/dump/main.c b/dump/main.c
index 49ff2b7..5425b7b 100644
--- a/dump/main.c
+++ b/dump/main.c
@@ -14,11 +14,11 @@
 #include "erofs/inode.h"
 #include "erofs/io.h"
 #include "erofs/dir.h"
+#include "erofs/compress.h"
+#include "erofs/fragments.h"
 #include "../lib/liberofs_private.h"
+#include "../lib/liberofs_uuid.h"
 
-#ifdef HAVE_LIBUUID
-#include <uuid.h>
-#endif
 
 struct erofsdump_cfg {
 	unsigned int totalshow;
@@ -37,7 +37,7 @@
 static char *file_types[] = {
 	".txt", ".so", ".xml", ".apk",
 	".odex", ".vdex", ".oat", ".rc",
-	".otf", ".txt", "others",
+	".otf", "others",
 };
 #define OTHERFILETYPE	ARRAY_SIZE(file_types)
 /* (1 << FILE_MAX_SIZE_BITS)KB */
@@ -91,10 +91,16 @@
 static struct erofsdump_feature feature_lists[] = {
 	{ true, EROFS_FEATURE_COMPAT_SB_CHKSUM, "sb_csum" },
 	{ true, EROFS_FEATURE_COMPAT_MTIME, "mtime" },
-	{ false, EROFS_FEATURE_INCOMPAT_LZ4_0PADDING, "0padding" },
+	{ true, EROFS_FEATURE_COMPAT_XATTR_FILTER, "xattr_filter" },
+	{ false, EROFS_FEATURE_INCOMPAT_ZERO_PADDING, "0padding" },
+	{ false, EROFS_FEATURE_INCOMPAT_COMPR_CFGS, "compr_cfgs" },
 	{ false, EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER, "big_pcluster" },
 	{ false, EROFS_FEATURE_INCOMPAT_CHUNKED_FILE, "chunked_file" },
 	{ false, EROFS_FEATURE_INCOMPAT_DEVICE_TABLE, "device_table" },
+	{ false, EROFS_FEATURE_INCOMPAT_ZTAILPACKING, "ztailpacking" },
+	{ false, EROFS_FEATURE_INCOMPAT_FRAGMENTS, "fragments" },
+	{ false, EROFS_FEATURE_INCOMPAT_DEDUPE, "dedupe" },
+	{ false, EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES, "xattr_prefixes" },
 };
 
 static int erofsdump_readdir(struct erofs_dir_context *ctx);
@@ -151,7 +157,7 @@
 			usage();
 			exit(0);
 		case 3:
-			err = blob_open_ro(optarg);
+			err = blob_open_ro(&sbi, optarg);
 			if (err)
 				return err;
 			++sbi.extra_devices;
@@ -196,10 +202,10 @@
 		stats.uncompressed_files++;
 		*size = inode->i_size;
 		break;
-	case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
-	case EROFS_INODE_FLAT_COMPRESSION:
+	case EROFS_INODE_COMPRESSED_FULL:
+	case EROFS_INODE_COMPRESSED_COMPACT:
 		stats.compressed_files++;
-		*size = inode->u.i_blocks * EROFS_BLKSIZ;
+		*size = inode->u.i_blocks * erofs_blksiz(inode->sbi);
 		break;
 	default:
 		erofs_err("unknown datalayout");
@@ -224,36 +230,23 @@
 	++stats.file_type_stat[type];
 }
 
-static void update_file_size_statatics(erofs_off_t occupied_size,
-		erofs_off_t original_size)
+static void update_file_size_statistics(erofs_off_t size, bool original)
 {
-	int occupied_size_mark, original_size_mark;
+	unsigned int *file_size = original ? stats.file_original_size :
+				  stats.file_comp_size;
+	int size_mark = 0;
 
-	original_size_mark = 0;
-	occupied_size_mark = 0;
-	occupied_size >>= 10;
-	original_size >>= 10;
+	size >>= 10;
 
-	while (occupied_size || original_size) {
-		if (occupied_size) {
-			occupied_size >>= 1;
-			occupied_size_mark++;
-		}
-		if (original_size) {
-			original_size >>= 1;
-			original_size_mark++;
-		}
+	while (size) {
+		size >>= 1;
+		size_mark++;
 	}
 
-	if (original_size_mark >= FILE_MAX_SIZE_BITS)
-		stats.file_original_size[FILE_MAX_SIZE_BITS]++;
+	if (size_mark >= FILE_MAX_SIZE_BITS)
+		file_size[FILE_MAX_SIZE_BITS]++;
 	else
-		stats.file_original_size[original_size_mark]++;
-
-	if (occupied_size_mark >= FILE_MAX_SIZE_BITS)
-		stats.file_comp_size[FILE_MAX_SIZE_BITS]++;
-	else
-		stats.file_comp_size[occupied_size_mark]++;
+		file_size[size_mark]++;
 }
 
 static int erofsdump_ls_dirent_iter(struct erofs_dir_context *ctx)
@@ -276,11 +269,37 @@
 	return erofsdump_readdir(ctx);
 }
 
+static int erofsdump_read_packed_inode(void)
+{
+	int err;
+	erofs_off_t occupied_size = 0;
+	struct erofs_inode vi = { .sbi = &sbi, .nid = sbi.packed_nid };
+
+	if (!(erofs_sb_has_fragments(&sbi) && sbi.packed_nid > 0))
+		return 0;
+
+	err = erofs_read_inode_from_disk(&vi);
+	if (err) {
+		erofs_err("failed to read packed file inode from disk");
+		return err;
+	}
+
+	err = erofsdump_get_occupied_size(&vi, &occupied_size);
+	if (err) {
+		erofs_err("failed to get the file size of packed inode");
+		return err;
+	}
+
+	stats.files_total_size += occupied_size;
+	update_file_size_statistics(occupied_size, false);
+	return 0;
+}
+
 static int erofsdump_readdir(struct erofs_dir_context *ctx)
 {
 	int err;
 	erofs_off_t occupied_size = 0;
-	struct erofs_inode vi = { .nid = ctx->de_nid };
+	struct erofs_inode vi = { .sbi = &sbi, .nid = ctx->de_nid };
 
 	err = erofs_read_inode_from_disk(&vi);
 	if (err) {
@@ -300,7 +319,8 @@
 		stats.files_total_origin_size += vi.i_size;
 		inc_file_extension_count(ctx->dname, ctx->de_namelen);
 		stats.files_total_size += occupied_size;
-		update_file_size_statatics(occupied_size, vi.i_size);
+		update_file_size_statistics(vi.i_size, true);
+		update_file_size_statistics(occupied_size, false);
 	}
 
 	/* XXXX: the dir depth should be restricted in order to avoid loops */
@@ -334,7 +354,7 @@
 	int err, i;
 	erofs_off_t size;
 	u16 access_mode;
-	struct erofs_inode inode = { .nid = dumpcfg.nid };
+	struct erofs_inode inode = { .sbi = &sbi, .nid = dumpcfg.nid };
 	char path[PATH_MAX];
 	char access_mode_str[] = "rwxrwxrwx";
 	char timebuf[128] = {0};
@@ -365,10 +385,10 @@
 		return;
 	}
 
-	err = erofs_get_pathname(inode.nid, path, sizeof(path));
+	err = erofs_get_pathname(inode.sbi, inode.nid, path, sizeof(path));
 	if (err < 0) {
-		erofs_err("file path not found @ nid %llu", inode.nid | 0ULL);
-		return;
+		strncpy(path, "(not found)", sizeof(path) - 1);
+		path[sizeof(path) - 1] = '\0';
 	}
 
 	strftime(timebuf, sizeof(timebuf),
@@ -377,7 +397,8 @@
 	for (i = 8; i >= 0; i--)
 		if (((access_mode >> i) & 1) == 0)
 			access_mode_str[8 - i] = '-';
-	fprintf(stdout, "File : %s\n", path);
+	fprintf(stdout, "Path : %s\n",
+		erofs_is_packed_inode(&inode) ? "(packed file)" : path);
 	fprintf(stdout, "Size: %" PRIu64"  On-disk size: %" PRIu64 "  %s\n",
 		inode.i_size, size,
 		file_category_types[erofs_mode_to_ftype(inode.i_mode)]);
@@ -387,7 +408,6 @@
 		inode.datalayout,
 		(double)(100 * size) / (double)(inode.i_size));
 	fprintf(stdout, "Inode size: %d   ", inode.inode_isize);
-	fprintf(stdout, "Extent size: %u   ", inode.extent_isize);
 	fprintf(stdout,	"Xattr size: %u\n", inode.xattr_isize);
 	fprintf(stdout, "Uid: %u   Gid: %u  ", inode.i_uid, inode.i_gid);
 	fprintf(stdout, "Access: %04o/%s\n", access_mode, access_mode_str);
@@ -430,19 +450,27 @@
 			.m_deviceid = map.m_deviceid,
 			.m_pa = map.m_pa,
 		};
-		err = erofs_map_dev(&sbi, &mdev);
+		err = erofs_map_dev(inode.sbi, &mdev);
 		if (err) {
 			erofs_err("failed to map device");
 			return;
 		}
 
-		fprintf(stdout, ext_fmt[!!mdev.m_deviceid], extent_count++,
-			map.m_la, map.m_la + map.m_llen, map.m_llen,
-			mdev.m_pa, mdev.m_pa + map.m_plen, map.m_plen,
-			mdev.m_deviceid);
+		if (map.m_flags & EROFS_MAP_FRAGMENT)
+			fprintf(stdout, ext_fmt[!!mdev.m_deviceid],
+				extent_count++,
+				map.m_la, map.m_la + map.m_llen, map.m_llen,
+				0, 0, 0, mdev.m_deviceid);
+		else
+			fprintf(stdout, ext_fmt[!!mdev.m_deviceid],
+				extent_count++,
+				map.m_la, map.m_la + map.m_llen, map.m_llen,
+				mdev.m_pa, mdev.m_pa + map.m_plen, map.m_plen,
+				mdev.m_deviceid);
 		map.m_la += map.m_llen;
 	}
-	fprintf(stdout, "%s: %d extents found\n", path, extent_count);
+	fprintf(stdout, "%s: %d extents found\n",
+		erofs_is_packed_inode(&inode) ? "(packed file)" : path, extent_count);
 }
 
 static void erofsdump_filesize_distribution(const char *title,
@@ -548,6 +576,11 @@
 		erofs_err("read dir failed");
 		return;
 	}
+	err = erofsdump_read_packed_inode();
+	if (err) {
+		erofs_err("failed to read packed inode");
+		return;
+	}
 	erofsdump_file_statistic();
 	erofsdump_filesize_distribution("Original",
 			stats.file_original_size,
@@ -558,10 +591,27 @@
 	erofsdump_filetype_distribution(file_types, OTHERFILETYPE);
 }
 
+static void erofsdump_print_supported_compressors(FILE *f, unsigned int mask)
+{
+	unsigned int i = 0;
+	bool comma = false;
+	const char *s;
+
+	while ((s = z_erofs_list_supported_algorithms(i++, &mask)) != NULL) {
+		if (*s == '\0')
+			continue;
+		if (comma)
+			fputs(", ", f);
+		fputs(s, f);
+		comma = true;
+	}
+	fputc('\n', f);
+}
+
 static void erofsdump_show_superblock(void)
 {
 	time_t time = sbi.build_time;
-	char uuid_str[37] = "not available";
+	char uuid_str[37];
 	int i = 0;
 
 	fprintf(stdout, "Filesystem magic number:                      0x%04X\n",
@@ -574,6 +624,19 @@
 			sbi.xattr_blkaddr);
 	fprintf(stdout, "Filesystem root nid:                          %llu\n",
 			sbi.root_nid | 0ULL);
+	if (erofs_sb_has_fragments(&sbi) && sbi.packed_nid > 0)
+		fprintf(stdout, "Filesystem packed nid:                        %llu\n",
+			sbi.packed_nid | 0ULL);
+	if (erofs_sb_has_compr_cfgs(&sbi)) {
+		fprintf(stdout, "Filesystem compr_algs:                        ");
+		erofsdump_print_supported_compressors(stdout,
+			sbi.available_compr_algs);
+	} else {
+		fprintf(stdout, "Filesystem lz4_max_distance:                  %u\n",
+			sbi.lz4_max_distance | 0U);
+	}
+	fprintf(stdout, "Filesystem sb_extslots:                       %u\n",
+			sbi.extslots | 0U);
 	fprintf(stdout, "Filesystem inode count:                       %llu\n",
 			sbi.inos | 0ULL);
 	fprintf(stdout, "Filesystem created:                           %s",
@@ -586,9 +649,7 @@
 		if (feat & feature_lists[i].flag)
 			fprintf(stdout, "%s ", feature_lists[i].name);
 	}
-#ifdef HAVE_LIBUUID
-	uuid_unparse_lower(sbi.uuid, uuid_str);
-#endif
+	erofs_uuid_unparse_lower(sbi.uuid, uuid_str);
 	fprintf(stdout, "\nFilesystem UUID:                              %s\n",
 			uuid_str);
 }
@@ -605,13 +666,13 @@
 		goto exit;
 	}
 
-	err = dev_open_ro(cfg.c_img_path);
+	err = dev_open_ro(&sbi, cfg.c_img_path);
 	if (err) {
 		erofs_err("failed to open image file");
 		goto exit;
 	}
 
-	err = erofs_read_superblock();
+	err = erofs_read_superblock(&sbi);
 	if (err) {
 		erofs_err("failed to read superblock");
 		goto exit_dev_close;
@@ -629,16 +690,18 @@
 
 	if (dumpcfg.show_extent && !dumpcfg.show_inode) {
 		usage();
-		goto exit_dev_close;
+		goto exit_put_super;
 	}
 
 	if (dumpcfg.show_inode)
 		erofsdump_show_fileinfo(dumpcfg.show_extent);
 
+exit_put_super:
+	erofs_put_super(&sbi);
 exit_dev_close:
-	dev_close();
+	dev_close(&sbi);
 exit:
-	blob_closeall();
+	blob_closeall(&sbi);
 	erofs_exit_configure();
 	return err;
 }
diff --git a/fsck/Makefile.am b/fsck/Makefile.am
index e6a1fb6..d024405 100644
--- a/fsck/Makefile.am
+++ b/fsck/Makefile.am
@@ -7,4 +7,13 @@
 fsck_erofs_SOURCES = main.c
 fsck_erofs_CFLAGS = -Wall -I$(top_srcdir)/include
 fsck_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
-	${libuuid_LIBS} ${liblz4_LIBS} ${liblzma_LIBS}
+	${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS}
+
+if ENABLE_FUZZING
+noinst_PROGRAMS   = fuzz_erofsfsck
+fuzz_erofsfsck_SOURCES = main.c
+fuzz_erofsfsck_CFLAGS = -Wall -I$(top_srcdir)/include -DFUZZING
+fuzz_erofsfsck_LDFLAGS = -fsanitize=address,fuzzer
+fuzz_erofsfsck_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
+	${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS}
+endif
diff --git a/fsck/main.c b/fsck/main.c
index 5a2f659..3f86da4 100644
--- a/fsck/main.c
+++ b/fsck/main.c
@@ -49,15 +49,27 @@
 	{0, 0, 0, 0},
 };
 
+#define NR_HARDLINK_HASHTABLE	16384
+
+struct erofsfsck_hardlink_entry {
+	struct list_head list;
+	erofs_nid_t nid;
+	char *path;
+};
+
+static struct list_head erofsfsck_link_hashtable[NR_HARDLINK_HASHTABLE];
+
 static void print_available_decompressors(FILE *f, const char *delim)
 {
-	unsigned int i = 0;
+	int i = 0;
+	bool comma = false;
 	const char *s;
 
-	while ((s = z_erofs_list_available_compressors(i)) != NULL) {
-		if (i++)
+	while ((s = z_erofs_list_available_compressors(&i)) != NULL) {
+		if (comma)
 			fputs(delim, f);
 		fputs(s, f);
+		comma = true;
 	}
 	fputc('\n', f);
 }
@@ -131,6 +143,11 @@
 				while (len > 1 && optarg[len - 1] == '/')
 					len--;
 
+				if (len >= PATH_MAX) {
+					erofs_err("target directory name too long!");
+					return -ENAMETOOLONG;
+				}
+
 				fsckcfg.extract_path = malloc(PATH_MAX);
 				if (!fsckcfg.extract_path)
 					return -ENOMEM;
@@ -143,7 +160,7 @@
 			}
 			break;
 		case 3:
-			ret = blob_open_ro(optarg);
+			ret = blob_open_ro(&sbi, optarg);
 			if (ret)
 				return ret;
 			++sbi.extra_devices;
@@ -258,12 +275,13 @@
 
 static int erofs_check_sb_chksum(void)
 {
-	int ret;
-	u8 buf[EROFS_BLKSIZ];
+#ifndef FUZZING
+	u8 buf[EROFS_MAX_BLOCK_SIZE];
 	u32 crc;
 	struct erofs_super_block *sb;
+	int ret;
 
-	ret = blk_read(0, buf, 0, 1);
+	ret = blk_read(&sbi, 0, buf, 0, 1);
 	if (ret) {
 		erofs_err("failed to read superblock to check checksum: %d",
 			  ret);
@@ -273,18 +291,20 @@
 	sb = (struct erofs_super_block *)(buf + EROFS_SUPER_OFFSET);
 	sb->checksum = 0;
 
-	crc = erofs_crc32c(~0, (u8 *)sb, EROFS_BLKSIZ - EROFS_SUPER_OFFSET);
+	crc = erofs_crc32c(~0, (u8 *)sb, erofs_blksiz(&sbi) - EROFS_SUPER_OFFSET);
 	if (crc != sbi.checksum) {
 		erofs_err("superblock chksum doesn't match: saved(%08xh) calculated(%08xh)",
 			  sbi.checksum, crc);
 		fsckcfg.corrupted = true;
 		return -1;
 	}
+#endif
 	return 0;
 }
 
 static int erofs_verify_xattr(struct erofs_inode *inode)
 {
+	struct erofs_sb_info *sbi = inode->sbi;
 	unsigned int xattr_hdr_size = sizeof(struct erofs_xattr_ibody_header);
 	unsigned int xattr_entry_size = sizeof(struct erofs_xattr_entry);
 	erofs_off_t addr;
@@ -292,7 +312,7 @@
 	struct erofs_xattr_ibody_header *ih;
 	struct erofs_xattr_entry *entry;
 	int i, remaining = inode->xattr_isize, ret = 0;
-	char buf[EROFS_BLKSIZ];
+	char buf[EROFS_MAX_BLOCK_SIZE];
 
 	if (inode->xattr_isize == xattr_hdr_size) {
 		erofs_err("xattr_isize %d of nid %llu is not supported yet",
@@ -308,8 +328,8 @@
 		}
 	}
 
-	addr = iloc(inode->nid) + inode->inode_isize;
-	ret = dev_read(0, buf, addr, xattr_hdr_size);
+	addr = erofs_iloc(inode) + inode->inode_isize;
+	ret = dev_read(sbi, 0, buf, addr, xattr_hdr_size);
 	if (ret < 0) {
 		erofs_err("failed to read xattr header @ nid %llu: %d",
 			  inode->nid | 0ULL, ret);
@@ -318,12 +338,12 @@
 	ih = (struct erofs_xattr_ibody_header *)buf;
 	xattr_shared_count = ih->h_shared_count;
 
-	ofs = erofs_blkoff(addr) + xattr_hdr_size;
+	ofs = erofs_blkoff(sbi, addr) + xattr_hdr_size;
 	addr += xattr_hdr_size;
 	remaining -= xattr_hdr_size;
 	for (i = 0; i < xattr_shared_count; ++i) {
-		if (ofs >= EROFS_BLKSIZ) {
-			if (ofs != EROFS_BLKSIZ) {
+		if (ofs >= erofs_blksiz(sbi)) {
+			if (ofs != erofs_blksiz(sbi)) {
 				erofs_err("unaligned xattr entry in xattr shared area @ nid %llu",
 					  inode->nid | 0ULL);
 				ret = -EFSCORRUPTED;
@@ -339,7 +359,7 @@
 	while (remaining > 0) {
 		unsigned int entry_sz;
 
-		ret = dev_read(0, buf, addr, xattr_entry_size);
+		ret = dev_read(sbi, 0, buf, addr, xattr_entry_size);
 		if (ret) {
 			erofs_err("failed to read xattr entry @ nid %llu: %d",
 				  inode->nid | 0ULL, ret);
@@ -366,7 +386,6 @@
 	struct erofs_map_blocks map = {
 		.index = UINT_MAX,
 	};
-	struct erofs_map_dev mdev;
 	int ret = 0;
 	bool compressed;
 	erofs_off_t pos = 0;
@@ -383,8 +402,8 @@
 	case EROFS_INODE_CHUNK_BASED:
 		compressed = false;
 		break;
-	case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
-	case EROFS_INODE_FLAT_COMPRESSION:
+	case EROFS_INODE_COMPRESSED_FULL:
+	case EROFS_INODE_COMPRESSED_COMPACT:
 		compressed = true;
 		break;
 	default:
@@ -393,6 +412,8 @@
 	}
 
 	while (pos < inode->i_size) {
+		unsigned int alloc_rawsize;
+
 		map.m_la = pos;
 		if (compressed)
 			ret = z_erofs_map_blocks_iter(inode, &map,
@@ -421,72 +442,66 @@
 		if (!(map.m_flags & EROFS_MAP_MAPPED) || !fsckcfg.check_decomp)
 			continue;
 
-		if (map.m_plen > raw_size) {
-			raw_size = map.m_plen;
-			raw = realloc(raw, raw_size);
-			BUG_ON(!raw);
+		if (map.m_plen > Z_EROFS_PCLUSTER_MAX_SIZE) {
+			if (compressed) {
+				erofs_err("invalid pcluster size %" PRIu64 " @ offset %" PRIu64 " of nid %" PRIu64,
+					  map.m_plen, map.m_la,
+					  inode->nid | 0ULL);
+				ret = -EFSCORRUPTED;
+				goto out;
+			}
+			alloc_rawsize = Z_EROFS_PCLUSTER_MAX_SIZE;
+		} else {
+			alloc_rawsize = map.m_plen;
 		}
 
-		mdev = (struct erofs_map_dev) {
-			.m_deviceid = map.m_deviceid,
-			.m_pa = map.m_pa,
-		};
-		ret = erofs_map_dev(&sbi, &mdev);
-		if (ret) {
-			erofs_err("failed to map device of m_pa %" PRIu64 ", m_deviceid %u @ nid %llu: %d",
-				  map.m_pa, map.m_deviceid, inode->nid | 0ULL,
-				  ret);
-			goto out;
-		}
+		if (alloc_rawsize > raw_size) {
+			char *newraw = realloc(raw, alloc_rawsize);
 
-		if (compressed && map.m_llen > buffer_size) {
-			buffer_size = map.m_llen;
-			buffer = realloc(buffer, buffer_size);
-			BUG_ON(!buffer);
-		}
-
-		ret = dev_read(mdev.m_deviceid, raw, mdev.m_pa, map.m_plen);
-		if (ret < 0) {
-			erofs_err("failed to read data of m_pa %" PRIu64 ", m_plen %" PRIu64 " @ nid %llu: %d",
-				  mdev.m_pa, map.m_plen, inode->nid | 0ULL,
-				  ret);
-			goto out;
+			if (!newraw) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			raw = newraw;
+			raw_size = alloc_rawsize;
 		}
 
 		if (compressed) {
-			struct z_erofs_decompress_req rq = {
-				.in = raw,
-				.out = buffer,
-				.decodedskip = 0,
-				.inputsize = map.m_plen,
-				.decodedlength = map.m_llen,
-				.alg = map.m_algorithmformat,
-				.partial_decoding = 0
-			};
-
-			ret = z_erofs_decompress(&rq);
-			if (ret < 0) {
-				erofs_err("failed to decompress data of m_pa %" PRIu64 ", m_plen %" PRIu64 " @ nid %llu: %s",
-					  mdev.m_pa, map.m_plen,
-					  inode->nid | 0ULL, strerror(-ret));
-				goto out;
+			if (map.m_llen > buffer_size) {
+				buffer_size = map.m_llen;
+				buffer = realloc(buffer, buffer_size);
+				BUG_ON(!buffer);
 			}
-		}
+			ret = z_erofs_read_one_data(inode, &map, raw, buffer,
+						    0, map.m_llen, false);
+			if (ret)
+				goto out;
 
-		if (outfd >= 0 && write(outfd, compressed ? buffer : raw,
-					map.m_llen) < 0) {
-			erofs_err("I/O error occurred when verifying data chunk @ nid %llu",
-				  inode->nid | 0ULL);
-			ret = -EIO;
-			goto out;
+			if (outfd >= 0 && write(outfd, buffer, map.m_llen) < 0)
+				goto fail_eio;
+		} else {
+			u64 p = 0;
+
+			do {
+				u64 count = min_t(u64, alloc_rawsize,
+						  map.m_llen);
+
+				ret = erofs_read_one_data(inode, &map, raw, p, count);
+				if (ret)
+					goto out;
+
+				if (outfd >= 0 && write(outfd, raw, count) < 0)
+					goto fail_eio;
+				map.m_llen -= count;
+				p += count;
+			} while (map.m_llen);
 		}
 	}
 
 	if (fsckcfg.print_comp_ratio) {
-		fsckcfg.logical_blocks +=
-			DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
-		fsckcfg.physical_blocks +=
-			DIV_ROUND_UP(pchunk_len, EROFS_BLKSIZ);
+		if (!erofs_is_packed_inode(inode))
+			fsckcfg.logical_blocks += BLK_ROUND_UP(inode->sbi, inode->i_size);
+		fsckcfg.physical_blocks += BLK_ROUND_UP(inode->sbi, pchunk_len);
 	}
 out:
 	if (raw)
@@ -494,6 +509,12 @@
 	if (buffer)
 		free(buffer);
 	return ret < 0 ? ret : 0;
+
+fail_eio:
+	erofs_err("I/O error occurred when verifying data chunk @ nid %llu",
+		  inode->nid | 0ULL);
+	ret = -EIO;
+	goto out;
 }
 
 static inline int erofs_extract_dir(struct erofs_inode *inode)
@@ -542,6 +563,63 @@
 	return 0;
 }
 
+static char *erofsfsck_hardlink_find(erofs_nid_t nid)
+{
+	struct list_head *head =
+			&erofsfsck_link_hashtable[nid % NR_HARDLINK_HASHTABLE];
+	struct erofsfsck_hardlink_entry *entry;
+
+	list_for_each_entry(entry, head, list)
+		if (entry->nid == nid)
+			return entry->path;
+	return NULL;
+}
+
+static int erofsfsck_hardlink_insert(erofs_nid_t nid, const char *path)
+{
+	struct erofsfsck_hardlink_entry *entry;
+
+	entry = malloc(sizeof(*entry));
+	if (!entry)
+		return -ENOMEM;
+
+	entry->nid = nid;
+	entry->path = strdup(path);
+	if (!entry->path) {
+		free(entry);
+		return -ENOMEM;
+	}
+
+	list_add_tail(&entry->list,
+		      &erofsfsck_link_hashtable[nid % NR_HARDLINK_HASHTABLE]);
+	return 0;
+}
+
+static void erofsfsck_hardlink_init(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < NR_HARDLINK_HASHTABLE; ++i)
+		init_list_head(&erofsfsck_link_hashtable[i]);
+}
+
+static void erofsfsck_hardlink_exit(void)
+{
+	struct erofsfsck_hardlink_entry *entry, *n;
+	struct list_head *head;
+	unsigned int i;
+
+	for (i = 0; i < NR_HARDLINK_HASHTABLE; ++i) {
+		head = &erofsfsck_link_hashtable[i];
+
+		list_for_each_entry_safe(entry, n, head, list) {
+			if (entry->path)
+				free(entry->path);
+			free(entry);
+		}
+	}
+}
+
 static inline int erofs_extract_file(struct erofs_inode *inode)
 {
 	bool tryagain = true;
@@ -679,28 +757,88 @@
 static int erofsfsck_dirent_iter(struct erofs_dir_context *ctx)
 {
 	int ret;
-	size_t prev_pos = fsckcfg.extract_pos;
+	size_t prev_pos, curr_pos;
 
 	if (ctx->dot_dotdot)
 		return 0;
 
-	if (fsckcfg.extract_path) {
-		size_t curr_pos = prev_pos;
+	prev_pos = fsckcfg.extract_pos;
+	curr_pos = prev_pos;
 
+	if (prev_pos + ctx->de_namelen >= PATH_MAX) {
+		erofs_err("unable to fsck since the path is too long (%u)",
+			  curr_pos + ctx->de_namelen);
+		return -EOPNOTSUPP;
+	}
+
+	if (fsckcfg.extract_path) {
 		fsckcfg.extract_path[curr_pos++] = '/';
 		strncpy(fsckcfg.extract_path + curr_pos, ctx->dname,
 			ctx->de_namelen);
 		curr_pos += ctx->de_namelen;
 		fsckcfg.extract_path[curr_pos] = '\0';
-		fsckcfg.extract_pos = curr_pos;
+	} else {
+		curr_pos += ctx->de_namelen;
 	}
-
+	fsckcfg.extract_pos = curr_pos;
 	ret = erofsfsck_check_inode(ctx->dir->nid, ctx->de_nid);
 
-	if (fsckcfg.extract_path) {
+	if (fsckcfg.extract_path)
 		fsckcfg.extract_path[prev_pos] = '\0';
-		fsckcfg.extract_pos = prev_pos;
+	fsckcfg.extract_pos = prev_pos;
+	return ret;
+}
+
+static int erofsfsck_extract_inode(struct erofs_inode *inode)
+{
+	int ret;
+	char *oldpath;
+
+	if (!fsckcfg.extract_path) {
+verify:
+		/* verify data chunk layout */
+		return erofs_verify_inode_data(inode, -1);
 	}
+
+	oldpath = erofsfsck_hardlink_find(inode->nid);
+	if (oldpath) {
+		if (link(oldpath, fsckcfg.extract_path) == -1) {
+			erofs_err("failed to extract hard link: %s (%s)",
+				  fsckcfg.extract_path, strerror(errno));
+			return -errno;
+		}
+		return 0;
+	}
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFDIR:
+		ret = erofs_extract_dir(inode);
+		break;
+	case S_IFREG:
+		if (erofs_is_packed_inode(inode))
+			goto verify;
+		ret = erofs_extract_file(inode);
+		break;
+	case S_IFLNK:
+		ret = erofs_extract_symlink(inode);
+		break;
+	case S_IFCHR:
+	case S_IFBLK:
+	case S_IFIFO:
+	case S_IFSOCK:
+		ret = erofs_extract_special(inode);
+		break;
+	default:
+		/* TODO */
+		goto verify;
+	}
+	if (ret && ret != -ECANCELED)
+		return ret;
+
+	/* record nid and old path for hardlink */
+	if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
+		ret = erofsfsck_hardlink_insert(inode->nid,
+						fsckcfg.extract_path);
 	return ret;
 }
 
@@ -712,6 +850,7 @@
 	erofs_dbg("check inode: nid(%llu)", nid | 0ULL);
 
 	inode.nid = nid;
+	inode.sbi = &sbi;
 	ret = erofs_read_inode_from_disk(&inode);
 	if (ret) {
 		if (ret == -EIO)
@@ -725,32 +864,7 @@
 	if (ret)
 		goto out;
 
-	if (fsckcfg.extract_path) {
-		switch (inode.i_mode & S_IFMT) {
-		case S_IFDIR:
-			ret = erofs_extract_dir(&inode);
-			break;
-		case S_IFREG:
-			ret = erofs_extract_file(&inode);
-			break;
-		case S_IFLNK:
-			ret = erofs_extract_symlink(&inode);
-			break;
-		case S_IFCHR:
-		case S_IFBLK:
-		case S_IFIFO:
-		case S_IFSOCK:
-			ret = erofs_extract_special(&inode);
-			break;
-		default:
-			/* TODO */
-			goto verify;
-		}
-	} else {
-verify:
-		/* verify data chunk layout */
-		ret = erofs_verify_inode_data(&inode, -1);
-	}
+	ret = erofsfsck_extract_inode(&inode);
 	if (ret && ret != -ECANCELED)
 		goto out;
 
@@ -766,7 +880,7 @@
 		ret = erofs_iterate_dir(&ctx, true);
 	}
 
-	if (!ret)
+	if (!ret && !erofs_is_packed_inode(&inode))
 		erofsfsck_set_attributes(&inode, fsckcfg.extract_path);
 
 	if (ret == -ECANCELED)
@@ -777,7 +891,11 @@
 	return ret;
 }
 
-int main(int argc, char **argv)
+#ifdef FUZZING
+int erofsfsck_fuzz_one(int argc, char *argv[])
+#else
+int main(int argc, char *argv[])
+#endif
 {
 	int err;
 
@@ -804,21 +922,36 @@
 		goto exit;
 	}
 
-	err = dev_open_ro(cfg.c_img_path);
+#ifdef FUZZING
+	cfg.c_dbg_lvl = -1;
+#endif
+
+	err = dev_open_ro(&sbi, cfg.c_img_path);
 	if (err) {
 		erofs_err("failed to open image file");
 		goto exit;
 	}
 
-	err = erofs_read_superblock();
+	err = erofs_read_superblock(&sbi);
 	if (err) {
 		erofs_err("failed to read superblock");
 		goto exit_dev_close;
 	}
 
-	if (erofs_sb_has_sb_chksum() && erofs_check_sb_chksum()) {
+	if (erofs_sb_has_sb_chksum(&sbi) && erofs_check_sb_chksum()) {
 		erofs_err("failed to verify superblock checksum");
-		goto exit_dev_close;
+		goto exit_put_super;
+	}
+
+	if (fsckcfg.extract_path)
+		erofsfsck_hardlink_init();
+
+	if (erofs_sb_has_fragments(&sbi) && sbi.packed_nid > 0) {
+		err = erofsfsck_check_inode(sbi.packed_nid, sbi.packed_nid);
+		if (err) {
+			erofs_err("failed to verify packed file");
+			goto exit_hardlink;
+		}
 	}
 
 	err = erofsfsck_check_inode(sbi.root_nid, sbi.root_nid);
@@ -843,10 +976,40 @@
 		}
 	}
 
+exit_hardlink:
+	if (fsckcfg.extract_path)
+		erofsfsck_hardlink_exit();
+exit_put_super:
+	erofs_put_super(&sbi);
 exit_dev_close:
-	dev_close();
+	dev_close(&sbi);
 exit:
-	blob_closeall();
+	blob_closeall(&sbi);
 	erofs_exit_configure();
 	return err ? 1 : 0;
 }
+
+#ifdef FUZZING
+int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size)
+{
+	int fd, ret;
+	char filename[] = "/tmp/erofsfsck_libfuzzer_XXXXXX";
+	char *argv[] = {
+		"fsck.erofs",
+		"--extract",
+		filename,
+	};
+
+	fd = mkstemp(filename);
+	if (fd < 0)
+		return -errno;
+	if (write(fd, Data, Size) != Size) {
+		close(fd);
+		return -EIO;
+	}
+	close(fd);
+	ret = erofsfsck_fuzz_one(ARRAY_SIZE(argv), argv);
+	unlink(filename);
+	return ret ? -1 : 0;
+}
+#endif
diff --git a/fuse/Makefile.am b/fuse/Makefile.am
index 3179a2b..50be783 100644
--- a/fuse/Makefile.am
+++ b/fuse/Makefile.am
@@ -7,4 +7,4 @@
 erofsfuse_CFLAGS = -Wall -I$(top_srcdir)/include
 erofsfuse_CFLAGS += -DFUSE_USE_VERSION=26 ${libfuse_CFLAGS} ${libselinux_CFLAGS}
 erofsfuse_LDADD = $(top_builddir)/lib/liberofs.la ${libfuse_LIBS} ${liblz4_LIBS} \
-	${libselinux_LIBS} ${liblzma_LIBS}
+	${libselinux_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS}
diff --git a/fuse/main.c b/fuse/main.c
index f4c2476..821d98c 100644
--- a/fuse/main.c
+++ b/fuse/main.c
@@ -13,6 +13,7 @@
 #include "erofs/print.h"
 #include "erofs/io.h"
 #include "erofs/dir.h"
+#include "erofs/inode.h"
 
 struct erofsfuse_dir_context {
 	struct erofs_dir_context ctx;
@@ -24,11 +25,13 @@
 static int erofsfuse_fill_dentries(struct erofs_dir_context *ctx)
 {
 	struct erofsfuse_dir_context *fusectx = (void *)ctx;
+	struct stat st = {0};
 	char dname[EROFS_NAME_LEN + 1];
 
 	strncpy(dname, ctx->dname, ctx->de_namelen);
 	dname[ctx->de_namelen] = '\0';
-	fusectx->filler(fusectx->buf, dname, NULL, 0);
+	st.st_mode = erofs_ftype_to_dtype(ctx->de_ftype) << 12;
+	fusectx->filler(fusectx->buf, dname, &st, 0);
 	return 0;
 }
 
@@ -46,6 +49,7 @@
 	};
 	erofs_dbg("readdir:%s offset=%llu", path, (long long)offset);
 
+	dir.sbi = &sbi;
 	ret = erofs_ilookup(path, &dir);
 	if (ret)
 		return ret;
@@ -81,7 +85,7 @@
 
 static int erofsfuse_getattr(const char *path, struct stat *stbuf)
 {
-	struct erofs_inode vi = {};
+	struct erofs_inode vi = { .sbi = &sbi };
 	int ret;
 
 	erofs_dbg("getattr(%s)", path);
@@ -92,7 +96,7 @@
 	stbuf->st_mode  = vi.i_mode;
 	stbuf->st_nlink = vi.i_nlink;
 	stbuf->st_size  = vi.i_size;
-	stbuf->st_blocks = roundup(vi.i_size, EROFS_BLKSIZ) >> 9;
+	stbuf->st_blocks = roundup(vi.i_size, erofs_blksiz(vi.sbi)) >> 9;
 	stbuf->st_uid = vi.i_uid;
 	stbuf->st_gid = vi.i_gid;
 	if (S_ISBLK(vi.i_mode) || S_ISCHR(vi.i_mode))
@@ -112,6 +116,7 @@
 
 	erofs_dbg("path:%s size=%zd offset=%llu", path, size, (long long)offset);
 
+	vi.sbi = &sbi;
 	ret = erofs_ilookup(path, &vi);
 	if (ret)
 		return ret;
@@ -139,7 +144,45 @@
 	return 0;
 }
 
+static int erofsfuse_getxattr(const char *path, const char *name, char *value,
+			size_t size
+#ifdef __APPLE__
+			, uint32_t position)
+#else
+			)
+#endif
+{
+	int ret;
+	struct erofs_inode vi;
+
+	erofs_dbg("getxattr(%s): name=%s size=%llu", path, name, size);
+
+	vi.sbi = &sbi;
+	ret = erofs_ilookup(path, &vi);
+	if (ret)
+		return ret;
+
+	return erofs_getxattr(&vi, name, value, size);
+}
+
+static int erofsfuse_listxattr(const char *path, char *list, size_t size)
+{
+	int ret;
+	struct erofs_inode vi;
+
+	erofs_dbg("listxattr(%s): size=%llu", path, size);
+
+	vi.sbi = &sbi;
+	ret = erofs_ilookup(path, &vi);
+	if (ret)
+		return ret;
+
+	return erofs_listxattr(&vi, list, size);
+}
+
 static struct fuse_operations erofs_ops = {
+	.getxattr = erofsfuse_getxattr,
+	.listxattr = erofsfuse_listxattr,
 	.readlink = erofsfuse_readlink,
 	.getattr = erofsfuse_getattr,
 	.readdir = erofsfuse_readdir,
@@ -151,6 +194,7 @@
 static struct options {
 	const char *disk;
 	const char *mountpoint;
+	u64 offset;
 	unsigned int debug_lvl;
 	bool show_help;
 	bool odebug;
@@ -158,6 +202,7 @@
 
 #define OPTION(t, p) { t, offsetof(struct options, p), 1 }
 static const struct fuse_opt option_spec[] = {
+	OPTION("--offset=%lu", offset),
 	OPTION("--dbglevel=%u", debug_lvl),
 	OPTION("--help", show_help),
 	FUSE_OPT_KEY("--device=", 1),
@@ -170,6 +215,7 @@
 
 	fputs("usage: [options] IMAGE MOUNTPOINT\n\n"
 	      "Options:\n"
+	      "    --offset=#             skip # bytes when reading IMAGE\n"
 	      "    --dbglevel=#           set output message level to # (maximum 9)\n"
 	      "    --device=#             specify an extra device to be used together\n"
 #if FUSE_MAJOR_VERSION < 3
@@ -190,6 +236,7 @@
 static void erofsfuse_dumpcfg(void)
 {
 	erofs_dump("disk: %s\n", fusecfg.disk);
+	erofs_dump("offset: %llu\n", fusecfg.offset | 0ULL);
 	erofs_dump("mountpoint: %s\n", fusecfg.mountpoint);
 	erofs_dump("dbglevel: %u\n", cfg.c_dbg_lvl);
 }
@@ -201,7 +248,7 @@
 
 	switch (key) {
 	case 1:
-		ret = blob_open_ro(arg + sizeof("--device=") - 1);
+		ret = blob_open_ro(&sbi, arg + sizeof("--device=") - 1);
 		if (ret)
 			return -1;
 		++sbi.extra_devices;
@@ -279,23 +326,27 @@
 	if (fusecfg.odebug && cfg.c_dbg_lvl < EROFS_DBG)
 		cfg.c_dbg_lvl = EROFS_DBG;
 
+	cfg.c_offset = fusecfg.offset;
+
 	erofsfuse_dumpcfg();
-	ret = dev_open_ro(fusecfg.disk);
+	ret = dev_open_ro(&sbi, fusecfg.disk);
 	if (ret) {
 		fprintf(stderr, "failed to open: %s\n", fusecfg.disk);
 		goto err_fuse_free_args;
 	}
 
-	ret = erofs_read_superblock();
+	ret = erofs_read_superblock(&sbi);
 	if (ret) {
 		fprintf(stderr, "failed to read erofs super block\n");
 		goto err_dev_close;
 	}
 
 	ret = fuse_main(args.argc, args.argv, &erofs_ops, NULL);
+
+	erofs_put_super(&sbi);
 err_dev_close:
-	blob_closeall();
-	dev_close();
+	blob_closeall(&sbi);
+	dev_close(&sbi);
 err_fuse_free_args:
 	fuse_opt_free_args(&args);
 err:
diff --git a/include/erofs/blobchunk.h b/include/erofs/blobchunk.h
index 49cb7bf..89c8048 100644
--- a/include/erofs/blobchunk.h
+++ b/include/erofs/blobchunk.h
@@ -14,12 +14,16 @@
 
 #include "erofs/internal.h"
 
+struct erofs_blobchunk *erofs_get_unhashed_chunk(unsigned int device_id,
+		erofs_blk_t blkaddr, erofs_off_t sourceoffset);
 int erofs_blob_write_chunk_indexes(struct erofs_inode *inode, erofs_off_t off);
-int erofs_blob_write_chunked_file(struct erofs_inode *inode);
-int erofs_blob_remap(void);
+int erofs_blob_write_chunked_file(struct erofs_inode *inode, int fd,
+				  erofs_off_t startoff);
+int tarerofs_write_chunkes(struct erofs_inode *inode, erofs_off_t data_offset);
+int erofs_mkfs_dump_blobs(struct erofs_sb_info *sbi);
 void erofs_blob_exit(void);
 int erofs_blob_init(const char *blobfile_path);
-int erofs_generate_devtable(void);
+int erofs_mkfs_init_devices(struct erofs_sb_info *sbi, unsigned int devices);
 
 #ifdef __cplusplus
 }
diff --git a/include/erofs/block_list.h b/include/erofs/block_list.h
index 78fab44..9f9975e 100644
--- a/include/erofs/block_list.h
+++ b/include/erofs/block_list.h
@@ -13,9 +13,12 @@
 
 #include "internal.h"
 
+int erofs_blocklist_open(char *filename, bool srcmap);
+void erofs_blocklist_close(void);
+
+void tarerofs_blocklist_write(erofs_blk_t blkaddr, erofs_blk_t nblocks,
+			      erofs_off_t srcoff);
 #ifdef WITH_ANDROID
-int erofs_droid_blocklist_fopen(void);
-void erofs_droid_blocklist_fclose(void);
 void erofs_droid_blocklist_write(struct erofs_inode *inode,
 				 erofs_blk_t blk_start, erofs_blk_t nblocks);
 void erofs_droid_blocklist_write_tail_end(struct erofs_inode *inode,
diff --git a/include/erofs/cache.h b/include/erofs/cache.h
index de12399..de5584e 100644
--- a/include/erofs/cache.h
+++ b/include/erofs/cache.h
@@ -22,10 +22,12 @@
 #define META		1
 /* including inline xattrs, extent */
 #define INODE		2
+/* directory data */
+#define DIRA		3
 /* shared xattrs */
-#define XATTR		3
+#define XATTR		4
 /* device table */
-#define DEVT		4
+#define DEVT		5
 
 struct erofs_bhops {
 	bool (*preflush)(struct erofs_buffer_head *bh);
@@ -55,11 +57,14 @@
 static inline const int get_alignsize(int type, int *type_ret)
 {
 	if (type == DATA)
-		return EROFS_BLKSIZ;
+		return erofs_blksiz(&sbi);
 
 	if (type == INODE) {
 		*type_ret = META;
 		return sizeof(struct erofs_inode_compact);
+	} else if (type == DIRA) {
+		*type_ret = META;
+		return erofs_blksiz(&sbi);
 	} else if (type == XATTR) {
 		*type_ret = META;
 		return sizeof(struct erofs_xattr_entry);
@@ -75,7 +80,6 @@
 
 extern const struct erofs_bhops erofs_drop_directly_bhops;
 extern const struct erofs_bhops erofs_skip_write_bhops;
-extern const struct erofs_bhops erofs_buf_write_bhops;
 
 static inline erofs_off_t erofs_btell(struct erofs_buffer_head *bh, bool end)
 {
@@ -84,7 +88,7 @@
 	if (bb->blkaddr == NULL_ADDR)
 		return NULL_ADDR_UL;
 
-	return blknr_to_addr(bb->blkaddr) +
+	return erofs_pos(&sbi, bb->blkaddr) +
 		(end ? list_next_entry(bh, list)->off : bh->off);
 }
 
@@ -108,6 +112,7 @@
 bool erofs_bflush(struct erofs_buffer_block *bb);
 
 void erofs_bdrop(struct erofs_buffer_head *bh, bool tryrevoke);
+erofs_blk_t erofs_total_metablocks(void);
 
 #ifdef __cplusplus
 }
diff --git a/include/erofs/compress.h b/include/erofs/compress.h
index 24f6204..46cff03 100644
--- a/include/erofs/compress.h
+++ b/include/erofs/compress.h
@@ -14,16 +14,28 @@
 
 #include "internal.h"
 
-#define EROFS_CONFIG_COMPR_MAX_SZ           (3000 * 1024)
-#define EROFS_CONFIG_COMPR_MIN_SZ           (32   * 1024)
+#define EROFS_CONFIG_COMPR_MAX_SZ           (4000 * 1024)
 
 void z_erofs_drop_inline_pcluster(struct erofs_inode *inode);
-int erofs_write_compressed_file(struct erofs_inode *inode);
+int erofs_write_compressed_file(struct erofs_inode *inode, int fd);
 
-int z_erofs_compress_init(struct erofs_buffer_head *bh);
+int z_erofs_compress_init(struct erofs_sb_info *sbi,
+			  struct erofs_buffer_head *bh);
 int z_erofs_compress_exit(void);
 
-const char *z_erofs_list_available_compressors(unsigned int i);
+const char *z_erofs_list_supported_algorithms(int i, unsigned int *mask);
+const char *z_erofs_list_available_compressors(int *i);
+
+static inline bool erofs_is_packed_inode(struct erofs_inode *inode)
+{
+	erofs_nid_t packed_nid = inode->sbi->packed_nid;
+
+	if (inode->nid == EROFS_PACKED_NID_UNALLOCATED) {
+		DBG_BUGON(packed_nid != EROFS_PACKED_NID_UNALLOCATED);
+		return true;
+	}
+	return (packed_nid > 0 && inode->nid == packed_nid);
+}
 
 #ifdef __cplusplus
 }
diff --git a/include/erofs/compress_hints.h b/include/erofs/compress_hints.h
index 659c5b6..9f0d8ae 100644
--- a/include/erofs/compress_hints.h
+++ b/include/erofs/compress_hints.h
@@ -20,11 +20,12 @@
 
 	regex_t reg;
 	unsigned int physical_clusterblks;
+	unsigned char algorithmtype;
 };
 
 bool z_erofs_apply_compress_hints(struct erofs_inode *inode);
 void erofs_cleanup_compress_hints(void);
-int erofs_load_compress_hints(void);
+int erofs_load_compress_hints(struct erofs_sb_info *sbi);
 
 #ifdef __cplusplus
 }
diff --git a/include/erofs/config.h b/include/erofs/config.h
index 0d0916c..e342722 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -32,6 +32,8 @@
 	TIMESTAMP_CLAMPING,
 };
 
+#define EROFS_MAX_COMPR_CFGS		64
+
 struct erofs_configure {
 	const char *c_version;
 	int c_dbg_lvl;
@@ -39,13 +41,20 @@
 	bool c_legacy_compress;
 #ifndef NDEBUG
 	bool c_random_pclusterblks;
+	bool c_random_algorithms;
 #endif
 	char c_timeinherit;
 	char c_chunkbits;
-	bool c_noinline_data;
+	bool c_inline_data;
 	bool c_ztailpacking;
+	bool c_fragments;
+	bool c_all_fragments;
+	bool c_dedupe;
 	bool c_ignore_mtime;
 	bool c_showprogress;
+	bool c_extra_ea_name_prefixes;
+	bool c_xattr_name_filter;
+	bool c_ovlfs_strip;
 
 #ifdef HAVE_LIBSELINUX
 	struct selabel_handle *sehnd;
@@ -55,24 +64,28 @@
 	char *c_src_path;
 	char *c_blobdev_path;
 	char *c_compress_hints_file;
-	char *c_compr_alg_master;
-	int c_compr_level_master;
+	char *c_compr_alg[EROFS_MAX_COMPR_CFGS];
+	int c_compr_level[EROFS_MAX_COMPR_CFGS];
 	char c_force_inodeversion;
 	char c_force_chunkformat;
 	/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
 	int c_inline_xattr_tolerance;
 
-	u32 c_pclusterblks_max, c_pclusterblks_def;
+	u32 c_pclusterblks_max, c_pclusterblks_def, c_pclusterblks_packed;
 	u32 c_max_decompressed_extent_bytes;
 	u32 c_dict_size;
 	u64 c_unix_timestamp;
 	u32 c_uid, c_gid;
+	const char *mount_point;
+	long long c_uid_offset, c_gid_offset;
 #ifdef WITH_ANDROID
-	char *mount_point;
 	char *target_out_path;
 	char *fs_config_file;
 	char *block_list_file;
 #endif
+
+	/* offset when reading multi partition images */
+	u64 c_offset;
 };
 
 extern struct erofs_configure cfg;
diff --git a/include/erofs/decompress.h b/include/erofs/decompress.h
index 82bf7b8..0d55483 100644
--- a/include/erofs/decompress.h
+++ b/include/erofs/decompress.h
@@ -14,6 +14,7 @@
 #include "internal.h"
 
 struct z_erofs_decompress_req {
+	struct erofs_sb_info *sbi;
 	char *in, *out;
 
 	/*
@@ -23,6 +24,9 @@
 	unsigned int decodedskip;
 	unsigned int inputsize, decodedlength;
 
+	/* cut point of interlaced uncompressed data */
+	unsigned int interlaced_offset;
+
 	/* indicate the algorithm will be used for decompression */
 	unsigned int alg;
 	bool partial_decoding;
diff --git a/include/erofs/dedupe.h b/include/erofs/dedupe.h
new file mode 100644
index 0000000..153bd4c
--- /dev/null
+++ b/include/erofs/dedupe.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+/*
+ * Copyright (C) 2022 Alibaba Cloud
+ */
+#ifndef __EROFS_DEDUPE_H
+#define __EROFS_DEDUPE_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "internal.h"
+
+struct z_erofs_inmem_extent {
+	erofs_blk_t blkaddr;
+	unsigned int compressedblks;
+	unsigned int length;
+	bool raw, partial;
+};
+
+struct z_erofs_dedupe_ctx {
+	u8		*start, *end;
+	u8		*cur;
+	struct z_erofs_inmem_extent	e;
+};
+
+int z_erofs_dedupe_match(struct z_erofs_dedupe_ctx *ctx);
+int z_erofs_dedupe_insert(struct z_erofs_inmem_extent *e,
+			  void *original_data);
+void z_erofs_dedupe_commit(bool drop);
+int z_erofs_dedupe_init(unsigned int wsiz);
+void z_erofs_dedupe_exit(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/erofs/defs.h b/include/erofs/defs.h
index e5aa23c..fefa7e7 100644
--- a/include/erofs/defs.h
+++ b/include/erofs/defs.h
@@ -179,9 +179,29 @@
 #define __maybe_unused      __attribute__((__unused__))
 #endif
 
-static inline u32 get_unaligned_le32(const u8 *p)
+#define __packed __attribute__((__packed__))
+
+#define __get_unaligned_t(type, ptr) ({						\
+	const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr);	\
+	__pptr->x;								\
+})
+
+#define __put_unaligned_t(type, val, ptr) do {					\
+	struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr);		\
+	__pptr->x = (val);							\
+} while (0)
+
+#define get_unaligned(ptr)	__get_unaligned_t(typeof(*(ptr)), (ptr))
+#define put_unaligned(val, ptr) __put_unaligned_t(typeof(*(ptr)), (val), (ptr))
+
+static inline u32 get_unaligned_le32(const void *p)
 {
-	return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24;
+	return le32_to_cpu(__get_unaligned_t(__le32, p));
+}
+
+static inline void put_unaligned_le32(u32 val, void *p)
+{
+	__put_unaligned_t(__le32, cpu_to_le32(val), p);
 }
 
 /**
@@ -266,6 +286,11 @@
 	return x ? sizeof(x) * 8 - __builtin_clz(x) : 0;
 }
 
+static inline unsigned long lowbit(unsigned long n)
+{
+	return n & -n;
+}
+
 /**
  * __roundup_pow_of_two() - round up to nearest power of two
  * @n: value to round up
@@ -313,11 +338,6 @@
 #define ST_MTIM_NSEC(stbuf) 0
 #endif
 
-#ifdef __APPLE__
-#define stat64		stat
-#define lstat64		lstat
-#endif
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/erofs/dir.h b/include/erofs/dir.h
index 74bffb5..5460ac4 100644
--- a/include/erofs/dir.h
+++ b/include/erofs/dir.h
@@ -62,7 +62,8 @@
 /* Iterate over inodes that are in directory */
 int erofs_iterate_dir(struct erofs_dir_context *ctx, bool fsck);
 /* Get a full pathname of the inode NID */
-int erofs_get_pathname(erofs_nid_t nid, char *buf, size_t size);
+int erofs_get_pathname(struct erofs_sb_info *sbi, erofs_nid_t nid,
+		       char *buf, size_t size);
 
 #ifdef __cplusplus
 }
diff --git a/include/erofs/diskbuf.h b/include/erofs/diskbuf.h
new file mode 100644
index 0000000..29d9fe2
--- /dev/null
+++ b/include/erofs/diskbuf.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+#ifndef __EROFS_DISKBUF_H
+#define __EROFS_DISKBUF_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "erofs/defs.h"
+
+struct erofs_diskbuf {
+	void *sp;		/* internal stream pointer */
+	u64 offset;		/* internal offset */
+};
+
+int erofs_diskbuf_getfd(struct erofs_diskbuf *db, u64 *off);
+
+int erofs_diskbuf_reserve(struct erofs_diskbuf *db, int sid, u64 *off);
+void erofs_diskbuf_commit(struct erofs_diskbuf *db, u64 len);
+void erofs_diskbuf_close(struct erofs_diskbuf *db);
+
+int erofs_diskbuf_init(unsigned int nstrms);
+void erofs_diskbuf_exit(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/erofs/err.h b/include/erofs/err.h
index 08b0bdb..2ae9e21 100644
--- a/include/erofs/err.h
+++ b/include/erofs/err.h
@@ -33,6 +33,12 @@
 	return (long) ptr;
 }
 
+static inline void * ERR_CAST(const void *ptr)
+{
+	/* cast away the const */
+	return (void *) ptr;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/erofs/fragments.h b/include/erofs/fragments.h
new file mode 100644
index 0000000..4c6f755
--- /dev/null
+++ b/include/erofs/fragments.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+/*
+ * Copyright (C), 2022, Coolpad Group Limited.
+ */
+#ifndef __EROFS_FRAGMENTS_H
+#define __EROFS_FRAGMENTS_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "erofs/internal.h"
+
+extern const char *erofs_frags_packedname;
+#define EROFS_PACKED_INODE	erofs_frags_packedname
+
+FILE *erofs_packedfile_init(void);
+void erofs_packedfile_exit(void);
+struct erofs_inode *erofs_mkfs_build_packedfile(void);
+
+int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 *tofcrc);
+int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd, u32 tofcrc);
+int z_erofs_pack_fragments(struct erofs_inode *inode, void *data,
+			   unsigned int len, u32 tofcrc);
+void z_erofs_fragments_commit(struct erofs_inode *inode);
+int z_erofs_fragments_init(void);
+void z_erofs_fragments_exit(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/erofs/hashmap.h b/include/erofs/hashmap.h
index 3d38578..d25092d 100644
--- a/include/erofs/hashmap.h
+++ b/include/erofs/hashmap.h
@@ -61,7 +61,7 @@
 /* hashmap functions */
 void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function,
 		  size_t initial_size);
-void hashmap_free(struct hashmap *map, int free_entries);
+int hashmap_free(struct hashmap *map);
 
 /* hashmap_entry functions */
 static inline void hashmap_entry_init(void *entry, unsigned int hash)
@@ -75,8 +75,7 @@
 void *hashmap_get(const struct hashmap *map, const void *key, const void *keydata);
 void *hashmap_get_next(const struct hashmap *map, const void *entry);
 void hashmap_add(struct hashmap *map, void *entry);
-void *hashmap_put(struct hashmap *map, void *entry);
-void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata);
+void *hashmap_remove(struct hashmap *map, const void *key);
 
 static inline void *hashmap_get_from_hash(const struct hashmap *map,
 					  unsigned int hash,
diff --git a/include/erofs/inode.h b/include/erofs/inode.h
index 79b39b0..bcfd98e 100644
--- a/include/erofs/inode.h
+++ b/include/erofs/inode.h
@@ -15,12 +15,30 @@
 
 #include "erofs/internal.h"
 
+static inline struct erofs_inode *erofs_igrab(struct erofs_inode *inode)
+{
+	++inode->i_count;
+	return inode;
+}
+
+u32 erofs_new_encode_dev(dev_t dev);
 unsigned char erofs_mode_to_ftype(umode_t mode);
+unsigned char erofs_ftype_to_dtype(unsigned int filetype);
 void erofs_inode_manager_init(void);
+void erofs_insert_ihash(struct erofs_inode *inode, dev_t dev, ino_t ino);
+struct erofs_inode *erofs_iget(dev_t dev, ino_t ino);
+struct erofs_inode *erofs_iget_by_nid(erofs_nid_t nid);
 unsigned int erofs_iput(struct erofs_inode *inode);
 erofs_nid_t erofs_lookupnid(struct erofs_inode *inode);
-struct erofs_inode *erofs_mkfs_build_tree_from_path(struct erofs_inode *parent,
-						    const char *path);
+struct erofs_dentry *erofs_d_alloc(struct erofs_inode *parent,
+				   const char *name);
+int erofs_rebuild_dump_tree(struct erofs_inode *dir);
+int erofs_init_empty_dir(struct erofs_inode *dir);
+int __erofs_fill_inode(struct erofs_inode *inode, struct stat *st,
+		       const char *path);
+struct erofs_inode *erofs_new_inode(void);
+struct erofs_inode *erofs_mkfs_build_tree_from_path(const char *path);
+struct erofs_inode *erofs_mkfs_build_special_from_fd(int fd, const char *name);
 
 #ifdef __cplusplus
 }
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 6a70f11..c1ff582 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -17,35 +17,20 @@
 
 typedef unsigned short umode_t;
 
-#define __packed __attribute__((__packed__))
-
 #include "erofs_fs.h"
 #include <fcntl.h>
+#include <sys/types.h> /* for off_t definition */
+#include <sys/stat.h> /* for S_ISCHR definition */
+#include <stdio.h>
 
 #ifndef PATH_MAX
 #define PATH_MAX        4096    /* # chars in a path name including nul */
 #endif
 
-#ifndef PAGE_SHIFT
-#define PAGE_SHIFT		(12)
+#ifndef EROFS_MAX_BLOCK_SIZE
+#define EROFS_MAX_BLOCK_SIZE	4096
 #endif
 
-#ifndef PAGE_SIZE
-#define PAGE_SIZE		(1U << PAGE_SHIFT)
-#endif
-
-/* no obvious reason to support explicit PAGE_SIZE != 4096 for now */
-#if PAGE_SIZE != 4096
-#warning EROFS may be incompatible on your platform
-#endif
-
-#ifndef PAGE_MASK
-#define PAGE_MASK		(~(PAGE_SIZE-1))
-#endif
-
-#define LOG_BLOCK_SIZE          (12)
-#define EROFS_BLKSIZ            (1U << LOG_BLOCK_SIZE)
-
 #define EROFS_ISLOTBITS		5
 #define EROFS_SLOTSIZE		(1U << EROFS_ISLOTBITS)
 
@@ -57,21 +42,33 @@
 #define NULL_ADDR	((unsigned int)-1)
 #define NULL_ADDR_UL	((unsigned long)-1)
 
-#define erofs_blknr(addr)       ((addr) / EROFS_BLKSIZ)
-#define erofs_blkoff(addr)      ((addr) % EROFS_BLKSIZ)
-#define blknr_to_addr(nr)       ((erofs_off_t)(nr) * EROFS_BLKSIZ)
+/* global sbi */
+extern struct erofs_sb_info sbi;
 
-#define BLK_ROUND_UP(addr)	DIV_ROUND_UP(addr, EROFS_BLKSIZ)
+#define erofs_blksiz(sbi)	(1u << (sbi)->blkszbits)
+#define erofs_blknr(sbi, addr)  ((addr) >> (sbi)->blkszbits)
+#define erofs_blkoff(sbi, addr) ((addr) & (erofs_blksiz(sbi) - 1))
+#define erofs_pos(sbi, nr)      ((erofs_off_t)(nr) << (sbi)->blkszbits)
+#define BLK_ROUND_UP(sbi, addr)	DIV_ROUND_UP(addr, erofs_blksiz(sbi))
 
 struct erofs_buffer_head;
 
 struct erofs_device_info {
+	u8 tag[64];
 	u32 blocks;
 	u32 mapped_blkaddr;
 };
 
+struct erofs_xattr_prefix_item {
+	struct erofs_xattr_long_prefix *prefix;
+	u8 infix_len;
+};
+
+#define EROFS_PACKED_NID_UNALLOCATED	-1
+
 struct erofs_sb_info {
 	struct erofs_device_info *devs;
+	char *devname;
 
 	u64 total_blocks;
 	u64 primarydevice_blocks;
@@ -84,7 +81,9 @@
 	u64 build_time;
 	u32 build_time_nsec;
 
+	u8  extslots;
 	unsigned char islotbits;
+	unsigned char blkszbits;
 
 	/* what we really care is nid, rather than ino.. */
 	erofs_nid_t root_nid;
@@ -92,6 +91,7 @@
 	u64 inos;
 
 	u8 uuid[16];
+	char volume_name[16];
 
 	u16 available_compr_algs;
 	u16 lz4_max_distance;
@@ -102,53 +102,74 @@
 		u16 devt_slotoff;		/* used for mkfs */
 		u16 device_id_mask;		/* used for others */
 	};
+	erofs_nid_t packed_nid;
+
+	u32 xattr_prefix_start;
+	u8 xattr_prefix_count;
+	struct erofs_xattr_prefix_item *xattr_prefixes;
+
+	int devfd, devblksz;
+	u64 devsz;
+	dev_t dev;
+	unsigned int nblobs;
+	unsigned int blobfd[256];
+
+	struct list_head list;
+
+	u64 saved_by_deduplication;
 };
 
-/* global sbi */
-extern struct erofs_sb_info sbi;
-
-static inline erofs_off_t iloc(erofs_nid_t nid)
-{
-	return blknr_to_addr(sbi.meta_blkaddr) + (nid << sbi.islotbits);
-}
+/* make sure that any user of the erofs headers has atleast 64bit off_t type */
+extern int erofs_assert_largefile[sizeof(off_t)-8];
 
 #define EROFS_FEATURE_FUNCS(name, compat, feature) \
-static inline bool erofs_sb_has_##name(void) \
+static inline bool erofs_sb_has_##name(struct erofs_sb_info *sbi) \
 { \
-	return sbi.feature_##compat & EROFS_FEATURE_##feature; \
+	return sbi->feature_##compat & EROFS_FEATURE_##feature; \
 } \
-static inline void erofs_sb_set_##name(void) \
+static inline void erofs_sb_set_##name(struct erofs_sb_info *sbi) \
 { \
-	sbi.feature_##compat |= EROFS_FEATURE_##feature; \
+	sbi->feature_##compat |= EROFS_FEATURE_##feature; \
 } \
-static inline void erofs_sb_clear_##name(void) \
+static inline void erofs_sb_clear_##name(struct erofs_sb_info *sbi) \
 { \
-	sbi.feature_##compat &= ~EROFS_FEATURE_##feature; \
+	sbi->feature_##compat &= ~EROFS_FEATURE_##feature; \
 }
 
-EROFS_FEATURE_FUNCS(lz4_0padding, incompat, INCOMPAT_LZ4_0PADDING)
+EROFS_FEATURE_FUNCS(lz4_0padding, incompat, INCOMPAT_ZERO_PADDING)
 EROFS_FEATURE_FUNCS(compr_cfgs, incompat, INCOMPAT_COMPR_CFGS)
 EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER)
 EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE)
 EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE)
 EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING)
+EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS)
+EROFS_FEATURE_FUNCS(dedupe, incompat, INCOMPAT_DEDUPE)
+EROFS_FEATURE_FUNCS(xattr_prefixes, incompat, INCOMPAT_XATTR_PREFIXES)
 EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
+EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER)
 
 #define EROFS_I_EA_INITED	(1 << 0)
 #define EROFS_I_Z_INITED	(1 << 1)
 
+struct erofs_diskbuf;
+
 struct erofs_inode {
 	struct list_head i_hash, i_subdirs, i_xattrs;
 
 	union {
 		/* (erofsfuse) runtime flags */
 		unsigned int flags;
-		/* (mkfs.erofs) device ID containing source file */
-		u32 dev;
+
+		/* (mkfs.erofs) next pointer for directory dumping */
+		struct erofs_inode *next_dirwrite;
 	};
 	unsigned int i_count;
+	struct erofs_sb_info *sbi;
 	struct erofs_inode *i_parent;
 
+	/* (mkfs.erofs) device ID containing source file */
+	u32 dev;
+
 	umode_t i_mode;
 	erofs_off_t i_size;
 
@@ -169,17 +190,28 @@
 		};
 	} u;
 
-	char i_srcpath[PATH_MAX + 1];
-
+	char *i_srcpath;
+	union {
+		char *i_link;
+		struct erofs_diskbuf *i_diskbuf;
+	};
 	unsigned char datalayout;
 	unsigned char inode_isize;
 	/* inline tail-end packing size */
 	unsigned short idata_size;
 	bool compressed_idata;
+	bool lazy_tailblock;
+	bool with_diskbuf;
+	bool opaque;
+	/* OVL: non-merge dir that may contain whiteout entries */
+	bool whiteouts;
 
 	unsigned int xattr_isize;
 	unsigned int extent_isize;
 
+	unsigned int xattr_shared_count;
+	unsigned int *xattr_shared_xattrs;
+
 	erofs_nid_t nid;
 	struct erofs_buffer_head *bh;
 	struct erofs_buffer_head *bh_inline, *bh_data;
@@ -206,8 +238,18 @@
 #ifdef WITH_ANDROID
 	uint64_t capabilities;
 #endif
+	erofs_off_t fragmentoff;
+	unsigned int fragment_size;
 };
 
+static inline erofs_off_t erofs_iloc(struct erofs_inode *inode)
+{
+	struct erofs_sb_info *sbi = inode->sbi;
+
+	return erofs_pos(sbi, sbi->meta_blkaddr) +
+			(inode->nid << sbi->islotbits);
+}
+
 static inline bool is_inode_layout_compression(struct erofs_inode *inode)
 {
 	return erofs_inode_is_data_compressed(inode->datalayout);
@@ -276,6 +318,8 @@
 	BH_Mapped,
 	BH_Encoded,
 	BH_FullMapped,
+	BH_Fragment,
+	BH_Partialref,
 };
 
 /* Has a disk mapping */
@@ -286,9 +330,13 @@
 #define EROFS_MAP_ENCODED	(1 << BH_Encoded)
 /* The length of extent is full */
 #define EROFS_MAP_FULL_MAPPED	(1 << BH_FullMapped)
+/* Located in the special packed inode */
+#define EROFS_MAP_FRAGMENT	(1 << BH_Fragment)
+/* The extent refers to partial decompressed data */
+#define EROFS_MAP_PARTIAL_REF	(1 << BH_Partialref)
 
 struct erofs_map_blocks {
-	char mpage[EROFS_BLKSIZ];
+	char mpage[EROFS_MAX_BLOCK_SIZE];
 
 	erofs_off_t m_pa, m_la;
 	u64 m_plen, m_llen;
@@ -304,10 +352,12 @@
  * approach instead if possible since it's more metadata lightweight.)
  */
 #define EROFS_GET_BLOCKS_FIEMAP	0x0002
+/* Used to map tail extent for tailpacking inline or fragment pcluster */
 #define EROFS_GET_BLOCKS_FINDTAIL	0x0008
 
 enum {
 	Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX,
+	Z_EROFS_COMPRESSION_INTERLACED,
 	Z_EROFS_COMPRESSION_RUNTIME_MAX
 };
 
@@ -317,12 +367,12 @@
 };
 
 /* super.c */
-int erofs_read_superblock(void);
+int erofs_read_superblock(struct erofs_sb_info *sbi);
+void erofs_put_super(struct erofs_sb_info *sbi);
 
 /* namei.c */
 int erofs_read_inode_from_disk(struct erofs_inode *vi);
 int erofs_ilookup(const char *path, struct erofs_inode *vi);
-int erofs_read_inode_from_disk(struct erofs_inode *vi);
 
 /* data.c */
 int erofs_pread(struct erofs_inode *inode, char *buf,
@@ -330,6 +380,13 @@
 int erofs_map_blocks(struct erofs_inode *inode,
 		struct erofs_map_blocks *map, int flags);
 int erofs_map_dev(struct erofs_sb_info *sbi, struct erofs_map_dev *map);
+int erofs_read_one_data(struct erofs_inode *inode, struct erofs_map_blocks *map,
+			char *buffer, u64 offset, size_t len);
+int z_erofs_read_one_data(struct erofs_inode *inode,
+			struct erofs_map_blocks *map, char *raw, char *buffer,
+			erofs_off_t skip, erofs_off_t length, bool trimmed);
+void *erofs_read_metadata(struct erofs_sb_info *sbi, erofs_nid_t nid,
+			  erofs_off_t *offset, int *lengthp);
 
 static inline int erofs_get_occupied_size(const struct erofs_inode *inode,
 					  erofs_off_t *size)
@@ -341,16 +398,21 @@
 	case EROFS_INODE_CHUNK_BASED:
 		*size = inode->i_size;
 		break;
-	case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
-	case EROFS_INODE_FLAT_COMPRESSION:
-		*size = inode->u.i_blocks * EROFS_BLKSIZ;
+	case EROFS_INODE_COMPRESSED_FULL:
+	case EROFS_INODE_COMPRESSED_COMPACT:
+		*size = inode->u.i_blocks * erofs_blksiz(inode->sbi);
 		break;
 	default:
-		return -ENOTSUP;
+		return -EOPNOTSUPP;
 	}
 	return 0;
 }
 
+/* data.c */
+int erofs_getxattr(struct erofs_inode *vi, const char *name, char *buffer,
+		   size_t buffer_size);
+int erofs_listxattr(struct erofs_inode *vi, char *buffer, size_t buffer_size);
+
 /* zmap.c */
 int z_erofs_fill_inode(struct erofs_inode *vi);
 int z_erofs_map_blocks_iter(struct erofs_inode *vi,
@@ -375,6 +437,12 @@
 	return crc;
 }
 
+#define EROFS_WHITEOUT_DEV	0
+static inline bool erofs_inode_is_whiteout(struct erofs_inode *inode)
+{
+	return S_ISCHR(inode->i_mode) && inode->u.i_rdev == EROFS_WHITEOUT_DEV;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/erofs/io.h b/include/erofs/io.h
index 0f58c70..4db5716 100644
--- a/include/erofs/io.h
+++ b/include/erofs/io.h
@@ -22,36 +22,36 @@
 #define O_BINARY	0
 #endif
 
-void blob_closeall(void);
-int blob_open_ro(const char *dev);
-int dev_open(const char *devname);
-int dev_open_ro(const char *dev);
-void dev_close(void);
-int dev_write(const void *buf, u64 offset, size_t len);
-int dev_read(int device_id, void *buf, u64 offset, size_t len);
-int dev_fillzero(u64 offset, size_t len, bool padding);
-int dev_fsync(void);
-int dev_resize(erofs_blk_t nblocks);
-u64 dev_length(void);
-
-extern int erofs_devfd;
+void blob_closeall(struct erofs_sb_info *sbi);
+int blob_open_ro(struct erofs_sb_info *sbi, const char *dev);
+int dev_open(struct erofs_sb_info *sbi, const char *devname);
+int dev_open_ro(struct erofs_sb_info *sbi, const char *dev);
+void dev_close(struct erofs_sb_info *sbi);
+int dev_write(struct erofs_sb_info *sbi, const void *buf,
+	      u64 offset, size_t len);
+int dev_read(struct erofs_sb_info *sbi, int device_id,
+	     void *buf, u64 offset, size_t len);
+int dev_fillzero(struct erofs_sb_info *sbi, u64 offset,
+		 size_t len, bool padding);
+int dev_fsync(struct erofs_sb_info *sbi);
+int dev_resize(struct erofs_sb_info *sbi, erofs_blk_t nblocks);
 
 ssize_t erofs_copy_file_range(int fd_in, erofs_off_t *off_in,
 			      int fd_out, erofs_off_t *off_out,
 			      size_t length);
 
-static inline int blk_write(const void *buf, erofs_blk_t blkaddr,
-			    u32 nblocks)
+static inline int blk_write(struct erofs_sb_info *sbi, const void *buf,
+			    erofs_blk_t blkaddr, u32 nblocks)
 {
-	return dev_write(buf, blknr_to_addr(blkaddr),
-			 blknr_to_addr(nblocks));
+	return dev_write(sbi, buf, erofs_pos(sbi, blkaddr),
+			 erofs_pos(sbi, nblocks));
 }
 
-static inline int blk_read(int device_id, void *buf,
+static inline int blk_read(struct erofs_sb_info *sbi, int device_id, void *buf,
 			   erofs_blk_t start, u32 nblocks)
 {
-	return dev_read(device_id, buf, blknr_to_addr(start),
-			 blknr_to_addr(nblocks));
+	return dev_read(sbi, device_id, buf, erofs_pos(sbi, start),
+			erofs_pos(sbi, nblocks));
 }
 
 #ifdef __cplusplus
diff --git a/include/erofs/list.h b/include/erofs/list.h
index 3f5da1a..d7a9fee 100644
--- a/include/erofs/list.h
+++ b/include/erofs/list.h
@@ -70,6 +70,26 @@
 	return head->next == head;
 }
 
+static inline void __list_splice(struct list_head *list,
+		struct list_head *prev, struct list_head *next)
+{
+	struct list_head *first = list->next;
+	struct list_head *last = list->prev;
+
+	first->prev = prev;
+	prev->next = first;
+
+	last->next = next;
+	next->prev = last;
+}
+
+static inline void list_splice_tail(struct list_head *list,
+				    struct list_head *head)
+{
+	if (!list_empty(list))
+		__list_splice(list, head->prev, head);
+}
+
 #define list_entry(ptr, type, member) container_of(ptr, type, member)
 
 #define list_first_entry(ptr, type, member)                                    \
diff --git a/include/erofs/rebuild.h b/include/erofs/rebuild.h
new file mode 100644
index 0000000..e99ce74
--- /dev/null
+++ b/include/erofs/rebuild.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+#ifndef __EROFS_REBUILD_H
+#define __EROFS_REBUILD_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "internal.h"
+
+struct erofs_dentry *erofs_rebuild_get_dentry(struct erofs_inode *pwd,
+		char *path, bool aufs, bool *whout, bool *opq, bool to_head);
+
+int erofs_rebuild_load_tree(struct erofs_inode *root, struct erofs_sb_info *sbi);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/erofs/tar.h b/include/erofs/tar.h
new file mode 100644
index 0000000..a76f740
--- /dev/null
+++ b/include/erofs/tar.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+#ifndef __EROFS_TAR_H
+#define __EROFS_TAR_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#if defined(HAVE_ZLIB)
+#include <zlib.h>
+#endif
+#include <sys/stat.h>
+
+#include "internal.h"
+
+struct erofs_pax_header {
+	struct stat st;
+	struct list_head xattrs;
+	bool use_mtime;
+	bool use_size;
+	bool use_uid;
+	bool use_gid;
+	char *path, *link;
+};
+
+#define EROFS_IOS_DECODER_NONE		0
+#define EROFS_IOS_DECODER_GZIP		1
+
+struct erofs_iostream {
+	union {
+		int fd;			/* original fd */
+		void *handler;
+	};
+	u64 sz;
+	char *buffer;
+	unsigned int head, tail, bufsize;
+	int decoder;
+	bool feof;
+};
+
+struct erofs_tarfile {
+	struct erofs_pax_header global;
+	struct erofs_iostream ios;
+	char *mapfile;
+
+	int fd;
+	u64 offset;
+	bool index_mode, aufs;
+};
+
+void erofs_iostream_close(struct erofs_iostream *ios);
+int erofs_iostream_open(struct erofs_iostream *ios, int fd, int decoder);
+int tarerofs_parse_tar(struct erofs_inode *root, struct erofs_tarfile *tar);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/erofs/xattr.h b/include/erofs/xattr.h
index 226e984..0f76037 100644
--- a/include/erofs/xattr.h
+++ b/include/erofs/xattr.h
@@ -14,40 +14,52 @@
 
 #include "internal.h"
 
+#ifndef ENOATTR
+#define ENOATTR	ENODATA
+#endif
+
+static inline unsigned int inlinexattr_header_size(struct erofs_inode *vi)
+{
+	return sizeof(struct erofs_xattr_ibody_header) +
+		sizeof(u32) * vi->xattr_shared_count;
+}
+
+static inline erofs_blk_t xattrblock_addr(struct erofs_inode *vi,
+					  unsigned int xattr_id)
+{
+	return vi->sbi->xattr_blkaddr +
+		erofs_blknr(vi->sbi, xattr_id * sizeof(__u32));
+}
+
+static inline unsigned int xattrblock_offset(struct erofs_inode *vi,
+					     unsigned int xattr_id)
+{
+	return erofs_blkoff(vi->sbi, xattr_id * sizeof(__u32));
+}
+
 #define EROFS_INODE_XATTR_ICOUNT(_size)	({\
 	u32 __size = le16_to_cpu(_size); \
 	((__size) == 0) ? 0 : \
 	(_size - sizeof(struct erofs_xattr_ibody_header)) / \
 	sizeof(struct erofs_xattr_entry) + 1; })
 
-#ifndef XATTR_USER_PREFIX
-#define XATTR_USER_PREFIX	"user."
-#endif
-#ifndef XATTR_USER_PREFIX_LEN
-#define XATTR_USER_PREFIX_LEN (sizeof(XATTR_USER_PREFIX) - 1)
-#endif
-#ifndef XATTR_SECURITY_PREFIX
-#define XATTR_SECURITY_PREFIX	"security."
-#endif
-#ifndef XATTR_SECURITY_PREFIX_LEN
-#define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1)
-#endif
-#ifndef XATTR_TRUSTED_PREFIX
-#define XATTR_TRUSTED_PREFIX	"trusted."
-#endif
-#ifndef XATTR_TRUSTED_PREFIX_LEN
-#define XATTR_TRUSTED_PREFIX_LEN (sizeof(XATTR_TRUSTED_PREFIX) - 1)
-#endif
-#ifndef XATTR_NAME_POSIX_ACL_ACCESS
-#define XATTR_NAME_POSIX_ACL_ACCESS "system.posix_acl_access"
-#endif
-#ifndef XATTR_NAME_POSIX_ACL_DEFAULT
-#define XATTR_NAME_POSIX_ACL_DEFAULT "system.posix_acl_default"
-#endif
-
+int erofs_scan_file_xattrs(struct erofs_inode *inode);
 int erofs_prepare_xattr_ibody(struct erofs_inode *inode);
-char *erofs_export_xattr_ibody(struct list_head *ixattrs, unsigned int size);
-int erofs_build_shared_xattrs_from_path(const char *path);
+char *erofs_export_xattr_ibody(struct erofs_inode *inode);
+int erofs_build_shared_xattrs_from_path(struct erofs_sb_info *sbi, const char *path);
+
+int erofs_xattr_insert_name_prefix(const char *prefix);
+void erofs_xattr_cleanup_name_prefixes(void);
+int erofs_xattr_write_name_prefixes(struct erofs_sb_info *sbi, FILE *f);
+void erofs_xattr_prefixes_cleanup(struct erofs_sb_info *sbi);
+int erofs_xattr_prefixes_init(struct erofs_sb_info *sbi);
+
+int erofs_setxattr(struct erofs_inode *inode, char *key,
+		   const void *value, size_t size);
+int erofs_set_opaque_xattr(struct erofs_inode *inode);
+void erofs_clear_opaque_xattr(struct erofs_inode *inode);
+int erofs_set_origin_xattr(struct erofs_inode *inode);
+int erofs_read_xattrs_from_disk(struct erofs_inode *inode);
 
 #ifdef __cplusplus
 }
diff --git a/include/erofs/xxhash.h b/include/erofs/xxhash.h
new file mode 100644
index 0000000..5441209
--- /dev/null
+++ b/include/erofs/xxhash.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0+ */
+#ifndef __EROFS_XXHASH_H
+#define __EROFS_XXHASH_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include <stdint.h>
+
+/**
+ * xxh32() - calculate the 32-bit hash of the input with a given seed.
+ *
+ * @input:  The data to hash.
+ * @length: The length of the data to hash.
+ * @seed:   The seed can be used to alter the result predictably.
+ *
+ * Return:  The 32-bit hash of the data.
+ */
+uint32_t xxh32(const void *input, size_t length, uint32_t seed);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/erofs_fs.h b/include/erofs_fs.h
index 08f9761..eba6c26 100644
--- a/include/erofs_fs.h
+++ b/include/erofs_fs.h
@@ -3,7 +3,7 @@
  * EROFS (Enhanced ROM File System) on-disk format definition
  *
  * Copyright (C) 2017-2018 HUAWEI, Inc.
- *             http://www.huawei.com/
+ *             https://www.huawei.com/
  * Copyright (C) 2021, Alibaba Cloud
  */
 #ifndef __EROFS_FS_H
@@ -12,36 +12,42 @@
 #define EROFS_SUPER_MAGIC_V1    0xE0F5E1E2
 #define EROFS_SUPER_OFFSET      1024
 
-#define EROFS_FEATURE_COMPAT_SB_CHKSUM		0x00000001
-#define EROFS_FEATURE_COMPAT_MTIME		0x00000002
+#define EROFS_FEATURE_COMPAT_SB_CHKSUM          0x00000001
+#define EROFS_FEATURE_COMPAT_MTIME              0x00000002
+#define EROFS_FEATURE_COMPAT_XATTR_FILTER	0x00000004
 
 /*
  * Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should
  * be incompatible with this kernel version.
  */
-#define EROFS_FEATURE_INCOMPAT_LZ4_0PADDING	0x00000001
+#define EROFS_FEATURE_INCOMPAT_ZERO_PADDING	0x00000001
 #define EROFS_FEATURE_INCOMPAT_COMPR_CFGS	0x00000002
 #define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER	0x00000002
 #define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE	0x00000004
 #define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE	0x00000008
+#define EROFS_FEATURE_INCOMPAT_COMPR_HEAD2	0x00000008
 #define EROFS_FEATURE_INCOMPAT_ZTAILPACKING	0x00000010
+#define EROFS_FEATURE_INCOMPAT_FRAGMENTS	0x00000020
+#define EROFS_FEATURE_INCOMPAT_DEDUPE		0x00000020
+#define EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES	0x00000040
 #define EROFS_ALL_FEATURE_INCOMPAT		\
-	(EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \
+	(EROFS_FEATURE_INCOMPAT_ZERO_PADDING | \
 	 EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
 	 EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \
 	 EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \
 	 EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \
-	 EROFS_FEATURE_INCOMPAT_ZTAILPACKING)
+	 EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 | \
+	 EROFS_FEATURE_INCOMPAT_ZTAILPACKING | \
+	 EROFS_FEATURE_INCOMPAT_FRAGMENTS | \
+	 EROFS_FEATURE_INCOMPAT_DEDUPE | \
+	 EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES)
 
 #define EROFS_SB_EXTSLOT_SIZE	16
 
 struct erofs_deviceslot {
-	union {
-		u8 uuid[16];		/* used for device manager later */
-		u8 userdata[64];	/* digest(sha256), etc. */
-	} u;
-	__le32 blocks;			/* total fs blocks of this device */
-	__le32 mapped_blkaddr;		/* map starting at mapped_blkaddr */
+	u8 tag[64];		/* digest(sha256), etc. */
+	__le32 blocks;		/* total fs blocks of this device */
+	__le32 mapped_blkaddr;	/* map starting at mapped_blkaddr */
 	u8 reserved[56];
 };
 #define EROFS_DEVT_SLOT_SIZE	sizeof(struct erofs_deviceslot)
@@ -51,14 +57,14 @@
 	__le32 magic;           /* file system magic number */
 	__le32 checksum;        /* crc32c(super_block) */
 	__le32 feature_compat;
-	__u8 blkszbits;         /* support block_size == PAGE_SIZE only */
+	__u8 blkszbits;         /* filesystem block size in bit shift */
 	__u8 sb_extslots;	/* superblock size = 128 + sb_extslots * 16 */
 
 	__le16 root_nid;	/* nid of root directory */
 	__le64 inos;            /* total valid ino # (== f_files - f_favail) */
 
-	__le64 build_time;      /* inode v1 time derivation */
-	__le32 build_time_nsec;	/* inode v1 time derivation in nano scale */
+	__le64 build_time;      /* compact inode time derivation */
+	__le32 build_time_nsec;	/* compact inode time derivation in ns scale */
 	__le32 blocks;          /* used for statfs */
 	__le32 meta_blkaddr;	/* start block address of metadata area */
 	__le32 xattr_blkaddr;	/* start block address of shared xattr area */
@@ -73,39 +79,39 @@
 	} __packed u1;
 	__le16 extra_devices;	/* # of devices besides the primary device */
 	__le16 devt_slotoff;	/* startoff = devt_slotoff * devt_slotsize */
-	__u8 reserved2[38];
+	__u8 dirblkbits;	/* directory block size in bit shift */
+	__u8 xattr_prefix_count;	/* # of long xattr name prefixes */
+	__le32 xattr_prefix_start;	/* start of long xattr prefixes */
+	__le64 packed_nid;	/* nid of the special packed inode */
+	__u8 xattr_filter_reserved; /* reserved for xattr name filter */
+	__u8 reserved2[23];
 };
 
 /*
- * erofs inode datalayout (i_format in on-disk inode):
- * 0 - inode plain without inline data A:
- * inode, [xattrs], ... | ... | no-holed data
- * 1 - inode VLE compression B (legacy):
- * inode, [xattrs], extents ... | ...
- * 2 - inode plain with inline data C:
- * inode, [xattrs], last_inline_data, ... | ... | no-holed data
- * 3 - inode compression D:
- * inode, [xattrs], map_header, extents ... | ...
- * 4 - inode chunk-based E:
- * inode, [xattrs], chunk indexes ... | ...
+ * EROFS inode datalayout (i_format in on-disk inode):
+ * 0 - uncompressed flat inode without tail-packing inline data:
+ * 1 - compressed inode with non-compact indexes:
+ * 2 - uncompressed flat inode with tail-packing inline data:
+ * 3 - compressed inode with compact indexes:
+ * 4 - chunk-based inode with (optional) multi-device support:
  * 5~7 - reserved
  */
 enum {
 	EROFS_INODE_FLAT_PLAIN			= 0,
-	EROFS_INODE_FLAT_COMPRESSION_LEGACY	= 1,
+	EROFS_INODE_COMPRESSED_FULL		= 1,
 	EROFS_INODE_FLAT_INLINE			= 2,
-	EROFS_INODE_FLAT_COMPRESSION		= 3,
+	EROFS_INODE_COMPRESSED_COMPACT		= 3,
 	EROFS_INODE_CHUNK_BASED			= 4,
 	EROFS_INODE_DATALAYOUT_MAX
 };
 
 static inline bool erofs_inode_is_data_compressed(unsigned int datamode)
 {
-	return datamode == EROFS_INODE_FLAT_COMPRESSION ||
-		datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY;
+	return datamode == EROFS_INODE_COMPRESSED_COMPACT ||
+		datamode == EROFS_INODE_COMPRESSED_FULL;
 }
 
-/* bit definitions of inode i_advise */
+/* bit definitions of inode i_format */
 #define EROFS_I_VERSION_BITS            1
 #define EROFS_I_DATALAYOUT_BITS         3
 
@@ -123,11 +129,30 @@
 #define EROFS_CHUNK_FORMAT_ALL	\
 	(EROFS_CHUNK_FORMAT_BLKBITS_MASK | EROFS_CHUNK_FORMAT_INDEXES)
 
+/* 32-byte on-disk inode */
+#define EROFS_INODE_LAYOUT_COMPACT	0
+/* 64-byte on-disk inode */
+#define EROFS_INODE_LAYOUT_EXTENDED	1
+
 struct erofs_inode_chunk_info {
 	__le16 format;		/* chunk blkbits, etc. */
 	__le16 reserved;
 };
 
+union erofs_inode_i_u {
+	/* total compressed blocks for compressed inodes */
+	__le32 compressed_blocks;
+
+	/* block address for uncompressed flat inodes */
+	__le32 raw_blkaddr;
+
+	/* for device files, used to indicate old/new device # */
+	__le32 rdev;
+
+	/* for chunk-based files, it contains the summary info */
+	struct erofs_inode_chunk_info c;
+};
+
 /* 32-byte reduced form of an ondisk inode */
 struct erofs_inode_compact {
 	__le16 i_format;	/* inode format hints */
@@ -138,28 +163,14 @@
 	__le16 i_nlink;
 	__le32 i_size;
 	__le32 i_reserved;
-	union {
-		/* file total compressed blocks for data mapping 1 */
-		__le32 compressed_blocks;
-		__le32 raw_blkaddr;
+	union erofs_inode_i_u i_u;
 
-		/* for device files, used to indicate old/new device # */
-		__le32 rdev;
-
-		/* for chunk-based files, it contains the summary info */
-		struct erofs_inode_chunk_info c;
-	} i_u;
-	__le32 i_ino;           /* only used for 32-bit stat compatibility */
+	__le32 i_ino;		/* only used for 32-bit stat compatibility */
 	__le16 i_uid;
 	__le16 i_gid;
 	__le32 i_reserved2;
 };
 
-/* 32 bytes on-disk inode */
-#define EROFS_INODE_LAYOUT_COMPACT	0
-/* 64 bytes on-disk inode */
-#define EROFS_INODE_LAYOUT_EXTENDED	1
-
 /* 64-byte complete form of an ondisk inode */
 struct erofs_inode_extended {
 	__le16 i_format;	/* inode format hints */
@@ -169,21 +180,9 @@
 	__le16 i_mode;
 	__le16 i_reserved;
 	__le64 i_size;
-	union {
-		/* file total compressed blocks for data mapping 1 */
-		__le32 compressed_blocks;
-		__le32 raw_blkaddr;
+	union erofs_inode_i_u i_u;
 
-		/* for device files, used to indicate old/new device # */
-		__le32 rdev;
-
-		/* for chunk-based files, it contains the summary info */
-		struct erofs_inode_chunk_info c;
-	} i_u;
-
-	/* only used for 32-bit stat compatibility */
-	__le32 i_ino;
-
+	__le32 i_ino;		/* only used for 32-bit stat compatibility */
 	__le32 i_uid;
 	__le32 i_gid;
 	__le64 i_mtime;
@@ -192,10 +191,6 @@
 	__u8   i_reserved2[16];
 };
 
-#define EROFS_MAX_SHARED_XATTRS         (128)
-/* h_shared_count between 129 ... 255 are special # */
-#define EROFS_SHARED_XATTR_EXTENT       (255)
-
 /*
  * inline xattrs (n == i_xattr_icount):
  * erofs_xattr_ibody_header(1) + (n - 1) * 4 bytes
@@ -208,7 +203,7 @@
  * for read-only fs, no need to introduce h_refcount
  */
 struct erofs_xattr_ibody_header {
-	__le32 h_reserved;
+	__le32 h_name_filter;		/* bit value 1 indicates not-present */
 	__u8   h_shared_count;
 	__u8   h_reserved2[7];
 	__le32 h_shared_xattrs[0];      /* shared xattr id array */
@@ -222,6 +217,17 @@
 #define EROFS_XATTR_INDEX_LUSTRE            5
 #define EROFS_XATTR_INDEX_SECURITY          6
 
+/*
+ * bit 7 of e_name_index is set when it refers to a long xattr name prefix,
+ * while the remained lower bits represent the index of the prefix.
+ */
+#define EROFS_XATTR_LONG_PREFIX		0x80
+#define EROFS_XATTR_LONG_PREFIX_MASK	0x7f
+
+#define EROFS_XATTR_FILTER_BITS		32
+#define EROFS_XATTR_FILTER_DEFAULT	UINT32_MAX
+#define EROFS_XATTR_FILTER_SEED		0x25BBE08F
+
 /* xattr entry (for both inline & shared xattrs) */
 struct erofs_xattr_entry {
 	__u8   e_name_len;      /* length of name */
@@ -231,6 +237,12 @@
 	char   e_name[0];       /* attribute name */
 };
 
+/* long xattr name prefix */
+struct erofs_xattr_long_prefix {
+	__u8   base_index;	/* short xattr name prefix index */
+	char   infix[0];	/* infix apart from short prefix */
+};
+
 static inline unsigned int erofs_xattr_ibody_size(__le16 i_xattr_icount)
 {
 	if (!i_xattr_icount)
@@ -261,132 +273,6 @@
 	__le32 blkaddr;		/* start block address of this inode chunk */
 };
 
-/* maximum supported size of a physical compression cluster */
-#define Z_EROFS_PCLUSTER_MAX_SIZE	(1024 * 1024)
-
-/* available compression algorithm types (for h_algorithmtype) */
-enum {
-	Z_EROFS_COMPRESSION_LZ4		= 0,
-	Z_EROFS_COMPRESSION_LZMA	= 1,
-	Z_EROFS_COMPRESSION_MAX
-};
-#define Z_EROFS_ALL_COMPR_ALGS		(1 << (Z_EROFS_COMPRESSION_MAX - 1))
-
-/* 14 bytes (+ length field = 16 bytes) */
-struct z_erofs_lz4_cfgs {
-	__le16 max_distance;
-	__le16 max_pclusterblks;
-	u8 reserved[10];
-} __packed;
-
-/* 14 bytes (+ length field = 16 bytes) */
-struct z_erofs_lzma_cfgs {
-	__le32 dict_size;
-	__le16 format;
-	u8 reserved[8];
-} __packed;
-#define Z_EROFS_LZMA_MAX_DICT_SIZE	(8 * Z_EROFS_PCLUSTER_MAX_SIZE)
-
-/*
- * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
- *  e.g. for 4k logical cluster size,      4B        if compacted 2B is off;
- *                                  (4B) + 2B + (4B) if compacted 2B is on.
- * bit 1 : HEAD1 big pcluster (0 - off; 1 - on)
- * bit 2 : HEAD2 big pcluster (0 - off; 1 - on)
- * bit 3 : tailpacking inline pcluster (0 - off; 1 - on)
- */
-#define Z_EROFS_ADVISE_COMPACTED_2B		0x0001
-#define Z_EROFS_ADVISE_BIG_PCLUSTER_1		0x0002
-#define Z_EROFS_ADVISE_BIG_PCLUSTER_2		0x0004
-#define Z_EROFS_ADVISE_INLINE_PCLUSTER		0x0008
-
-struct z_erofs_map_header {
-	__le16	h_reserved1;
-	/* record the size of tailpacking data */
-	__le16  h_idata_size;
-	__le16	h_advise;
-	/*
-	 * bit 0-3 : algorithm type of head 1 (logical cluster type 01);
-	 * bit 4-7 : algorithm type of head 2 (logical cluster type 11).
-	 */
-	__u8	h_algorithmtype;
-	/*
-	 * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096;
-	 * bit 3-7 : reserved.
-	 */
-	__u8	h_clusterbits;
-};
-
-#define Z_EROFS_VLE_LEGACY_HEADER_PADDING       8
-
-/*
- * Fixed-sized output compression ondisk Logical Extent cluster type:
- *    0 - literal (uncompressed) cluster
- *    1 - compressed cluster (for the head logical cluster)
- *    2 - compressed cluster (for the other logical clusters)
- *
- * In detail,
- *    0 - literal (uncompressed) cluster,
- *        di_advise = 0
- *        di_clusterofs = the literal data offset of the cluster
- *        di_blkaddr = the blkaddr of the literal cluster
- *
- *    1 - compressed cluster (for the head logical cluster)
- *        di_advise = 1
- *        di_clusterofs = the decompressed data offset of the cluster
- *        di_blkaddr = the blkaddr of the compressed cluster
- *
- *    2 - compressed cluster (for the other logical clusters)
- *        di_advise = 2
- *        di_clusterofs =
- *           the decompressed data offset in its own head cluster
- *        di_u.delta[0] = distance to its corresponding head cluster
- *        di_u.delta[1] = distance to its corresponding tail cluster
- *                (di_advise could be 0, 1 or 2)
- */
-enum {
-	Z_EROFS_VLE_CLUSTER_TYPE_PLAIN		= 0,
-	Z_EROFS_VLE_CLUSTER_TYPE_HEAD		= 1,
-	Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD	= 2,
-	Z_EROFS_VLE_CLUSTER_TYPE_RESERVED	= 3,
-	Z_EROFS_VLE_CLUSTER_TYPE_MAX
-};
-
-#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS        2
-#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT         0
-
-/*
- * D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the
- * compressed block count of a compressed extent (in logical clusters, aka.
- * block count of a pcluster).
- */
-#define Z_EROFS_VLE_DI_D0_CBLKCNT		(1 << 11)
-
-struct z_erofs_vle_decompressed_index {
-	__le16 di_advise;
-	/* where to decompress in the head cluster */
-	__le16 di_clusterofs;
-
-	union {
-		/* for the head cluster */
-		__le32 blkaddr;
-		/*
-		 * for the rest clusters
-		 * eg. for 4k page-sized cluster, maximum 4K*64k = 256M)
-		 * [0] - pointing to the head cluster
-		 * [1] - pointing to the tail cluster
-		 */
-		__le16 delta[2];
-	} di_u;
-};
-
-#define Z_EROFS_VLE_LEGACY_INDEX_ALIGN(size) \
-	(round_up(size, sizeof(struct z_erofs_vle_decompressed_index)) + \
-	 sizeof(struct z_erofs_map_header) + Z_EROFS_VLE_LEGACY_HEADER_PADDING)
-
-#define Z_EROFS_VLE_EXTENT_ALIGN(size) round_up(size, \
-	sizeof(struct z_erofs_vle_decompressed_index))
-
 /* dirent sorts in alphabet order, thus we can do binary search */
 struct erofs_dirent {
 	__le64 nid;     /* node number */
@@ -410,9 +296,157 @@
 
 #define EROFS_NAME_LEN      255
 
+/* maximum supported size of a physical compression cluster */
+#define Z_EROFS_PCLUSTER_MAX_SIZE	(1024 * 1024)
+
+/* available compression algorithm types (for h_algorithmtype) */
+enum {
+	Z_EROFS_COMPRESSION_LZ4		= 0,
+	Z_EROFS_COMPRESSION_LZMA	= 1,
+	Z_EROFS_COMPRESSION_DEFLATE	= 2,
+	Z_EROFS_COMPRESSION_MAX
+};
+#define Z_EROFS_ALL_COMPR_ALGS		((1 << Z_EROFS_COMPRESSION_MAX) - 1)
+
+/* 14 bytes (+ length field = 16 bytes) */
+struct z_erofs_lz4_cfgs {
+	__le16 max_distance;
+	__le16 max_pclusterblks;
+	u8 reserved[10];
+} __packed;
+
+/* 14 bytes (+ length field = 16 bytes) */
+struct z_erofs_lzma_cfgs {
+	__le32 dict_size;
+	__le16 format;
+	u8 reserved[8];
+} __packed;
+
+#define Z_EROFS_LZMA_MAX_DICT_SIZE	(8 * Z_EROFS_PCLUSTER_MAX_SIZE)
+
+/* 6 bytes (+ length field = 8 bytes) */
+struct z_erofs_deflate_cfgs {
+	u8 windowbits;			/* 8..15 for DEFLATE */
+	u8 reserved[5];
+} __packed;
+
+/*
+ * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
+ *  e.g. for 4k logical cluster size,      4B        if compacted 2B is off;
+ *                                  (4B) + 2B + (4B) if compacted 2B is on.
+ * bit 1 : HEAD1 big pcluster (0 - off; 1 - on)
+ * bit 2 : HEAD2 big pcluster (0 - off; 1 - on)
+ * bit 3 : tailpacking inline pcluster (0 - off; 1 - on)
+ * bit 4 : interlaced plain pcluster (0 - off; 1 - on)
+ * bit 5 : fragment pcluster (0 - off; 1 - on)
+ */
+#define Z_EROFS_ADVISE_COMPACTED_2B		0x0001
+#define Z_EROFS_ADVISE_BIG_PCLUSTER_1		0x0002
+#define Z_EROFS_ADVISE_BIG_PCLUSTER_2		0x0004
+#define Z_EROFS_ADVISE_INLINE_PCLUSTER		0x0008
+#define Z_EROFS_ADVISE_INTERLACED_PCLUSTER	0x0010
+#define Z_EROFS_ADVISE_FRAGMENT_PCLUSTER	0x0020
+
+#define Z_EROFS_FRAGMENT_INODE_BIT              7
+struct z_erofs_map_header {
+	union {
+		/* fragment data offset in the packed inode */
+		__le32  h_fragmentoff;
+		struct {
+			__le16  h_reserved1;
+			/* indicates the encoded size of tailpacking data */
+			__le16  h_idata_size;
+		};
+	};
+	__le16	h_advise;
+	/*
+	 * bit 0-3 : algorithm type of head 1 (logical cluster type 01);
+	 * bit 4-7 : algorithm type of head 2 (logical cluster type 11).
+	 */
+	__u8	h_algorithmtype;
+	/*
+	 * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096;
+	 * bit 3-6 : reserved;
+	 * bit 7   : move the whole file into packed inode or not.
+	 */
+	__u8	h_clusterbits;
+};
+
+/*
+ * On-disk logical cluster type:
+ *    0   - literal (uncompressed) lcluster
+ *    1,3 - compressed lcluster (for HEAD lclusters)
+ *    2   - compressed lcluster (for NONHEAD lclusters)
+ *
+ * In detail,
+ *    0 - literal (uncompressed) lcluster,
+ *        di_advise = 0
+ *        di_clusterofs = the literal data offset of the lcluster
+ *        di_blkaddr = the blkaddr of the literal pcluster
+ *
+ *    1,3 - compressed lcluster (for HEAD lclusters)
+ *        di_advise = 1 or 3
+ *        di_clusterofs = the decompressed data offset of the lcluster
+ *        di_blkaddr = the blkaddr of the compressed pcluster
+ *
+ *    2 - compressed lcluster (for NONHEAD lclusters)
+ *        di_advise = 2
+ *        di_clusterofs =
+ *           the decompressed data offset in its own HEAD lcluster
+ *        di_u.delta[0] = distance to this HEAD lcluster
+ *        di_u.delta[1] = distance to the next HEAD lcluster
+ */
+enum {
+	Z_EROFS_LCLUSTER_TYPE_PLAIN	= 0,
+	Z_EROFS_LCLUSTER_TYPE_HEAD1	= 1,
+	Z_EROFS_LCLUSTER_TYPE_NONHEAD	= 2,
+	Z_EROFS_LCLUSTER_TYPE_HEAD2	= 3,
+	Z_EROFS_LCLUSTER_TYPE_MAX
+};
+
+#define Z_EROFS_LI_LCLUSTER_TYPE_BITS        2
+#define Z_EROFS_LI_LCLUSTER_TYPE_BIT         0
+
+/* (noncompact only, HEAD) This pcluster refers to partial decompressed data */
+#define Z_EROFS_LI_PARTIAL_REF		(1 << 15)
+
+/*
+ * D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the
+ * compressed block count of a compressed extent (in logical clusters, aka.
+ * block count of a pcluster).
+ */
+#define Z_EROFS_LI_D0_CBLKCNT		(1 << 11)
+
+struct z_erofs_lcluster_index {
+	__le16 di_advise;
+	/* where to decompress in the head lcluster */
+	__le16 di_clusterofs;
+
+	union {
+		/* for the HEAD lclusters */
+		__le32 blkaddr;
+		/*
+		 * for the NONHEAD lclusters
+		 * [0] - distance to its HEAD lcluster
+		 * [1] - distance to the next HEAD lcluster
+		 */
+		__le16 delta[2];
+	} di_u;
+};
+
+#define Z_EROFS_FULL_INDEX_ALIGN(end)	\
+	(round_up(end, 8) + sizeof(struct z_erofs_map_header) + 8)
+
 /* check the EROFS on-disk layout strictly at compile time */
 static inline void erofs_check_ondisk_layout_definitions(void)
 {
+	const union {
+		struct z_erofs_map_header h;
+		__le64 v;
+	} fmh __maybe_unused = {
+		.h.h_clusterbits = 1 << Z_EROFS_FRAGMENT_INODE_BIT,
+	};
+
 	BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128);
 	BUILD_BUG_ON(sizeof(struct erofs_inode_compact) != 32);
 	BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64);
@@ -421,15 +455,18 @@
 	BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_info) != 4);
 	BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) != 8);
 	BUILD_BUG_ON(sizeof(struct z_erofs_map_header) != 8);
-	BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8);
+	BUILD_BUG_ON(sizeof(struct z_erofs_lcluster_index) != 8);
 	BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12);
 	/* keep in sync between 2 index structures for better extendibility */
 	BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) !=
-		     sizeof(struct z_erofs_vle_decompressed_index));
+		     sizeof(struct z_erofs_lcluster_index));
 	BUILD_BUG_ON(sizeof(struct erofs_deviceslot) != 128);
 
-	BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) <
-		     Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1);
+	BUILD_BUG_ON(BIT(Z_EROFS_LI_LCLUSTER_TYPE_BITS) <
+		     Z_EROFS_LCLUSTER_TYPE_MAX - 1);
+	/* exclude old compiler versions like gcc 7.5.0 */
+	BUILD_BUG_ON(__builtin_constant_p(fmh.v) ?
+		     fmh.v != cpu_to_le64(1ULL << 63) : 0);
 }
 
 #endif
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 3fad357..483d410 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -9,6 +9,7 @@
       $(top_srcdir)/include/erofs/config.h \
       $(top_srcdir)/include/erofs/decompress.h \
       $(top_srcdir)/include/erofs/defs.h \
+      $(top_srcdir)/include/erofs/diskbuf.h \
       $(top_srcdir)/include/erofs/err.h \
       $(top_srcdir)/include/erofs/exclude.h \
       $(top_srcdir)/include/erofs/flex-array.h \
@@ -19,16 +20,23 @@
       $(top_srcdir)/include/erofs/io.h \
       $(top_srcdir)/include/erofs/list.h \
       $(top_srcdir)/include/erofs/print.h \
+      $(top_srcdir)/include/erofs/tar.h \
       $(top_srcdir)/include/erofs/trace.h \
       $(top_srcdir)/include/erofs/xattr.h \
       $(top_srcdir)/include/erofs/compress_hints.h \
+      $(top_srcdir)/include/erofs/fragments.h \
+      $(top_srcdir)/include/erofs/xxhash.h \
+      $(top_srcdir)/include/erofs/rebuild.h \
       $(top_srcdir)/lib/liberofs_private.h
 
 noinst_HEADERS += compressor.h
 liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
 		      namei.c data.c compress.c compressor.c zmap.c decompress.c \
-		      compress_hints.c hashmap.c sha256.c blobchunk.c dir.c
-liberofs_la_CFLAGS = -Wall -I$(top_srcdir)/include
+		      compress_hints.c hashmap.c sha256.c blobchunk.c dir.c \
+		      fragments.c rb_tree.c dedupe.c uuid_unparse.c uuid.c tar.c \
+		      block_list.c xxhash.c rebuild.c diskbuf.c
+
+liberofs_la_CFLAGS = -Wall ${libuuid_CFLAGS} -I$(top_srcdir)/include
 if ENABLE_LZ4
 liberofs_la_CFLAGS += ${LZ4_CFLAGS}
 liberofs_la_SOURCES += compressor_lz4.c
@@ -40,3 +48,8 @@
 liberofs_la_CFLAGS += ${liblzma_CFLAGS}
 liberofs_la_SOURCES += compressor_liblzma.c
 endif
+
+liberofs_la_SOURCES += kite_deflate.c compressor_deflate.c
+if ENABLE_LIBDEFLATE
+liberofs_la_SOURCES += compressor_libdeflate.c
+endif
diff --git a/lib/blobchunk.c b/lib/blobchunk.c
index 77b0c17..e4d0bad 100644
--- a/lib/blobchunk.c
+++ b/lib/blobchunk.c
@@ -10,80 +10,102 @@
 #include "erofs/block_list.h"
 #include "erofs/cache.h"
 #include "erofs/io.h"
+#include "sha256.h"
 #include <unistd.h>
 
-void erofs_sha256(const unsigned char *in, unsigned long in_size,
-		  unsigned char out[32]);
-
 struct erofs_blobchunk {
-	struct hashmap_entry ent;
+	union {
+		struct hashmap_entry ent;
+		struct list_head list;
+	};
 	char		sha256[32];
-	unsigned int	chunksize;
+	unsigned int	device_id;
+	union {
+		erofs_off_t	chunksize;
+		erofs_off_t	sourceoffset;
+	};
 	erofs_blk_t	blkaddr;
 };
 
 static struct hashmap blob_hashmap;
 static FILE *blobfile;
 static erofs_blk_t remapped_base;
+static erofs_off_t datablob_size;
 static bool multidev;
 static struct erofs_buffer_head *bh_devt;
+struct erofs_blobchunk erofs_holechunk = {
+	.blkaddr = EROFS_NULL_ADDR,
+};
+static LIST_HEAD(unhashed_blobchunks);
 
-static struct erofs_blobchunk *erofs_blob_getchunk(int fd,
-		unsigned int chunksize)
+struct erofs_blobchunk *erofs_get_unhashed_chunk(unsigned int device_id,
+		erofs_blk_t blkaddr, erofs_off_t sourceoffset)
 {
-	static u8 zeroed[EROFS_BLKSIZ];
-	u8 *chunkdata, sha256[32];
-	int ret;
-	unsigned int hash;
-	erofs_off_t blkpos;
 	struct erofs_blobchunk *chunk;
 
-	chunkdata = malloc(chunksize);
-	if (!chunkdata)
+	chunk = calloc(1, sizeof(struct erofs_blobchunk));
+	if (!chunk)
 		return ERR_PTR(-ENOMEM);
 
-	ret = read(fd, chunkdata, chunksize);
-	if (ret < chunksize) {
-		chunk = ERR_PTR(-EIO);
-		goto out;
-	}
-	erofs_sha256(chunkdata, chunksize, sha256);
+	chunk->device_id = device_id;
+	chunk->blkaddr = blkaddr;
+	chunk->sourceoffset = sourceoffset;
+	list_add_tail(&chunk->list, &unhashed_blobchunks);
+	return chunk;
+}
+
+static struct erofs_blobchunk *erofs_blob_getchunk(struct erofs_sb_info *sbi,
+						u8 *buf, erofs_off_t chunksize)
+{
+	static u8 zeroed[EROFS_MAX_BLOCK_SIZE];
+	struct erofs_blobchunk *chunk;
+	unsigned int hash, padding;
+	u8 sha256[32];
+	erofs_off_t blkpos;
+	int ret;
+
+	erofs_sha256(buf, chunksize, sha256);
 	hash = memhash(sha256, sizeof(sha256));
 	chunk = hashmap_get_from_hash(&blob_hashmap, hash, sha256);
 	if (chunk) {
 		DBG_BUGON(chunksize != chunk->chunksize);
-		goto out;
+		sbi->saved_by_deduplication += chunksize;
+		erofs_dbg("Found duplicated chunk at %u", chunk->blkaddr);
+		return chunk;
 	}
+
 	chunk = malloc(sizeof(struct erofs_blobchunk));
-	if (!chunk) {
-		chunk = ERR_PTR(-ENOMEM);
-		goto out;
-	}
+	if (!chunk)
+		return ERR_PTR(-ENOMEM);
 
 	chunk->chunksize = chunksize;
-	blkpos = ftell(blobfile);
-	DBG_BUGON(erofs_blkoff(blkpos));
-	chunk->blkaddr = erofs_blknr(blkpos);
 	memcpy(chunk->sha256, sha256, sizeof(sha256));
-	hashmap_entry_init(&chunk->ent, hash);
-	hashmap_add(&blob_hashmap, chunk);
+	blkpos = ftell(blobfile);
+	DBG_BUGON(erofs_blkoff(sbi, blkpos));
+
+	if (sbi->extra_devices)
+		chunk->device_id = 1;
+	else
+		chunk->device_id = 0;
+	chunk->blkaddr = erofs_blknr(sbi, blkpos);
 
 	erofs_dbg("Writing chunk (%u bytes) to %u", chunksize, chunk->blkaddr);
-	ret = fwrite(chunkdata, chunksize, 1, blobfile);
-	if (ret == 1 && erofs_blkoff(chunksize))
-		ret = fwrite(zeroed, EROFS_BLKSIZ - erofs_blkoff(chunksize),
-			     1, blobfile);
-	if (ret < 1) {
-		struct hashmap_entry key;
-
-		hashmap_entry_init(&key, hash);
-		hashmap_remove(&blob_hashmap, &key, sha256);
-		free(chunk);
-		chunk = ERR_PTR(-ENOSPC);
-		goto out;
+	ret = fwrite(buf, chunksize, 1, blobfile);
+	if (ret == 1) {
+		padding = erofs_blkoff(sbi, chunksize);
+		if (padding) {
+			padding = erofs_blksiz(sbi) - padding;
+			ret = fwrite(zeroed, padding, 1, blobfile);
+		}
 	}
-out:
-	free(chunkdata);
+
+	if (ret < 1) {
+		free(chunk);
+		return ERR_PTR(-ENOSPC);
+	}
+
+	hashmap_entry_init(&chunk->ent, hash);
+	hashmap_add(&blob_hashmap, chunk);
 	return chunk;
 }
 
@@ -106,158 +128,360 @@
 {
 	struct erofs_inode_chunk_index idx = {0};
 	erofs_blk_t extent_start = EROFS_NULL_ADDR;
-	erofs_blk_t extent_end, extents_blks;
+	erofs_blk_t extent_end, chunkblks;
+	erofs_off_t source_offset;
 	unsigned int dst, src, unit;
 	bool first_extent = true;
-	erofs_blk_t base_blkaddr = 0;
-
-	if (multidev) {
-		idx.device_id = 1;
-		DBG_BUGON(!(inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES));
-	} else {
-		base_blkaddr = remapped_base;
-	}
 
 	if (inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
 		unit = sizeof(struct erofs_inode_chunk_index);
 	else
 		unit = EROFS_BLOCK_MAP_ENTRY_SIZE;
 
+	chunkblks = 1U << (inode->u.chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK);
 	for (dst = src = 0; dst < inode->extent_isize;
 	     src += sizeof(void *), dst += unit) {
 		struct erofs_blobchunk *chunk;
 
 		chunk = *(void **)(inode->chunkindexes + src);
 
-		idx.blkaddr = base_blkaddr + chunk->blkaddr;
-		if (extent_start != EROFS_NULL_ADDR &&
-		    idx.blkaddr == extent_end + 1) {
-			extent_end = idx.blkaddr;
+		if (chunk->blkaddr == EROFS_NULL_ADDR) {
+			idx.blkaddr = EROFS_NULL_ADDR;
+		} else if (chunk->device_id) {
+			DBG_BUGON(!(inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES));
+			idx.blkaddr = chunk->blkaddr;
+			extent_start = EROFS_NULL_ADDR;
 		} else {
+			idx.blkaddr = remapped_base + chunk->blkaddr;
+		}
+
+		if (extent_start == EROFS_NULL_ADDR ||
+		    idx.blkaddr != extent_end) {
 			if (extent_start != EROFS_NULL_ADDR) {
+				tarerofs_blocklist_write(extent_start,
+						extent_end - extent_start,
+						source_offset);
 				erofs_droid_blocklist_write_extent(inode,
 					extent_start,
-					(extent_end - extent_start) + 1,
+					extent_end - extent_start,
 					first_extent, false);
 				first_extent = false;
 			}
 			extent_start = idx.blkaddr;
-			extent_end = idx.blkaddr;
+			source_offset = chunk->sourceoffset;
 		}
+		extent_end = idx.blkaddr + chunkblks;
+		idx.device_id = cpu_to_le16(chunk->device_id);
+		idx.blkaddr = cpu_to_le32(idx.blkaddr);
+
 		if (unit == EROFS_BLOCK_MAP_ENTRY_SIZE)
 			memcpy(inode->chunkindexes + dst, &idx.blkaddr, unit);
 		else
 			memcpy(inode->chunkindexes + dst, &idx, sizeof(idx));
 	}
 	off = roundup(off, unit);
-
-	if (extent_start == EROFS_NULL_ADDR)
-		extents_blks = 0;
-	else
-		extents_blks = (extent_end - extent_start) + 1;
-	erofs_droid_blocklist_write_extent(inode, extent_start, extents_blks,
+	if (extent_start != EROFS_NULL_ADDR)
+		tarerofs_blocklist_write(extent_start, extent_end - extent_start,
+					 source_offset);
+	erofs_droid_blocklist_write_extent(inode, extent_start,
+			extent_start == EROFS_NULL_ADDR ?
+					0 : extent_end - extent_start,
 					   first_extent, true);
 
-	return dev_write(inode->chunkindexes, off, inode->extent_isize);
+	return dev_write(inode->sbi, inode->chunkindexes, off, inode->extent_isize);
 }
 
-int erofs_blob_write_chunked_file(struct erofs_inode *inode)
+int erofs_blob_mergechunks(struct erofs_inode *inode, unsigned int chunkbits,
+			   unsigned int new_chunkbits)
 {
-	unsigned int chunksize = 1 << cfg.c_chunkbits;
-	unsigned int count = DIV_ROUND_UP(inode->i_size, chunksize);
-	struct erofs_inode_chunk_index *idx;
-	erofs_off_t pos, len;
-	unsigned int unit;
-	int fd, ret;
+	struct erofs_sb_info *sbi = inode->sbi;
+	unsigned int dst, src, unit, count;
 
-	inode->u.chunkformat |= inode->u.chunkbits - LOG_BLOCK_SIZE;
-	if (multidev)
-		inode->u.chunkformat |= EROFS_CHUNK_FORMAT_INDEXES;
+	if (new_chunkbits - sbi->blkszbits > EROFS_CHUNK_FORMAT_BLKBITS_MASK)
+		new_chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + sbi->blkszbits;
+	if (chunkbits >= new_chunkbits)		/* no need to merge */
+		goto out;
 
 	if (inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
 		unit = sizeof(struct erofs_inode_chunk_index);
 	else
 		unit = EROFS_BLOCK_MAP_ENTRY_SIZE;
 
+	count = round_up(inode->i_size, 1ULL << new_chunkbits) >> new_chunkbits;
+	for (dst = src = 0; dst < count; ++dst) {
+		*((void **)inode->chunkindexes + dst) =
+			*((void **)inode->chunkindexes + src);
+		src += 1U << (new_chunkbits - chunkbits);
+	}
+
+	DBG_BUGON(count * unit >= inode->extent_isize);
 	inode->extent_isize = count * unit;
-	idx = malloc(count * max(sizeof(*idx), sizeof(void *)));
+	chunkbits = new_chunkbits;
+out:
+	inode->u.chunkformat = (chunkbits - sbi->blkszbits) |
+		(inode->u.chunkformat & ~EROFS_CHUNK_FORMAT_BLKBITS_MASK);
+	return 0;
+}
+
+int erofs_blob_write_chunked_file(struct erofs_inode *inode, int fd,
+				  erofs_off_t startoff)
+{
+	struct erofs_sb_info *sbi = inode->sbi;
+	unsigned int chunkbits = cfg.c_chunkbits;
+	unsigned int count, unit;
+	struct erofs_blobchunk *chunk, *lastch;
+	struct erofs_inode_chunk_index *idx;
+	erofs_off_t pos, len, chunksize;
+	erofs_blk_t lb, minextblks;
+	u8 *chunkdata;
+	int ret;
+
+#ifdef SEEK_DATA
+	/* if the file is fully sparsed, use one big chunk instead */
+	if (lseek(fd, startoff, SEEK_DATA) < 0 && errno == ENXIO) {
+		chunkbits = ilog2(inode->i_size - 1) + 1;
+		if (chunkbits < sbi->blkszbits)
+			chunkbits = sbi->blkszbits;
+	}
+#endif
+	if (chunkbits - sbi->blkszbits > EROFS_CHUNK_FORMAT_BLKBITS_MASK)
+		chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + sbi->blkszbits;
+	chunksize = 1ULL << chunkbits;
+	count = DIV_ROUND_UP(inode->i_size, chunksize);
+
+	if (sbi->extra_devices)
+		inode->u.chunkformat |= EROFS_CHUNK_FORMAT_INDEXES;
+	if (inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
+		unit = sizeof(struct erofs_inode_chunk_index);
+	else
+		unit = EROFS_BLOCK_MAP_ENTRY_SIZE;
+
+	chunkdata = malloc(chunksize);
+	if (!chunkdata)
+		return -ENOMEM;
+
+	inode->extent_isize = count * unit;
+	inode->chunkindexes = malloc(count * max(sizeof(*idx), sizeof(void *)));
+	if (!inode->chunkindexes) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	idx = inode->chunkindexes;
+
+	lastch = NULL;
+	minextblks = BLK_ROUND_UP(sbi, inode->i_size);
+	for (pos = 0; pos < inode->i_size; pos += len) {
+#ifdef SEEK_DATA
+		off_t offset = lseek(fd, pos + startoff, SEEK_DATA);
+
+		if (offset < 0) {
+			if (errno != ENXIO)
+				offset = pos;
+			else
+				offset = ((pos >> chunkbits) + 1) << chunkbits;
+		} else {
+			offset -= startoff;
+
+			if (offset != (offset & ~(chunksize - 1))) {
+				offset &= ~(chunksize - 1);
+				if (lseek(fd, offset + startoff, SEEK_SET) !=
+					  startoff + offset) {
+					ret = -EIO;
+					goto err;
+				}
+			}
+		}
+
+		if (offset > pos) {
+			len = 0;
+			do {
+				*(void **)idx++ = &erofs_holechunk;
+				pos += chunksize;
+			} while (pos < offset);
+			DBG_BUGON(pos != offset);
+			lastch = NULL;
+			continue;
+		}
+#endif
+
+		len = min_t(u64, inode->i_size - pos, chunksize);
+		ret = read(fd, chunkdata, len);
+		if (ret < len) {
+			ret = -EIO;
+			goto err;
+		}
+
+		chunk = erofs_blob_getchunk(sbi, chunkdata, len);
+		if (IS_ERR(chunk)) {
+			ret = PTR_ERR(chunk);
+			goto err;
+		}
+
+		if (lastch && (lastch->device_id != chunk->device_id ||
+		    erofs_pos(sbi, lastch->blkaddr) + lastch->chunksize !=
+		    erofs_pos(sbi, chunk->blkaddr))) {
+			lb = lowbit(pos >> sbi->blkszbits);
+			if (lb && lb < minextblks)
+				minextblks = lb;
+		}
+		*(void **)idx++ = chunk;
+		lastch = chunk;
+	}
+	inode->datalayout = EROFS_INODE_CHUNK_BASED;
+	free(chunkdata);
+	return erofs_blob_mergechunks(inode, chunkbits,
+				      ilog2(minextblks) + sbi->blkszbits);
+err:
+	free(inode->chunkindexes);
+	inode->chunkindexes = NULL;
+	free(chunkdata);
+	return ret;
+}
+
+int tarerofs_write_chunkes(struct erofs_inode *inode, erofs_off_t data_offset)
+{
+	struct erofs_sb_info *sbi = inode->sbi;
+	unsigned int chunkbits = ilog2(inode->i_size - 1) + 1;
+	unsigned int count, unit, device_id;
+	erofs_off_t chunksize, len, pos;
+	erofs_blk_t blkaddr;
+	struct erofs_inode_chunk_index *idx;
+
+	if (chunkbits < sbi->blkszbits)
+		chunkbits = sbi->blkszbits;
+	if (chunkbits - sbi->blkszbits > EROFS_CHUNK_FORMAT_BLKBITS_MASK)
+		chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + sbi->blkszbits;
+
+	inode->u.chunkformat |= chunkbits - sbi->blkszbits;
+	if (sbi->extra_devices) {
+		device_id = 1;
+		inode->u.chunkformat |= EROFS_CHUNK_FORMAT_INDEXES;
+		unit = sizeof(struct erofs_inode_chunk_index);
+		DBG_BUGON(erofs_blkoff(sbi, data_offset));
+		blkaddr = erofs_blknr(sbi, data_offset);
+	} else {
+		device_id = 0;
+		unit = EROFS_BLOCK_MAP_ENTRY_SIZE;
+		DBG_BUGON(erofs_blkoff(sbi, datablob_size));
+		blkaddr = erofs_blknr(sbi, datablob_size);
+		datablob_size += round_up(inode->i_size, erofs_blksiz(sbi));
+	}
+	chunksize = 1ULL << chunkbits;
+	count = DIV_ROUND_UP(inode->i_size, chunksize);
+
+	inode->extent_isize = count * unit;
+	idx = calloc(count, max(sizeof(*idx), sizeof(void *)));
 	if (!idx)
 		return -ENOMEM;
 	inode->chunkindexes = idx;
 
-	fd = open(inode->i_srcpath, O_RDONLY | O_BINARY);
-	if (fd < 0) {
-		ret = -errno;
-		goto err;
-	}
-
 	for (pos = 0; pos < inode->i_size; pos += len) {
 		struct erofs_blobchunk *chunk;
 
-		len = min_t(u64, inode->i_size - pos, chunksize);
-		chunk = erofs_blob_getchunk(fd, len);
+		len = min_t(erofs_off_t, inode->i_size - pos, chunksize);
+
+		chunk = erofs_get_unhashed_chunk(device_id, blkaddr,
+						 data_offset);
 		if (IS_ERR(chunk)) {
-			ret = PTR_ERR(chunk);
-			close(fd);
-			goto err;
+			free(inode->chunkindexes);
+			inode->chunkindexes = NULL;
+			return PTR_ERR(chunk);
 		}
+
 		*(void **)idx++ = chunk;
+		blkaddr += erofs_blknr(sbi, len);
+		data_offset += len;
 	}
 	inode->datalayout = EROFS_INODE_CHUNK_BASED;
-	close(fd);
 	return 0;
-err:
-	free(inode->chunkindexes);
-	inode->chunkindexes = NULL;
-	return ret;
 }
 
-int erofs_blob_remap(void)
+int erofs_mkfs_dump_blobs(struct erofs_sb_info *sbi)
 {
 	struct erofs_buffer_head *bh;
 	ssize_t length;
 	erofs_off_t pos_in, pos_out;
 	ssize_t ret;
 
-	fflush(blobfile);
-	length = ftell(blobfile);
-	if (length < 0)
-		return -errno;
-	if (multidev) {
-		struct erofs_deviceslot dis = {
-			.blocks = erofs_blknr(length),
-		};
+	if (blobfile) {
+		fflush(blobfile);
+		length = ftell(blobfile);
+		if (length < 0)
+			return -errno;
 
+		if (sbi->extra_devices)
+			sbi->devs[0].blocks = erofs_blknr(sbi, length);
+		else
+			datablob_size = length;
+	}
+
+	if (sbi->extra_devices) {
+		unsigned int i, ret;
+		erofs_blk_t nblocks;
+
+		nblocks = erofs_mapbh(NULL);
 		pos_out = erofs_btell(bh_devt, false);
-		ret = dev_write(&dis, pos_out, sizeof(dis));
-		if (ret)
-			return ret;
+		i = 0;
+		do {
+			struct erofs_deviceslot dis = {
+				.mapped_blkaddr = cpu_to_le32(nblocks),
+				.blocks = cpu_to_le32(sbi->devs[i].blocks),
+			};
 
+			memcpy(dis.tag, sbi->devs[i].tag, sizeof(dis.tag));
+			ret = dev_write(sbi, &dis, pos_out, sizeof(dis));
+			if (ret)
+				return ret;
+			pos_out += sizeof(dis);
+			nblocks += sbi->devs[i].blocks;
+		} while (++i < sbi->extra_devices);
 		bh_devt->op = &erofs_drop_directly_bhops;
 		erofs_bdrop(bh_devt, false);
 		return 0;
 	}
-	bh = erofs_balloc(DATA, length, 0, 0);
+
+	bh = erofs_balloc(DATA, blobfile ? datablob_size : 0, 0, 0);
 	if (IS_ERR(bh))
 		return PTR_ERR(bh);
 
 	erofs_mapbh(bh->block);
+
 	pos_out = erofs_btell(bh, false);
-	pos_in = 0;
-	remapped_base = erofs_blknr(pos_out);
-	ret = erofs_copy_file_range(fileno(blobfile), &pos_in,
-				    erofs_devfd, &pos_out, length);
+	remapped_base = erofs_blknr(sbi, pos_out);
+	if (blobfile) {
+		pos_in = 0;
+		ret = erofs_copy_file_range(fileno(blobfile), &pos_in,
+				sbi->devfd, &pos_out, datablob_size);
+		ret = ret < datablob_size ? -EIO : 0;
+	} else {
+		ret = 0;
+	}
 	bh->op = &erofs_drop_directly_bhops;
 	erofs_bdrop(bh, false);
-	return ret < length ? -EIO : 0;
+	return ret;
 }
 
 void erofs_blob_exit(void)
 {
+	struct hashmap_iter iter;
+	struct hashmap_entry *e;
+	struct erofs_blobchunk *bc, *n;
+
 	if (blobfile)
 		fclose(blobfile);
 
-	hashmap_free(&blob_hashmap, 1);
+	while ((e = hashmap_iter_first(&blob_hashmap, &iter))) {
+		bc = container_of((struct hashmap_entry *)e,
+				  struct erofs_blobchunk, ent);
+		DBG_BUGON(hashmap_remove(&blob_hashmap, e) != e);
+		free(bc);
+	}
+	DBG_BUGON(hashmap_free(&blob_hashmap));
+
+	list_for_each_entry_safe(bc, n, &unhashed_blobchunks, list) {
+		list_del(&bc->list);
+		free(bc);
+	}
 }
 
 int erofs_blob_init(const char *blobfile_path)
@@ -280,22 +504,25 @@
 	return 0;
 }
 
-int erofs_generate_devtable(void)
+int erofs_mkfs_init_devices(struct erofs_sb_info *sbi, unsigned int devices)
 {
-	struct erofs_deviceslot dis;
-
-	if (!multidev)
+	if (!devices)
 		return 0;
 
-	bh_devt = erofs_balloc(DEVT, sizeof(dis), 0, 0);
-	if (IS_ERR(bh_devt))
-		return PTR_ERR(bh_devt);
+	sbi->devs = calloc(devices, sizeof(sbi->devs[0]));
+	if (!sbi->devs)
+		return -ENOMEM;
 
-	dis = (struct erofs_deviceslot) {};
+	bh_devt = erofs_balloc(DEVT,
+		sizeof(struct erofs_deviceslot) * devices, 0, 0);
+	if (IS_ERR(bh_devt)) {
+		free(sbi->devs);
+		return PTR_ERR(bh_devt);
+	}
 	erofs_mapbh(bh_devt->block);
 	bh_devt->op = &erofs_skip_write_bhops;
-	sbi.devt_slotoff = erofs_btell(bh_devt, false) / EROFS_DEVT_SLOT_SIZE;
-	sbi.extra_devices = 1;
-	erofs_sb_set_device_table();
+	sbi->devt_slotoff = erofs_btell(bh_devt, false) / EROFS_DEVT_SLOT_SIZE;
+	sbi->extra_devices = devices;
+	erofs_sb_set_device_table(sbi);
 	return 0;
 }
diff --git a/lib/block_list.c b/lib/block_list.c
index 896fb01..f47a746 100644
--- a/lib/block_list.c
+++ b/lib/block_list.c
@@ -3,7 +3,6 @@
  * Copyright (C), 2021, Coolpad Group Limited.
  * Created by Yue Hu <huyue2@yulong.com>
  */
-#ifdef WITH_ANDROID
 #include <stdio.h>
 #include <sys/stat.h>
 #include "erofs/block_list.h"
@@ -12,17 +11,19 @@
 #include "erofs/print.h"
 
 static FILE *block_list_fp;
+bool srcmap_enabled;
 
-int erofs_droid_blocklist_fopen(void)
+int erofs_blocklist_open(char *filename, bool srcmap)
 {
-	block_list_fp = fopen(cfg.block_list_file, "w");
+	block_list_fp = fopen(filename, "w");
 
 	if (!block_list_fp)
-		return -1;
+		return -errno;
+	srcmap_enabled = srcmap;
 	return 0;
 }
 
-void erofs_droid_blocklist_fclose(void)
+void erofs_blocklist_close(void)
 {
 	if (!block_list_fp)
 		return;
@@ -31,6 +32,18 @@
 	block_list_fp = NULL;
 }
 
+/* XXX: really need to be cleaned up */
+void tarerofs_blocklist_write(erofs_blk_t blkaddr, erofs_blk_t nblocks,
+			      erofs_off_t srcoff)
+{
+	if (!block_list_fp || !nblocks || !srcmap_enabled)
+		return;
+
+	fprintf(block_list_fp, "%08x %8x %08" PRIx64 "\n",
+		blkaddr, nblocks, srcoff);
+}
+
+#ifdef WITH_ANDROID
 static void blocklist_write(const char *path, erofs_blk_t blk_start,
 			    erofs_blk_t nblocks, bool first_extent,
 			    bool last_extent)
@@ -95,7 +108,7 @@
 		return;
 
 	/* XXX: another hack, which means it has been outputed before */
-	if (erofs_blknr(inode->i_size)) {
+	if (erofs_blknr(inode->sbi, inode->i_size)) {
 		if (blkaddr == NULL_ADDR)
 			fprintf(block_list_fp, "\n");
 		else
diff --git a/lib/cache.c b/lib/cache.c
index c735363..caca49b 100644
--- a/lib/cache.c
+++ b/lib/cache.c
@@ -14,10 +14,10 @@
 	.list = LIST_HEAD_INIT(blkh.list),
 	.blkaddr = NULL_ADDR,
 };
-static erofs_blk_t tail_blkaddr;
+static erofs_blk_t tail_blkaddr, erofs_metablkcnt;
 
 /* buckets for all mapped buffer blocks to boost up allocation */
-static struct list_head mapped_buckets[META + 1][EROFS_BLKSIZ];
+static struct list_head mapped_buckets[META + 1][EROFS_MAX_BLOCK_SIZE];
 /* last mapped buffer block to accelerate erofs_mapbh() */
 static struct erofs_buffer_block *last_mapped_block = &blkh;
 
@@ -39,29 +39,6 @@
 	.flush = erofs_bh_flush_skip_write,
 };
 
-int erofs_bh_flush_generic_write(struct erofs_buffer_head *bh, void *buf)
-{
-	struct erofs_buffer_head *nbh = list_next_entry(bh, list);
-	erofs_off_t offset = erofs_btell(bh, false);
-
-	DBG_BUGON(nbh->off < bh->off);
-	return dev_write(buf, offset, nbh->off - bh->off);
-}
-
-static bool erofs_bh_flush_buf_write(struct erofs_buffer_head *bh)
-{
-	int err = erofs_bh_flush_generic_write(bh, bh->fsprivate);
-
-	if (err)
-		return false;
-	free(bh->fsprivate);
-	return erofs_bh_flush_generic_end(bh);
-}
-
-const struct erofs_bhops erofs_buf_write_bhops = {
-	.flush = erofs_bh_flush_buf_write,
-};
-
 /* return buffer_head of erofs super block (with size 0) */
 struct erofs_buffer_head *erofs_buffer_init(void)
 {
@@ -86,7 +63,8 @@
 	if (bb->blkaddr == NULL_ADDR)
 		return;
 
-	bkt = mapped_buckets[bb->type] + bb->buffers.off % EROFS_BLKSIZ;
+	bkt = mapped_buckets[bb->type] +
+		(bb->buffers.off & (erofs_blksiz(&sbi) - 1));
 	list_del(&bb->mapped_list);
 	list_add_tail(&bb->mapped_list, bkt);
 }
@@ -99,10 +77,11 @@
 			   unsigned int extrasize,
 			   bool dryrun)
 {
+	const unsigned int blksiz = erofs_blksiz(&sbi);
+	const unsigned int blkmask = blksiz - 1;
 	const erofs_off_t alignedoffset = roundup(bb->buffers.off, alignsize);
-	const int oob = cmpsgn(roundup((bb->buffers.off - 1) % EROFS_BLKSIZ + 1,
-				       alignsize) + incr + extrasize,
-			       EROFS_BLKSIZ);
+	const int oob = cmpsgn(roundup(((bb->buffers.off - 1) & blkmask) + 1,
+				       alignsize) + incr + extrasize, blksiz);
 	bool tailupdate = false;
 	erofs_blk_t blkaddr;
 
@@ -114,7 +93,7 @@
 		blkaddr = bb->blkaddr;
 		if (blkaddr != NULL_ADDR) {
 			tailupdate = (tail_blkaddr == blkaddr +
-				      BLK_ROUND_UP(bb->buffers.off));
+				      DIV_ROUND_UP(bb->buffers.off, blksiz));
 			if (oob && !tailupdate)
 				return -EINVAL;
 		}
@@ -129,10 +108,11 @@
 		bb->buffers.off = alignedoffset + incr;
 		/* need to update the tail_blkaddr */
 		if (tailupdate)
-			tail_blkaddr = blkaddr + BLK_ROUND_UP(bb->buffers.off);
+			tail_blkaddr = blkaddr +
+					DIV_ROUND_UP(bb->buffers.off, blksiz);
 		erofs_bupdate_mapped(bb);
 	}
-	return (alignedoffset + incr - 1) % EROFS_BLKSIZ + 1;
+	return ((alignedoffset + incr - 1) & blkmask) + 1;
 }
 
 int erofs_bh_balloon(struct erofs_buffer_head *bh, erofs_off_t incr)
@@ -152,16 +132,17 @@
 				  unsigned int alignsize,
 				  struct erofs_buffer_block **bbp)
 {
+	const unsigned int blksiz = erofs_blksiz(&sbi);
 	struct erofs_buffer_block *cur, *bb;
 	unsigned int used0, used_before, usedmax, used;
 	int ret;
 
-	used0 = (size + required_ext) % EROFS_BLKSIZ + inline_ext;
+	used0 = ((size + required_ext) & (blksiz - 1)) + inline_ext;
 	/* inline data should be in the same fs block */
-	if (used0 > EROFS_BLKSIZ)
+	if (used0 > blksiz)
 		return -ENOSPC;
 
-	if (!used0 || alignsize == EROFS_BLKSIZ) {
+	if (!used0 || alignsize == blksiz) {
 		*bbp = NULL;
 		return 0;
 	}
@@ -170,10 +151,10 @@
 	bb = NULL;
 
 	/* try to find a most-fit mapped buffer block first */
-	if (size + required_ext + inline_ext >= EROFS_BLKSIZ)
+	if (size + required_ext + inline_ext >= blksiz)
 		goto skip_mapped;
 
-	used_before = rounddown(EROFS_BLKSIZ -
+	used_before = rounddown(blksiz -
 				(size + required_ext + inline_ext), alignsize);
 	for (; used_before; --used_before) {
 		struct list_head *bt = mapped_buckets[type] + used_before;
@@ -191,7 +172,7 @@
 
 		DBG_BUGON(cur->type != type);
 		DBG_BUGON(cur->blkaddr == NULL_ADDR);
-		DBG_BUGON(used_before != cur->buffers.off % EROFS_BLKSIZ);
+		DBG_BUGON(used_before != (cur->buffers.off & (blksiz - 1)));
 
 		ret = __erofs_battach(cur, NULL, size, alignsize,
 				      required_ext + inline_ext, true);
@@ -202,7 +183,7 @@
 
 		/* should contain all data in the current block */
 		used = ret + required_ext + inline_ext;
-		DBG_BUGON(used > EROFS_BLKSIZ);
+		DBG_BUGON(used > blksiz);
 
 		bb = cur;
 		usedmax = used;
@@ -215,7 +196,7 @@
 	if (cur == &blkh)
 		cur = list_next_entry(cur, list);
 	for (; cur != &blkh; cur = list_next_entry(cur, list)) {
-		used_before = cur->buffers.off % EROFS_BLKSIZ;
+		used_before = cur->buffers.off & (blksiz - 1);
 
 		/* skip if buffer block is just full */
 		if (!used_before)
@@ -230,10 +211,10 @@
 		if (ret < 0)
 			continue;
 
-		used = (ret + required_ext) % EROFS_BLKSIZ + inline_ext;
+		used = ((ret + required_ext) & (blksiz - 1)) + inline_ext;
 
 		/* should contain inline data in current block */
-		if (used > EROFS_BLKSIZ)
+		if (used > blksiz)
 			continue;
 
 		/*
@@ -288,7 +269,10 @@
 		bb->blkaddr = NULL_ADDR;
 		bb->buffers.off = 0;
 		init_list_head(&bb->buffers.list);
-		list_add_tail(&bb->list, &blkh.list);
+		if (type == DATA)
+			list_add(&bb->list, &last_mapped_block->list);
+		else
+			list_add_tail(&bb->list, &blkh.list);
 		init_list_head(&bb->mapped_list);
 
 		bh = malloc(sizeof(struct erofs_buffer_head));
@@ -300,8 +284,10 @@
 
 	ret = __erofs_battach(bb, bh, size, alignsize,
 			      required_ext + inline_ext, false);
-	if (ret < 0)
+	if (ret < 0) {
+		free(bh);
 		return ERR_PTR(ret);
+	}
 	return bh;
 }
 
@@ -343,7 +329,7 @@
 		erofs_bupdate_mapped(bb);
 	}
 
-	blkaddr = bb->blkaddr + BLK_ROUND_UP(bb->buffers.off);
+	blkaddr = bb->blkaddr + BLK_ROUND_UP(&sbi, bb->buffers.off);
 	if (blkaddr > tail_blkaddr)
 		tail_blkaddr = blkaddr;
 
@@ -367,8 +353,21 @@
 	return tail_blkaddr;
 }
 
+static void erofs_bfree(struct erofs_buffer_block *bb)
+{
+	DBG_BUGON(!list_empty(&bb->buffers.list));
+
+	if (bb == last_mapped_block)
+		last_mapped_block = list_prev_entry(bb, list);
+
+	list_del(&bb->mapped_list);
+	list_del(&bb->list);
+	free(bb);
+}
+
 bool erofs_bflush(struct erofs_buffer_block *bb)
 {
+	const unsigned int blksiz = erofs_blksiz(&sbi);
 	struct erofs_buffer_block *p, *n;
 	erofs_blk_t blkaddr;
 
@@ -396,18 +395,15 @@
 		if (skip)
 			continue;
 
-		padding = EROFS_BLKSIZ - p->buffers.off % EROFS_BLKSIZ;
-		if (padding != EROFS_BLKSIZ)
-			dev_fillzero(blknr_to_addr(blkaddr) - padding,
+		padding = blksiz - (p->buffers.off & (blksiz - 1));
+		if (padding != blksiz)
+			dev_fillzero(&sbi, erofs_pos(&sbi, blkaddr) - padding,
 				     padding, true);
 
-		DBG_BUGON(!list_empty(&p->buffers.list));
-
+		if (p->type != DATA)
+			erofs_metablkcnt += BLK_ROUND_UP(&sbi, p->buffers.off);
 		erofs_dbg("block %u to %u flushed", p->blkaddr, blkaddr - 1);
-
-		list_del(&p->mapped_list);
-		list_del(&p->list);
-		free(p);
+		erofs_bfree(p);
 	}
 	return true;
 }
@@ -420,7 +416,7 @@
 
 	/* tail_blkaddr could be rolled back after revoking all bhs */
 	if (tryrevoke && blkaddr != NULL_ADDR &&
-	    tail_blkaddr == blkaddr + BLK_ROUND_UP(bb->buffers.off))
+	    tail_blkaddr == blkaddr + BLK_ROUND_UP(&sbi, bb->buffers.off))
 		rollback = true;
 
 	bh->op = &erofs_drop_directly_bhops;
@@ -429,13 +425,14 @@
 	if (!list_empty(&bb->buffers.list))
 		return;
 
-	if (bb == last_mapped_block)
-		last_mapped_block = list_prev_entry(bb, list);
-
-	list_del(&bb->mapped_list);
-	list_del(&bb->list);
-	free(bb);
-
+	if (!rollback && bb->type != DATA)
+		erofs_metablkcnt += BLK_ROUND_UP(&sbi, bb->buffers.off);
+	erofs_bfree(bb);
 	if (rollback)
 		tail_blkaddr = blkaddr;
 }
+
+erofs_blk_t erofs_total_metablocks(void)
+{
+	return erofs_metablkcnt;
+}
diff --git a/lib/compress.c b/lib/compress.c
index ee3b856..f6dc12a 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -15,59 +15,70 @@
 #include "erofs/io.h"
 #include "erofs/cache.h"
 #include "erofs/compress.h"
+#include "erofs/dedupe.h"
 #include "compressor.h"
 #include "erofs/block_list.h"
 #include "erofs/compress_hints.h"
+#include "erofs/fragments.h"
 
-static struct erofs_compress compresshandle;
-static unsigned int algorithmtype[2];
+/* compressing configuration specified by users */
+struct erofs_compress_cfg {
+	struct erofs_compress handle;
+	unsigned int algorithmtype;
+	bool enable;
+} erofs_ccfg[EROFS_MAX_COMPR_CFGS];
 
 struct z_erofs_vle_compress_ctx {
-	u8 *metacur;
-
 	u8 queue[EROFS_CONFIG_COMPR_MAX_SZ * 2];
+	struct z_erofs_inmem_extent e;	/* (lookahead) extent */
+
+	struct erofs_inode *inode;
+	struct erofs_compress_cfg *ccfg;
+
+	u8 *metacur;
 	unsigned int head, tail;
-	unsigned int compressedblks;
+	erofs_off_t remaining;
+	unsigned int pclustersize;
 	erofs_blk_t blkaddr;		/* pointing to the next blkaddr */
 	u16 clusterofs;
+
+	u32 tof_chksum;
+	bool fix_dedupedfrag;
+	bool fragemitted;
 };
 
-#define Z_EROFS_LEGACY_MAP_HEADER_SIZE	\
-	(sizeof(struct z_erofs_map_header) + Z_EROFS_VLE_LEGACY_HEADER_PADDING)
+#define Z_EROFS_LEGACY_MAP_HEADER_SIZE	Z_EROFS_FULL_INDEX_ALIGN(0)
 
-static unsigned int vle_compressmeta_capacity(erofs_off_t filesize)
+static void z_erofs_write_indexes_final(struct z_erofs_vle_compress_ctx *ctx)
 {
-	const unsigned int indexsize = BLK_ROUND_UP(filesize) *
-		sizeof(struct z_erofs_vle_decompressed_index);
-
-	return Z_EROFS_LEGACY_MAP_HEADER_SIZE + indexsize;
-}
-
-static void vle_write_indexes_final(struct z_erofs_vle_compress_ctx *ctx)
-{
-	const unsigned int type = Z_EROFS_VLE_CLUSTER_TYPE_PLAIN;
-	struct z_erofs_vle_decompressed_index di;
+	const unsigned int type = Z_EROFS_LCLUSTER_TYPE_PLAIN;
+	struct z_erofs_lcluster_index di;
 
 	if (!ctx->clusterofs)
 		return;
 
 	di.di_clusterofs = cpu_to_le16(ctx->clusterofs);
 	di.di_u.blkaddr = 0;
-	di.di_advise = cpu_to_le16(type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT);
+	di.di_advise = cpu_to_le16(type << Z_EROFS_LI_LCLUSTER_TYPE_BIT);
 
 	memcpy(ctx->metacur, &di, sizeof(di));
 	ctx->metacur += sizeof(di);
 }
 
-static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
-			      unsigned int count, bool raw)
+static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
 {
+	struct erofs_inode *inode = ctx->inode;
+	struct erofs_sb_info *sbi = inode->sbi;
 	unsigned int clusterofs = ctx->clusterofs;
-	unsigned int d0 = 0, d1 = (clusterofs + count) / EROFS_BLKSIZ;
-	struct z_erofs_vle_decompressed_index di;
-	unsigned int type;
-	__le16 advise;
+	unsigned int count = ctx->e.length;
+	unsigned int d0 = 0, d1 = (clusterofs + count) / erofs_blksiz(sbi);
+	struct z_erofs_lcluster_index di;
+	unsigned int type, advise;
 
+	if (!count)
+		return;
+
+	ctx->e.length = 0;	/* mark as written first */
 	di.di_clusterofs = cpu_to_le16(ctx->clusterofs);
 
 	/* whether the tail-end (un)compressed block or not */
@@ -76,13 +87,18 @@
 		 * A lcluster cannot have three parts with the middle one which
 		 * is well-compressed for !ztailpacking cases.
 		 */
-		DBG_BUGON(!raw && !cfg.c_ztailpacking);
-		type = raw ? Z_EROFS_VLE_CLUSTER_TYPE_PLAIN :
-			Z_EROFS_VLE_CLUSTER_TYPE_HEAD;
-		advise = cpu_to_le16(type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT);
+		DBG_BUGON(!ctx->e.raw && !cfg.c_ztailpacking && !cfg.c_fragments);
+		DBG_BUGON(ctx->e.partial);
+		type = ctx->e.raw ? Z_EROFS_LCLUSTER_TYPE_PLAIN :
+			Z_EROFS_LCLUSTER_TYPE_HEAD1;
+		advise = type << Z_EROFS_LI_LCLUSTER_TYPE_BIT;
+		di.di_advise = cpu_to_le16(advise);
 
-		di.di_advise = advise;
-		di.di_u.blkaddr = cpu_to_le32(ctx->blkaddr);
+		if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL &&
+		    !ctx->e.compressedblks)
+			di.di_u.blkaddr = cpu_to_le32(inode->fragmentoff >> 32);
+		else
+			di.di_u.blkaddr = cpu_to_le32(ctx->e.blkaddr);
 		memcpy(ctx->metacur, &di, sizeof(di));
 		ctx->metacur += sizeof(di);
 
@@ -92,14 +108,15 @@
 	}
 
 	do {
+		advise = 0;
 		/* XXX: big pcluster feature should be per-inode */
-		if (d0 == 1 && erofs_sb_has_big_pcluster()) {
-			type = Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD;
-			di.di_u.delta[0] = cpu_to_le16(ctx->compressedblks |
-					Z_EROFS_VLE_DI_D0_CBLKCNT);
+		if (d0 == 1 && erofs_sb_has_big_pcluster(sbi)) {
+			type = Z_EROFS_LCLUSTER_TYPE_NONHEAD;
+			di.di_u.delta[0] = cpu_to_le16(ctx->e.compressedblks |
+						       Z_EROFS_LI_D0_CBLKCNT);
 			di.di_u.delta[1] = cpu_to_le16(d1);
 		} else if (d0) {
-			type = Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD;
+			type = Z_EROFS_LCLUSTER_TYPE_NONHEAD;
 
 			/*
 			 * If the |Z_EROFS_VLE_DI_D0_CBLKCNT| bit is set, parser
@@ -112,73 +129,194 @@
 			 * To solve this, we replace d0 with
 			 * Z_EROFS_VLE_DI_D0_CBLKCNT-1.
 			 */
-			if (d0 >= Z_EROFS_VLE_DI_D0_CBLKCNT)
+			if (d0 >= Z_EROFS_LI_D0_CBLKCNT)
 				di.di_u.delta[0] = cpu_to_le16(
-						Z_EROFS_VLE_DI_D0_CBLKCNT - 1);
+						Z_EROFS_LI_D0_CBLKCNT - 1);
 			else
 				di.di_u.delta[0] = cpu_to_le16(d0);
 			di.di_u.delta[1] = cpu_to_le16(d1);
 		} else {
-			type = raw ? Z_EROFS_VLE_CLUSTER_TYPE_PLAIN :
-				Z_EROFS_VLE_CLUSTER_TYPE_HEAD;
-			di.di_u.blkaddr = cpu_to_le32(ctx->blkaddr);
+			type = ctx->e.raw ? Z_EROFS_LCLUSTER_TYPE_PLAIN :
+				Z_EROFS_LCLUSTER_TYPE_HEAD1;
+
+			if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL &&
+			    !ctx->e.compressedblks)
+				di.di_u.blkaddr = cpu_to_le32(inode->fragmentoff >> 32);
+			else
+				di.di_u.blkaddr = cpu_to_le32(ctx->e.blkaddr);
+
+			if (ctx->e.partial) {
+				DBG_BUGON(ctx->e.raw);
+				advise |= Z_EROFS_LI_PARTIAL_REF;
+			}
 		}
-		advise = cpu_to_le16(type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT);
-		di.di_advise = advise;
+		advise |= type << Z_EROFS_LI_LCLUSTER_TYPE_BIT;
+		di.di_advise = cpu_to_le16(advise);
 
 		memcpy(ctx->metacur, &di, sizeof(di));
 		ctx->metacur += sizeof(di);
 
-		count -= EROFS_BLKSIZ - clusterofs;
+		count -= erofs_blksiz(sbi) - clusterofs;
 		clusterofs = 0;
 
 		++d0;
 		--d1;
-	} while (clusterofs + count >= EROFS_BLKSIZ);
+	} while (clusterofs + count >= erofs_blksiz(sbi));
 
 	ctx->clusterofs = clusterofs + count;
 }
 
+static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
+				   unsigned int *len)
+{
+	struct erofs_inode *inode = ctx->inode;
+	const unsigned int lclustermask = (1 << inode->z_logical_clusterbits) - 1;
+	struct erofs_sb_info *sbi = inode->sbi;
+	int ret = 0;
+
+	/*
+	 * No need dedupe for packed inode since it is composed of
+	 * fragments which have already been deduplicated.
+	 */
+	if (erofs_is_packed_inode(inode))
+		goto out;
+
+	do {
+		struct z_erofs_dedupe_ctx dctx = {
+			.start = ctx->queue + ctx->head - ({ int rc;
+				if (ctx->e.length <= erofs_blksiz(sbi))
+					rc = 0;
+				else if (ctx->e.length - erofs_blksiz(sbi) >= ctx->head)
+					rc = ctx->head;
+				else
+					rc = ctx->e.length - erofs_blksiz(sbi);
+				rc; }),
+			.end = ctx->queue + ctx->head + *len,
+			.cur = ctx->queue + ctx->head,
+		};
+		int delta;
+
+		if (z_erofs_dedupe_match(&dctx))
+			break;
+
+		delta = ctx->queue + ctx->head - dctx.cur;
+		/*
+		 * For big pcluster dedupe, leave two indices at least to store
+		 * CBLKCNT as the first step.  Even laterly, an one-block
+		 * decompresssion could be done as another try in practice.
+		 */
+		if (dctx.e.compressedblks > 1 &&
+		    ((ctx->clusterofs + ctx->e.length - delta) & lclustermask) +
+			dctx.e.length < 2 * (lclustermask + 1))
+			break;
+
+		if (delta) {
+			DBG_BUGON(delta < 0);
+			DBG_BUGON(!ctx->e.length);
+
+			/*
+			 * For big pcluster dedupe, if we decide to shorten the
+			 * previous big pcluster, make sure that the previous
+			 * CBLKCNT is still kept.
+			 */
+			if (ctx->e.compressedblks > 1 &&
+			    (ctx->clusterofs & lclustermask) + ctx->e.length
+				- delta < 2 * (lclustermask + 1))
+				break;
+			ctx->e.partial = true;
+			ctx->e.length -= delta;
+		}
+
+		/* fall back to noncompact indexes for deduplication */
+		inode->z_advise &= ~Z_EROFS_ADVISE_COMPACTED_2B;
+		inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
+		erofs_sb_set_dedupe(sbi);
+
+		sbi->saved_by_deduplication +=
+			dctx.e.compressedblks * erofs_blksiz(sbi);
+		erofs_dbg("Dedupe %u %scompressed data (delta %d) to %u of %u blocks",
+			  dctx.e.length, dctx.e.raw ? "un" : "",
+			  delta, dctx.e.blkaddr, dctx.e.compressedblks);
+		z_erofs_write_indexes(ctx);
+		ctx->e = dctx.e;
+		ctx->head += dctx.e.length - delta;
+		DBG_BUGON(*len < dctx.e.length - delta);
+		*len -= dctx.e.length - delta;
+
+		if (ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) {
+			const unsigned int qh_aligned =
+				round_down(ctx->head, erofs_blksiz(sbi));
+			const unsigned int qh_after = ctx->head - qh_aligned;
+
+			memmove(ctx->queue, ctx->queue + qh_aligned,
+				*len + qh_after);
+			ctx->head = qh_after;
+			ctx->tail = qh_after + *len;
+			ret = -EAGAIN;
+			break;
+		}
+	} while (*len);
+
+out:
+	z_erofs_write_indexes(ctx);
+	return ret;
+}
+
 static int write_uncompressed_extent(struct z_erofs_vle_compress_ctx *ctx,
 				     unsigned int *len, char *dst)
 {
 	int ret;
-	unsigned int count;
+	struct erofs_sb_info *sbi = ctx->inode->sbi;
+	unsigned int count, interlaced_offset, rightpart;
 
 	/* reset clusterofs to 0 if permitted */
-	if (!erofs_sb_has_lz4_0padding() && ctx->clusterofs &&
+	if (!erofs_sb_has_lz4_0padding(sbi) && ctx->clusterofs &&
 	    ctx->head >= ctx->clusterofs) {
 		ctx->head -= ctx->clusterofs;
 		*len += ctx->clusterofs;
 		ctx->clusterofs = 0;
 	}
 
-	/* write uncompressed data */
-	count = min(EROFS_BLKSIZ, *len);
+	count = min(erofs_blksiz(sbi), *len);
 
-	memcpy(dst, ctx->queue + ctx->head, count);
-	memset(dst + count, 0, EROFS_BLKSIZ - count);
+	/* write interlaced uncompressed data if needed */
+	if (ctx->inode->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER)
+		interlaced_offset = ctx->clusterofs;
+	else
+		interlaced_offset = 0;
+	rightpart = min(erofs_blksiz(sbi) - interlaced_offset, count);
+
+	memset(dst, 0, erofs_blksiz(sbi));
+
+	memcpy(dst + interlaced_offset, ctx->queue + ctx->head, rightpart);
+	memcpy(dst, ctx->queue + ctx->head + rightpart, count - rightpart);
 
 	erofs_dbg("Writing %u uncompressed data to block %u",
 		  count, ctx->blkaddr);
-	ret = blk_write(dst, ctx->blkaddr, 1);
+	ret = blk_write(sbi, dst, ctx->blkaddr, 1);
 	if (ret)
 		return ret;
 	return count;
 }
 
-static unsigned int z_erofs_get_max_pclusterblks(struct erofs_inode *inode)
+static unsigned int z_erofs_get_max_pclustersize(struct erofs_inode *inode)
 {
+	unsigned int pclusterblks;
+
+	if (erofs_is_packed_inode(inode))
+		pclusterblks = cfg.c_pclusterblks_packed;
 #ifndef NDEBUG
-	if (cfg.c_random_pclusterblks)
-		return 1 + rand() % cfg.c_pclusterblks_max;
+	else if (cfg.c_random_pclusterblks)
+		pclusterblks = 1 + rand() % cfg.c_pclusterblks_max;
 #endif
-	if (cfg.c_compress_hints_file) {
+	else if (cfg.c_compress_hints_file) {
 		z_erofs_apply_compress_hints(inode);
 		DBG_BUGON(!inode->z_physical_clusterblks);
-		return inode->z_physical_clusterblks;
+		pclusterblks = inode->z_physical_clusterblks;
+	} else {
+		pclusterblks = cfg.c_pclusterblks_def;
 	}
-	return cfg.c_pclusterblks_def;
+	return pclusterblks * erofs_blksiz(inode->sbi);
 }
 
 static int z_erofs_fill_inline_data(struct erofs_inode *inode, void *data,
@@ -197,23 +335,25 @@
 	return len;
 }
 
-static void tryrecompress_trailing(void *in, unsigned int *insize,
+static void tryrecompress_trailing(struct z_erofs_vle_compress_ctx *ctx,
+				   struct erofs_compress *ec,
+				   void *in, unsigned int *insize,
 				   void *out, int *compressedsize)
 {
+	struct erofs_sb_info *sbi = ctx->inode->sbi;
 	static char tmp[Z_EROFS_PCLUSTER_MAX_SIZE];
 	unsigned int count;
 	int ret = *compressedsize;
 
 	/* no need to recompress */
-	if (!(ret & (EROFS_BLKSIZ - 1)))
+	if (!(ret & (erofs_blksiz(sbi) - 1)))
 		return;
 
 	count = *insize;
-	ret = erofs_compress_destsize(&compresshandle,
-				      in, &count, (void *)tmp,
-				      rounddown(ret, EROFS_BLKSIZ), false);
+	ret = erofs_compress_destsize(ec, in, &count, (void *)tmp,
+				      rounddown(ret, erofs_blksiz(sbi)), false);
 	if (ret <= 0 || ret + (*insize - count) >=
-			roundup(*compressedsize, EROFS_BLKSIZ))
+			roundup(*compressedsize, erofs_blksiz(sbi)))
 		return;
 
 	/* replace the original compressed data if any gain */
@@ -222,34 +362,79 @@
 	*compressedsize = ret;
 }
 
-static int vle_compress_one(struct erofs_inode *inode,
-			    struct z_erofs_vle_compress_ctx *ctx,
-			    bool final)
+static bool z_erofs_fixup_deduped_fragment(struct z_erofs_vle_compress_ctx *ctx,
+					   unsigned int len)
 {
-	struct erofs_compress *const h = &compresshandle;
+	struct erofs_inode *inode = ctx->inode;
+	struct erofs_sb_info *sbi = inode->sbi;
+	const unsigned int newsize = ctx->remaining + len;
+
+	DBG_BUGON(!inode->fragment_size);
+
+	/* try to fix again if it gets larger (should be rare) */
+	if (inode->fragment_size < newsize) {
+		ctx->pclustersize = min(z_erofs_get_max_pclustersize(inode),
+					roundup(newsize - inode->fragment_size,
+						erofs_blksiz(sbi)));
+		return false;
+	}
+
+	inode->fragmentoff += inode->fragment_size - newsize;
+	inode->fragment_size = newsize;
+
+	erofs_dbg("Reducing fragment size to %u at %llu",
+		  inode->fragment_size, inode->fragmentoff | 0ULL);
+
+	/* it's the end */
+	DBG_BUGON(ctx->tail - ctx->head + ctx->remaining != newsize);
+	ctx->head = ctx->tail;
+	ctx->remaining = 0;
+	return true;
+}
+
+static int vle_compress_one(struct z_erofs_vle_compress_ctx *ctx)
+{
+	static char dstbuf[EROFS_CONFIG_COMPR_MAX_SZ + EROFS_MAX_BLOCK_SIZE];
+	struct erofs_inode *inode = ctx->inode;
+	struct erofs_sb_info *sbi = inode->sbi;
+	char *const dst = dstbuf + erofs_blksiz(sbi);
+	struct erofs_compress *const h = &ctx->ccfg->handle;
 	unsigned int len = ctx->tail - ctx->head;
-	unsigned int count;
+	bool is_packed_inode = erofs_is_packed_inode(inode);
+	bool final = !ctx->remaining;
 	int ret;
-	static char dstbuf[EROFS_CONFIG_COMPR_MAX_SZ + EROFS_BLKSIZ];
-	char *const dst = dstbuf + EROFS_BLKSIZ;
 
 	while (len) {
-		unsigned int pclustersize =
-			z_erofs_get_max_pclusterblks(inode) * EROFS_BLKSIZ;
-		bool may_inline = (cfg.c_ztailpacking && final);
-		bool raw;
+		bool may_packing = (cfg.c_fragments && final &&
+				   !is_packed_inode);
+		bool may_inline = (cfg.c_ztailpacking && final &&
+				  !may_packing);
+		bool fix_dedupedfrag = ctx->fix_dedupedfrag;
 
-		if (len <= pclustersize) {
-			if (!final)
+		if (z_erofs_compress_dedupe(ctx, &len) && !final)
+			break;
+
+		if (len <= ctx->pclustersize) {
+			if (!final || !len)
 				break;
-			if (!may_inline && len <= EROFS_BLKSIZ)
+			if (may_packing) {
+				if (inode->fragment_size && !fix_dedupedfrag) {
+					ctx->pclustersize =
+						roundup(len, erofs_blksiz(sbi));
+					goto fix_dedupedfrag;
+				}
+				ctx->e.length = len;
+				goto frag_packing;
+			}
+			if (!may_inline && len <= erofs_blksiz(sbi))
 				goto nocompression;
 		}
 
-		count = min(len, cfg.c_max_decompressed_extent_bytes);
+		ctx->e.length = min(len,
+				cfg.c_max_decompressed_extent_bytes);
 		ret = erofs_compress_destsize(h, ctx->queue + ctx->head,
-					      &count, dst, pclustersize,
-					      !(final && len == count));
+				&ctx->e.length, dst, ctx->pclustersize,
+				!(final && len == ctx->e.length));
 		if (ret <= 0) {
 			if (ret != -EAGAIN) {
 				erofs_err("failed to compress %s: %s",
@@ -257,29 +442,45 @@
 					  erofs_strerror(ret));
 			}
 
-			if (may_inline && len < EROFS_BLKSIZ)
+			if (may_inline && len < erofs_blksiz(sbi)) {
 				ret = z_erofs_fill_inline_data(inode,
 						ctx->queue + ctx->head,
 						len, true);
-			else
+			} else {
+				may_inline = false;
+				may_packing = false;
 nocompression:
 				ret = write_uncompressed_extent(ctx, &len, dst);
+			}
 
 			if (ret < 0)
 				return ret;
-			count = ret;
+			ctx->e.length = ret;
 
 			/*
 			 * XXX: For now, we have to leave `ctx->compressedblks
 			 * = 1' since there is no way to generate compressed
 			 * indexes after the time that ztailpacking is decided.
 			 */
-			ctx->compressedblks = 1;
-			raw = true;
+			ctx->e.compressedblks = 1;
+			ctx->e.raw = true;
+		} else if (may_packing && len == ctx->e.length &&
+			   ret < ctx->pclustersize &&
+			   (!inode->fragment_size || fix_dedupedfrag)) {
+frag_packing:
+			ret = z_erofs_pack_fragments(inode,
+						     ctx->queue + ctx->head,
+						     len, ctx->tof_chksum);
+			if (ret < 0)
+				return ret;
+			ctx->e.compressedblks = 0; /* indicate a fragment */
+			ctx->e.raw = false;
+			ctx->fragemitted = true;
+			fix_dedupedfrag = false;
 		/* tailpcluster should be less than 1 block */
-		} else if (may_inline && len == count &&
-			   ret < EROFS_BLKSIZ) {
-			if (ctx->clusterofs + len <= EROFS_BLKSIZ) {
+		} else if (may_inline && len == ctx->e.length &&
+			   ret < erofs_blksiz(sbi)) {
+			if (ctx->clusterofs + len <= erofs_blksiz(sbi)) {
 				inode->eof_tailraw = malloc(len);
 				if (!inode->eof_tailraw)
 					return -ENOMEM;
@@ -292,48 +493,73 @@
 			ret = z_erofs_fill_inline_data(inode, dst, ret, false);
 			if (ret < 0)
 				return ret;
-			ctx->compressedblks = 1;
-			raw = false;
+			ctx->e.compressedblks = 1;
+			ctx->e.raw = false;
 		} else {
 			unsigned int tailused, padding;
 
-			if (may_inline && len == count)
-				tryrecompress_trailing(ctx->queue + ctx->head,
-						       &count, dst, &ret);
+			/*
+			 * If there's space left for the last round when
+			 * deduping fragments, try to read the fragment and
+			 * recompress a little more to check whether it can be
+			 * filled up. Fix up the fragment if succeeds.
+			 * Otherwise, just drop it and go to packing.
+			 */
+			if (may_packing && len == ctx->e.length &&
+			    (ret & (erofs_blksiz(sbi) - 1)) &&
+			    ctx->tail < sizeof(ctx->queue)) {
+				ctx->pclustersize = BLK_ROUND_UP(sbi, ret) *
+						erofs_blksiz(sbi);
+				goto fix_dedupedfrag;
+			}
 
-			tailused = ret & (EROFS_BLKSIZ - 1);
+			if (may_inline && len == ctx->e.length)
+				tryrecompress_trailing(ctx, h,
+						ctx->queue + ctx->head,
+						&ctx->e.length, dst, &ret);
+
+			tailused = ret & (erofs_blksiz(sbi) - 1);
 			padding = 0;
-			ctx->compressedblks = DIV_ROUND_UP(ret, EROFS_BLKSIZ);
-			DBG_BUGON(ctx->compressedblks * EROFS_BLKSIZ >= count);
+			ctx->e.compressedblks = BLK_ROUND_UP(sbi, ret);
+			DBG_BUGON(ctx->e.compressedblks * erofs_blksiz(sbi) >=
+				  ctx->e.length);
 
 			/* zero out garbage trailing data for non-0padding */
-			if (!erofs_sb_has_lz4_0padding())
+			if (!erofs_sb_has_lz4_0padding(sbi))
 				memset(dst + ret, 0,
-				       roundup(ret, EROFS_BLKSIZ) - ret);
+				       roundup(ret, erofs_blksiz(sbi)) - ret);
 			else if (tailused)
-				padding = EROFS_BLKSIZ - tailused;
+				padding = erofs_blksiz(sbi) - tailused;
 
 			/* write compressed data */
 			erofs_dbg("Writing %u compressed data to %u of %u blocks",
-				  count, ctx->blkaddr, ctx->compressedblks);
+				  ctx->e.length, ctx->blkaddr,
+				  ctx->e.compressedblks);
 
-			ret = blk_write(dst - padding, ctx->blkaddr,
-					ctx->compressedblks);
+			ret = blk_write(sbi, dst - padding, ctx->blkaddr,
+					ctx->e.compressedblks);
 			if (ret)
 				return ret;
-			raw = false;
+			ctx->e.raw = false;
+			may_inline = false;
+			may_packing = false;
 		}
+		ctx->e.partial = false;
+		ctx->e.blkaddr = ctx->blkaddr;
+		if (!may_inline && !may_packing && !is_packed_inode)
+			(void)z_erofs_dedupe_insert(&ctx->e,
+						    ctx->queue + ctx->head);
+		ctx->blkaddr += ctx->e.compressedblks;
+		ctx->head += ctx->e.length;
+		len -= ctx->e.length;
 
-		ctx->head += count;
-		/* write compression indexes for this pcluster */
-		vle_write_indexes(ctx, count, raw);
-
-		ctx->blkaddr += ctx->compressedblks;
-		len -= count;
+		if (fix_dedupedfrag &&
+		    z_erofs_fixup_deduped_fragment(ctx, len))
+			break;
 
 		if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) {
 			const unsigned int qh_aligned =
-				round_down(ctx->head, EROFS_BLKSIZ);
+				round_down(ctx->head, erofs_blksiz(sbi));
 			const unsigned int qh_after = ctx->head - qh_aligned;
 
 			memmove(ctx->queue, ctx->queue + qh_aligned,
@@ -344,6 +570,13 @@
 		}
 	}
 	return 0;
+
+fix_dedupedfrag:
+	DBG_BUGON(!inode->fragment_size);
+	ctx->remaining += inode->fragment_size;
+	ctx->e.length = 0;
+	ctx->fix_dedupedfrag = true;
+	return 0;
 }
 
 struct z_erofs_compressindex_vec {
@@ -358,18 +591,18 @@
 static void *parse_legacy_indexes(struct z_erofs_compressindex_vec *cv,
 				  unsigned int nr, void *metacur)
 {
-	struct z_erofs_vle_decompressed_index *const db = metacur;
+	struct z_erofs_lcluster_index *const db = metacur;
 	unsigned int i;
 
 	for (i = 0; i < nr; ++i, ++cv) {
-		struct z_erofs_vle_decompressed_index *const di = db + i;
+		struct z_erofs_lcluster_index *const di = db + i;
 		const unsigned int advise = le16_to_cpu(di->di_advise);
 
-		cv->clustertype = (advise >> Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT) &
-			((1 << Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) - 1);
+		cv->clustertype = (advise >> Z_EROFS_LI_LCLUSTER_TYPE_BIT) &
+			((1 << Z_EROFS_LI_LCLUSTER_TYPE_BITS) - 1);
 		cv->clusterofs = le16_to_cpu(di->di_clusterofs);
 
-		if (cv->clustertype == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
+		if (cv->clustertype == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
 			cv->u.delta[0] = le16_to_cpu(di->di_u.delta[0]);
 			cv->u.delta[1] = le16_to_cpu(di->di_u.delta[1]);
 		} else {
@@ -384,10 +617,10 @@
 				     erofs_blk_t *blkaddr_ret,
 				     unsigned int destsize,
 				     unsigned int logical_clusterbits,
-				     bool final, bool *dummy_head)
+				     bool final, bool *dummy_head,
+				     bool update_blkaddr)
 {
 	unsigned int vcnt, encodebits, pos, i, cblks;
-	bool update_blkaddr;
 	erofs_blk_t blkaddr;
 
 	if (destsize == 4)
@@ -398,21 +631,21 @@
 		return ERR_PTR(-EINVAL);
 	encodebits = (vcnt * destsize * 8 - 32) / vcnt;
 	blkaddr = *blkaddr_ret;
-	update_blkaddr = erofs_sb_has_big_pcluster();
 
 	pos = 0;
 	for (i = 0; i < vcnt; ++i) {
 		unsigned int offset, v;
 		u8 ch, rem;
 
-		if (cv[i].clustertype == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
-			if (cv[i].u.delta[0] & Z_EROFS_VLE_DI_D0_CBLKCNT) {
-				cblks = cv[i].u.delta[0] & ~Z_EROFS_VLE_DI_D0_CBLKCNT;
+		if (cv[i].clustertype == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
+			if (cv[i].u.delta[0] & Z_EROFS_LI_D0_CBLKCNT) {
+				cblks = cv[i].u.delta[0] & ~Z_EROFS_LI_D0_CBLKCNT;
 				offset = cv[i].u.delta[0];
 				blkaddr += cblks;
 				*dummy_head = false;
 			} else if (i + 1 == vcnt) {
-				offset = cv[i].u.delta[1];
+				offset = min_t(u16, cv[i].u.delta[1],
+						(1 << logical_clusterbits) - 1);
 			} else {
 				offset = cv[i].u.delta[0];
 			}
@@ -451,25 +684,37 @@
 					unsigned int legacymetasize,
 					void *compressmeta)
 {
-	const unsigned int mpos = Z_EROFS_VLE_EXTENT_ALIGN(inode->inode_isize +
-							   inode->xattr_isize) +
+	const unsigned int mpos = roundup(inode->inode_isize +
+					  inode->xattr_isize, 8) +
 				  sizeof(struct z_erofs_map_header);
 	const unsigned int totalidx = (legacymetasize -
 			Z_EROFS_LEGACY_MAP_HEADER_SIZE) /
-				sizeof(struct z_erofs_vle_decompressed_index);
+				sizeof(struct z_erofs_lcluster_index);
 	const unsigned int logical_clusterbits = inode->z_logical_clusterbits;
 	u8 *out, *in;
 	struct z_erofs_compressindex_vec cv[16];
+	struct erofs_sb_info *sbi = inode->sbi;
 	/* # of 8-byte units so that it can be aligned with 32 bytes */
 	unsigned int compacted_4b_initial, compacted_4b_end;
 	unsigned int compacted_2b;
 	bool dummy_head;
+	bool big_pcluster = erofs_sb_has_big_pcluster(sbi);
 
-	if (logical_clusterbits < LOG_BLOCK_SIZE || LOG_BLOCK_SIZE < 12)
+	if (logical_clusterbits < sbi->blkszbits || sbi->blkszbits < 12)
 		return -EINVAL;
-	if (logical_clusterbits > 14)	/* currently not supported */
-		return -ENOTSUP;
-	if (logical_clusterbits == 12) {
+	if (logical_clusterbits > 14) {
+		erofs_err("compact format is unsupported for lcluster size %u",
+			  1 << logical_clusterbits);
+		return -EOPNOTSUPP;
+	}
+
+	if (inode->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) {
+		if (logical_clusterbits != 12) {
+			erofs_err("compact 2B is unsupported for lcluster size %u",
+				  1 << logical_clusterbits);
+			return -EINVAL;
+		}
+
 		compacted_4b_initial = (32 - mpos % 32) / 4;
 		if (compacted_4b_initial == 32 / 4)
 			compacted_4b_initial = 0;
@@ -495,7 +740,7 @@
 
 	dummy_head = false;
 	/* prior to bigpcluster, blkaddr was bumped up once coming into HEAD */
-	if (!erofs_sb_has_big_pcluster()) {
+	if (!big_pcluster) {
 		--blkaddr;
 		dummy_head = true;
 	}
@@ -505,7 +750,7 @@
 		in = parse_legacy_indexes(cv, 2, in);
 		out = write_compacted_indexes(out, cv, &blkaddr,
 					      4, logical_clusterbits, false,
-					      &dummy_head);
+					      &dummy_head, big_pcluster);
 		compacted_4b_initial -= 2;
 	}
 	DBG_BUGON(compacted_4b_initial);
@@ -515,7 +760,7 @@
 		in = parse_legacy_indexes(cv, 16, in);
 		out = write_compacted_indexes(out, cv, &blkaddr,
 					      2, logical_clusterbits, false,
-					      &dummy_head);
+					      &dummy_head, big_pcluster);
 		compacted_2b -= 16;
 	}
 	DBG_BUGON(compacted_2b);
@@ -525,7 +770,7 @@
 		in = parse_legacy_indexes(cv, 2, in);
 		out = write_compacted_indexes(out, cv, &blkaddr,
 					      4, logical_clusterbits, false,
-					      &dummy_head);
+					      &dummy_head, big_pcluster);
 		compacted_4b_end -= 2;
 	}
 
@@ -535,7 +780,7 @@
 		in = parse_legacy_indexes(cv, 1, in);
 		out = write_compacted_indexes(out, cv, &blkaddr,
 					      4, logical_clusterbits, true,
-					      &dummy_head);
+					      &dummy_head, big_pcluster);
 	}
 	inode->extent_isize = out - (u8 *)compressmeta;
 	return 0;
@@ -544,15 +789,20 @@
 static void z_erofs_write_mapheader(struct erofs_inode *inode,
 				    void *compressmeta)
 {
+	struct erofs_sb_info *sbi = inode->sbi;
 	struct z_erofs_map_header h = {
 		.h_advise = cpu_to_le16(inode->z_advise),
-		.h_idata_size = cpu_to_le16(inode->idata_size),
 		.h_algorithmtype = inode->z_algorithmtype[1] << 4 |
 				   inode->z_algorithmtype[0],
 		/* lclustersize */
-		.h_clusterbits = inode->z_logical_clusterbits - 12,
+		.h_clusterbits = inode->z_logical_clusterbits - sbi->blkszbits,
 	};
 
+	if (inode->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)
+		h.h_fragmentoff = cpu_to_le32(inode->fragmentoff);
+	else
+		h.h_idata_size = cpu_to_le16(inode->idata_size);
+
 	memset(compressmeta, 0, Z_EROFS_LEGACY_MAP_HEADER_SIZE);
 	/* write out map header */
 	memcpy(compressmeta, &h, sizeof(struct z_erofs_map_header));
@@ -560,36 +810,38 @@
 
 void z_erofs_drop_inline_pcluster(struct erofs_inode *inode)
 {
-	const unsigned int type = Z_EROFS_VLE_CLUSTER_TYPE_PLAIN;
+	struct erofs_sb_info *sbi = inode->sbi;
+	const unsigned int type = Z_EROFS_LCLUSTER_TYPE_PLAIN;
 	struct z_erofs_map_header *h = inode->compressmeta;
 
 	h->h_advise = cpu_to_le16(le16_to_cpu(h->h_advise) &
 				  ~Z_EROFS_ADVISE_INLINE_PCLUSTER);
+	h->h_idata_size = 0;
 	if (!inode->eof_tailraw)
 		return;
 	DBG_BUGON(inode->compressed_idata != true);
 
 	/* patch the EOF lcluster to uncompressed type first */
-	if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
-		struct z_erofs_vle_decompressed_index *di =
+	if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL) {
+		struct z_erofs_lcluster_index *di =
 			(inode->compressmeta + inode->extent_isize) -
-			sizeof(struct z_erofs_vle_decompressed_index);
+			sizeof(struct z_erofs_lcluster_index);
 		__le16 advise =
-			cpu_to_le16(type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT);
+			cpu_to_le16(type << Z_EROFS_LI_LCLUSTER_TYPE_BIT);
 
 		di->di_advise = advise;
-	} else if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION) {
+	} else if (inode->datalayout == EROFS_INODE_COMPRESSED_COMPACT) {
 		/* handle the last compacted 4B pack */
 		unsigned int eofs, base, pos, v, lo;
 		u8 *out;
 
 		eofs = inode->extent_isize -
-			(4 << (DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ) & 1));
+			(4 << (BLK_ROUND_UP(sbi, inode->i_size) & 1));
 		base = round_down(eofs, 8);
 		pos = 16 /* encodebits */ * ((eofs - base) / 4);
 		out = inode->compressmeta + base;
-		lo = get_unaligned_le32(out + pos / 8) & (EROFS_BLKSIZ - 1);
-		v = (type << LOG_BLOCK_SIZE) | lo;
+		lo = erofs_blkoff(sbi, get_unaligned_le32(out + pos / 8));
+		v = (type << sbi->blkszbits) | lo;
 		out[pos / 8] = v & 0xff;
 		out[pos / 8 + 1] = v >> 8;
 	} else {
@@ -604,72 +856,107 @@
 	inode->eof_tailraw = NULL;
 }
 
-int erofs_write_compressed_file(struct erofs_inode *inode)
+int erofs_write_compressed_file(struct erofs_inode *inode, int fd)
 {
 	struct erofs_buffer_head *bh;
 	static struct z_erofs_vle_compress_ctx ctx;
-	erofs_off_t remaining;
 	erofs_blk_t blkaddr, compressed_blocks;
 	unsigned int legacymetasize;
-	int ret, fd;
-	u8 *compressmeta = malloc(vle_compressmeta_capacity(inode->i_size));
+	int ret;
+	struct erofs_sb_info *sbi = inode->sbi;
+	u8 *compressmeta = malloc(BLK_ROUND_UP(sbi, inode->i_size) *
+				  sizeof(struct z_erofs_lcluster_index) +
+				  Z_EROFS_LEGACY_MAP_HEADER_SIZE);
 
 	if (!compressmeta)
 		return -ENOMEM;
 
-	fd = open(inode->i_srcpath, O_RDONLY | O_BINARY);
-	if (fd < 0) {
-		ret = -errno;
-		goto err_free_meta;
-	}
-
 	/* allocate main data buffer */
 	bh = erofs_balloc(DATA, 0, 0, 0);
 	if (IS_ERR(bh)) {
 		ret = PTR_ERR(bh);
-		goto err_close;
+		goto err_free_meta;
 	}
 
 	/* initialize per-file compression setting */
 	inode->z_advise = 0;
-	if (!cfg.c_legacy_compress) {
-		inode->z_advise |= Z_EROFS_ADVISE_COMPACTED_2B;
-		inode->datalayout = EROFS_INODE_FLAT_COMPRESSION;
+	inode->z_logical_clusterbits = sbi->blkszbits;
+	if (!cfg.c_legacy_compress && inode->z_logical_clusterbits <= 14) {
+		if (inode->z_logical_clusterbits <= 12)
+			inode->z_advise |= Z_EROFS_ADVISE_COMPACTED_2B;
+		inode->datalayout = EROFS_INODE_COMPRESSED_COMPACT;
 	} else {
-		inode->datalayout = EROFS_INODE_FLAT_COMPRESSION_LEGACY;
+		inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
 	}
 
-	if (erofs_sb_has_big_pcluster()) {
+	if (erofs_sb_has_big_pcluster(sbi)) {
 		inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
-		if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION)
+		if (inode->datalayout == EROFS_INODE_COMPRESSED_COMPACT)
 			inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
 	}
-	inode->z_algorithmtype[0] = algorithmtype[0];
-	inode->z_algorithmtype[1] = algorithmtype[1];
-	inode->z_logical_clusterbits = LOG_BLOCK_SIZE;
+	if (cfg.c_fragments && !cfg.c_dedupe)
+		inode->z_advise |= Z_EROFS_ADVISE_INTERLACED_PCLUSTER;
+
+#ifndef NDEBUG
+	if (cfg.c_random_algorithms) {
+		while (1) {
+			inode->z_algorithmtype[0] =
+				rand() % EROFS_MAX_COMPR_CFGS;
+			if (erofs_ccfg[inode->z_algorithmtype[0]].enable)
+				break;
+		}
+	}
+#endif
+	ctx.ccfg = &erofs_ccfg[inode->z_algorithmtype[0]];
+	inode->z_algorithmtype[0] = ctx.ccfg[0].algorithmtype;
+	inode->z_algorithmtype[1] = 0;
+
+	inode->idata_size = 0;
+	inode->fragment_size = 0;
+
+	/*
+	 * Handle tails in advance to avoid writing duplicated
+	 * parts into the packed inode.
+	 */
+	if (cfg.c_fragments && !erofs_is_packed_inode(inode)) {
+		ret = z_erofs_fragments_dedupe(inode, fd, &ctx.tof_chksum);
+		if (ret < 0)
+			goto err_bdrop;
+	}
 
 	blkaddr = erofs_mapbh(bh->block);	/* start_blkaddr */
+	ctx.inode = inode;
+	ctx.pclustersize = z_erofs_get_max_pclustersize(inode);
 	ctx.blkaddr = blkaddr;
 	ctx.metacur = compressmeta + Z_EROFS_LEGACY_MAP_HEADER_SIZE;
 	ctx.head = ctx.tail = 0;
 	ctx.clusterofs = 0;
-	remaining = inode->i_size;
-
-	while (remaining) {
-		const u64 readcount = min_t(u64, remaining,
-					    sizeof(ctx.queue) - ctx.tail);
-
-		ret = read(fd, ctx.queue + ctx.tail, readcount);
-		if (ret != readcount) {
-			ret = -errno;
-			goto err_bdrop;
-		}
-		remaining -= readcount;
-		ctx.tail += readcount;
-
-		ret = vle_compress_one(inode, &ctx, !remaining);
+	ctx.e.length = 0;
+	ctx.remaining = inode->i_size - inode->fragment_size;
+	ctx.fix_dedupedfrag = false;
+	ctx.fragemitted = false;
+	if (cfg.c_all_fragments && !erofs_is_packed_inode(inode) &&
+	    !inode->fragment_size) {
+		ret = z_erofs_pack_file_from_fd(inode, fd, ctx.tof_chksum);
 		if (ret)
 			goto err_free_idata;
+	} else {
+		while (ctx.remaining) {
+			const u64 rx = min_t(u64, ctx.remaining,
+					     sizeof(ctx.queue) - ctx.tail);
+
+			ret = read(fd, ctx.queue + ctx.tail, rx);
+			if (ret != rx) {
+				ret = -errno;
+				goto err_bdrop;
+			}
+			ctx.remaining -= rx;
+			ctx.tail += rx;
+
+			ret = vle_compress_one(&ctx);
+			if (ret)
+				goto err_free_idata;
+		}
 	}
 	DBG_BUGON(ctx.head != ctx.tail);
 
@@ -678,36 +965,65 @@
 	DBG_BUGON(compressed_blocks < !!inode->idata_size);
 	compressed_blocks -= !!inode->idata_size;
 
-	vle_write_indexes_final(&ctx);
+	/* generate an extent for the deduplicated fragment */
+	if (inode->fragment_size && !ctx.fragemitted) {
+		z_erofs_write_indexes(&ctx);
+		ctx.e.length = inode->fragment_size;
+		ctx.e.compressedblks = 0;
+		ctx.e.raw = false;
+		ctx.e.partial = false;
+		ctx.e.blkaddr = ctx.blkaddr;
+	}
+	z_erofs_fragments_commit(inode);
+
+	z_erofs_write_indexes(&ctx);
+	z_erofs_write_indexes_final(&ctx);
 	legacymetasize = ctx.metacur - compressmeta;
 	/* estimate if data compression saves space or not */
-	if (compressed_blocks * EROFS_BLKSIZ + inode->idata_size +
+	if (!inode->fragment_size &&
+	    compressed_blocks * erofs_blksiz(sbi) + inode->idata_size +
 	    legacymetasize >= inode->i_size) {
+		z_erofs_dedupe_commit(true);
 		ret = -ENOSPC;
 		goto err_free_idata;
 	}
+	z_erofs_dedupe_commit(false);
 	z_erofs_write_mapheader(inode, compressmeta);
 
-	close(fd);
+	if (!ctx.fragemitted)
+		sbi->saved_by_deduplication += inode->fragment_size;
+
+	/* if the entire file is a fragment, a simplified form is used. */
+	if (inode->i_size == inode->fragment_size) {
+		DBG_BUGON(inode->fragmentoff >> 63);
+		*(__le64 *)compressmeta =
+			cpu_to_le64(inode->fragmentoff | 1ULL << 63);
+		inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
+		legacymetasize = Z_EROFS_LEGACY_MAP_HEADER_SIZE;
+	}
+
 	if (compressed_blocks) {
-		ret = erofs_bh_balloon(bh, blknr_to_addr(compressed_blocks));
-		DBG_BUGON(ret != EROFS_BLKSIZ);
+		ret = erofs_bh_balloon(bh, erofs_pos(sbi, compressed_blocks));
+		DBG_BUGON(ret != erofs_blksiz(sbi));
 	} else {
-		DBG_BUGON(!inode->idata_size);
+		if (!cfg.c_fragments && !cfg.c_dedupe)
+			DBG_BUGON(!inode->idata_size);
 	}
 
 	erofs_info("compressed %s (%llu bytes) into %u blocks",
 		   inode->i_srcpath, (unsigned long long)inode->i_size,
 		   compressed_blocks);
 
-	if (inode->idata_size)
+	if (inode->idata_size) {
+		bh->op = &erofs_skip_write_bhops;
 		inode->bh_data = bh;
-	else
+	} else {
 		erofs_bdrop(bh, false);
+	}
 
 	inode->u.i_blocks = compressed_blocks;
 
-	if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
+	if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL) {
 		inode->extent_isize = legacymetasize;
 	} else {
 		ret = z_erofs_convert_to_compacted_format(inode, blkaddr,
@@ -716,7 +1032,8 @@
 		DBG_BUGON(ret);
 	}
 	inode->compressmeta = compressmeta;
-	erofs_droid_blocklist_write(inode, blkaddr, compressed_blocks);
+	if (!erofs_is_packed_inode(inode))
+		erofs_droid_blocklist_write(inode, blkaddr, compressed_blocks);
 	return 0;
 
 err_free_idata:
@@ -726,28 +1043,18 @@
 	}
 err_bdrop:
 	erofs_bdrop(bh, true);	/* revoke buffer */
-err_close:
-	close(fd);
 err_free_meta:
 	free(compressmeta);
 	return ret;
 }
 
-static int erofs_get_compress_algorithm_id(const char *name)
-{
-	if (!strcmp(name, "lz4") || !strcmp(name, "lz4hc"))
-		return Z_EROFS_COMPRESSION_LZ4;
-	if (!strcmp(name, "lzma"))
-		return Z_EROFS_COMPRESSION_LZMA;
-	return -ENOTSUP;
-}
-
-int z_erofs_build_compr_cfgs(struct erofs_buffer_head *sb_bh)
+static int z_erofs_build_compr_cfgs(struct erofs_sb_info *sbi,
+				    struct erofs_buffer_head *sb_bh)
 {
 	struct erofs_buffer_head *bh = sb_bh;
 	int ret = 0;
 
-	if (sbi.available_compr_algs & (1 << Z_EROFS_COMPRESSION_LZ4)) {
+	if (sbi->available_compr_algs & (1 << Z_EROFS_COMPRESSION_LZ4)) {
 		struct {
 			__le16 size;
 			struct z_erofs_lz4_cfgs lz4;
@@ -755,7 +1062,7 @@
 			.size = cpu_to_le16(sizeof(struct z_erofs_lz4_cfgs)),
 			.lz4 = {
 				.max_distance =
-					cpu_to_le16(sbi.lz4_max_distance),
+					cpu_to_le16(sbi->lz4_max_distance),
 				.max_pclusterblks = cfg.c_pclusterblks_max,
 			}
 		};
@@ -766,12 +1073,12 @@
 			return PTR_ERR(bh);
 		}
 		erofs_mapbh(bh->block);
-		ret = dev_write(&lz4alg, erofs_btell(bh, false),
+		ret = dev_write(sbi, &lz4alg, erofs_btell(bh, false),
 				sizeof(lz4alg));
 		bh->op = &erofs_drop_directly_bhops;
 	}
 #ifdef HAVE_LIBLZMA
-	if (sbi.available_compr_algs & (1 << Z_EROFS_COMPRESSION_LZMA)) {
+	if (sbi->available_compr_algs & (1 << Z_EROFS_COMPRESSION_LZMA)) {
 		struct {
 			__le16 size;
 			struct z_erofs_lzma_cfgs lzma;
@@ -788,71 +1095,101 @@
 			return PTR_ERR(bh);
 		}
 		erofs_mapbh(bh->block);
-		ret = dev_write(&lzmaalg, erofs_btell(bh, false),
+		ret = dev_write(sbi, &lzmaalg, erofs_btell(bh, false),
 				sizeof(lzmaalg));
 		bh->op = &erofs_drop_directly_bhops;
 	}
 #endif
+	if (sbi->available_compr_algs & (1 << Z_EROFS_COMPRESSION_DEFLATE)) {
+		struct {
+			__le16 size;
+			struct z_erofs_deflate_cfgs z;
+		} __packed zalg = {
+			.size = cpu_to_le16(sizeof(struct z_erofs_deflate_cfgs)),
+			.z = {
+				.windowbits =
+					cpu_to_le32(ilog2(cfg.c_dict_size)),
+			}
+		};
+
+		bh = erofs_battach(bh, META, sizeof(zalg));
+		if (IS_ERR(bh)) {
+			DBG_BUGON(1);
+			return PTR_ERR(bh);
+		}
+		erofs_mapbh(bh->block);
+		ret = dev_write(sbi, &zalg, erofs_btell(bh, false),
+				sizeof(zalg));
+		bh->op = &erofs_drop_directly_bhops;
+	}
 	return ret;
 }
 
-int z_erofs_compress_init(struct erofs_buffer_head *sb_bh)
+int z_erofs_compress_init(struct erofs_sb_info *sbi, struct erofs_buffer_head *sb_bh)
 {
-	/* initialize for primary compression algorithm */
-	int ret = erofs_compressor_init(&compresshandle,
-					cfg.c_compr_alg_master);
+	int i, ret;
 
-	if (ret)
-		return ret;
+	for (i = 0; cfg.c_compr_alg[i]; ++i) {
+		struct erofs_compress *c = &erofs_ccfg[i].handle;
+
+		ret = erofs_compressor_init(sbi, c, cfg.c_compr_alg[i]);
+		if (ret)
+			return ret;
+
+		ret = erofs_compressor_setlevel(c, cfg.c_compr_level[i]);
+		if (ret)
+			return ret;
+
+		erofs_ccfg[i].algorithmtype =
+			z_erofs_get_compress_algorithm_id(c);
+		erofs_ccfg[i].enable = true;
+		sbi->available_compr_algs |= 1 << erofs_ccfg[i].algorithmtype;
+		if (erofs_ccfg[i].algorithmtype != Z_EROFS_COMPRESSION_LZ4)
+			erofs_sb_set_compr_cfgs(sbi);
+	}
 
 	/*
 	 * if primary algorithm is empty (e.g. compression off),
 	 * clear 0PADDING feature for old kernel compatibility.
 	 */
-	if (!cfg.c_compr_alg_master ||
-	    (cfg.c_legacy_compress && !strcmp(cfg.c_compr_alg_master, "lz4")))
-		erofs_sb_clear_lz4_0padding();
+	if (!cfg.c_compr_alg[0] ||
+	    (cfg.c_legacy_compress && !strncmp(cfg.c_compr_alg[0], "lz4", 3)))
+		erofs_sb_clear_lz4_0padding(sbi);
 
-	if (!cfg.c_compr_alg_master)
+	if (!cfg.c_compr_alg[0])
 		return 0;
 
-	ret = erofs_compressor_setlevel(&compresshandle,
-					cfg.c_compr_level_master);
-	if (ret)
-		return ret;
-
-	/* figure out primary algorithm */
-	ret = erofs_get_compress_algorithm_id(cfg.c_compr_alg_master);
-	if (ret < 0)
-		return ret;
-
-	algorithmtype[0] = ret;	/* primary algorithm (head 0) */
-	algorithmtype[1] = 0;	/* secondary algorithm (head 1) */
 	/*
 	 * if big pcluster is enabled, an extra CBLKCNT lcluster index needs
 	 * to be loaded in order to get those compressed block counts.
 	 */
 	if (cfg.c_pclusterblks_max > 1) {
 		if (cfg.c_pclusterblks_max >
-		    Z_EROFS_PCLUSTER_MAX_SIZE / EROFS_BLKSIZ) {
+		    Z_EROFS_PCLUSTER_MAX_SIZE / erofs_blksiz(sbi)) {
 			erofs_err("unsupported clusterblks %u (too large)",
 				  cfg.c_pclusterblks_max);
 			return -EINVAL;
 		}
-		erofs_sb_set_big_pcluster();
+		erofs_sb_set_big_pcluster(sbi);
+	}
+	if (cfg.c_pclusterblks_packed > cfg.c_pclusterblks_max) {
+		erofs_err("invalid physical cluster size for the packed file");
+		return -EINVAL;
 	}
 
-	if (ret != Z_EROFS_COMPRESSION_LZ4)
-		erofs_sb_set_compr_cfgs();
-
-	if (erofs_sb_has_compr_cfgs()) {
-		sbi.available_compr_algs |= 1 << ret;
-		return z_erofs_build_compr_cfgs(sb_bh);
-	}
+	if (erofs_sb_has_compr_cfgs(sbi))
+		return z_erofs_build_compr_cfgs(sbi, sb_bh);
 	return 0;
 }
 
 int z_erofs_compress_exit(void)
 {
-	return erofs_compressor_exit(&compresshandle);
+	int i, ret;
+
+	for (i = 0; cfg.c_compr_alg[i]; ++i) {
+		ret = erofs_compressor_exit(&erofs_ccfg[i].handle);
+		if (ret)
+			return ret;
+	}
+	return 0;
 }
diff --git a/lib/compress_hints.c b/lib/compress_hints.c
index 92964eb..afc9f8f 100644
--- a/lib/compress_hints.c
+++ b/lib/compress_hints.c
@@ -20,57 +20,60 @@
 	erofs_err("invalid regex %s (%s)\n", s, str);
 }
 
-static int erofs_insert_compress_hints(const char *s, unsigned int blks)
+/* algorithmtype is actually ccfg # here */
+static int erofs_insert_compress_hints(const char *s, unsigned int blks,
+				       unsigned int algorithmtype)
 {
-	struct erofs_compress_hints *r;
+	struct erofs_compress_hints *ch;
 	int ret;
 
-	r = malloc(sizeof(struct erofs_compress_hints));
-	if (!r)
+	ch = malloc(sizeof(struct erofs_compress_hints));
+	if (!ch)
 		return -ENOMEM;
 
-	ret = regcomp(&r->reg, s, REG_EXTENDED|REG_NOSUB);
+	ret = regcomp(&ch->reg, s, REG_EXTENDED|REG_NOSUB);
 	if (ret) {
-		dump_regerror(ret, s, &r->reg);
-		goto err_out;
+		dump_regerror(ret, s, &ch->reg);
+		free(ch);
+		return ret;
 	}
-	r->physical_clusterblks = blks;
+	ch->physical_clusterblks = blks;
+	ch->algorithmtype = algorithmtype;
 
-	list_add_tail(&r->list, &compress_hints_head);
+	list_add_tail(&ch->list, &compress_hints_head);
 	erofs_info("compress hint %s (%u) is inserted", s, blks);
 	return ret;
-
-err_out:
-	free(r);
-	return ret;
 }
 
 bool z_erofs_apply_compress_hints(struct erofs_inode *inode)
 {
 	const char *s;
 	struct erofs_compress_hints *r;
-	unsigned int pclusterblks;
+	unsigned int pclusterblks, algorithmtype;
 
 	if (inode->z_physical_clusterblks)
 		return true;
 
 	s = erofs_fspath(inode->i_srcpath);
 	pclusterblks = cfg.c_pclusterblks_def;
+	algorithmtype = 0;
 
 	list_for_each_entry(r, &compress_hints_head, list) {
 		int ret = regexec(&r->reg, s, (size_t)0, NULL, 0);
 
 		if (!ret) {
 			pclusterblks = r->physical_clusterblks;
+			algorithmtype = r->algorithmtype;
 			break;
 		}
 		if (ret != REG_NOMATCH)
 			dump_regerror(ret, s, &r->reg);
 	}
 	inode->z_physical_clusterblks = pclusterblks;
+	inode->z_algorithmtype[0] = algorithmtype;
 
 	/* pclusterblks is 0 means this file shouldn't be compressed */
-	return !!pclusterblks;
+	return pclusterblks != 0;
 }
 
 void erofs_cleanup_compress_hints(void)
@@ -83,7 +86,7 @@
 	}
 }
 
-int erofs_load_compress_hints(void)
+int erofs_load_compress_hints(struct erofs_sb_info *sbi)
 {
 	char buf[PATH_MAX + 100];
 	FILE *f;
@@ -98,32 +101,53 @@
 		return -errno;
 
 	for (line = 1; fgets(buf, sizeof(buf), f); ++line) {
-		unsigned int pclustersize;
-		char *pattern;
+		unsigned int pclustersize, ccfg;
+		char *alg, *pattern;
+
+		if (*buf == '#' || *buf == '\n')
+			continue;
 
 		pclustersize = atoi(strtok(buf, "\t "));
+		alg = strtok(NULL, "\n\t ");
 		pattern = strtok(NULL, "\n");
+		if (!pattern) {
+			pattern = alg;
+			alg = NULL;
+		}
 		if (!pattern || *pattern == '\0') {
 			erofs_err("cannot find a match pattern at line %u",
 				  line);
 			ret = -EINVAL;
 			goto out;
 		}
-		if (pclustersize % EROFS_BLKSIZ) {
+		if (!alg || *alg == '\0') {
+			ccfg = 0;
+		} else {
+			ccfg = atoi(alg);
+			if (ccfg >= EROFS_MAX_COMPR_CFGS ||
+			    !cfg.c_compr_alg[ccfg]) {
+				erofs_err("invalid compressing configuration \"%s\" at line %u",
+					  alg, line);
+				ret = -EINVAL;
+				goto out;
+			}
+		}
+
+		if (pclustersize % erofs_blksiz(sbi)) {
 			erofs_warn("invalid physical clustersize %u, "
 				   "use default pclusterblks %u",
 				   pclustersize, cfg.c_pclusterblks_def);
 			continue;
 		}
 		erofs_insert_compress_hints(pattern,
-					    pclustersize / EROFS_BLKSIZ);
+				pclustersize / erofs_blksiz(sbi), ccfg);
 
 		if (pclustersize > max_pclustersize)
 			max_pclustersize = pclustersize;
 	}
 
-	if (cfg.c_pclusterblks_max * EROFS_BLKSIZ < max_pclustersize) {
-		cfg.c_pclusterblks_max = max_pclustersize / EROFS_BLKSIZ;
+	if (cfg.c_pclusterblks_max * erofs_blksiz(sbi) < max_pclustersize) {
+		cfg.c_pclusterblks_max = max_pclustersize / erofs_blksiz(sbi);
 		erofs_warn("update max pclusterblks to %u", cfg.c_pclusterblks_max);
 	}
 out:
diff --git a/lib/compressor.c b/lib/compressor.c
index a46bc39..93f5617 100644
--- a/lib/compressor.c
+++ b/lib/compressor.c
@@ -10,18 +10,71 @@
 
 #define EROFS_CONFIG_COMPR_DEF_BOUNDARY		(128)
 
-static const struct erofs_compressor *compressors[] = {
+static const struct erofs_algorithm {
+	char *name;
+	const struct erofs_compressor *c;
+	unsigned int id;
+
+	/* its name won't be shown as a supported algorithm */
+	bool optimisor;
+} erofs_algs[] = {
+	{ "lz4",
 #if LZ4_ENABLED
-#if LZ4HC_ENABLED
-		&erofs_compressor_lz4hc,
-#endif
 		&erofs_compressor_lz4,
+#else
+		NULL,
 #endif
+	  Z_EROFS_COMPRESSION_LZ4, false },
+
+#if LZ4HC_ENABLED
+	{ "lz4hc", &erofs_compressor_lz4hc,
+	  Z_EROFS_COMPRESSION_LZ4, true },
+#endif
+
+	{ "lzma",
 #if HAVE_LIBLZMA
 		&erofs_compressor_lzma,
+#else
+		NULL,
+#endif
+	  Z_EROFS_COMPRESSION_LZMA, false },
+
+	{ "deflate", &erofs_compressor_deflate,
+	  Z_EROFS_COMPRESSION_DEFLATE, false },
+
+#if HAVE_LIBDEFLATE
+	{ "libdeflate", &erofs_compressor_libdeflate,
+	  Z_EROFS_COMPRESSION_DEFLATE, true },
 #endif
 };
 
+int z_erofs_get_compress_algorithm_id(const struct erofs_compress *c)
+{
+	DBG_BUGON(!c->alg);
+	return c->alg->id;
+}
+
+const char *z_erofs_list_supported_algorithms(int i, unsigned int *mask)
+{
+	if (i >= ARRAY_SIZE(erofs_algs))
+		return NULL;
+	if (!erofs_algs[i].optimisor && (*mask & (1 << erofs_algs[i].id))) {
+		*mask ^= 1 << erofs_algs[i].id;
+		return erofs_algs[i].name;
+	}
+	return "";
+}
+
+const char *z_erofs_list_available_compressors(int *i)
+{
+	for (;*i < ARRAY_SIZE(erofs_algs); ++*i) {
+		if (!erofs_algs[*i].c)
+			continue;
+		return erofs_algs[(*i)++].name;
+	}
+	return NULL;
+}
+
 int erofs_compress_destsize(const struct erofs_compress *c,
 			    const void *src, unsigned int *srcsize,
 			    void *dst, unsigned int dstsize, bool inblocks)
@@ -30,18 +83,18 @@
 	int ret;
 
 	DBG_BUGON(!c->alg);
-	if (!c->alg->compress_destsize)
+	if (!c->alg->c->compress_destsize)
 		return -ENOTSUP;
 
 	uncompressed_capacity = *srcsize;
-	ret = c->alg->compress_destsize(c, src, srcsize, dst, dstsize);
+	ret = c->alg->c->compress_destsize(c, src, srcsize, dst, dstsize);
 	if (ret < 0)
 		return ret;
 
-	/* XXX: ret >= EROFS_BLKSIZ is a temporary hack for ztailpacking */
-	if (inblocks || ret >= EROFS_BLKSIZ ||
+	/* XXX: ret >= destsize_alignsize is a temporary hack for ztailpacking */
+	if (inblocks || ret >= c->destsize_alignsize ||
 	    uncompressed_capacity != *srcsize)
-		compressed_size = roundup(ret, EROFS_BLKSIZ);
+		compressed_size = roundup(ret, c->destsize_alignsize);
 	else
 		compressed_size = ret;
 	DBG_BUGON(c->compress_threshold < 100);
@@ -51,16 +104,11 @@
 	return ret;
 }
 
-const char *z_erofs_list_available_compressors(unsigned int i)
-{
-	return i >= ARRAY_SIZE(compressors) ? NULL : compressors[i]->name;
-}
-
 int erofs_compressor_setlevel(struct erofs_compress *c, int compression_level)
 {
 	DBG_BUGON(!c->alg);
-	if (c->alg->setlevel)
-		return c->alg->setlevel(c, compression_level);
+	if (c->alg->c->setlevel)
+		return c->alg->c->setlevel(c, compression_level);
 
 	if (compression_level >= 0)
 		return -EINVAL;
@@ -68,16 +116,19 @@
 	return 0;
 }
 
-int erofs_compressor_init(struct erofs_compress *c, char *alg_name)
+int erofs_compressor_init(struct erofs_sb_info *sbi,
+			  struct erofs_compress *c, char *alg_name)
 {
 	int ret, i;
 
+	c->sbi = sbi;
+
 	/* should be written in "minimum compression ratio * 100" */
 	c->compress_threshold = 100;
 
 	/* optimize for 4k size page */
-	c->destsize_alignsize = EROFS_BLKSIZ;
-	c->destsize_redzone_begin = EROFS_BLKSIZ - 16;
+	c->destsize_alignsize = erofs_blksiz(sbi);
+	c->destsize_redzone_begin = erofs_blksiz(sbi) - 16;
 	c->destsize_redzone_end = EROFS_CONFIG_COMPR_DEF_BOUNDARY;
 
 	if (!alg_name) {
@@ -86,13 +137,16 @@
 	}
 
 	ret = -EINVAL;
-	for (i = 0; i < ARRAY_SIZE(compressors); ++i) {
-		if (alg_name && strcmp(alg_name, compressors[i]->name))
+	for (i = 0; i < ARRAY_SIZE(erofs_algs); ++i) {
+		if (alg_name && strcmp(alg_name, erofs_algs[i].name))
 			continue;
 
-		ret = compressors[i]->init(c);
+		if (!erofs_algs[i].c)
+			continue;
+
+		ret = erofs_algs[i].c->init(c);
 		if (!ret) {
-			DBG_BUGON(!c->alg);
+			c->alg = &erofs_algs[i];
 			return 0;
 		}
 	}
@@ -102,7 +156,7 @@
 
 int erofs_compressor_exit(struct erofs_compress *c)
 {
-	if (c->alg && c->alg->exit)
-		return c->alg->exit(c);
+	if (c->alg && c->alg->c->exit)
+		return c->alg->c->exit(c);
 	return 0;
 }
diff --git a/lib/compressor.h b/lib/compressor.h
index cf063f1..9fa01d1 100644
--- a/lib/compressor.h
+++ b/lib/compressor.h
@@ -12,8 +12,6 @@
 struct erofs_compress;
 
 struct erofs_compressor {
-	const char *name;
-
 	int default_level;
 	int best_level;
 
@@ -26,8 +24,11 @@
 				 void *dst, unsigned int dstsize);
 };
 
+struct erofs_algorithm;
+
 struct erofs_compress {
-	const struct erofs_compressor *alg;
+	struct erofs_sb_info *sbi;
+	const struct erofs_algorithm *alg;
 
 	unsigned int compress_threshold;
 	unsigned int compression_level;
@@ -44,13 +45,17 @@
 extern const struct erofs_compressor erofs_compressor_lz4;
 extern const struct erofs_compressor erofs_compressor_lz4hc;
 extern const struct erofs_compressor erofs_compressor_lzma;
+extern const struct erofs_compressor erofs_compressor_deflate;
+extern const struct erofs_compressor erofs_compressor_libdeflate;
 
+int z_erofs_get_compress_algorithm_id(const struct erofs_compress *c);
 int erofs_compress_destsize(const struct erofs_compress *c,
 			    const void *src, unsigned int *srcsize,
 			    void *dst, unsigned int dstsize, bool inblocks);
 
 int erofs_compressor_setlevel(struct erofs_compress *c, int compression_level);
-int erofs_compressor_init(struct erofs_compress *c, char *alg_name);
+int erofs_compressor_init(struct erofs_sb_info *sbi,
+		struct erofs_compress *c, char *alg_name);
 int erofs_compressor_exit(struct erofs_compress *c);
 
 #endif
diff --git a/lib/compressor_deflate.c b/lib/compressor_deflate.c
new file mode 100644
index 0000000..4e5902e
--- /dev/null
+++ b/lib/compressor_deflate.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+/*
+ * Copyright (C) 2023, Alibaba Cloud
+ * Copyright (C) 2023, Gao Xiang <xiang@kernel.org>
+ */
+#include "erofs/internal.h"
+#include "erofs/print.h"
+#include "erofs/config.h"
+#include "compressor.h"
+
+void *kite_deflate_init(int level, unsigned int dict_size);
+void kite_deflate_end(void *s);
+int kite_deflate_destsize(void *s, const u8 *in, u8 *out,
+			  unsigned int *srcsize, unsigned int target_dstsize);
+
+static int deflate_compress_destsize(const struct erofs_compress *c,
+				     const void *src, unsigned int *srcsize,
+				     void *dst, unsigned int dstsize)
+{
+	int rc = kite_deflate_destsize(c->private_data, src, dst,
+				       srcsize, dstsize);
+
+	if (rc <= 0)
+		return -EFAULT;
+	return rc;
+}
+
+static int compressor_deflate_exit(struct erofs_compress *c)
+{
+	if (!c->private_data)
+		return -EINVAL;
+
+	kite_deflate_end(c->private_data);
+	return 0;
+}
+
+static int compressor_deflate_init(struct erofs_compress *c)
+{
+	c->private_data = NULL;
+
+	erofs_warn("EXPERIMENTAL DEFLATE algorithm in use. Use at your own risk!");
+	erofs_warn("*Carefully* check filesystem data correctness to avoid corruption!");
+	erofs_warn("Please send a report to <linux-erofs@lists.ozlabs.org> if something is wrong.");
+	return 0;
+}
+
+static int erofs_compressor_deflate_setlevel(struct erofs_compress *c,
+					     int compression_level)
+{
+	void *s;
+
+	if (c->private_data) {
+		kite_deflate_end(c->private_data);
+		c->private_data = NULL;
+	}
+
+	if (compression_level < 0)
+		compression_level = erofs_compressor_deflate.default_level;
+
+	s = kite_deflate_init(compression_level, cfg.c_dict_size);
+	if (IS_ERR(s))
+		return PTR_ERR(s);
+
+	c->private_data = s;
+	c->compression_level = compression_level;
+	return 0;
+}
+
+const struct erofs_compressor erofs_compressor_deflate = {
+	.default_level = 1,
+	.best_level = 9,
+	.init = compressor_deflate_init,
+	.exit = compressor_deflate_exit,
+	.setlevel = erofs_compressor_deflate_setlevel,
+	.compress_destsize = deflate_compress_destsize,
+};
diff --git a/lib/compressor_libdeflate.c b/lib/compressor_libdeflate.c
new file mode 100644
index 0000000..c0b019a
--- /dev/null
+++ b/lib/compressor_libdeflate.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+#include "erofs/internal.h"
+#include "erofs/print.h"
+#include "erofs/config.h"
+#include <libdeflate.h>
+#include "compressor.h"
+
+static int libdeflate_compress_destsize(const struct erofs_compress *c,
+				        const void *src, unsigned int *srcsize,
+				        void *dst, unsigned int dstsize)
+{
+	static size_t last_uncompressed_size = 0;
+	size_t l = 0; /* largest input that fits so far */
+	size_t l_csize = 0;
+	size_t r = *srcsize + 1; /* smallest input that doesn't fit so far */
+	size_t m;
+	u8 tmpbuf[dstsize + 9];
+
+	if (last_uncompressed_size)
+		m = last_uncompressed_size * 15 / 16;
+	else
+		m = dstsize * 4;
+	for (;;) {
+		size_t csize;
+
+		m = max(m, l + 1);
+		m = min(m, r - 1);
+
+		csize = libdeflate_deflate_compress(c->private_data, src, m,
+						    tmpbuf, dstsize + 9);
+		/*printf("Tried %zu => %zu\n", m, csize);*/
+		if (csize > 0 && csize <= dstsize) {
+			/* Fits */
+			memcpy(dst, tmpbuf, csize);
+			l = m;
+			l_csize = csize;
+			if (r <= l + 1 || csize +
+				(22 - 2*(int)c->compression_level) >= dstsize)
+				break;
+			/*
+			 * Estimate needed input prefix size based on current
+			 * compression ratio.
+			 */
+			m = (dstsize * m) / csize;
+		} else {
+			/* Doesn't fit */
+			r = m;
+			if (r <= l + 1)
+				break;
+			m = (l + r) / 2;
+		}
+	}
+
+	/*
+	 * Since generic EROFS on-disk compressed data will be filled with
+	 * leading 0s (but no more than one block, 4KB for example, even the
+	 * whole pcluster is 128KB) if not filled, it will be used to identify
+	 * the actual compressed length as well without taking more reserved
+	 * compressed bytes or some extra metadata to record this.
+	 *
+	 * DEFLATE streams can also be used in this way, if it starts from a
+	 * non-last stored block, flag an unused bit instead to avoid the zero
+	 * byte. It's still a valid one according to the DEFLATE specification.
+	 */
+	if (l_csize && !((u8 *)dst)[0])
+	       ((u8 *)dst)[0] = 1 << (2 + 1);
+
+	/*printf("Choosing %zu => %zu\n", l, l_csize);*/
+	*srcsize = l;
+	last_uncompressed_size = l;
+	return l_csize;
+}
+
+static int compressor_libdeflate_exit(struct erofs_compress *c)
+{
+	if (!c->private_data)
+		return -EINVAL;
+
+	libdeflate_free_compressor(c->private_data);
+	return 0;
+}
+
+static int compressor_libdeflate_init(struct erofs_compress *c)
+{
+	c->private_data = NULL;
+
+	erofs_warn("EXPERIMENTAL libdeflate compressor in use. Use at your own risk!");
+	return 0;
+}
+
+static int erofs_compressor_libdeflate_setlevel(struct erofs_compress *c,
+						int compression_level)
+{
+	if (compression_level < 0)
+		compression_level = erofs_compressor_deflate.default_level;
+
+	libdeflate_free_compressor(c->private_data);
+	c->private_data = libdeflate_alloc_compressor(compression_level);
+	if (!c->private_data)
+		return -ENOMEM;
+	c->compression_level = compression_level;
+	return 0;
+}
+
+const struct erofs_compressor erofs_compressor_libdeflate = {
+	.default_level = 1,
+	.best_level = 12,
+	.init = compressor_libdeflate_init,
+	.exit = compressor_libdeflate_exit,
+	.setlevel = erofs_compressor_libdeflate_setlevel,
+	.compress_destsize = libdeflate_compress_destsize,
+};
diff --git a/lib/compressor_liblzma.c b/lib/compressor_liblzma.c
index 4886d6a..0ed6f23 100644
--- a/lib/compressor_liblzma.c
+++ b/lib/compressor_liblzma.c
@@ -56,11 +56,16 @@
 					     int compression_level)
 {
 	struct erofs_liblzma_context *ctx = c->private_data;
+	u32 preset;
 
 	if (compression_level < 0)
-		compression_level = LZMA_PRESET_DEFAULT;
+		preset = LZMA_PRESET_DEFAULT;
+	else if (compression_level >= 100)
+		preset = (compression_level - 100) | LZMA_PRESET_EXTREME;
+	else
+		preset = compression_level;
 
-	if (lzma_lzma_preset(&ctx->opt, compression_level))
+	if (lzma_lzma_preset(&ctx->opt, preset))
 		return -EINVAL;
 
 	/* XXX: temporary hack */
@@ -83,7 +88,6 @@
 {
 	struct erofs_liblzma_context *ctx;
 
-	c->alg = &erofs_compressor_lzma;
 	ctx = malloc(sizeof(*ctx));
 	if (!ctx)
 		return -ENOMEM;
@@ -95,9 +99,8 @@
 }
 
 const struct erofs_compressor erofs_compressor_lzma = {
-	.name = "lzma",
 	.default_level = LZMA_PRESET_DEFAULT,
-	.best_level = LZMA_PRESET_EXTREME,
+	.best_level = 109,
 	.init = erofs_compressor_liblzma_init,
 	.exit = erofs_compressor_liblzma_exit,
 	.setlevel = erofs_compressor_liblzma_setlevel,
diff --git a/lib/compressor_lz4.c b/lib/compressor_lz4.c
index b6f6e7e..6677693 100644
--- a/lib/compressor_lz4.c
+++ b/lib/compressor_lz4.c
@@ -32,13 +32,11 @@
 
 static int compressor_lz4_init(struct erofs_compress *c)
 {
-	c->alg = &erofs_compressor_lz4;
-	sbi.lz4_max_distance = LZ4_DISTANCE_MAX;
+	c->sbi->lz4_max_distance = LZ4_DISTANCE_MAX;
 	return 0;
 }
 
 const struct erofs_compressor erofs_compressor_lz4 = {
-	.name = "lz4",
 	.default_level = 0,
 	.best_level = 0,
 	.init = compressor_lz4_init,
diff --git a/lib/compressor_lz4hc.c b/lib/compressor_lz4hc.c
index eec1c84..b410e15 100644
--- a/lib/compressor_lz4hc.c
+++ b/lib/compressor_lz4hc.c
@@ -38,13 +38,11 @@
 
 static int compressor_lz4hc_init(struct erofs_compress *c)
 {
-	c->alg = &erofs_compressor_lz4hc;
-
 	c->private_data = LZ4_createStreamHC();
 	if (!c->private_data)
 		return -ENOMEM;
 
-	sbi.lz4_max_distance = LZ4_DISTANCE_MAX;
+	c->sbi->lz4_max_distance = LZ4_DISTANCE_MAX;
 	return 0;
 }
 
@@ -60,7 +58,6 @@
 }
 
 const struct erofs_compressor erofs_compressor_lz4hc = {
-	.name = "lz4hc",
 	.default_level = LZ4HC_CLEVEL_DEFAULT,
 	.best_level = LZ4HC_CLEVEL_MAX,
 	.init = compressor_lz4hc_init,
diff --git a/lib/config.c b/lib/config.c
index d478b07..2f3df37 100644
--- a/lib/config.c
+++ b/lib/config.c
@@ -25,7 +25,6 @@
 	cfg.c_version  = PACKAGE_VERSION;
 	cfg.c_dry_run  = false;
 	cfg.c_ignore_mtime = false;
-	cfg.c_compr_level_master = -1;
 	cfg.c_force_inodeversion = 0;
 	cfg.c_inline_xattr_tolerance = 2;
 	cfg.c_unix_timestamp = -1;
diff --git a/lib/data.c b/lib/data.c
index 6bc554d..a87053f 100644
--- a/lib/data.c
+++ b/lib/data.c
@@ -18,27 +18,29 @@
 	erofs_blk_t nblocks, lastblk;
 	u64 offset = map->m_la;
 	struct erofs_inode *vi = inode;
+	struct erofs_sb_info *sbi = inode->sbi;
 	bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);
 
 	trace_erofs_map_blocks_flatmode_enter(inode, map, flags);
 
-	nblocks = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
+	nblocks = BLK_ROUND_UP(sbi, inode->i_size);
 	lastblk = nblocks - tailendpacking;
 
 	/* there is no hole in flatmode */
 	map->m_flags = EROFS_MAP_MAPPED;
 
-	if (offset < blknr_to_addr(lastblk)) {
-		map->m_pa = blknr_to_addr(vi->u.i_blkaddr) + map->m_la;
-		map->m_plen = blknr_to_addr(lastblk) - offset;
+	if (offset < erofs_pos(sbi, lastblk)) {
+		map->m_pa = erofs_pos(sbi, vi->u.i_blkaddr) + map->m_la;
+		map->m_plen = erofs_pos(sbi, lastblk) - offset;
 	} else if (tailendpacking) {
 		/* 2 - inode inline B: inode, [xattrs], inline last blk... */
-		map->m_pa = iloc(vi->nid) + vi->inode_isize +
-			vi->xattr_isize + erofs_blkoff(map->m_la);
+		map->m_pa = erofs_iloc(vi) + vi->inode_isize +
+			vi->xattr_isize + erofs_blkoff(sbi, map->m_la);
 		map->m_plen = inode->i_size - offset;
 
 		/* inline data should be located in the same meta block */
-		if (erofs_blkoff(map->m_pa) + map->m_plen > EROFS_BLKSIZ) {
+		if (erofs_blkoff(sbi, map->m_pa) + map->m_plen >
+							erofs_blksiz(sbi)) {
 			erofs_err("inline data cross block boundary @ nid %" PRIu64,
 				  vi->nid);
 			DBG_BUGON(1);
@@ -65,8 +67,9 @@
 		struct erofs_map_blocks *map, int flags)
 {
 	struct erofs_inode *vi = inode;
+	struct erofs_sb_info *sbi = inode->sbi;
 	struct erofs_inode_chunk_index *idx;
-	u8 buf[EROFS_BLKSIZ];
+	u8 buf[EROFS_MAX_BLOCK_SIZE];
 	u64 chunknr;
 	unsigned int unit;
 	erofs_off_t pos;
@@ -89,39 +92,39 @@
 		unit = EROFS_BLOCK_MAP_ENTRY_SIZE;	/* block map */
 
 	chunknr = map->m_la >> vi->u.chunkbits;
-	pos = roundup(iloc(vi->nid) + vi->inode_isize +
+	pos = roundup(erofs_iloc(vi) + vi->inode_isize +
 		      vi->xattr_isize, unit) + unit * chunknr;
 
-	err = blk_read(0, buf, erofs_blknr(pos), 1);
+	err = blk_read(sbi, 0, buf, erofs_blknr(sbi, pos), 1);
 	if (err < 0)
 		return -EIO;
 
 	map->m_la = chunknr << vi->u.chunkbits;
 	map->m_plen = min_t(erofs_off_t, 1UL << vi->u.chunkbits,
-			    roundup(inode->i_size - map->m_la, EROFS_BLKSIZ));
+			roundup(inode->i_size - map->m_la, erofs_blksiz(sbi)));
 
 	/* handle block map */
 	if (!(vi->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) {
-		__le32 *blkaddr = (void *)buf + erofs_blkoff(pos);
+		__le32 *blkaddr = (void *)buf + erofs_blkoff(sbi, pos);
 
 		if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) {
 			map->m_flags = 0;
 		} else {
-			map->m_pa = blknr_to_addr(le32_to_cpu(*blkaddr));
+			map->m_pa = erofs_pos(sbi, le32_to_cpu(*blkaddr));
 			map->m_flags = EROFS_MAP_MAPPED;
 		}
 		goto out;
 	}
 	/* parse chunk indexes */
-	idx = (void *)buf + erofs_blkoff(pos);
+	idx = (void *)buf + erofs_blkoff(sbi, pos);
 	switch (le32_to_cpu(idx->blkaddr)) {
 	case EROFS_NULL_ADDR:
 		map->m_flags = 0;
 		break;
 	default:
 		map->m_deviceid = le16_to_cpu(idx->device_id) &
-			sbi.device_id_mask;
-		map->m_pa = blknr_to_addr(le32_to_cpu(idx->blkaddr));
+			sbi->device_id_mask;
+		map->m_pa = erofs_pos(sbi, le32_to_cpu(idx->blkaddr));
 		map->m_flags = EROFS_MAP_MAPPED;
 		break;
 	}
@@ -145,8 +148,8 @@
 			dif = sbi->devs + id;
 			if (!dif->mapped_blkaddr)
 				continue;
-			startoff = blknr_to_addr(dif->mapped_blkaddr);
-			length = blknr_to_addr(dif->blocks);
+			startoff = erofs_pos(sbi, dif->mapped_blkaddr);
+			length = erofs_pos(sbi, dif->blocks);
 
 			if (map->m_pa >= startoff &&
 			    map->m_pa < startoff + length) {
@@ -158,19 +161,39 @@
 	return 0;
 }
 
+int erofs_read_one_data(struct erofs_inode *inode, struct erofs_map_blocks *map,
+			char *buffer, u64 offset, size_t len)
+{
+	struct erofs_sb_info *sbi = inode->sbi;
+	struct erofs_map_dev mdev;
+	int ret;
+
+	mdev = (struct erofs_map_dev) {
+		.m_deviceid = map->m_deviceid,
+		.m_pa = map->m_pa,
+	};
+	ret = erofs_map_dev(sbi, &mdev);
+	if (ret)
+		return ret;
+
+	ret = dev_read(sbi, mdev.m_deviceid, buffer, mdev.m_pa + offset, len);
+	if (ret < 0)
+		return -EIO;
+	return 0;
+}
+
 static int erofs_read_raw_data(struct erofs_inode *inode, char *buffer,
 			       erofs_off_t size, erofs_off_t offset)
 {
 	struct erofs_map_blocks map = {
 		.index = UINT_MAX,
 	};
-	struct erofs_map_dev mdev;
 	int ret;
 	erofs_off_t ptr = offset;
 
 	while (ptr < offset + size) {
 		char *const estart = buffer + ptr - offset;
-		erofs_off_t eend;
+		erofs_off_t eend, moff = 0;
 
 		map.m_la = ptr;
 		ret = erofs_map_blocks(inode, &map, 0);
@@ -179,14 +202,6 @@
 
 		DBG_BUGON(map.m_plen != map.m_llen);
 
-		mdev = (struct erofs_map_dev) {
-			.m_deviceid = map.m_deviceid,
-			.m_pa = map.m_pa,
-		};
-		ret = erofs_map_dev(&sbi, &mdev);
-		if (ret)
-			return ret;
-
 		/* trim extent */
 		eend = min(offset + size, map.m_la + map.m_llen);
 		DBG_BUGON(ptr < map.m_la);
@@ -204,19 +219,77 @@
 		}
 
 		if (ptr > map.m_la) {
-			mdev.m_pa += ptr - map.m_la;
+			moff = ptr - map.m_la;
 			map.m_la = ptr;
 		}
 
-		ret = dev_read(mdev.m_deviceid, estart, mdev.m_pa,
-			       eend - map.m_la);
-		if (ret < 0)
-			return -EIO;
+		ret = erofs_read_one_data(inode, &map, estart, moff,
+					  eend - map.m_la);
+		if (ret)
+			return ret;
 		ptr = eend;
 	}
 	return 0;
 }
 
+int z_erofs_read_one_data(struct erofs_inode *inode,
+			struct erofs_map_blocks *map, char *raw, char *buffer,
+			erofs_off_t skip, erofs_off_t length, bool trimmed)
+{
+	struct erofs_sb_info *sbi = inode->sbi;
+	struct erofs_map_dev mdev;
+	int ret = 0;
+
+	if (map->m_flags & EROFS_MAP_FRAGMENT) {
+		struct erofs_inode packed_inode = {
+			.sbi = sbi,
+			.nid = sbi->packed_nid,
+		};
+
+		ret = erofs_read_inode_from_disk(&packed_inode);
+		if (ret) {
+			erofs_err("failed to read packed inode from disk");
+			return ret;
+		}
+
+		return erofs_pread(&packed_inode, buffer, length - skip,
+				   inode->fragmentoff + skip);
+	}
+
+	/* no device id here, thus it will always succeed */
+	mdev = (struct erofs_map_dev) {
+		.m_pa = map->m_pa,
+	};
+	ret = erofs_map_dev(sbi, &mdev);
+	if (ret) {
+		DBG_BUGON(1);
+		return ret;
+	}
+
+	ret = dev_read(sbi, mdev.m_deviceid, raw, mdev.m_pa, map->m_plen);
+	if (ret < 0)
+		return ret;
+
+	ret = z_erofs_decompress(&(struct z_erofs_decompress_req) {
+			.sbi = sbi,
+			.in = raw,
+			.out = buffer,
+			.decodedskip = skip,
+			.interlaced_offset =
+				map->m_algorithmformat == Z_EROFS_COMPRESSION_INTERLACED ?
+					erofs_blkoff(sbi, map->m_la) : 0,
+			.inputsize = map->m_plen,
+			.decodedlength = length,
+			.alg = map->m_algorithmformat,
+			.partial_decoding = trimmed ? true :
+				!(map->m_flags & EROFS_MAP_FULL_MAPPED) ||
+					(map->m_flags & EROFS_MAP_PARTIAL_REF),
+			 });
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
 static int z_erofs_read_data(struct erofs_inode *inode, char *buffer,
 			     erofs_off_t size, erofs_off_t offset)
 {
@@ -224,8 +297,7 @@
 	struct erofs_map_blocks map = {
 		.index = UINT_MAX,
 	};
-	struct erofs_map_dev mdev;
-	bool partial;
+	bool trimmed;
 	unsigned int bufsize = 0;
 	char *raw = NULL;
 	int ret = 0;
@@ -238,27 +310,17 @@
 		if (ret)
 			break;
 
-		/* no device id here, thus it will always succeed */
-		mdev = (struct erofs_map_dev) {
-			.m_pa = map.m_pa,
-		};
-		ret = erofs_map_dev(&sbi, &mdev);
-		if (ret) {
-			DBG_BUGON(1);
-			break;
-		}
-
 		/*
 		 * trim to the needed size if the returned extent is quite
 		 * larger than requested, and set up partial flag as well.
 		 */
 		if (end < map.m_la + map.m_llen) {
 			length = end - map.m_la;
-			partial = true;
+			trimmed = true;
 		} else {
 			DBG_BUGON(end != map.m_la + map.m_llen);
 			length = map.m_llen;
-			partial = !(map.m_flags & EROFS_MAP_FULL_MAPPED);
+			trimmed = false;
 		}
 
 		if (map.m_la < offset) {
@@ -270,7 +332,7 @@
 		}
 
 		if (!(map.m_flags & EROFS_MAP_MAPPED)) {
-			memset(buffer + end - offset, 0, length);
+			memset(buffer + end - offset, 0, length - skip);
 			end = map.m_la;
 			continue;
 		}
@@ -283,19 +345,9 @@
 				break;
 			}
 		}
-		ret = dev_read(mdev.m_deviceid, raw, mdev.m_pa, map.m_plen);
-		if (ret < 0)
-			break;
 
-		ret = z_erofs_decompress(&(struct z_erofs_decompress_req) {
-					.in = raw,
-					.out = buffer + end - offset,
-					.decodedskip = skip,
-					.inputsize = map.m_plen,
-					.decodedlength = length,
-					.alg = map.m_algorithmformat,
-					.partial_decoding = partial
-					 });
+		ret = z_erofs_read_one_data(inode, &map, raw,
+				buffer + end - offset, skip, length, trimmed);
 		if (ret < 0)
 			break;
 	}
@@ -312,11 +364,95 @@
 	case EROFS_INODE_FLAT_INLINE:
 	case EROFS_INODE_CHUNK_BASED:
 		return erofs_read_raw_data(inode, buf, count, offset);
-	case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
-	case EROFS_INODE_FLAT_COMPRESSION:
+	case EROFS_INODE_COMPRESSED_FULL:
+	case EROFS_INODE_COMPRESSED_COMPACT:
 		return z_erofs_read_data(inode, buf, count, offset);
 	default:
 		break;
 	}
 	return -EINVAL;
 }
+
+static void *erofs_read_metadata_nid(struct erofs_sb_info *sbi, erofs_nid_t nid,
+				     erofs_off_t *offset, int *lengthp)
+{
+	struct erofs_inode vi = { .sbi = sbi, .nid = nid };
+	__le16 __len;
+	int ret, len;
+	char *buffer;
+
+	ret = erofs_read_inode_from_disk(&vi);
+	if (ret)
+		return ERR_PTR(ret);
+
+	*offset = round_up(*offset, 4);
+	ret = erofs_pread(&vi, (void *)&__len, sizeof(__le16), *offset);
+	if (ret)
+		return ERR_PTR(ret);
+
+	len = le16_to_cpu(__len);
+	if (!len)
+		return ERR_PTR(-EFSCORRUPTED);
+
+	buffer = malloc(len);
+	if (!buffer)
+		return ERR_PTR(-ENOMEM);
+	*offset += sizeof(__le16);
+	*lengthp = len;
+
+	ret = erofs_pread(&vi, buffer, len, *offset);
+	if (ret) {
+		free(buffer);
+		return ERR_PTR(ret);
+	}
+	*offset += len;
+	return buffer;
+}
+
+static void *erofs_read_metadata_bdi(struct erofs_sb_info *sbi,
+				     erofs_off_t *offset, int *lengthp)
+{
+	int ret, len, i, cnt;
+	void *buffer;
+	u8 data[EROFS_MAX_BLOCK_SIZE];
+
+	*offset = round_up(*offset, 4);
+	ret = blk_read(sbi, 0, data, erofs_blknr(sbi, *offset), 1);
+	if (ret)
+		return ERR_PTR(ret);
+	len = le16_to_cpu(*(__le16 *)&data[erofs_blkoff(sbi, *offset)]);
+	if (!len)
+		return ERR_PTR(-EFSCORRUPTED);
+
+	buffer = malloc(len);
+	if (!buffer)
+		return ERR_PTR(-ENOMEM);
+	*offset += sizeof(__le16);
+	*lengthp = len;
+
+	for (i = 0; i < len; i += cnt) {
+		cnt = min_t(int, erofs_blksiz(sbi) - erofs_blkoff(sbi, *offset),
+			    len - i);
+		ret = blk_read(sbi, 0, data, erofs_blknr(sbi, *offset), 1);
+		if (ret) {
+			free(buffer);
+			return ERR_PTR(ret);
+		}
+		memcpy(buffer + i, data + erofs_blkoff(sbi, *offset), cnt);
+		*offset += cnt;
+	}
+	return buffer;
+}
+
+/*
+ * read variable-sized metadata, offset will be aligned by 4-byte
+ *
+ * @nid is 0 if metadata is in meta inode
+ */
+void *erofs_read_metadata(struct erofs_sb_info *sbi, erofs_nid_t nid,
+			  erofs_off_t *offset, int *lengthp)
+{
+	if (nid)
+		return erofs_read_metadata_nid(sbi, nid, offset, lengthp);
+	return erofs_read_metadata_bdi(sbi, offset, lengthp);
+}
diff --git a/lib/decompress.c b/lib/decompress.c
index 1661f91..fe8a40c 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -9,12 +9,160 @@
 #include "erofs/err.h"
 #include "erofs/print.h"
 
+#ifdef HAVE_LIBDEFLATE
+/* if libdeflate is available, use libdeflate instead. */
+#include <libdeflate.h>
+
+static int z_erofs_decompress_deflate(struct z_erofs_decompress_req *rq)
+{
+	struct erofs_sb_info *sbi = rq->sbi;
+	u8 *dest = (u8 *)rq->out;
+	u8 *src = (u8 *)rq->in;
+	u8 *buff = NULL;
+	size_t actual_out;
+	unsigned int inputmargin = 0;
+	struct libdeflate_decompressor *inf;
+	enum libdeflate_result ret;
+
+	while (!src[inputmargin & (erofs_blksiz(sbi) - 1)])
+		if (!(++inputmargin & (erofs_blksiz(sbi) - 1)))
+			break;
+
+	if (inputmargin >= rq->inputsize)
+		return -EFSCORRUPTED;
+
+	if (rq->decodedskip) {
+		buff = malloc(rq->decodedlength);
+		if (!buff)
+			return -ENOMEM;
+		dest = buff;
+	}
+
+	inf = libdeflate_alloc_decompressor();
+	if (!inf)
+		return -ENOMEM;
+
+	if (rq->partial_decoding) {
+		ret = libdeflate_deflate_decompress(inf, src + inputmargin,
+				rq->inputsize - inputmargin, dest,
+				rq->decodedlength, &actual_out);
+		if (ret && ret != LIBDEFLATE_INSUFFICIENT_SPACE) {
+			ret = -EIO;
+			goto out_inflate_end;
+		}
+
+		if (actual_out != rq->decodedlength) {
+			ret = -EIO;
+			goto out_inflate_end;
+		}
+	} else {
+		ret = libdeflate_deflate_decompress(inf, src + inputmargin,
+				rq->inputsize - inputmargin, dest,
+				rq->decodedlength, NULL);
+		if (ret) {
+			ret = -EIO;
+			goto out_inflate_end;
+		}
+	}
+
+	if (rq->decodedskip)
+		memcpy(rq->out, dest + rq->decodedskip,
+		       rq->decodedlength - rq->decodedskip);
+
+out_inflate_end:
+	libdeflate_free_decompressor(inf);
+	if (buff)
+		free(buff);
+	return ret;
+}
+#elif defined(HAVE_ZLIB)
+#include <zlib.h>
+
+/* report a zlib or i/o error */
+static int zerr(int ret)
+{
+	switch (ret) {
+	case Z_STREAM_ERROR:
+		return -EINVAL;
+	case Z_DATA_ERROR:
+		return -EIO;
+	case Z_MEM_ERROR:
+		return -ENOMEM;
+	case Z_ERRNO:
+	case Z_VERSION_ERROR:
+	default:
+		return -EFAULT;
+	}
+}
+
+static int z_erofs_decompress_deflate(struct z_erofs_decompress_req *rq)
+{
+	struct erofs_sb_info *sbi = rq->sbi;
+	u8 *dest = (u8 *)rq->out;
+	u8 *src = (u8 *)rq->in;
+	u8 *buff = NULL;
+	unsigned int inputmargin = 0;
+	z_stream strm;
+	int ret;
+
+	while (!src[inputmargin & (erofs_blksiz(sbi) - 1)])
+		if (!(++inputmargin & (erofs_blksiz(sbi) - 1)))
+			break;
+
+	if (inputmargin >= rq->inputsize)
+		return -EFSCORRUPTED;
+
+	if (rq->decodedskip) {
+		buff = malloc(rq->decodedlength);
+		if (!buff)
+			return -ENOMEM;
+		dest = buff;
+	}
+
+	/* allocate inflate state */
+	strm.zalloc = Z_NULL;
+	strm.zfree = Z_NULL;
+	strm.opaque = Z_NULL;
+	strm.avail_in = 0;
+	strm.next_in = Z_NULL;
+	ret = inflateInit2(&strm, -15);
+	if (ret != Z_OK) {
+		free(buff);
+		return zerr(ret);
+	}
+
+	strm.next_in = src + inputmargin;
+	strm.avail_in = rq->inputsize - inputmargin;
+	strm.next_out = dest;
+	strm.avail_out = rq->decodedlength;
+
+	ret = inflate(&strm, rq->partial_decoding ? Z_SYNC_FLUSH : Z_FINISH);
+	if (ret != Z_STREAM_END || strm.total_out != rq->decodedlength) {
+		if (ret != Z_OK || !rq->partial_decoding) {
+			ret = zerr(ret);
+			goto out_inflate_end;
+		}
+	}
+
+	if (rq->decodedskip)
+		memcpy(rq->out, dest + rq->decodedskip,
+		       rq->decodedlength - rq->decodedskip);
+
+out_inflate_end:
+	inflateEnd(&strm);
+	if (buff)
+		free(buff);
+	return ret;
+}
+#endif
+
 #ifdef HAVE_LIBLZMA
 #include <lzma.h>
 
 static int z_erofs_decompress_lzma(struct z_erofs_decompress_req *rq)
 {
 	int ret = 0;
+	struct erofs_sb_info *sbi = rq->sbi;
 	u8 *dest = (u8 *)rq->out;
 	u8 *src = (u8 *)rq->in;
 	u8 *buff = NULL;
@@ -22,8 +170,8 @@
 	lzma_stream strm;
 	lzma_ret ret2;
 
-	while (!src[inputmargin & ~PAGE_MASK])
-		if (!(++inputmargin & ~PAGE_MASK))
+	while (!src[inputmargin & (erofs_blksiz(sbi) - 1)])
+		if (!(++inputmargin & (erofs_blksiz(sbi) - 1)))
 			break;
 
 	if (inputmargin >= rq->inputsize)
@@ -81,12 +229,13 @@
 	char *buff = NULL;
 	bool support_0padding = false;
 	unsigned int inputmargin = 0;
+	struct erofs_sb_info *sbi = rq->sbi;
 
-	if (erofs_sb_has_lz4_0padding()) {
+	if (erofs_sb_has_lz4_0padding(sbi)) {
 		support_0padding = true;
 
-		while (!src[inputmargin & ~PAGE_MASK])
-			if (!(++inputmargin & ~PAGE_MASK))
+		while (!src[inputmargin & (erofs_blksiz(sbi) - 1)])
+			if (!(++inputmargin & (erofs_blksiz(sbi) - 1)))
 				break;
 
 		if (inputmargin >= rq->inputsize)
@@ -131,13 +280,32 @@
 
 int z_erofs_decompress(struct z_erofs_decompress_req *rq)
 {
-	if (rq->alg == Z_EROFS_COMPRESSION_SHIFTED) {
-		if (rq->inputsize > EROFS_BLKSIZ)
+	struct erofs_sb_info *sbi = rq->sbi;
+
+	if (rq->alg == Z_EROFS_COMPRESSION_INTERLACED) {
+		unsigned int count, rightpart, skip;
+
+		/* XXX: should support inputsize >= erofs_blksiz(sbi) later */
+		if (rq->inputsize > erofs_blksiz(sbi))
 			return -EFSCORRUPTED;
 
-		DBG_BUGON(rq->decodedlength > EROFS_BLKSIZ);
-		DBG_BUGON(rq->decodedlength < rq->decodedskip);
+		if (rq->decodedlength > erofs_blksiz(sbi))
+			return -EFSCORRUPTED;
 
+		if (rq->decodedlength < rq->decodedskip)
+			return -EFSCORRUPTED;
+
+		count = rq->decodedlength - rq->decodedskip;
+		skip = erofs_blkoff(sbi, rq->interlaced_offset + rq->decodedskip);
+		rightpart = min(erofs_blksiz(sbi) - skip, count);
+		memcpy(rq->out, rq->in + skip, rightpart);
+		memcpy(rq->out + rightpart, rq->in, count - rightpart);
+		return 0;
+	} else if (rq->alg == Z_EROFS_COMPRESSION_SHIFTED) {
+		if (rq->decodedlength > rq->inputsize)
+			return -EFSCORRUPTED;
+
+		DBG_BUGON(rq->decodedlength < rq->decodedskip);
 		memcpy(rq->out, rq->in + rq->decodedskip,
 		       rq->decodedlength - rq->decodedskip);
 		return 0;
@@ -151,5 +319,9 @@
 	if (rq->alg == Z_EROFS_COMPRESSION_LZMA)
 		return z_erofs_decompress_lzma(rq);
 #endif
+#if defined(HAVE_ZLIB) || defined(HAVE_LIBDEFLATE)
+	if (rq->alg == Z_EROFS_COMPRESSION_DEFLATE)
+		return z_erofs_decompress_deflate(rq);
+#endif
 	return -EOPNOTSUPP;
 }
diff --git a/lib/dedupe.c b/lib/dedupe.c
new file mode 100644
index 0000000..17da452
--- /dev/null
+++ b/lib/dedupe.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+/*
+ * Copyright (C) 2022 Alibaba Cloud
+ */
+#include "erofs/dedupe.h"
+#include "erofs/print.h"
+#include "rb_tree.h"
+#include "rolling_hash.h"
+#include "sha256.h"
+
+unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2,
+			    unsigned long sz)
+{
+	const unsigned long *a1, *a2;
+	unsigned long n = sz;
+
+	if (sz < sizeof(long))
+		goto out_bytes;
+
+	if (((long)s1 & (sizeof(long) - 1)) ==
+			((long)s2 & (sizeof(long) - 1))) {
+		while ((long)s1 & (sizeof(long) - 1)) {
+			if (*s1 != *s2)
+				break;
+			++s1;
+			++s2;
+			--sz;
+		}
+
+		a1 = (const unsigned long *)s1;
+		a2 = (const unsigned long *)s2;
+		while (sz >= sizeof(long)) {
+			if (*a1 != *a2)
+				break;
+			++a1;
+			++a2;
+			sz -= sizeof(long);
+		}
+	} else {
+		a1 = (const unsigned long *)s1;
+		a2 = (const unsigned long *)s2;
+		do {
+			if (get_unaligned(a1) != get_unaligned(a2))
+				break;
+			++a1;
+			++a2;
+			sz -= sizeof(long);
+		} while (sz >= sizeof(long));
+	}
+	s1 = (const u8 *)a1;
+	s2 = (const u8 *)a2;
+out_bytes:
+	while (sz) {
+		if (*s1 != *s2)
+			break;
+		++s1;
+		++s2;
+		--sz;
+	}
+	return n - sz;
+}
+
+static unsigned int window_size, rollinghash_rm;
+static struct rb_tree *dedupe_tree, *dedupe_subtree;
+
+struct z_erofs_dedupe_item {
+	long long	hash;
+	u8		prefix_sha256[32];
+
+	erofs_blk_t	compressed_blkaddr;
+	unsigned int	compressed_blks;
+
+	int		original_length;
+	bool		partial, raw;
+	u8		extra_data[];
+};
+
+static int z_erofs_dedupe_rbtree_cmp(struct rb_tree *self,
+		struct rb_node *node_a, struct rb_node *node_b)
+{
+	struct z_erofs_dedupe_item *e_a = node_a->value;
+	struct z_erofs_dedupe_item *e_b = node_b->value;
+
+	return (e_a->hash > e_b->hash) - (e_a->hash < e_b->hash);
+}
+
+int z_erofs_dedupe_match(struct z_erofs_dedupe_ctx *ctx)
+{
+	struct z_erofs_dedupe_item e_find;
+	u8 *cur;
+	bool initial = true;
+
+	if (!dedupe_tree)
+		return -ENOENT;
+
+	if (ctx->cur > ctx->end - window_size)
+		cur = ctx->end - window_size;
+	else
+		cur = ctx->cur;
+
+	/* move backward byte-by-byte */
+	for (; cur >= ctx->start; --cur) {
+		struct z_erofs_dedupe_item *e;
+		unsigned int extra;
+		u8 sha256[32];
+
+		if (initial) {
+			/* initial try */
+			e_find.hash = erofs_rolling_hash_init(cur, window_size, true);
+			initial = false;
+		} else {
+			e_find.hash = erofs_rolling_hash_advance(e_find.hash,
+				rollinghash_rm, cur[window_size], cur[0]);
+		}
+
+		e = rb_tree_find(dedupe_tree, &e_find);
+		if (!e) {
+			e = rb_tree_find(dedupe_subtree, &e_find);
+			if (!e)
+				continue;
+		}
+
+		erofs_sha256(cur, window_size, sha256);
+		if (memcmp(sha256, e->prefix_sha256, sizeof(sha256)))
+			continue;
+
+		extra = min_t(unsigned int, ctx->end - cur - window_size,
+			      e->original_length - window_size);
+		extra = erofs_memcmp2(cur + window_size, e->extra_data, extra);
+		if (window_size + extra <= ctx->cur - cur)
+			continue;
+		ctx->cur = cur;
+		ctx->e.length = window_size + extra;
+		ctx->e.partial = e->partial ||
+			(window_size + extra < e->original_length);
+		ctx->e.raw = e->raw;
+		ctx->e.blkaddr = e->compressed_blkaddr;
+		ctx->e.compressedblks = e->compressed_blks;
+		return 0;
+	}
+	return -ENOENT;
+}
+
+int z_erofs_dedupe_insert(struct z_erofs_inmem_extent *e,
+			  void *original_data)
+{
+	struct z_erofs_dedupe_item *di;
+
+	if (!dedupe_subtree || e->length < window_size)
+		return 0;
+
+	di = malloc(sizeof(*di) + e->length - window_size);
+	if (!di)
+		return -ENOMEM;
+
+	di->original_length = e->length;
+	erofs_sha256(original_data, window_size, di->prefix_sha256);
+	di->hash = erofs_rolling_hash_init(original_data,
+			window_size, true);
+	memcpy(di->extra_data, original_data + window_size,
+	       e->length - window_size);
+	di->compressed_blkaddr = e->blkaddr;
+	di->compressed_blks = e->compressedblks;
+	di->partial = e->partial;
+	di->raw = e->raw;
+
+	/* with the same rolling hash */
+	if (!rb_tree_insert(dedupe_subtree, di))
+		free(di);
+	return 0;
+}
+
+static void z_erofs_dedupe_node_free_cb(struct rb_tree *self,
+					struct rb_node *node)
+{
+	free(node->value);
+	rb_tree_node_dealloc_cb(self, node);
+}
+
+void z_erofs_dedupe_commit(bool drop)
+{
+	if (!dedupe_subtree)
+		return;
+	if (!drop) {
+		struct rb_iter iter;
+		struct z_erofs_dedupe_item *di;
+
+		di = rb_iter_first(&iter, dedupe_subtree);
+		while (di) {
+			if (!rb_tree_insert(dedupe_tree, di))
+				DBG_BUGON(1);
+			di = rb_iter_next(&iter);
+		}
+		/*rb_iter_dealloc(iter);*/
+		rb_tree_dealloc(dedupe_subtree, rb_tree_node_dealloc_cb);
+	} else {
+		rb_tree_dealloc(dedupe_subtree, z_erofs_dedupe_node_free_cb);
+	}
+	dedupe_subtree = rb_tree_create(z_erofs_dedupe_rbtree_cmp);
+}
+
+int z_erofs_dedupe_init(unsigned int wsiz)
+{
+	dedupe_tree = rb_tree_create(z_erofs_dedupe_rbtree_cmp);
+	if (!dedupe_tree)
+		return -ENOMEM;
+
+	dedupe_subtree = rb_tree_create(z_erofs_dedupe_rbtree_cmp);
+	if (!dedupe_subtree) {
+		rb_tree_dealloc(dedupe_subtree, NULL);
+		return -ENOMEM;
+	}
+	window_size = wsiz;
+	rollinghash_rm = erofs_rollinghash_calc_rm(window_size);
+	return 0;
+}
+
+void z_erofs_dedupe_exit(void)
+{
+	z_erofs_dedupe_commit(true);
+	rb_tree_dealloc(dedupe_subtree, NULL);
+	rb_tree_dealloc(dedupe_tree, z_erofs_dedupe_node_free_cb);
+}
diff --git a/lib/dir.c b/lib/dir.c
index e6b9283..1223cbc 100644
--- a/lib/dir.c
+++ b/lib/dir.c
@@ -4,11 +4,25 @@
 #include "erofs/print.h"
 #include "erofs/dir.h"
 
+/* filename should not have a '/' in the name string */
+static bool erofs_validate_filename(const char *dname, int size)
+{
+	char *name = (char *)dname;
+
+	while (name - dname < size && *name != '\0') {
+		if (*name == '/')
+			return false;
+		++name;
+	}
+	return true;
+}
+
 static int traverse_dirents(struct erofs_dir_context *ctx,
 			    void *dentry_blk, unsigned int lblk,
 			    unsigned int next_nameoff, unsigned int maxsize,
 			    bool fsck)
 {
+	struct erofs_sb_info *sbi = ctx->dir->sbi;
 	struct erofs_dirent *de = dentry_blk;
 	const struct erofs_dirent *end = dentry_blk + next_nameoff;
 	const char *prev_name = NULL;
@@ -41,7 +55,7 @@
 			break;
 		}
 
-		if (nameoff + de_namelen > maxsize ||
+		if (nameoff + de_namelen > maxsize || !de_namelen ||
 				de_namelen > EROFS_NAME_LEN) {
 			errmsg = "bogus dirent namelen";
 			break;
@@ -76,8 +90,8 @@
 					goto out;
 				}
 				ctx->flags |= EROFS_READDIR_DOTDOT_FOUND;
-				if (sbi.root_nid == ctx->dir->nid) {
-					ctx->pnid = sbi.root_nid;
+				if (sbi->root_nid == ctx->dir->nid) {
+					ctx->pnid = sbi->root_nid;
 					ctx->flags |= EROFS_READDIR_VALID_PNID;
 				}
 				if (fsck &&
@@ -101,6 +115,10 @@
 				}
 				break;
 			}
+		} else if (fsck &&
+			   !erofs_validate_filename(de_name, de_namelen)) {
+			errmsg = "corrupted dirent with illegal filename";
+			goto out;
 		}
 		ret = ctx->cb(ctx);
 		if (ret) {
@@ -123,9 +141,10 @@
 int erofs_iterate_dir(struct erofs_dir_context *ctx, bool fsck)
 {
 	struct erofs_inode *dir = ctx->dir;
+	struct erofs_sb_info *sbi = dir->sbi;
 	int err = 0;
 	erofs_off_t pos;
-	char buf[EROFS_BLKSIZ];
+	char buf[EROFS_MAX_BLOCK_SIZE];
 
 	if (!S_ISDIR(dir->i_mode))
 		return -ENOTDIR;
@@ -133,9 +152,9 @@
 	ctx->flags &= ~EROFS_READDIR_ALL_SPECIAL_FOUND;
 	pos = 0;
 	while (pos < dir->i_size) {
-		erofs_blk_t lblk = erofs_blknr(pos);
+		erofs_blk_t lblk = erofs_blknr(sbi, pos);
 		erofs_off_t maxsize = min_t(erofs_off_t,
-					dir->i_size - pos, EROFS_BLKSIZ);
+					dir->i_size - pos, erofs_blksiz(sbi));
 		const struct erofs_dirent *de = (const void *)buf;
 		unsigned int nameoff;
 
@@ -148,7 +167,7 @@
 
 		nameoff = le16_to_cpu(de->nameoff);
 		if (nameoff < sizeof(struct erofs_dirent) ||
-		    nameoff >= EROFS_BLKSIZ) {
+		    nameoff >= erofs_blksiz(sbi)) {
 			erofs_err("invalid de[0].nameoff %u @ nid %llu, lblk %u",
 				  nameoff, dir->nid | 0ULL, lblk);
 			return -EFSCORRUPTED;
@@ -203,7 +222,10 @@
 	}
 
 	if (ctx->de_ftype == EROFS_FT_DIR || ctx->de_ftype == EROFS_FT_UNKNOWN) {
-		struct erofs_inode dir = { .nid = ctx->de_nid };
+		struct erofs_inode dir = {
+			.sbi = ctx->dir->sbi,
+			.nid = ctx->de_nid
+		};
 
 		ret = erofs_read_inode_from_disk(&dir);
 		if (ret) {
@@ -212,10 +234,16 @@
 		}
 
 		if (S_ISDIR(dir.i_mode)) {
-			ctx->dir = &dir;
-			pathctx->pos = pos + len + 1;
-			ret = erofs_iterate_dir(ctx, false);
-			pathctx->pos = pos;
+			struct erofs_get_pathname_context nctx = {
+				.ctx.flags = 0,
+				.ctx.dir = &dir,
+				.ctx.cb = erofs_get_pathname_iter,
+				.target_nid = pathctx->target_nid,
+				.buf = pathctx->buf,
+				.size = pathctx->size,
+				.pos = pos + len + 1,
+			};
+			ret = erofs_iterate_dir(&nctx.ctx, false);
 			if (ret == EROFS_PATHNAME_FOUND) {
 				pathctx->buf[pos++] = '/';
 				strncpy(pathctx->buf + pos, dname, len);
@@ -229,10 +257,14 @@
 	return 0;
 }
 
-int erofs_get_pathname(erofs_nid_t nid, char *buf, size_t size)
+int erofs_get_pathname(struct erofs_sb_info *sbi, erofs_nid_t nid,
+		       char *buf, size_t size)
 {
 	int ret;
-	struct erofs_inode root = { .nid = sbi.root_nid };
+	struct erofs_inode root = {
+		.sbi = sbi,
+		.nid = sbi->root_nid,
+	};
 	struct erofs_get_pathname_context pathctx = {
 		.ctx.flags = 0,
 		.ctx.dir = &root,
diff --git a/lib/diskbuf.c b/lib/diskbuf.c
new file mode 100644
index 0000000..8205ba5
--- /dev/null
+++ b/lib/diskbuf.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+#include "erofs/diskbuf.h"
+#include "erofs/internal.h"
+#include "erofs/print.h"
+#include <stdio.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+/* A simple approach to avoid creating too many temporary files */
+static struct erofs_diskbufstrm {
+	u64 count;
+	u64 tailoffset, devpos;
+	int fd;
+	unsigned int alignsize;
+	bool locked;
+} *dbufstrm;
+
+int erofs_diskbuf_getfd(struct erofs_diskbuf *db, u64 *fpos)
+{
+	const struct erofs_diskbufstrm *strm = db->sp;
+	u64 offset;
+
+	if (!strm)
+		return -1;
+	offset = db->offset + strm->devpos;
+	if (lseek(strm->fd, offset, SEEK_SET) != offset)
+		return -E2BIG;
+	if (fpos)
+		*fpos = offset;
+	return strm->fd;
+}
+
+int erofs_diskbuf_reserve(struct erofs_diskbuf *db, int sid, u64 *off)
+{
+	struct erofs_diskbufstrm *strm = dbufstrm + sid;
+
+	if (strm->tailoffset & (strm->alignsize - 1)) {
+		strm->tailoffset = round_up(strm->tailoffset, strm->alignsize);
+		if (lseek(strm->fd, strm->tailoffset + strm->devpos,
+			  SEEK_SET) != strm->tailoffset + strm->devpos)
+			return -EIO;
+	}
+	db->offset = strm->tailoffset;
+	if (off)
+		*off = db->offset + strm->devpos;
+	db->sp = strm;
+	++strm->count;
+	strm->locked = true;	/* TODO: need a real lock for MT */
+	return strm->fd;
+}
+
+void erofs_diskbuf_commit(struct erofs_diskbuf *db, u64 len)
+{
+	struct erofs_diskbufstrm *strm = db->sp;
+
+	DBG_BUGON(!strm);
+	DBG_BUGON(!strm->locked);
+	DBG_BUGON(strm->tailoffset != db->offset);
+	strm->tailoffset += len;
+}
+
+void erofs_diskbuf_close(struct erofs_diskbuf *db)
+{
+	struct erofs_diskbufstrm *strm = db->sp;
+
+	DBG_BUGON(!strm);
+	DBG_BUGON(strm->count <= 1);
+	--strm->count;
+	db->sp = NULL;
+}
+
+int erofs_tmpfile(void)
+{
+#define	TRAILER		"tmp.XXXXXXXXXX"
+	char buf[PATH_MAX];
+	int fd;
+	umode_t u;
+
+	(void)snprintf(buf, sizeof(buf), "%s/" TRAILER,
+		       getenv("TMPDIR") ?: "/tmp");
+
+	fd = mkstemp(buf);
+	if (fd < 0)
+		return -errno;
+
+	unlink(buf);
+	u = umask(0);
+	(void)umask(u);
+	(void)fchmod(fd, 0666 & ~u);
+	return fd;
+}
+
+int erofs_diskbuf_init(unsigned int nstrms)
+{
+	struct erofs_diskbufstrm *strm;
+
+	strm = calloc(nstrms + 1, sizeof(*strm));
+	if (!strm)
+		return -ENOMEM;
+	strm[nstrms].fd = -1;
+	dbufstrm = strm;
+
+	for (; strm < dbufstrm + nstrms; ++strm) {
+		struct stat st;
+
+		/* try to use the devfd for regfiles on stream 0 */
+		if (strm == dbufstrm && sbi.devsz == INT64_MAX) {
+			strm->devpos = 1ULL << 40;
+			if (!ftruncate(sbi.devfd, strm->devpos << 1)) {
+				strm->fd = dup(sbi.devfd);
+				if (lseek(strm->fd, strm->devpos,
+					  SEEK_SET) != strm->devpos)
+					return -EIO;
+				goto setupone;
+			}
+		}
+		strm->devpos = 0;
+		strm->fd = erofs_tmpfile();
+		if (strm->fd < 0)
+			return -ENOSPC;
+setupone:
+		strm->tailoffset = 0;
+		strm->count = 1;
+		if (fstat(strm->fd, &st))
+			return -errno;
+		strm->alignsize = max_t(u32, st.st_blksize, getpagesize());
+	}
+	return 0;
+}
+
+void erofs_diskbuf_exit(void)
+{
+	struct erofs_diskbufstrm *strm;
+
+	if (!dbufstrm)
+		return;
+
+	for (strm = dbufstrm; strm->fd >= 0; ++strm) {
+		DBG_BUGON(strm->count != 1);
+
+		close(strm->fd);
+		strm->fd = -1;
+	}
+}
diff --git a/lib/fragments.c b/lib/fragments.c
new file mode 100644
index 0000000..d4f6be1
--- /dev/null
+++ b/lib/fragments.c
@@ -0,0 +1,351 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+/*
+ * Copyright (C), 2022, Coolpad Group Limited.
+ * Created by Yue Hu <huyue2@coolpad.com>
+ */
+#ifndef _LARGEFILE_SOURCE
+#define _LARGEFILE_SOURCE
+#endif
+#ifndef _LARGEFILE64_SOURCE
+#define _LARGEFILE64_SOURCE
+#endif
+#ifndef _FILE_OFFSET_BITS
+#define _FILE_OFFSET_BITS 64
+#endif
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include "erofs/err.h"
+#include "erofs/inode.h"
+#include "erofs/compress.h"
+#include "erofs/print.h"
+#include "erofs/internal.h"
+#include "erofs/fragments.h"
+
+struct erofs_fragment_dedupe_item {
+	struct list_head	list;
+	unsigned int		length;
+	erofs_off_t		pos;
+	u8			data[];
+};
+
+#define EROFS_TOF_HASHLEN		16
+
+#define FRAGMENT_HASHSIZE		65536
+#define FRAGMENT_HASH(c)		((c) & (FRAGMENT_HASHSIZE - 1))
+
+static struct list_head dupli_frags[FRAGMENT_HASHSIZE];
+static FILE *packedfile;
+const char *erofs_frags_packedname = "packed_file";
+
+#ifndef HAVE_LSEEK64
+#define erofs_lseek64 lseek
+#else
+#define erofs_lseek64 lseek64
+#endif
+
+static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd,
+					 u32 crc)
+{
+	struct erofs_fragment_dedupe_item *cur, *di = NULL;
+	struct list_head *head;
+	u8 *data;
+	unsigned int length, e2, deduped;
+	erofs_off_t pos;
+	int ret;
+
+	head = &dupli_frags[FRAGMENT_HASH(crc)];
+	if (list_empty(head))
+		return 0;
+
+	/* XXX: no need to read so much for smaller? */
+	if (inode->i_size < EROFS_CONFIG_COMPR_MAX_SZ)
+		length = inode->i_size;
+	else
+		length = EROFS_CONFIG_COMPR_MAX_SZ;
+
+	data = malloc(length);
+	if (!data)
+		return -ENOMEM;
+
+	if (erofs_lseek64(fd, inode->i_size - length, SEEK_SET) < 0) {
+		ret = -errno;
+		goto out;
+	}
+
+	ret = read(fd, data, length);
+	if (ret != length) {
+		ret = -errno;
+		goto out;
+	}
+
+	DBG_BUGON(length <= EROFS_TOF_HASHLEN);
+	e2 = length - EROFS_TOF_HASHLEN;
+	deduped = 0;
+
+	list_for_each_entry(cur, head, list) {
+		unsigned int e1, mn, i = 0;
+
+		DBG_BUGON(cur->length <= EROFS_TOF_HASHLEN);
+		e1 = cur->length - EROFS_TOF_HASHLEN;
+
+		if (memcmp(cur->data + e1, data + e2, EROFS_TOF_HASHLEN))
+			continue;
+
+		mn = min(e1, e2);
+		while (i < mn && cur->data[e1 - i - 1] == data[e2 - i - 1])
+			++i;
+
+		if (!di || i + EROFS_TOF_HASHLEN > deduped) {
+			deduped = i + EROFS_TOF_HASHLEN;
+			di = cur;
+
+			/* full match */
+			if (i == e2)
+				break;
+		}
+	}
+	if (!di)
+		goto out;
+
+	DBG_BUGON(di->length < deduped);
+	pos = di->pos + di->length - deduped;
+	/* let's read more to dedupe as long as we can */
+	if (deduped == di->length) {
+		fflush(packedfile);
+
+		while(deduped < inode->i_size && pos) {
+			char buf[2][16384];
+			unsigned int sz = min_t(unsigned int, pos,
+						sizeof(buf[0]));
+
+			if (pread(fileno(packedfile), buf[0], sz,
+				  pos - sz) != sz)
+				break;
+			if (pread(fd, buf[1], sz,
+				  inode->i_size - deduped - sz) != sz)
+				break;
+
+			if (memcmp(buf[0], buf[1], sz))
+				break;
+			pos -= sz;
+			deduped += sz;
+		}
+	}
+	inode->fragment_size = deduped;
+	inode->fragmentoff = pos;
+
+	erofs_dbg("Dedupe %u tail data at %llu", inode->fragment_size,
+		  inode->fragmentoff | 0ULL);
+out:
+	free(data);
+	return ret;
+}
+
+int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 *tofcrc)
+{
+	u8 data_to_hash[EROFS_TOF_HASHLEN];
+	int ret;
+
+	if (inode->i_size <= EROFS_TOF_HASHLEN)
+		return 0;
+
+	if (erofs_lseek64(fd, inode->i_size - EROFS_TOF_HASHLEN, SEEK_SET) < 0)
+		return -errno;
+
+	ret = read(fd, data_to_hash, EROFS_TOF_HASHLEN);
+	if (ret != EROFS_TOF_HASHLEN)
+		return -errno;
+
+	*tofcrc = erofs_crc32c(~0, data_to_hash, EROFS_TOF_HASHLEN);
+	ret = z_erofs_fragments_dedupe_find(inode, fd, *tofcrc);
+	if (ret < 0)
+		return ret;
+	ret = lseek(fd, 0, SEEK_SET);
+	if (ret < 0)
+		return -errno;
+	return 0;
+}
+
+static int z_erofs_fragments_dedupe_insert(void *data, unsigned int len,
+					   erofs_off_t pos, u32 crc)
+{
+	struct erofs_fragment_dedupe_item *di;
+
+	if (len <= EROFS_TOF_HASHLEN)
+		return 0;
+	if (len > EROFS_CONFIG_COMPR_MAX_SZ) {
+		data += len - EROFS_CONFIG_COMPR_MAX_SZ;
+		pos += len - EROFS_CONFIG_COMPR_MAX_SZ;
+		len = EROFS_CONFIG_COMPR_MAX_SZ;
+	}
+	di = malloc(sizeof(*di) + len);
+	if (!di)
+		return -ENOMEM;
+
+	memcpy(di->data, data, len);
+	di->length = len;
+	di->pos = pos;
+
+	list_add_tail(&di->list, &dupli_frags[FRAGMENT_HASH(crc)]);
+	return 0;
+}
+
+int z_erofs_fragments_init(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < FRAGMENT_HASHSIZE; ++i)
+		init_list_head(&dupli_frags[i]);
+	return 0;
+}
+
+void z_erofs_fragments_exit(void)
+{
+	struct erofs_fragment_dedupe_item *di, *n;
+	struct list_head *head;
+	unsigned int i;
+
+	for (i = 0; i < FRAGMENT_HASHSIZE; ++i) {
+		head = &dupli_frags[i];
+
+		list_for_each_entry_safe(di, n, head, list)
+			free(di);
+	}
+}
+
+void z_erofs_fragments_commit(struct erofs_inode *inode)
+{
+	if (!inode->fragment_size)
+		return;
+	/*
+	 * If the packed inode is larger than 4GiB, the full fragmentoff
+	 * will be recorded by switching to the noncompact layout anyway.
+	 */
+	if (inode->fragmentoff >> 32)
+		inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
+
+	inode->z_advise |= Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
+	erofs_sb_set_fragments(inode->sbi);
+}
+
+int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd,
+			      u32 tofcrc)
+{
+#ifdef HAVE_FTELLO64
+	off64_t offset = ftello64(packedfile);
+#else
+	off_t offset = ftello(packedfile);
+#endif
+	char *memblock;
+	int rc;
+
+	if (offset < 0)
+		return -errno;
+
+	inode->fragmentoff = (erofs_off_t)offset;
+	inode->fragment_size = inode->i_size;
+
+	memblock = mmap(NULL, inode->i_size, PROT_READ, MAP_SHARED, fd, 0);
+	if (memblock == MAP_FAILED || !memblock) {
+		unsigned long long remaining = inode->fragment_size;
+
+		memblock = NULL;
+		while (remaining) {
+			char buf[32768];
+			unsigned int sz = min_t(unsigned int, remaining,
+						sizeof(buf));
+
+			rc = read(fd, buf, sz);
+			if (rc != sz) {
+				if (rc < 0)
+					rc = -errno;
+				else
+					rc = -EAGAIN;
+				goto out;
+			}
+			if (fwrite(buf, sz, 1, packedfile) != 1) {
+				rc = -EIO;
+				goto out;
+			}
+			remaining -= sz;
+		}
+		rc = lseek(fd, 0, SEEK_SET);
+		if (rc < 0) {
+			rc = -errno;
+			goto out;
+		}
+	} else if (fwrite(memblock, inode->fragment_size, 1, packedfile) != 1) {
+		rc = -EIO;
+		goto out;
+	}
+
+	erofs_dbg("Recording %u fragment data at %lu", inode->fragment_size,
+		  inode->fragmentoff);
+
+	if (memblock)
+		rc = z_erofs_fragments_dedupe_insert(memblock,
+			inode->fragment_size, inode->fragmentoff, tofcrc);
+out:
+	if (memblock)
+		munmap(memblock, inode->i_size);
+	return rc;
+}
+
+int z_erofs_pack_fragments(struct erofs_inode *inode, void *data,
+			   unsigned int len, u32 tofcrc)
+{
+#ifdef HAVE_FTELLO64
+	off64_t offset = ftello64(packedfile);
+#else
+	off_t offset = ftello(packedfile);
+#endif
+	int ret;
+
+	if (offset < 0)
+		return -errno;
+
+	inode->fragmentoff = (erofs_off_t)offset;
+	inode->fragment_size = len;
+
+	if (fwrite(data, len, 1, packedfile) != 1)
+		return -EIO;
+
+	erofs_dbg("Recording %u fragment data at %lu", inode->fragment_size,
+		  inode->fragmentoff);
+
+	ret = z_erofs_fragments_dedupe_insert(data, len, inode->fragmentoff,
+					      tofcrc);
+	if (ret)
+		return ret;
+	return len;
+}
+
+struct erofs_inode *erofs_mkfs_build_packedfile(void)
+{
+	fflush(packedfile);
+
+	return erofs_mkfs_build_special_from_fd(fileno(packedfile),
+						EROFS_PACKED_INODE);
+}
+
+void erofs_packedfile_exit(void)
+{
+	if (packedfile)
+		fclose(packedfile);
+}
+
+FILE *erofs_packedfile_init(void)
+{
+#ifdef HAVE_TMPFILE64
+	packedfile = tmpfile64();
+#else
+	packedfile = tmpfile();
+#endif
+	if (!packedfile)
+		return ERR_PTR(-ENOMEM);
+	return packedfile;
+}
diff --git a/lib/hashmap.c b/lib/hashmap.c
index e11bd8d..45916ae 100644
--- a/lib/hashmap.c
+++ b/lib/hashmap.c
@@ -149,20 +149,21 @@
 	alloc_table(map, size);
 }
 
-void hashmap_free(struct hashmap *map, int free_entries)
+int hashmap_free(struct hashmap *map)
 {
-	if (!map || !map->table)
-		return;
-	if (free_entries) {
+	if (map && map->table) {
 		struct hashmap_iter iter;
 		struct hashmap_entry *e;
 
 		hashmap_iter_init(map, &iter);
-		while ((e = hashmap_iter_next(&iter)))
-			free(e);
+		e = hashmap_iter_next(&iter);
+		if (e)
+			return -EBUSY;
+
+		free(map->table);
+		memset(map, 0, sizeof(*map));
 	}
-	free(map->table);
-	memset(map, 0, sizeof(*map));
+	return 0;
 }
 
 void *hashmap_get(const struct hashmap *map, const void *key, const void *keydata)
@@ -194,10 +195,13 @@
 		rehash(map, map->tablesize << HASHMAP_RESIZE_BITS);
 }
 
-void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata)
+void *hashmap_remove(struct hashmap *map, const void *entry)
 {
 	struct hashmap_entry *old;
-	struct hashmap_entry **e = find_entry_ptr(map, key, keydata);
+	struct hashmap_entry **e = &map->table[bucket(map, entry)];
+
+	while (*e && *e != entry)
+		e = &(*e)->next;
 
 	if (!*e)
 		return NULL;
@@ -214,14 +218,6 @@
 	return old;
 }
 
-void *hashmap_put(struct hashmap *map, void *entry)
-{
-	struct hashmap_entry *old = hashmap_remove(map, entry, NULL);
-
-	hashmap_add(map, entry);
-	return old;
-}
-
 void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter)
 {
 	iter->map = map;
diff --git a/lib/inode.c b/lib/inode.c
index f192510..8409ccd 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -16,6 +16,7 @@
 #endif
 #include <dirent.h>
 #include "erofs/print.h"
+#include "erofs/diskbuf.h"
 #include "erofs/inode.h"
 #include "erofs/cache.h"
 #include "erofs/io.h"
@@ -25,6 +26,7 @@
 #include "erofs/block_list.h"
 #include "erofs/compress_hints.h"
 #include "erofs/blobchunk.h"
+#include "erofs/fragments.h"
 #include "liberofs_private.h"
 
 #define S_SHIFT                 12
@@ -43,6 +45,25 @@
 	return erofs_ftype_by_mode[(mode & S_IFMT) >> S_SHIFT];
 }
 
+static const unsigned char erofs_dtype_by_ftype[EROFS_FT_MAX] = {
+	[EROFS_FT_UNKNOWN]	= DT_UNKNOWN,
+	[EROFS_FT_REG_FILE]	= DT_REG,
+	[EROFS_FT_DIR]		= DT_DIR,
+	[EROFS_FT_CHRDEV]	= DT_CHR,
+	[EROFS_FT_BLKDEV]	= DT_BLK,
+	[EROFS_FT_FIFO]		= DT_FIFO,
+	[EROFS_FT_SOCK]		= DT_SOCK,
+	[EROFS_FT_SYMLINK]	= DT_LNK
+};
+
+unsigned char erofs_ftype_to_dtype(unsigned int filetype)
+{
+	if (filetype >= EROFS_FT_MAX)
+		return DT_UNKNOWN;
+
+	return erofs_dtype_by_ftype[filetype];
+}
+
 #define NR_INODE_HASHTABLE	16384
 
 struct list_head inode_hashtable[NR_INODE_HASHTABLE];
@@ -55,10 +76,10 @@
 		init_list_head(&inode_hashtable[i]);
 }
 
-static struct erofs_inode *erofs_igrab(struct erofs_inode *inode)
+void erofs_insert_ihash(struct erofs_inode *inode, dev_t dev, ino_t ino)
 {
-	++inode->i_count;
-	return inode;
+	list_add(&inode->i_hash,
+		 &inode_hashtable[(ino ^ dev) % NR_INODE_HASHTABLE]);
 }
 
 /* get the inode from the (source) inode # */
@@ -99,6 +120,14 @@
 	if (inode->eof_tailraw)
 		free(inode->eof_tailraw);
 	list_del(&inode->i_hash);
+	if (inode->i_srcpath)
+		free(inode->i_srcpath);
+	if (inode->with_diskbuf) {
+		erofs_diskbuf_close(inode->i_diskbuf);
+		free(inode->i_diskbuf);
+	} else if (inode->i_link) {
+		free(inode->i_link);
+	}
 	free(inode);
 	return 0;
 }
@@ -120,7 +149,8 @@
 
 /* allocate main data for a inode */
 static int __allocate_inode_bh_data(struct erofs_inode *inode,
-				    unsigned long nblocks)
+				    unsigned long nblocks,
+				    int type)
 {
 	struct erofs_buffer_head *bh;
 	int ret;
@@ -132,7 +162,7 @@
 	}
 
 	/* allocate main data buffer */
-	bh = erofs_balloc(DATA, blknr_to_addr(nblocks), 0, 0);
+	bh = erofs_balloc(type, erofs_pos(inode->sbi, nblocks), 0, 0);
 	if (IS_ERR(bh))
 		return PTR_ERR(bh);
 
@@ -157,11 +187,49 @@
 	return strcmp(da->name, db->name);
 }
 
-int erofs_prepare_dir_file(struct erofs_inode *dir, unsigned int nr_subdirs)
+static int erofs_prepare_dir_layout(struct erofs_inode *dir,
+				    unsigned int nr_subdirs)
 {
+	struct erofs_sb_info *sbi = dir->sbi;
 	struct erofs_dentry *d, *n, **sorted_d;
-	unsigned int d_size, i_nlink, i;
-	int ret;
+	unsigned int i;
+	unsigned int d_size = 0;
+
+	sorted_d = malloc(nr_subdirs * sizeof(d));
+	if (!sorted_d)
+		return -ENOMEM;
+	i = 0;
+	list_for_each_entry_safe(d, n, &dir->i_subdirs, d_child) {
+		list_del(&d->d_child);
+		sorted_d[i++] = d;
+	}
+	DBG_BUGON(i != nr_subdirs);
+	qsort(sorted_d, nr_subdirs, sizeof(d), comp_subdir);
+	for (i = 0; i < nr_subdirs; i++)
+		list_add_tail(&sorted_d[i]->d_child, &dir->i_subdirs);
+	free(sorted_d);
+
+	/* let's calculate dir size */
+	list_for_each_entry(d, &dir->i_subdirs, d_child) {
+		int len = strlen(d->name) + sizeof(struct erofs_dirent);
+
+		if (erofs_blkoff(sbi, d_size) + len > erofs_blksiz(sbi))
+			d_size = round_up(d_size, erofs_blksiz(sbi));
+		d_size += len;
+	}
+	dir->i_size = d_size;
+
+	/* no compression for all dirs */
+	dir->datalayout = EROFS_INODE_FLAT_INLINE;
+
+	/* it will be used in erofs_prepare_inode_buffer */
+	dir->idata_size = d_size % erofs_blksiz(sbi);
+	return 0;
+}
+
+int erofs_init_empty_dir(struct erofs_inode *dir)
+{
+	struct erofs_dentry *d;
 
 	/* dot is pointed to the current dir inode */
 	d = erofs_d_alloc(dir, ".");
@@ -177,56 +245,21 @@
 	d->inode = erofs_igrab(dir->i_parent);
 	d->type = EROFS_FT_DIR;
 
-	/* sort subdirs */
-	nr_subdirs += 2;
-	sorted_d = malloc(nr_subdirs * sizeof(d));
-	if (!sorted_d)
-		return -ENOMEM;
-	i = 0;
-	list_for_each_entry_safe(d, n, &dir->i_subdirs, d_child) {
-		list_del(&d->d_child);
-		sorted_d[i++] = d;
-	}
-	DBG_BUGON(i != nr_subdirs);
-	qsort(sorted_d, nr_subdirs, sizeof(d), comp_subdir);
-	for (i = 0; i < nr_subdirs; i++)
-		list_add_tail(&sorted_d[i]->d_child, &dir->i_subdirs);
-	free(sorted_d);
+	dir->i_nlink = 2;
+	return 0;
+}
 
-	/* let's calculate dir size and update i_nlink */
-	d_size = 0;
-	i_nlink = 0;
-	list_for_each_entry(d, &dir->i_subdirs, d_child) {
-		int len = strlen(d->name) + sizeof(struct erofs_dirent);
+int erofs_prepare_dir_file(struct erofs_inode *dir, unsigned int nr_subdirs)
+{
+	int ret;
 
-		if (d_size % EROFS_BLKSIZ + len > EROFS_BLKSIZ)
-			d_size = round_up(d_size, EROFS_BLKSIZ);
-		d_size += len;
-
-		i_nlink += (d->type == EROFS_FT_DIR);
-	}
-	dir->i_size = d_size;
-	/*
-	 * if there're too many subdirs as compact form, set nlink=1
-	 * rather than upgrade to use extented form instead.
-	 */
-	if (i_nlink > USHRT_MAX &&
-	    dir->inode_isize == sizeof(struct erofs_inode_compact))
-		dir->i_nlink = 1;
-	else
-		dir->i_nlink = i_nlink;
-
-	/* no compression for all dirs */
-	dir->datalayout = EROFS_INODE_FLAT_INLINE;
-
-	/* allocate dir main data */
-	ret = __allocate_inode_bh_data(dir, erofs_blknr(d_size));
+	ret = erofs_init_empty_dir(dir);
 	if (ret)
 		return ret;
 
-	/* it will be used in erofs_prepare_inode_buffer */
-	dir->idata_size = d_size % EROFS_BLKSIZ;
-	return 0;
+	/* sort subdirs */
+	nr_subdirs += 2;
+	return erofs_prepare_dir_layout(dir, nr_subdirs);
 }
 
 static void fill_dirblock(char *buf, unsigned int size, unsigned int q,
@@ -253,13 +286,42 @@
 	memset(buf + q, 0, size - q);
 }
 
-static int write_dirblock(unsigned int q, struct erofs_dentry *head,
+static int write_dirblock(struct erofs_sb_info *sbi,
+			  unsigned int q, struct erofs_dentry *head,
 			  struct erofs_dentry *end, erofs_blk_t blkaddr)
 {
-	char buf[EROFS_BLKSIZ];
+	char buf[EROFS_MAX_BLOCK_SIZE];
 
-	fill_dirblock(buf, EROFS_BLKSIZ, q, head, end);
-	return blk_write(buf, blkaddr, 1);
+	fill_dirblock(buf, erofs_blksiz(sbi), q, head, end);
+	return blk_write(sbi, buf, blkaddr, 1);
+}
+
+erofs_nid_t erofs_lookupnid(struct erofs_inode *inode)
+{
+	struct erofs_buffer_head *const bh = inode->bh;
+	struct erofs_sb_info *sbi = inode->sbi;
+	erofs_off_t off, meta_offset;
+
+	if (!bh || (long long)inode->nid > 0)
+		return inode->nid;
+
+	erofs_mapbh(bh->block);
+	off = erofs_btell(bh, false);
+
+	meta_offset = erofs_pos(sbi, sbi->meta_blkaddr);
+	DBG_BUGON(off < meta_offset);
+	inode->nid = (off - meta_offset) >> EROFS_ISLOTBITS;
+	erofs_dbg("Assign nid %llu to file %s (mode %05o)",
+		  inode->nid, inode->i_srcpath, inode->i_mode);
+	return inode->nid;
+}
+
+static void erofs_d_invalidate(struct erofs_dentry *d)
+{
+	struct erofs_inode *const inode = d->inode;
+
+	d->nid = erofs_lookupnid(inode);
+	erofs_iput(inode);
 }
 
 static int erofs_write_dir_file(struct erofs_inode *dir)
@@ -267,18 +329,25 @@
 	struct erofs_dentry *head = list_first_entry(&dir->i_subdirs,
 						     struct erofs_dentry,
 						     d_child);
+	struct erofs_sb_info *sbi = dir->sbi;
 	struct erofs_dentry *d;
 	int ret;
 	unsigned int q, used, blkno;
 
 	q = used = blkno = 0;
 
+	/* allocate dir main data */
+	ret = __allocate_inode_bh_data(dir, erofs_blknr(sbi, dir->i_size), DIRA);
+	if (ret)
+		return ret;
+
 	list_for_each_entry(d, &dir->i_subdirs, d_child) {
 		const unsigned int len = strlen(d->name) +
 			sizeof(struct erofs_dirent);
 
-		if (used + len > EROFS_BLKSIZ) {
-			ret = write_dirblock(q, head, d,
+		erofs_d_invalidate(d);
+		if (used + len > erofs_blksiz(sbi)) {
+			ret = write_dirblock(sbi, q, head, d,
 					     dir->u.i_blkaddr + blkno);
 			if (ret)
 				return ret;
@@ -291,13 +360,13 @@
 		q += sizeof(struct erofs_dirent);
 	}
 
-	DBG_BUGON(used > EROFS_BLKSIZ);
-	if (used == EROFS_BLKSIZ) {
-		DBG_BUGON(dir->i_size % EROFS_BLKSIZ);
+	DBG_BUGON(used > erofs_blksiz(sbi));
+	if (used == erofs_blksiz(sbi)) {
+		DBG_BUGON(dir->i_size % erofs_blksiz(sbi));
 		DBG_BUGON(dir->idata_size);
-		return write_dirblock(q, head, d, dir->u.i_blkaddr + blkno);
+		return write_dirblock(sbi, q, head, d, dir->u.i_blkaddr + blkno);
 	}
-	DBG_BUGON(used != dir->i_size % EROFS_BLKSIZ);
+	DBG_BUGON(used != dir->i_size % erofs_blksiz(sbi));
 	if (used) {
 		/* fill tail-end dir block */
 		dir->idata = malloc(used);
@@ -309,25 +378,26 @@
 	return 0;
 }
 
-static int erofs_write_file_from_buffer(struct erofs_inode *inode, char *buf)
+int erofs_write_file_from_buffer(struct erofs_inode *inode, char *buf)
 {
-	const unsigned int nblocks = erofs_blknr(inode->i_size);
+	struct erofs_sb_info *sbi = inode->sbi;
+	const unsigned int nblocks = erofs_blknr(sbi, inode->i_size);
 	int ret;
 
 	inode->datalayout = EROFS_INODE_FLAT_INLINE;
 
-	ret = __allocate_inode_bh_data(inode, nblocks);
+	ret = __allocate_inode_bh_data(inode, nblocks, DATA);
 	if (ret)
 		return ret;
 
 	if (nblocks)
-		blk_write(buf, inode->u.i_blkaddr, nblocks);
-	inode->idata_size = inode->i_size % EROFS_BLKSIZ;
+		blk_write(sbi, buf, inode->u.i_blkaddr, nblocks);
+	inode->idata_size = inode->i_size % erofs_blksiz(sbi);
 	if (inode->idata_size) {
 		inode->idata = malloc(inode->idata_size);
 		if (!inode->idata)
 			return -ENOMEM;
-		memcpy(inode->idata, buf + blknr_to_addr(nblocks),
+		memcpy(inode->idata, buf + erofs_pos(sbi, nblocks),
 		       inode->idata_size);
 	}
 	return 0;
@@ -345,31 +415,32 @@
 {
 	int ret;
 	unsigned int nblocks, i;
+	struct erofs_sb_info *sbi = inode->sbi;
 
 	inode->datalayout = EROFS_INODE_FLAT_INLINE;
-	nblocks = inode->i_size / EROFS_BLKSIZ;
+	nblocks = inode->i_size / erofs_blksiz(sbi);
 
-	ret = __allocate_inode_bh_data(inode, nblocks);
+	ret = __allocate_inode_bh_data(inode, nblocks, DATA);
 	if (ret)
 		return ret;
 
 	for (i = 0; i < nblocks; ++i) {
-		char buf[EROFS_BLKSIZ];
+		char buf[EROFS_MAX_BLOCK_SIZE];
 
-		ret = read(fd, buf, EROFS_BLKSIZ);
-		if (ret != EROFS_BLKSIZ) {
+		ret = read(fd, buf, erofs_blksiz(sbi));
+		if (ret != erofs_blksiz(sbi)) {
 			if (ret < 0)
 				return -errno;
 			return -EAGAIN;
 		}
 
-		ret = blk_write(buf, inode->u.i_blkaddr + i, 1);
+		ret = blk_write(sbi, buf, inode->u.i_blkaddr + i, 1);
 		if (ret)
 			return ret;
 	}
 
 	/* read the tail-end data */
-	inode->idata_size = inode->i_size % EROFS_BLKSIZ;
+	inode->idata_size = inode->i_size % erofs_blksiz(sbi);
 	if (inode->idata_size) {
 		inode->idata = malloc(inode->idata_size);
 		if (!inode->idata)
@@ -386,14 +457,11 @@
 	return 0;
 }
 
-int erofs_write_file(struct erofs_inode *inode)
+int erofs_write_file(struct erofs_inode *inode, int fd, u64 fpos)
 {
-	int ret, fd;
+	int ret;
 
-	if (!inode->i_size) {
-		inode->datalayout = EROFS_INODE_FLAT_PLAIN;
-		return 0;
-	}
+	DBG_BUGON(!inode->i_size);
 
 	if (cfg.c_chunkbits) {
 		inode->u.chunkbits = cfg.c_chunkbits;
@@ -401,29 +469,27 @@
 		inode->u.chunkformat = 0;
 		if (cfg.c_force_chunkformat == FORCE_INODE_CHUNK_INDEXES)
 			inode->u.chunkformat = EROFS_CHUNK_FORMAT_INDEXES;
-		return erofs_blob_write_chunked_file(inode);
+		return erofs_blob_write_chunked_file(inode, fd, fpos);
 	}
 
-	if (cfg.c_compr_alg_master && erofs_file_is_compressible(inode)) {
-		ret = erofs_write_compressed_file(inode);
-
+	if (cfg.c_compr_alg[0] && erofs_file_is_compressible(inode)) {
+		ret = erofs_write_compressed_file(inode, fd);
 		if (!ret || ret != -ENOSPC)
 			return ret;
+
+		ret = lseek(fd, fpos, SEEK_SET);
+		if (ret < 0)
+			return -errno;
 	}
 
 	/* fallback to all data uncompressed */
-	fd = open(inode->i_srcpath, O_RDONLY | O_BINARY);
-	if (fd < 0)
-		return -errno;
-
-	ret = write_uncompressed_file_from_fd(inode, fd);
-	close(fd);
-	return ret;
+	return write_uncompressed_file_from_fd(inode, fd);
 }
 
 static bool erofs_bh_flush_write_inode(struct erofs_buffer_head *bh)
 {
 	struct erofs_inode *const inode = bh->fsprivate;
+	struct erofs_sb_info *sbi = inode->sbi;
 	const u16 icount = EROFS_INODE_XATTR_ICOUNT(inode->xattr_isize);
 	erofs_off_t off = erofs_btell(bh, false);
 	union {
@@ -510,18 +576,18 @@
 		BUG_ON(1);
 	}
 
-	ret = dev_write(&u, off, inode->inode_isize);
+	ret = dev_write(sbi, &u, off, inode->inode_isize);
 	if (ret)
 		return false;
 	off += inode->inode_isize;
 
 	if (inode->xattr_isize) {
-		char *xattrs = erofs_export_xattr_ibody(&inode->i_xattrs,
-							inode->xattr_isize);
+		char *xattrs = erofs_export_xattr_ibody(inode);
+
 		if (IS_ERR(xattrs))
 			return false;
 
-		ret = dev_write(xattrs, off, inode->xattr_isize);
+		ret = dev_write(sbi, xattrs, off, inode->xattr_isize);
 		free(xattrs);
 		if (ret)
 			return false;
@@ -536,8 +602,8 @@
 				return false;
 		} else {
 			/* write compression metadata */
-			off = Z_EROFS_VLE_EXTENT_ALIGN(off);
-			ret = dev_write(inode->compressmeta, off,
+			off = roundup(off, 8);
+			ret = dev_write(sbi, inode->compressmeta, off,
 					inode->extent_isize);
 			if (ret)
 				return false;
@@ -556,6 +622,7 @@
 
 static int erofs_prepare_tail_block(struct erofs_inode *inode)
 {
+	struct erofs_sb_info *sbi = inode->sbi;
 	struct erofs_buffer_head *bh;
 	int ret;
 
@@ -563,23 +630,16 @@
 		return 0;
 
 	bh = inode->bh_data;
-	if (!bh) {
-		bh = erofs_balloc(DATA, EROFS_BLKSIZ, 0, 0);
-		if (IS_ERR(bh))
-			return PTR_ERR(bh);
-		bh->op = &erofs_skip_write_bhops;
-
-		/* get blkaddr of bh */
-		ret = erofs_mapbh(bh->block);
-		DBG_BUGON(ret < 0);
-		inode->u.i_blkaddr = bh->block->blkaddr;
-
-		inode->bh_data = bh;
-		return 0;
+	if (bh) {
+		/* expend a block as the tail block (should be successful) */
+		ret = erofs_bh_balloon(bh, erofs_blksiz(sbi));
+		if (ret != erofs_blksiz(sbi)) {
+			DBG_BUGON(1);
+			return -EIO;
+		}
+	} else {
+		inode->lazy_tailblock = true;
 	}
-	/* expend a block as the tail block (should be successful) */
-	ret = erofs_bh_balloon(bh, EROFS_BLKSIZ);
-	DBG_BUGON(ret != EROFS_BLKSIZ);
 	return 0;
 }
 
@@ -592,15 +652,14 @@
 
 	inodesize = inode->inode_isize + inode->xattr_isize;
 	if (inode->extent_isize)
-		inodesize = Z_EROFS_VLE_EXTENT_ALIGN(inodesize) +
-			    inode->extent_isize;
+		inodesize = roundup(inodesize, 8) + inode->extent_isize;
 
 	/* TODO: tailpacking inline of chunk-based format isn't finalized */
 	if (inode->datalayout == EROFS_INODE_CHUNK_BASED)
 		goto noinline;
 
 	if (!is_inode_layout_compression(inode)) {
-		if (cfg.c_noinline_data && S_ISREG(inode->i_mode)) {
+		if (!cfg.c_inline_data && S_ISREG(inode->i_mode)) {
 			inode->datalayout = EROFS_INODE_FLAT_PLAIN;
 			goto noinline;
 		}
@@ -637,7 +696,7 @@
 			erofs_dbg("Inline %scompressed data (%u bytes) to %s",
 				  inode->compressed_idata ? "" : "un",
 				  inode->idata_size, inode->i_srcpath);
-			erofs_sb_set_ztailpacking();
+			erofs_sb_set_ztailpacking(inode->sbi);
 		} else {
 			inode->datalayout = EROFS_INODE_FLAT_INLINE;
 			erofs_dbg("Inline tail-end data (%u bytes) to %s",
@@ -665,7 +724,7 @@
 	const erofs_off_t off = erofs_btell(bh, false);
 	int ret;
 
-	ret = dev_write(inode->idata, off, inode->idata_size);
+	ret = dev_write(inode->sbi, inode->idata, off, inode->idata_size);
 	if (ret)
 		return false;
 
@@ -683,6 +742,7 @@
 
 static int erofs_write_tail_end(struct erofs_inode *inode)
 {
+	struct erofs_sb_info *sbi = inode->sbi;
 	struct erofs_buffer_head *bh, *ibh;
 
 	bh = inode->bh_data;
@@ -702,25 +762,47 @@
 		int ret;
 		erofs_off_t pos, zero_pos;
 
-		erofs_mapbh(bh->block);
-		pos = erofs_btell(bh, true) - EROFS_BLKSIZ;
+		if (!bh) {
+			bh = erofs_balloc(DATA, erofs_blksiz(sbi), 0, 0);
+			if (IS_ERR(bh))
+				return PTR_ERR(bh);
+			bh->op = &erofs_skip_write_bhops;
+
+			/* get blkaddr of bh */
+			ret = erofs_mapbh(bh->block);
+			inode->u.i_blkaddr = bh->block->blkaddr;
+			inode->bh_data = bh;
+		} else {
+			if (inode->lazy_tailblock) {
+				/* expend a tail block (should be successful) */
+				ret = erofs_bh_balloon(bh, erofs_blksiz(sbi));
+				if (ret != erofs_blksiz(sbi)) {
+					DBG_BUGON(1);
+					return -EIO;
+				}
+				inode->lazy_tailblock = false;
+			}
+			ret = erofs_mapbh(bh->block);
+		}
+		DBG_BUGON(ret < 0);
+		pos = erofs_btell(bh, true) - erofs_blksiz(sbi);
 
 		/* 0'ed data should be padded at head for 0padding conversion */
-		if (erofs_sb_has_lz4_0padding() && inode->compressed_idata) {
+		if (erofs_sb_has_lz4_0padding(sbi) && inode->compressed_idata) {
 			zero_pos = pos;
-			pos += EROFS_BLKSIZ - inode->idata_size;
+			pos += erofs_blksiz(sbi) - inode->idata_size;
 		} else {
 			/* pad 0'ed data for the other cases */
 			zero_pos = pos + inode->idata_size;
 		}
-		ret = dev_write(inode->idata, pos, inode->idata_size);
+		ret = dev_write(sbi, inode->idata, pos, inode->idata_size);
 		if (ret)
 			return ret;
 
-		DBG_BUGON(inode->idata_size > EROFS_BLKSIZ);
-		if (inode->idata_size < EROFS_BLKSIZ) {
-			ret = dev_fillzero(zero_pos,
-					   EROFS_BLKSIZ - inode->idata_size,
+		DBG_BUGON(inode->idata_size > erofs_blksiz(sbi));
+		if (inode->idata_size < erofs_blksiz(sbi)) {
+			ret = dev_fillzero(sbi, zero_pos,
+					   erofs_blksiz(sbi) - inode->idata_size,
 					   false);
 			if (ret)
 				return ret;
@@ -729,7 +811,7 @@
 		free(inode->idata);
 		inode->idata = NULL;
 
-		erofs_droid_blocklist_write_tail_end(inode, erofs_blknr(pos));
+		erofs_droid_blocklist_write_tail_end(inode, erofs_blknr(sbi, pos));
 	}
 out:
 	/* now bh_data can drop directly */
@@ -750,20 +832,22 @@
 		return true;
 	if (inode->i_size > UINT_MAX)
 		return true;
+	if (erofs_is_packed_inode(inode))
+		return false;
 	if (inode->i_uid > USHRT_MAX)
 		return true;
 	if (inode->i_gid > USHRT_MAX)
 		return true;
 	if (inode->i_nlink > USHRT_MAX)
 		return true;
-	if ((inode->i_mtime != sbi.build_time ||
-	     inode->i_mtime_nsec != sbi.build_time_nsec) &&
+	if ((inode->i_mtime != inode->sbi->build_time ||
+	     inode->i_mtime_nsec != inode->sbi->build_time_nsec) &&
 	    !cfg.c_ignore_mtime)
 		return true;
 	return false;
 }
 
-static u32 erofs_new_encode_dev(dev_t dev)
+u32 erofs_new_encode_dev(dev_t dev)
 {
 	const unsigned int major = major(dev);
 	const unsigned int minor = minor(dev);
@@ -773,7 +857,7 @@
 
 #ifdef WITH_ANDROID
 int erofs_droid_inode_fsconfig(struct erofs_inode *inode,
-			       struct stat64 *st,
+			       struct stat *st,
 			       const char *path)
 {
 	/* filesystem_config does not preserve file type bits */
@@ -785,6 +869,9 @@
 	inode->capabilities = 0;
 	if (!cfg.fs_config_file && !cfg.mount_point)
 		return 0;
+	/* avoid loading special inodes */
+	if (path == EROFS_PACKED_INODE)
+		return 0;
 
 	if (!cfg.mount_point ||
 	/* have to drop the mountpoint for rootdir of canned fsconfig */
@@ -818,37 +905,59 @@
 }
 #else
 static int erofs_droid_inode_fsconfig(struct erofs_inode *inode,
-				      struct stat64 *st,
+				      struct stat *st,
 				      const char *path)
 {
 	return 0;
 }
 #endif
 
-static int erofs_fill_inode(struct erofs_inode *inode,
-			    struct stat64 *st,
-			    const char *path)
+int __erofs_fill_inode(struct erofs_inode *inode, struct stat *st,
+		       const char *path)
 {
 	int err = erofs_droid_inode_fsconfig(inode, st, path);
+	struct erofs_sb_info *sbi = inode->sbi;
 
 	if (err)
 		return err;
-	inode->i_mode = st->st_mode;
+
 	inode->i_uid = cfg.c_uid == -1 ? st->st_uid : cfg.c_uid;
 	inode->i_gid = cfg.c_gid == -1 ? st->st_gid : cfg.c_gid;
+
+	if (inode->i_uid + cfg.c_uid_offset < 0)
+		erofs_err("uid overflow @ %s", path);
+	inode->i_uid += cfg.c_uid_offset;
+
+	if (inode->i_gid + cfg.c_gid_offset < 0)
+		erofs_err("gid overflow @ %s", path);
+	inode->i_gid += cfg.c_gid_offset;
+
 	inode->i_mtime = st->st_mtime;
 	inode->i_mtime_nsec = ST_MTIM_NSEC(st);
 
 	switch (cfg.c_timeinherit) {
 	case TIMESTAMP_CLAMPING:
-		if (inode->i_mtime < sbi.build_time)
+		if (inode->i_mtime < sbi->build_time)
 			break;
 	case TIMESTAMP_FIXED:
-		inode->i_mtime = sbi.build_time;
-		inode->i_mtime_nsec = sbi.build_time_nsec;
+		inode->i_mtime = sbi->build_time;
+		inode->i_mtime_nsec = sbi->build_time_nsec;
 	default:
 		break;
 	}
+
+	return 0;
+}
+
+static int erofs_fill_inode(struct erofs_inode *inode, struct stat *st,
+			    const char *path)
+{
+	int err = __erofs_fill_inode(inode, st, path);
+
+	if (err)
+		return err;
+
+	inode->i_mode = st->st_mode;
 	inode->i_nlink = 1;	/* fix up later if needed */
 
 	switch (inode->i_mode & S_IFMT) {
@@ -868,11 +977,14 @@
 		return -EINVAL;
 	}
 
-	strncpy(inode->i_srcpath, path, sizeof(inode->i_srcpath) - 1);
-	inode->i_srcpath[sizeof(inode->i_srcpath) - 1] = '\0';
+	inode->i_srcpath = strdup(path);
+	if (!inode->i_srcpath)
+		return -ENOMEM;
 
-	inode->dev = st->st_dev;
-	inode->i_ino[1] = st->st_ino;
+	if (!S_ISDIR(inode->i_mode)) {
+		inode->dev = st->st_dev;
+		inode->i_ino[1] = st->st_ino;
+	}
 
 	if (erofs_should_use_inode_extended(inode)) {
 		if (cfg.c_force_inodeversion == FORCE_INODE_COMPACT) {
@@ -885,13 +997,11 @@
 		inode->inode_isize = sizeof(struct erofs_inode_compact);
 	}
 
-	list_add(&inode->i_hash,
-		 &inode_hashtable[(st->st_ino ^ st->st_dev) %
-				  NR_INODE_HASHTABLE]);
+	erofs_insert_ihash(inode, st->st_dev, st->st_ino);
 	return 0;
 }
 
-static struct erofs_inode *erofs_new_inode(void)
+struct erofs_inode *erofs_new_inode(void)
 {
 	struct erofs_inode *inode;
 
@@ -899,9 +1009,12 @@
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 
+	inode->sbi = &sbi;
 	inode->i_ino[0] = sbi.inos++;	/* inode serial number */
 	inode->i_count = 1;
+	inode->datalayout = EROFS_INODE_FLAT_PLAIN;
 
+	init_list_head(&inode->i_hash);
 	init_list_head(&inode->i_subdirs);
 	init_list_head(&inode->i_xattrs);
 	return inode;
@@ -910,7 +1023,7 @@
 /* get the inode from the (source) path */
 static struct erofs_inode *erofs_iget_from_path(const char *path, bool is_src)
 {
-	struct stat64 st;
+	struct stat st;
 	struct erofs_inode *inode;
 	int ret;
 
@@ -918,7 +1031,7 @@
 	if (!is_src)
 		return ERR_PTR(-EINVAL);
 
-	ret = lstat64(path, &st);
+	ret = lstat(path, &st);
 	if (ret)
 		return ERR_PTR(-errno);
 
@@ -940,10 +1053,9 @@
 
 	ret = erofs_fill_inode(inode, &st, path);
 	if (ret) {
-		free(inode);
+		erofs_iput(inode);
 		return ERR_PTR(ret);
 	}
-
 	return inode;
 }
 
@@ -951,87 +1063,72 @@
 {
 	const erofs_off_t rootnid_maxoffset = 0xffff << EROFS_ISLOTBITS;
 	struct erofs_buffer_head *const bh = rootdir->bh;
+	struct erofs_sb_info *sbi = rootdir->sbi;
 	erofs_off_t off, meta_offset;
 
 	erofs_mapbh(bh->block);
 	off = erofs_btell(bh, false);
 
 	if (off > rootnid_maxoffset)
-		meta_offset = round_up(off - rootnid_maxoffset, EROFS_BLKSIZ);
+		meta_offset = round_up(off - rootnid_maxoffset, erofs_blksiz(sbi));
 	else
 		meta_offset = 0;
-	sbi.meta_blkaddr = erofs_blknr(meta_offset);
+	sbi->meta_blkaddr = erofs_blknr(sbi, meta_offset);
 	rootdir->nid = (off - meta_offset) >> EROFS_ISLOTBITS;
 }
 
-erofs_nid_t erofs_lookupnid(struct erofs_inode *inode)
-{
-	struct erofs_buffer_head *const bh = inode->bh;
-	erofs_off_t off, meta_offset;
-
-	if (!bh)
-		return inode->nid;
-
-	erofs_mapbh(bh->block);
-	off = erofs_btell(bh, false);
-
-	meta_offset = blknr_to_addr(sbi.meta_blkaddr);
-	DBG_BUGON(off < meta_offset);
-	return inode->nid = (off - meta_offset) >> EROFS_ISLOTBITS;
-}
-
-static void erofs_d_invalidate(struct erofs_dentry *d)
-{
-	struct erofs_inode *const inode = d->inode;
-
-	d->nid = erofs_lookupnid(inode);
-	erofs_iput(inode);
-}
-
-static struct erofs_inode *erofs_mkfs_build_tree(struct erofs_inode *dir)
+static int erofs_mkfs_build_tree(struct erofs_inode *dir, struct list_head *dirs)
 {
 	int ret;
 	DIR *_dir;
 	struct dirent *dp;
 	struct erofs_dentry *d;
-	unsigned int nr_subdirs;
+	unsigned int nr_subdirs, i_nlink;
+
+	ret = erofs_scan_file_xattrs(dir);
+	if (ret < 0)
+		return ret;
 
 	ret = erofs_prepare_xattr_ibody(dir);
 	if (ret < 0)
-		return ERR_PTR(ret);
+		return ret;
 
 	if (!S_ISDIR(dir->i_mode)) {
 		if (S_ISLNK(dir->i_mode)) {
 			char *const symlink = malloc(dir->i_size);
 
 			if (!symlink)
-				return ERR_PTR(-ENOMEM);
+				return -ENOMEM;
 			ret = readlink(dir->i_srcpath, symlink, dir->i_size);
 			if (ret < 0) {
 				free(symlink);
-				return ERR_PTR(-errno);
+				return -errno;
 			}
-
 			ret = erofs_write_file_from_buffer(dir, symlink);
 			free(symlink);
-			if (ret)
-				return ERR_PTR(ret);
+		} else if (dir->i_size) {
+			int fd = open(dir->i_srcpath, O_RDONLY | O_BINARY);
+			if (fd < 0)
+				return -errno;
+
+			ret = erofs_write_file(dir, fd, 0);
+			close(fd);
 		} else {
-			ret = erofs_write_file(dir);
-			if (ret)
-				return ERR_PTR(ret);
+			ret = 0;
 		}
+		if (ret)
+			return ret;
 
 		erofs_prepare_inode_buffer(dir);
 		erofs_write_tail_end(dir);
-		return dir;
+		return 0;
 	}
 
 	_dir = opendir(dir->i_srcpath);
 	if (!_dir) {
 		erofs_err("failed to opendir at %s: %s",
 			  dir->i_srcpath, erofs_strerror(errno));
-		return ERR_PTR(-errno);
+		return -errno;
 	}
 
 	nr_subdirs = 0;
@@ -1045,8 +1142,7 @@
 		if (!dp)
 			break;
 
-		if (is_dot_dotdot(dp->d_name) ||
-		    !strncmp(dp->d_name, "lost+found", strlen("lost+found")))
+		if (is_dot_dotdot(dp->d_name))
 			continue;
 
 		/* skip if it's a exclude file */
@@ -1059,10 +1155,6 @@
 			goto err_closedir;
 		}
 		nr_subdirs++;
-
-		/* to count i_nlink for directories */
-		d->type = (dp->d_type == DT_DIR ?
-			EROFS_FT_DIR : EROFS_FT_UNKNOWN);
 	}
 
 	if (errno) {
@@ -1073,21 +1165,24 @@
 
 	ret = erofs_prepare_dir_file(dir, nr_subdirs);
 	if (ret)
-		goto err;
+		return ret;
 
 	ret = erofs_prepare_inode_buffer(dir);
 	if (ret)
-		goto err;
+		return ret;
+	dir->bh->op = &erofs_skip_write_bhops;
 
 	if (IS_ROOT(dir))
 		erofs_fixup_meta_blkaddr(dir);
 
+	i_nlink = 0;
 	list_for_each_entry(d, &dir->i_subdirs, d_child) {
-		char buf[PATH_MAX], *trimmed;
+		char buf[PATH_MAX];
 		unsigned char ftype;
+		struct erofs_inode *inode;
 
 		if (is_dot_dotdot(d->name)) {
-			erofs_d_invalidate(d);
+			++i_nlink;
 			continue;
 		}
 
@@ -1098,57 +1193,247 @@
 			goto fail;
 		}
 
-		trimmed = erofs_trim_for_progressinfo(erofs_fspath(buf),
-					sizeof("Processing  ...") - 1);
-		erofs_update_progressinfo("Processing %s ...", trimmed);
-		free(trimmed);
-		d->inode = erofs_mkfs_build_tree_from_path(dir, buf);
-		if (IS_ERR(d->inode)) {
-			ret = PTR_ERR(d->inode);
+		inode = erofs_iget_from_path(buf, true);
+
+		if (IS_ERR(inode)) {
+			ret = PTR_ERR(inode);
 fail:
 			d->inode = NULL;
 			d->type = EROFS_FT_UNKNOWN;
-			goto err;
+			return ret;
 		}
 
-		ftype = erofs_mode_to_ftype(d->inode->i_mode);
-		DBG_BUGON(ftype == EROFS_FT_DIR && d->type != ftype);
+		/* a hardlink to the existed inode */
+		if (inode->i_parent) {
+			++inode->i_nlink;
+		} else {
+			inode->i_parent = dir;
+			erofs_igrab(inode);
+			list_add_tail(&inode->i_subdirs, dirs);
+		}
+		ftype = erofs_mode_to_ftype(inode->i_mode);
+		i_nlink += (ftype == EROFS_FT_DIR);
+		d->inode = inode;
 		d->type = ftype;
-
-		erofs_d_invalidate(d);
-		erofs_info("add file %s/%s (nid %llu, type %u)",
-			   dir->i_srcpath, d->name, (unsigned long long)d->nid,
-			   d->type);
+		erofs_info("file %s/%s dumped (type %u)",
+			   dir->i_srcpath, d->name, d->type);
 	}
-	erofs_write_dir_file(dir);
-	erofs_write_tail_end(dir);
-	return dir;
+	/*
+	 * if there're too many subdirs as compact form, set nlink=1
+	 * rather than upgrade to use extented form instead.
+	 */
+	if (i_nlink > USHRT_MAX &&
+	    dir->inode_isize == sizeof(struct erofs_inode_compact))
+		dir->i_nlink = 1;
+	else
+		dir->i_nlink = i_nlink;
+	return 0;
 
 err_closedir:
 	closedir(_dir);
-err:
-	return ERR_PTR(ret);
+	return ret;
 }
 
-struct erofs_inode *erofs_mkfs_build_tree_from_path(struct erofs_inode *parent,
-						    const char *path)
+struct erofs_inode *erofs_mkfs_build_tree_from_path(const char *path)
 {
-	struct erofs_inode *const inode = erofs_iget_from_path(path, true);
+	LIST_HEAD(dirs);
+	struct erofs_inode *inode, *root, *dumpdir;
 
+	root = erofs_iget_from_path(path, true);
+	if (IS_ERR(root))
+		return root;
+
+	(void)erofs_igrab(root);
+	root->i_parent = root;	/* rootdir mark */
+	list_add(&root->i_subdirs, &dirs);
+
+	dumpdir = NULL;
+	do {
+		int err;
+		char *trimmed;
+
+		inode = list_first_entry(&dirs, struct erofs_inode, i_subdirs);
+		list_del(&inode->i_subdirs);
+		init_list_head(&inode->i_subdirs);
+
+		trimmed = erofs_trim_for_progressinfo(
+				erofs_fspath(inode->i_srcpath),
+				sizeof("Processing  ...") - 1);
+		erofs_update_progressinfo("Processing %s ...", trimmed);
+		free(trimmed);
+
+		err = erofs_mkfs_build_tree(inode, &dirs);
+		if (err) {
+			root = ERR_PTR(err);
+			break;
+		}
+
+		if (S_ISDIR(inode->i_mode)) {
+			inode->next_dirwrite = dumpdir;
+			dumpdir = inode;
+		} else {
+			erofs_iput(inode);
+		}
+	} while (!list_empty(&dirs));
+
+	while (dumpdir) {
+		inode = dumpdir;
+		erofs_write_dir_file(inode);
+		erofs_write_tail_end(inode);
+		inode->bh->op = &erofs_write_inode_bhops;
+		dumpdir = inode->next_dirwrite;
+		erofs_iput(inode);
+	}
+	return root;
+}
+
+struct erofs_inode *erofs_mkfs_build_special_from_fd(int fd, const char *name)
+{
+	struct stat st;
+	struct erofs_inode *inode;
+	int ret;
+
+	ret = lseek(fd, 0, SEEK_SET);
+	if (ret < 0)
+		return ERR_PTR(-errno);
+
+	ret = fstat(fd, &st);
+	if (ret)
+		return ERR_PTR(-errno);
+
+	inode = erofs_new_inode();
 	if (IS_ERR(inode))
 		return inode;
 
-	/* a hardlink to the existed inode */
-	if (inode->i_parent) {
-		++inode->i_nlink;
-		return inode;
+	if (name == EROFS_PACKED_INODE) {
+		st.st_uid = st.st_gid = 0;
+		st.st_nlink = 0;
 	}
 
-	/* a completely new inode is found */
-	if (parent)
-		inode->i_parent = parent;
-	else
-		inode->i_parent = inode;	/* rootdir mark */
+	ret = erofs_fill_inode(inode, &st, name);
+	if (ret) {
+		free(inode);
+		return ERR_PTR(ret);
+	}
 
-	return erofs_mkfs_build_tree(inode);
+	if (name == EROFS_PACKED_INODE) {
+		inode->sbi->packed_nid = EROFS_PACKED_NID_UNALLOCATED;
+		inode->nid = inode->sbi->packed_nid;
+	}
+
+	ret = erofs_write_compressed_file(inode, fd);
+	if (ret == -ENOSPC) {
+		ret = lseek(fd, 0, SEEK_SET);
+		if (ret < 0)
+			return ERR_PTR(-errno);
+
+		ret = write_uncompressed_file_from_fd(inode, fd);
+	}
+
+	if (ret) {
+		DBG_BUGON(ret == -ENOSPC);
+		return ERR_PTR(ret);
+	}
+	erofs_prepare_inode_buffer(inode);
+	erofs_write_tail_end(inode);
+	return inode;
+}
+
+int erofs_rebuild_dump_tree(struct erofs_inode *dir)
+{
+	struct erofs_dentry *d, *n;
+	unsigned int nr_subdirs;
+	int ret;
+
+	if (erofs_should_use_inode_extended(dir)) {
+		if (cfg.c_force_inodeversion == FORCE_INODE_COMPACT) {
+			erofs_err("file %s cannot be in compact form",
+				  dir->i_srcpath);
+			return -EINVAL;
+		}
+		dir->inode_isize = sizeof(struct erofs_inode_extended);
+	} else {
+		dir->inode_isize = sizeof(struct erofs_inode_compact);
+	}
+
+	/* strip all unnecessary overlayfs xattrs when ovlfs_strip is enabled */
+	if (cfg.c_ovlfs_strip)
+		erofs_clear_opaque_xattr(dir);
+	else if (dir->whiteouts)
+		erofs_set_origin_xattr(dir);
+
+	ret = erofs_prepare_xattr_ibody(dir);
+	if (ret < 0)
+		return ret;
+
+	if (!S_ISDIR(dir->i_mode)) {
+		if (dir->bh)
+			return 0;
+		if (S_ISLNK(dir->i_mode)) {
+			ret = erofs_write_file_from_buffer(dir, dir->i_link);
+			free(dir->i_link);
+			dir->i_link = NULL;
+		} else if (dir->with_diskbuf) {
+			u64 fpos;
+
+			ret = erofs_diskbuf_getfd(dir->i_diskbuf, &fpos);
+			if (ret >= 0)
+				ret = erofs_write_file(dir, ret, fpos);
+			erofs_diskbuf_close(dir->i_diskbuf);
+			free(dir->i_diskbuf);
+			dir->i_diskbuf = NULL;
+			dir->with_diskbuf = false;
+		} else {
+			ret = 0;
+		}
+		if (ret)
+			return ret;
+		ret = erofs_prepare_inode_buffer(dir);
+		if (ret)
+			return ret;
+		erofs_write_tail_end(dir);
+		return 0;
+	}
+
+	nr_subdirs = 0;
+	list_for_each_entry_safe(d, n, &dir->i_subdirs, d_child) {
+		if (cfg.c_ovlfs_strip && erofs_inode_is_whiteout(d->inode)) {
+			erofs_dbg("remove whiteout %s", d->inode->i_srcpath);
+			list_del(&d->d_child);
+			erofs_d_invalidate(d);
+			free(d);
+			continue;
+		}
+		++nr_subdirs;
+	}
+
+	ret = erofs_prepare_dir_layout(dir, nr_subdirs);
+	if (ret)
+		return ret;
+
+	ret = erofs_prepare_inode_buffer(dir);
+	if (ret)
+		return ret;
+	dir->bh->op = &erofs_skip_write_bhops;
+
+	if (IS_ROOT(dir))
+		erofs_fixup_meta_blkaddr(dir);
+
+	list_for_each_entry(d, &dir->i_subdirs, d_child) {
+		struct erofs_inode *inode;
+
+		if (is_dot_dotdot(d->name))
+			continue;
+
+		inode = erofs_igrab(d->inode);
+		ret = erofs_rebuild_dump_tree(inode);
+		dir->i_nlink += (erofs_mode_to_ftype(inode->i_mode) == EROFS_FT_DIR);
+		erofs_iput(inode);
+		if (ret)
+			return ret;
+	}
+	erofs_write_dir_file(dir);
+	erofs_write_tail_end(dir);
+	dir->bh->op = &erofs_write_inode_bhops;
+	return 0;
 }
diff --git a/lib/io.c b/lib/io.c
index 9c663c5..c92f16c 100644
--- a/lib/io.c
+++ b/lib/io.c
@@ -10,6 +10,7 @@
 #ifndef _GNU_SOURCE
 #define _GNU_SOURCE
 #endif
+#include <stdlib.h>
 #include <sys/stat.h>
 #include <sys/ioctl.h>
 #include "erofs/io.h"
@@ -19,16 +20,13 @@
 #ifdef HAVE_LINUX_FALLOC_H
 #include <linux/falloc.h>
 #endif
-
+#ifdef HAVE_SYS_STATFS_H
+#include <sys/statfs.h>
+#endif
 #define EROFS_MODNAME	"erofs_io"
 #include "erofs/print.h"
 
-static const char *erofs_devname;
-int erofs_devfd = -1;
-static u64 erofs_devsz;
-static unsigned int erofs_nblobs, erofs_blobfd[256];
-
-int dev_get_blkdev_size(int fd, u64 *bytes)
+static int dev_get_blkdev_size(int fd, u64 *bytes)
 {
 	errno = ENOTSUP;
 #ifdef BLKGETSIZE64
@@ -48,19 +46,25 @@
 	return -errno;
 }
 
-void dev_close(void)
+void dev_close(struct erofs_sb_info *sbi)
 {
-	close(erofs_devfd);
-	erofs_devname = NULL;
-	erofs_devfd   = -1;
-	erofs_devsz   = 0;
+	close(sbi->devfd);
+	free(sbi->devname);
+	sbi->devname = NULL;
+	sbi->devfd   = -1;
+	sbi->devsz   = 0;
 }
 
-int dev_open(const char *dev)
+int dev_open(struct erofs_sb_info *sbi, const char *dev)
 {
 	struct stat st;
 	int fd, ret;
 
+#if defined(HAVE_SYS_STATFS_H) && defined(HAVE_FSTATFS)
+	bool again = false;
+
+repeat:
+#endif
 	fd = open(dev, O_RDWR | O_CREAT | O_BINARY, 0644);
 	if (fd < 0) {
 		erofs_err("failed to open(%s).", dev);
@@ -76,23 +80,46 @@
 
 	switch (st.st_mode & S_IFMT) {
 	case S_IFBLK:
-		ret = dev_get_blkdev_size(fd, &erofs_devsz);
+		ret = dev_get_blkdev_size(fd, &sbi->devsz);
 		if (ret) {
 			erofs_err("failed to get block device size(%s).", dev);
 			close(fd);
 			return ret;
 		}
-		erofs_devsz = round_down(erofs_devsz, EROFS_BLKSIZ);
+		sbi->devsz = round_down(sbi->devsz, erofs_blksiz(sbi));
 		break;
 	case S_IFREG:
-		ret = ftruncate(fd, 0);
-		if (ret) {
-			erofs_err("failed to ftruncate(%s).", dev);
-			close(fd);
-			return -errno;
+		if (st.st_size) {
+#if defined(HAVE_SYS_STATFS_H) && defined(HAVE_FSTATFS)
+			struct statfs stfs;
+
+			if (again)
+				return -ENOTEMPTY;
+
+			/*
+			 * fses like EXT4 and BTRFS will flush dirty blocks
+			 * after truncate(0) even after the writeback happens
+			 * (see kernel commit 7d8f9f7d150d and ccd2506bd431),
+			 * which is NOT our intention.  Let's work around this.
+			 */
+			if (!fstatfs(fd, &stfs) && (stfs.f_type == 0xEF53 ||
+					stfs.f_type == 0x9123683E)) {
+				close(fd);
+				unlink(dev);
+				again = true;
+				goto repeat;
+			}
+#endif
+			ret = ftruncate(fd, 0);
+			if (ret) {
+				erofs_err("failed to ftruncate(%s).", dev);
+				close(fd);
+				return -errno;
+			}
 		}
 		/* INT64_MAX is the limit of kernel vfs */
-		erofs_devsz = INT64_MAX;
+		sbi->devsz = INT64_MAX;
+		sbi->devblksz = st.st_blksize;
 		break;
 	default:
 		erofs_err("bad file type (%s, %o).", dev, st.st_mode);
@@ -100,23 +127,27 @@
 		return -EINVAL;
 	}
 
-	erofs_devname = dev;
-	erofs_devfd = fd;
+	sbi->devname = strdup(dev);
+	if (!sbi->devname) {
+		close(fd);
+		return -ENOMEM;
+	}
+	sbi->devfd = fd;
 
 	erofs_info("successfully to open %s", dev);
 	return 0;
 }
 
-void blob_closeall(void)
+void blob_closeall(struct erofs_sb_info *sbi)
 {
 	unsigned int i;
 
-	for (i = 0; i < erofs_nblobs; ++i)
-		close(erofs_blobfd[i]);
-	erofs_nblobs = 0;
+	for (i = 0; i < sbi->nblobs; ++i)
+		close(sbi->blobfd[i]);
+	sbi->nblobs = 0;
 }
 
-int blob_open_ro(const char *dev)
+int blob_open_ro(struct erofs_sb_info *sbi, const char *dev)
 {
 	int fd = open(dev, O_RDONLY | O_BINARY);
 
@@ -125,14 +156,14 @@
 		return -errno;
 	}
 
-	erofs_blobfd[erofs_nblobs] = fd;
-	erofs_info("successfully to open blob%u %s", erofs_nblobs, dev);
-	++erofs_nblobs;
+	sbi->blobfd[sbi->nblobs] = fd;
+	erofs_info("successfully to open blob%u %s", sbi->nblobs, dev);
+	++sbi->nblobs;
 	return 0;
 }
 
 /* XXX: temporary soluation. Disk I/O implementation needs to be refactored. */
-int dev_open_ro(const char *dev)
+int dev_open_ro(struct erofs_sb_info *sbi, const char *dev)
 {
 	int fd = open(dev, O_RDONLY | O_BINARY);
 
@@ -141,18 +172,17 @@
 		return -errno;
 	}
 
-	erofs_devfd = fd;
-	erofs_devname = dev;
-	erofs_devsz = INT64_MAX;
+	sbi->devname = strdup(dev);
+	if (!sbi->devname) {
+		close(fd);
+		return -ENOMEM;
+	}
+	sbi->devfd = fd;
+	sbi->devsz = INT64_MAX;
 	return 0;
 }
 
-u64 dev_length(void)
-{
-	return erofs_devsz;
-}
-
-int dev_write(const void *buf, u64 offset, size_t len)
+int dev_write(struct erofs_sb_info *sbi, const void *buf, u64 offset, size_t len)
 {
 	int ret;
 
@@ -164,60 +194,60 @@
 		return -EINVAL;
 	}
 
-	if (offset >= erofs_devsz || len > erofs_devsz ||
-	    offset > erofs_devsz - len) {
+	if (offset >= sbi->devsz || len > sbi->devsz ||
+	    offset > sbi->devsz - len) {
 		erofs_err("Write posion[%" PRIu64 ", %zd] is too large beyond the end of device(%" PRIu64 ").",
-			  offset, len, erofs_devsz);
+			  offset, len, sbi->devsz);
 		return -EINVAL;
 	}
 
 #ifdef HAVE_PWRITE64
-	ret = pwrite64(erofs_devfd, buf, len, (off64_t)offset);
+	ret = pwrite64(sbi->devfd, buf, len, (off64_t)offset);
 #else
-	ret = pwrite(erofs_devfd, buf, len, (off_t)offset);
+	ret = pwrite(sbi->devfd, buf, len, (off_t)offset);
 #endif
 	if (ret != (int)len) {
 		if (ret < 0) {
 			erofs_err("Failed to write data into device - %s:[%" PRIu64 ", %zd].",
-				  erofs_devname, offset, len);
+				  sbi->devname, offset, len);
 			return -errno;
 		}
 
 		erofs_err("Writing data into device - %s:[%" PRIu64 ", %zd] - was truncated.",
-			  erofs_devname, offset, len);
+			  sbi->devname, offset, len);
 		return -ERANGE;
 	}
 	return 0;
 }
 
-int dev_fillzero(u64 offset, size_t len, bool padding)
+int dev_fillzero(struct erofs_sb_info *sbi, u64 offset, size_t len, bool padding)
 {
-	static const char zero[EROFS_BLKSIZ] = {0};
+	static const char zero[EROFS_MAX_BLOCK_SIZE] = {0};
 	int ret;
 
 	if (cfg.c_dry_run)
 		return 0;
 
 #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE)
-	if (!padding && fallocate(erofs_devfd, FALLOC_FL_PUNCH_HOLE |
+	if (!padding && fallocate(sbi->devfd, FALLOC_FL_PUNCH_HOLE |
 				  FALLOC_FL_KEEP_SIZE, offset, len) >= 0)
 		return 0;
 #endif
-	while (len > EROFS_BLKSIZ) {
-		ret = dev_write(zero, offset, EROFS_BLKSIZ);
+	while (len > erofs_blksiz(sbi)) {
+		ret = dev_write(sbi, zero, offset, erofs_blksiz(sbi));
 		if (ret)
 			return ret;
-		len -= EROFS_BLKSIZ;
-		offset += EROFS_BLKSIZ;
+		len -= erofs_blksiz(sbi);
+		offset += erofs_blksiz(sbi);
 	}
-	return dev_write(zero, offset, len);
+	return dev_write(sbi, zero, offset, len);
 }
 
-int dev_fsync(void)
+int dev_fsync(struct erofs_sb_info *sbi)
 {
 	int ret;
 
-	ret = fsync(erofs_devfd);
+	ret = fsync(sbi->devfd);
 	if (ret) {
 		erofs_err("Could not fsync device!!!");
 		return -EIO;
@@ -225,66 +255,81 @@
 	return 0;
 }
 
-int dev_resize(unsigned int blocks)
+int dev_resize(struct erofs_sb_info *sbi, unsigned int blocks)
 {
 	int ret;
 	struct stat st;
 	u64 length;
 
-	if (cfg.c_dry_run || erofs_devsz != INT64_MAX)
+	if (cfg.c_dry_run || sbi->devsz != INT64_MAX)
 		return 0;
 
-	ret = fstat(erofs_devfd, &st);
+	ret = fstat(sbi->devfd, &st);
 	if (ret) {
 		erofs_err("failed to fstat.");
 		return -errno;
 	}
 
-	length = (u64)blocks * EROFS_BLKSIZ;
+	length = (u64)blocks * erofs_blksiz(sbi);
 	if (st.st_size == length)
 		return 0;
 	if (st.st_size > length)
-		return ftruncate(erofs_devfd, length);
+		return ftruncate(sbi->devfd, length);
 
 	length = length - st.st_size;
 #if defined(HAVE_FALLOCATE)
-	if (fallocate(erofs_devfd, 0, st.st_size, length) >= 0)
+	if (fallocate(sbi->devfd, 0, st.st_size, length) >= 0)
 		return 0;
 #endif
-	return dev_fillzero(st.st_size, length, true);
+	return dev_fillzero(sbi, st.st_size, length, true);
 }
 
-int dev_read(int device_id, void *buf, u64 offset, size_t len)
+int dev_read(struct erofs_sb_info *sbi, int device_id,
+	     void *buf, u64 offset, size_t len)
 {
-	int ret, fd;
+	int read_count, fd;
 
 	if (cfg.c_dry_run)
 		return 0;
 
+	offset += cfg.c_offset;
+
 	if (!buf) {
 		erofs_err("buf is NULL");
 		return -EINVAL;
 	}
 
 	if (!device_id) {
-		fd = erofs_devfd;
+		fd = sbi->devfd;
 	} else {
-		if (device_id > erofs_nblobs) {
+		if (device_id > sbi->nblobs) {
 			erofs_err("invalid device id %d", device_id);
 			return -ENODEV;
 		}
-		fd = erofs_blobfd[device_id - 1];
+		fd = sbi->blobfd[device_id - 1];
 	}
 
+	while (len > 0) {
 #ifdef HAVE_PREAD64
-	ret = pread64(fd, buf, len, (off64_t)offset);
+		read_count = pread64(fd, buf, len, (off64_t)offset);
 #else
-	ret = pread(fd, buf, len, (off_t)offset);
+		read_count = pread(fd, buf, len, (off_t)offset);
 #endif
-	if (ret != (int)len) {
-		erofs_err("Failed to read data from device - %s:[%" PRIu64 ", %zd].",
-			  erofs_devname, offset, len);
-		return -errno;
+		if (read_count < 1) {
+			if (!read_count) {
+				erofs_info("Reach EOF of device - %s:[%" PRIu64 ", %zd].",
+					   sbi->devname, offset, len);
+				memset(buf, 0, len);
+				return 0;
+			} else if (errno != EINTR) {
+				erofs_err("Failed to read data from device - %s:[%" PRIu64 ", %zd].",
+					  sbi->devname, offset, len);
+				return -errno;
+			}
+		}
+		offset += read_count;
+		len -= read_count;
+		buf += read_count;
 	}
 	return 0;
 }
@@ -373,7 +418,7 @@
 			      length, 0);
 	if (ret >= 0)
 		goto out;
-	if (errno != ENOSYS) {
+	if (errno != ENOSYS && errno != EXDEV) {
 		ret = -errno;
 out:
 		*off_in = off64_in;
diff --git a/lib/kite_deflate.c b/lib/kite_deflate.c
new file mode 100644
index 0000000..8667954
--- /dev/null
+++ b/lib/kite_deflate.c
@@ -0,0 +1,1271 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+/*
+ * erofs-utils/lib/kite_deflate.c
+ *
+ * Copyright (C) 2023, Alibaba Cloud
+ * Copyright (C) 2023, Gao Xiang <xiang@kernel.org>
+ */
+#include "erofs/defs.h"
+#include "erofs/print.h"
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdio.h>
+
+unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2,
+			    unsigned long sz);
+
+#ifdef TEST
+#define kite_dbg(x, ...)	fprintf(stderr, x "\n", ##__VA_ARGS__)
+#else
+#define kite_dbg(x, ...)
+#endif
+
+#define kHistorySize32		(1U << 15)
+
+#define kNumLenSymbols32	256
+#define kNumLenSymbolsMax	kNumLenSymbols32
+
+#define kSymbolEndOfBlock	256
+#define kSymbolMatch		(kSymbolEndOfBlock + 1)
+#define kNumLenSlots		29
+#define kMainTableSize		(kSymbolMatch + kNumLenSlots)
+
+#define kFixedLenTableSize	(kSymbolMatch + 31)
+#define FixedDistTableSize	32
+
+#define kMainTableSize		(kSymbolMatch + kNumLenSlots)
+#define kDistTableSize32	30
+
+#define kNumLitLenCodesMin	257
+#define kNumDistCodesMin	1
+
+#define kNumLensCodesMin	4
+#define kLensTableSize		19
+
+#define kMatchMinLen		3
+#define kMatchMaxLen32		kNumLenSymbols32 + kMatchMinLen - 1
+
+#define kTableDirectLevels      16
+#define kBitLensRepNumber_3_6   kTableDirectLevels
+#define kBitLens0Number_3_10    (kBitLensRepNumber_3_6 + 1)
+#define kBitLens0Number_11_138  (kBitLens0Number_3_10 + 1)
+
+static u32 kstaticHuff_mainCodes[kFixedLenTableSize];
+static const u8 kstaticHuff_litLenLevels[kFixedLenTableSize] = {
+	[0   ... 143] = 8, [144 ... 255] = 9,
+	[256 ... 279] = 7, [280 ... 287] = 8,
+};
+static u32 kstaticHuff_distCodes[kFixedLenTableSize];
+
+const u8 kLenStart32[kNumLenSlots] =
+	{0,1,2,3,4,5,6,7,8,10,12,14,16,20,24,28,32,40,48,56,64,80,96,112,128,160,192,224, 255};
+
+const u8 kLenExtraBits32[kNumLenSlots] =
+	{0,0,0,0,0,0,0,0,1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,  4,  5,
+	 5,  5,  5, 0};
+
+/* First normalized distance for each code (0 = distance of 1) */
+const u32 kDistStart[kDistTableSize32] =
+	{0,1,2,3,4,6,8,12,16,24,32,48,64,96,128,192,256,384,512,768,
+	 1024,1536,2048,3072,4096,6144,8192,12288,16384,24576};
+
+/* extra bits for each distance code */
+const u8 kDistExtraBits[kDistTableSize32] =
+	{0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
+
+const u8 kCodeLengthAlphabetOrder[kLensTableSize] =
+	{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+const u8 kLevelExtraBits[3] = {2, 3, 7};
+
+#define kStored			0
+#define kFixedHuffman		1
+#define kDynamicHuffman		2
+
+struct kite_deflate_symbol {
+	u16 len, dist;
+};
+
+struct kite_deflate_table {
+	u32  mainCodes[kMainTableSize];
+	u8   litLenLevels[kMainTableSize];
+	u32  distCodes[kDistTableSize32];
+	u8   distLevels[kDistTableSize32];
+	u32  levelCodes[kLensTableSize];
+	u8   levelLens[kLensTableSize];
+
+	u8   numdistlens, numblcodes;
+	u16  numlitlens;
+};
+
+struct kite_deflate {
+	struct kite_deflate_table  *tab;
+	const u8   *in;
+	u8         *out;
+
+	u32  inlen, outlen;
+	u32  pos_in, pos_out;
+	u32  inflightbits;
+	u8   bitpos;
+	u8   numHuffBits;
+	u32  symbols;
+
+	u32  costbits, startpos;
+	u8   encode_mode;
+	bool freq_changed, lastblock;
+
+	/* Previous match for lazy matching */
+	bool prev_valid;
+	u16 prev_longest;
+
+	u32  mainFreqs[kMainTableSize];
+	u32  distFreqs[kDistTableSize32];
+	struct kite_deflate_table tables[2];
+
+	/* don't reset the following fields */
+	struct kite_matchfinder *mf;
+	struct kite_deflate_symbol *sym;
+	u32 max_symbols;
+	bool lazy_search;
+};
+
+#define ZLIB_DISTANCE_TOO_FAR	4096
+
+static u8 g_LenSlots[kNumLenSymbolsMax];
+
+#define kNumLogBits 9		// do not change it
+static u8 g_FastPos[1 << kNumLogBits];
+
+static void writebits(struct kite_deflate *s, unsigned int v, u8 bits)
+{
+	unsigned int rem = sizeof(s->inflightbits) * 8 - s->bitpos;
+
+	s->inflightbits |= (v << s->bitpos) & (!rem - 1);
+	if (bits > rem) {
+		u8 *out = s->out + s->pos_out;
+
+		out[0] = s->inflightbits & 0xff;
+		out[1] = (s->inflightbits >> 8) & 0xff;
+		out[2] = (s->inflightbits >> 16) & 0xff;
+		out[3] = (s->inflightbits >> 24) & 0xff;
+		s->pos_out += 4;
+		DBG_BUGON(s->pos_out > s->outlen);
+		s->inflightbits = v >> rem;
+		s->bitpos = bits - rem;
+		return;
+	}
+	s->bitpos += bits;
+}
+
+static void flushbits(struct kite_deflate *s)
+{
+	u8 *out = s->out + s->pos_out;
+
+	if (!s->bitpos)
+		return;
+	out[0] = s->inflightbits & 0xff;
+	if (s->bitpos >= 8) {
+		out[1] = (s->inflightbits >> 8) & 0xff;
+		if (s->bitpos >= 16) {
+			out[2] = (s->inflightbits >> 16) & 0xff;
+			if (s->bitpos >= 24)
+				out[3] = (s->inflightbits >> 24) & 0xff;
+		}
+	}
+	s->pos_out += round_up(s->bitpos, 8) >> 3;
+	DBG_BUGON(s->pos_out > s->outlen);
+	s->bitpos = 0;
+	s->inflightbits = 0;
+}
+
+#define kMaxLen 16
+
+static void deflate_genhuffcodes(const u8 *lens, u32 *p, unsigned int nr_codes,
+				 const u32 *bl_count)
+{
+	u32 nextCodes[kMaxLen + 1];	/* next code value for each bit length */
+	unsigned int code = 0;		/* running code value */
+	unsigned int bits, k;
+
+	for (bits = 1; bits <= kMaxLen; ++bits) {
+		code = (code + bl_count[bits - 1]) << 1;
+		nextCodes[bits] = code;
+	}
+
+	DBG_BUGON(code + bl_count[kMaxLen] != 1 << kMaxLen);
+
+	for (k = 0; k < nr_codes; ++k)
+		p[k] = nextCodes[lens[k]]++;
+}
+
+static u32 deflate_reversebits_one(u32 code, u8 bits)
+{
+	unsigned int x = code;
+
+	x = ((x & 0x5555) << 1) | ((x & 0xAAAA) >> 1);
+	x = ((x & 0x3333) << 2) | ((x & 0xCCCC) >> 2);
+	x = ((x & 0x0F0F) << 4) | ((x & 0xF0F0) >> 4);
+
+	return (((x & 0x00FF) << 8) | ((x & 0xFF00) >> 8)) >> (16 - bits);
+}
+
+static void Huffman_ReverseBits(u32 *codes, const u8 *lens, unsigned int n)
+{
+	while (n) {
+		u32 code = *codes;
+
+		*codes++ = deflate_reversebits_one(code, *lens++);
+		--n;
+	}
+}
+
+static void kite_deflate_init_once(void)
+{
+	static const u32 static_bl_count[kMaxLen + 1] = {
+		[7] = 279 - 256 + 1,
+		[8] = (143 + 1) + (287 - 280 + 1),
+		[9] = 255 - 144 + 1,
+	};
+	unsigned int i, c, j, k;
+
+	if (kstaticHuff_distCodes[31])
+		return;
+	deflate_genhuffcodes(kstaticHuff_litLenLevels, kstaticHuff_mainCodes,
+			     kFixedLenTableSize, static_bl_count);
+	Huffman_ReverseBits(kstaticHuff_mainCodes, kstaticHuff_litLenLevels,
+			    kFixedLenTableSize);
+
+	for (i = 0; i < ARRAY_SIZE(kstaticHuff_distCodes); ++i)
+		kstaticHuff_distCodes[i] = deflate_reversebits_one(i, 5);
+
+	for (i = 0; i < kNumLenSlots; i++) {
+		c = kLenStart32[i];
+		j = 1 << kLenExtraBits32[i];
+
+		for (k = 0; k < j; k++, c++)
+			g_LenSlots[c] = (u8)i;
+	}
+
+	c = 0;
+	for (i = 0; i < /*kFastSlots*/ kNumLogBits * 2; i++) {
+		k = 1 << kDistExtraBits[i];
+		for (j = 0; j < k; j++)
+			g_FastPos[c++] = i;
+	}
+}
+
+static void kite_deflate_scanlens(unsigned int numlens, u8 *lens, u32 *freqs)
+{
+	int n;				/* iterates over all tree elements */
+	int prevlen = -1;		/* last emitted length */
+	int curlen;			/* length of current code */
+	int nextlen = lens[0];		/* length of next code */
+	int count = 0;			/* repeat count of the current code */
+	int max_count = 7;		/* max repeat count */
+	int min_count = 4;		/* min repeat count */
+
+	if (!nextlen)
+		max_count = 138, min_count = 3;
+
+	for (n = 0; n < numlens; n++) {
+		curlen = nextlen;
+		nextlen = n + 1 < numlens ? lens[n + 1] : -1;
+		++count;
+
+		if (count < max_count && curlen == nextlen)
+			continue;
+		if (count < min_count) {
+			freqs[curlen] += count;
+		} else if (curlen != 0) {
+			if (curlen != prevlen)
+				freqs[curlen]++;
+			freqs[kBitLensRepNumber_3_6]++;
+		} else if (count <= 10) {
+			freqs[kBitLens0Number_3_10]++;
+		} else {
+			freqs[kBitLens0Number_11_138]++;
+		}
+
+		count = 0;
+		prevlen = curlen;
+		if (!nextlen)
+			max_count = 138, min_count = 3;
+		else if (curlen == nextlen)
+			max_count = 6, min_count = 3;
+		else
+			max_count = 7, min_count = 4;
+	}
+}
+
+static void kite_deflate_sendtree(struct kite_deflate *s, const u8 *lens,
+				  unsigned int numlens)
+{
+	int n;				/* iterates over all tree elements */
+	int prevlen = -1;		/* last emitted length */
+	int curlen;			/* length of current code */
+	int nextlen = lens[0];		/* length of next code */
+	int count = 0;			/* repeat count of the current code */
+	int max_count = 7;		/* max repeat count */
+	int min_count = 4;		/* min repeat count */
+	const u8 *bl_lens = s->tab->levelLens;
+	const u32 *bl_codes = s->tab->levelCodes;
+
+	if (!nextlen)
+		max_count = 138, min_count = 3;
+
+	for (n = 0; n < numlens; n++) {
+		curlen = nextlen;
+		nextlen = n + 1 < numlens ? lens[n + 1] : -1;
+		++count;
+
+		if (count < max_count && curlen == nextlen)
+			continue;
+		if (count < min_count) {
+			do {
+				writebits(s, bl_codes[curlen], bl_lens[curlen]);
+			} while (--count);
+		} else if (curlen) {
+			if (curlen != prevlen) {
+				writebits(s, bl_codes[curlen], bl_lens[curlen]);
+				count--;
+			}
+			writebits(s, bl_codes[kBitLensRepNumber_3_6],
+				  bl_lens[kBitLensRepNumber_3_6]);
+			writebits(s, count - 3, 2);
+		} else if (count <= 10) {
+			writebits(s, bl_codes[kBitLens0Number_3_10],
+				  bl_lens[kBitLens0Number_3_10]);
+			writebits(s, count - 3, 3);
+		} else {
+			writebits(s, bl_codes[kBitLens0Number_11_138],
+				  bl_lens[kBitLens0Number_11_138]);
+			writebits(s, count - 11, 7);
+		}
+
+		count = 0;
+		prevlen = curlen;
+		if (!nextlen)
+			max_count = 138, min_count = 3;
+		else if (curlen == nextlen)
+			max_count = 6, min_count = 3;
+		else
+			max_count = 7, min_count = 4;
+	}
+}
+
+static void kite_deflate_setfixedtrees(struct kite_deflate *s)
+{
+	writebits(s, (kFixedHuffman << 1) + s->lastblock, 3);
+}
+
+static void kite_deflate_sendtrees(struct kite_deflate *s)
+{
+	struct kite_deflate_table *t = s->tab;
+	unsigned int i;
+
+	writebits(s, (kDynamicHuffman << 1) + s->lastblock, 3);
+	writebits(s, t->numlitlens - kNumLitLenCodesMin, 5);
+	writebits(s, t->numdistlens - kNumDistCodesMin,  5);
+	writebits(s, t->numblcodes - kNumLensCodesMin,   4);
+
+	for (i = 0; i < t->numblcodes; i++)
+		writebits(s, t->levelLens[kCodeLengthAlphabetOrder[i]], 3);
+
+	Huffman_ReverseBits(t->levelCodes, t->levelLens, kLensTableSize);
+	kite_deflate_sendtree(s, t->litLenLevels, t->numlitlens);
+	kite_deflate_sendtree(s, t->distLevels, t->numdistlens);
+}
+
+static inline unsigned int deflateDistSlot(unsigned int pos)
+{
+	const unsigned int zz = (kNumLogBits - 1) &
+		((((1U << kNumLogBits) - 1) - pos) >> (31 - 3));
+
+	return g_FastPos[pos >> zz] + (zz * 2);
+}
+
+static void kite_deflate_writeblock(struct kite_deflate *s, bool fixed)
+{
+	int i;
+	u32 *mainCodes, *distCodes;
+	const u8 *litLenLevels, *distLevels;
+
+	if (!fixed) {
+		struct kite_deflate_table *t = s->tab;
+
+		mainCodes = t->mainCodes; distCodes = t->distCodes;
+		litLenLevels = t->litLenLevels;	distLevels = t->distLevels;
+
+		Huffman_ReverseBits(mainCodes, litLenLevels, kMainTableSize);
+		Huffman_ReverseBits(distCodes, distLevels, kDistTableSize32);
+	} else {
+		mainCodes = kstaticHuff_mainCodes;
+		distCodes = kstaticHuff_distCodes;
+
+		litLenLevels = kstaticHuff_litLenLevels;
+		distLevels = NULL;
+	}
+
+	for (i = 0; i < s->symbols; ++i) {
+		struct kite_deflate_symbol *sym = &s->sym[i];
+
+		if (sym->len < kMatchMinLen) {		/* literal */
+			writebits(s, mainCodes[sym->dist],
+				  litLenLevels[sym->dist]);
+		} else {
+			unsigned int lenSlot, distSlot;
+			unsigned int lc = sym->len - kMatchMinLen;
+
+			lenSlot = g_LenSlots[lc];
+			writebits(s, mainCodes[kSymbolMatch + lenSlot],
+				  litLenLevels[kSymbolMatch + lenSlot]);
+			writebits(s, lc - kLenStart32[lenSlot],
+				  kLenExtraBits32[lenSlot]);
+
+			distSlot = deflateDistSlot(sym->dist - 1);
+			writebits(s, distCodes[distSlot],
+				  fixed ? 5 : distLevels[distSlot]);
+			writebits(s, sym->dist - 1 - kDistStart[distSlot],
+				  kDistExtraBits[distSlot]);
+		}
+	}
+	writebits(s, mainCodes[kSymbolEndOfBlock],
+		  litLenLevels[kSymbolEndOfBlock]);
+}
+
+static u32 Huffman_GetPrice(const u32 *freqs, const u8 *lens, u32 num)
+{
+	u32 price = 0;
+
+	while (num) {
+		price += (*lens++) * (*freqs++);
+		--num;
+	}
+	return price;
+}
+
+static u32 Huffman_GetPriceEx(const u32 *freqs, const u8 *lens, u32 num,
+			      const u8 *extraBits, u32 extraBase)
+{
+	return Huffman_GetPrice(freqs, lens, num) +
+		Huffman_GetPrice(freqs + extraBase, extraBits, num - extraBase);
+}
+
+/* Adapted from C/HuffEnc.c (7zip) for now */
+#define HeapSortDown(p, k, size, temp) \
+  { for (;;) { \
+    size_t s = (k << 1); \
+    if (s > size) break; \
+    if (s < size && p[s + 1] > p[s]) s++; \
+    if (temp >= p[s]) break; \
+    p[k] = p[s]; k = s; \
+  } p[k] = temp; }
+
+static void HeapSort(u32 *p, size_t size)
+{
+  if (size <= 1)
+    return;
+  p--;
+  {
+    size_t i = size / 2;
+    do
+    {
+      u32 temp = p[i];
+      size_t k = i;
+      HeapSortDown(p, k, size, temp)
+    }
+    while (--i != 0);
+  }
+  /*
+  do
+  {
+    size_t k = 1;
+    UInt32 temp = p[size];
+    p[size--] = p[1];
+    HeapSortDown(p, k, size, temp)
+  }
+  while (size > 1);
+  */
+  while (size > 3)
+  {
+    u32 temp = p[size];
+    size_t k = (p[3] > p[2]) ? 3 : 2;
+    p[size--] = p[1];
+    p[1] = p[k];
+    HeapSortDown(p, k, size, temp)
+  }
+  {
+    u32 temp = p[size];
+    p[size] = p[1];
+    if (size > 2 && p[2] < temp)
+    {
+      p[1] = p[2];
+      p[2] = temp;
+    }
+    else
+      p[1] = temp;
+  }
+}
+
+#define NUM_BITS 10
+#define MASK (((unsigned)1 << NUM_BITS) - 1)
+
+static void Huffman_Generate(const u32 *freqs, u32 *p, u8 *lens,
+			     unsigned int numSymbols, unsigned int maxLen)
+{
+	u32 num, i;
+
+	num = 0;
+	/* if (maxLen > 10) maxLen = 10; */
+
+	for (i = 0; i < numSymbols; i++) {
+		u32 freq = freqs[i];
+
+		if (!freq)
+			lens[i] = 0;
+		else
+			p[num++] = i | (freq << NUM_BITS);
+	}
+	HeapSort(p, num);
+
+	if (num < 2) {
+		unsigned int minCode = 0, maxCode = 1;
+
+		if (num == 1) {
+			maxCode = (unsigned int)p[0] & MASK;
+			if (!maxCode)
+				maxCode++;
+		}
+		p[minCode] = 0;
+		p[maxCode] = 1;
+		lens[minCode] = lens[maxCode] = 1;
+		return;
+	}
+
+	{
+		u32 b, e, i;
+
+		i = b = e = 0;
+		do {
+			u32 n, m, freq;
+
+			n = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++;
+			freq = (p[n] & ~MASK);
+			p[n] = (p[n] & MASK) | (e << NUM_BITS);
+			m = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++;
+			freq += (p[m] & ~MASK);
+			p[m] = (p[m] & MASK) | (e << NUM_BITS);
+			p[e] = (p[e] & MASK) | freq;
+			e++;
+		} while (num - e > 1);
+
+		{
+			u32 lenCounters[kMaxLen + 1];
+
+			for (i = 0; i <= kMaxLen; i++)
+				lenCounters[i] = 0;
+
+			p[--e] &= MASK;
+			lenCounters[1] = 2;
+			while (e > 0) {
+				u32 len = (p[p[--e] >> NUM_BITS] >> NUM_BITS) + 1;
+
+				p[e] = (p[e] & MASK) | (len << NUM_BITS);
+				if (len >= maxLen)
+					for (len = maxLen - 1; lenCounters[len] == 0; len--);
+				lenCounters[len]--;
+				lenCounters[(size_t)len + 1] += 2;
+			}
+
+			{
+				u32 len;
+
+				i = 0;
+				for (len = maxLen; len != 0; len--) {
+					u32 k;
+					for (k = lenCounters[len]; k != 0; k--)
+						lens[p[i++] & MASK] = (u8)len;
+				}
+			}
+			deflate_genhuffcodes(lens, p, numSymbols, lenCounters);
+		}
+	}
+}
+
+static void kite_deflate_fixdynblock(struct kite_deflate *s)
+{
+	struct kite_deflate_table *t = s->tab;
+	unsigned int numlitlens, numdistlens, numblcodes;
+	u32 levelFreqs[kLensTableSize] = {0};
+	u32 opt_mainlen;
+
+	if (!s->freq_changed)
+		return;
+
+	/* in order to match zlib */
+	s->numHuffBits = kMaxLen;
+//	s->numHuffBits = (s->symbols > 18000 ? 12 :
+//		(s->symbols > 7000 ? 11 : (s->symbols > 2000 ? 10 : 9)));
+
+	Huffman_Generate(s->mainFreqs, t->mainCodes, t->litLenLevels,
+			 kMainTableSize, s->numHuffBits);
+	Huffman_Generate(s->distFreqs, t->distCodes, t->distLevels,
+			 kDistTableSize32, s->numHuffBits);
+
+	/* code lengths for the literal/length alphabet */
+	numlitlens = kMainTableSize;
+	while (numlitlens > kNumLitLenCodesMin &&
+	       !t->litLenLevels[numlitlens - 1])
+		--numlitlens;
+
+	/* code lengths for the distance alphabet */
+	numdistlens = kDistTableSize32;
+	while (numdistlens > kNumDistCodesMin &&
+	       !t->distLevels[numdistlens - 1])
+		--numdistlens;
+
+	kite_deflate_scanlens(numlitlens, t->litLenLevels, levelFreqs);
+	kite_deflate_scanlens(numdistlens, t->distLevels, levelFreqs);
+	Huffman_Generate(levelFreqs, t->levelCodes, t->levelLens,
+			 kLensTableSize, 7);
+	numblcodes = kLensTableSize;
+	while (numblcodes > kNumLensCodesMin &&
+	       !t->levelLens[kCodeLengthAlphabetOrder[numblcodes - 1]])
+		--numblcodes;
+
+	t->numlitlens = numlitlens;
+	t->numdistlens = numdistlens;
+	t->numblcodes = numblcodes;
+
+	opt_mainlen = Huffman_GetPriceEx(s->mainFreqs, t->litLenLevels,
+			kMainTableSize, kLenExtraBits32, kSymbolMatch) +
+		Huffman_GetPriceEx(s->distFreqs, t->distLevels,
+			kDistTableSize32, kDistExtraBits, 0);
+	s->costbits = 3 + 5 + 5 + 4 + 3 * numblcodes +
+		Huffman_GetPriceEx(levelFreqs, t->levelLens,
+			kLensTableSize, kLevelExtraBits, kTableDirectLevels) +
+		opt_mainlen;
+	s->freq_changed = false;
+}
+
+
+/*
+ * an array used used by the LZ-based encoder to hold the length-distance pairs
+ * found by LZ matchfinder.
+ */
+struct kite_match {
+	unsigned int len;
+	unsigned int dist;
+};
+
+struct kite_matchfinder {
+	/* pointer to buffer with data to be compressed */
+	const u8 *buffer;
+
+	/* indicate the first byte that doesn't contain valid input data */
+	const u8 *end;
+
+	/* LZ matchfinder hash chain representation */
+	u32 *hash, *chain;
+
+	u32 base;
+
+	/* indicate the next byte to run through the match finder */
+	u32 offset;
+
+	u32 cyclic_pos;
+
+	/* maximum length of a match that the matchfinder will try to find. */
+	u16 nice_len;
+
+	/* the total sliding window size */
+	u16 wsiz;
+
+	/* how many rounds a matchfinder searches on a hash chain for */
+	u16 depth;
+
+	/* do not perform lazy search no less than this match length */
+	u16 max_lazy;
+
+	/* reduce lazy search no less than this match length */
+	u8  good_len;
+
+	/* current match for lazy matching */
+	struct kite_match *matches;
+	struct kite_match matches_matrix[2][4];
+};
+
+/*
+ * This mysterious table is just the CRC of each possible byte. It can be
+ * computed using the standard bit-at-a-time methods. The polynomial can
+ * be seen in entry 128, 0x8408. This corresponds to x^0 + x^5 + x^12.
+ * Add the implicit x^16, and you have the standard CRC-CCITT.
+ */
+u16 const crc_ccitt_table[256] __attribute__((__aligned__(128))) = {
+	0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
+	0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7,
+	0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e,
+	0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876,
+	0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd,
+	0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5,
+	0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c,
+	0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974,
+	0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
+	0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3,
+	0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a,
+	0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72,
+	0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9,
+	0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1,
+	0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738,
+	0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70,
+	0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7,
+	0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
+	0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036,
+	0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e,
+	0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5,
+	0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd,
+	0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134,
+	0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c,
+	0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3,
+	0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb,
+	0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
+	0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a,
+	0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1,
+	0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9,
+	0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330,
+	0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78
+};
+
+int kite_mf_getmatches_hc3(struct kite_matchfinder *mf, u16 depth, u16 bestlen)
+{
+	const u8 *cur = mf->buffer + mf->offset;
+	const u8 *qbase = mf->buffer - mf->base;
+	u32 curMatch;
+	unsigned int v, hv, i, k, p, wsiz;
+
+	if (mf->end - cur < bestlen + 1)
+		return 0;
+
+	v = get_unaligned((u16 *)cur);
+	hv = v ^ crc_ccitt_table[cur[2]];
+	curMatch = mf->hash[hv];
+	p = mf->base + mf->offset;
+	mf->hash[hv] = p;
+	mf->chain[mf->cyclic_pos] = curMatch;
+	wsiz = mf->wsiz;
+	k = 1;
+
+	if (depth) {
+		unsigned int wpos = wsiz + mf->cyclic_pos;
+
+		hv = min_t(unsigned int, mf->nice_len, mf->end - cur);
+		DBG_BUGON(hv > kMatchMaxLen32);
+		do {
+			unsigned int diff = p - curMatch;
+			const u8 *q;
+
+			if (diff >= wsiz)
+				break;
+
+			q = qbase + curMatch;
+			curMatch = mf->chain[(wpos - diff) & (wsiz - 1)];
+			if (v == get_unaligned((u16 *)q) && (bestlen < 3 || (
+			    get_unaligned((u16 *)(cur + bestlen - 1)) ==
+			    get_unaligned((u16 *)(q + bestlen - 1)) &&
+			    !memcmp(cur + 3, q + 3, bestlen - 3)))) {
+				DBG_BUGON(cur[2] != q[2]);
+				i = erofs_memcmp2(cur + bestlen + 1,
+					q + bestlen + 1, hv - bestlen - 1);
+				bestlen += 1 + i;
+
+				k -= (k >= ARRAY_SIZE(mf->matches_matrix[0]));
+				mf->matches[k++] = (struct kite_match) {
+					.len = bestlen,
+					.dist = diff,
+				};
+				if (bestlen >= hv)
+					break;
+			}
+		} while (--depth);
+	}
+	mf->offset++;
+	mf->cyclic_pos = (mf->cyclic_pos + 1) & (wsiz - 1);
+	return k - 1;
+}
+
+/* let's align with zlib */
+static const struct kite_matchfinder_cfg {
+	u16  good_length;	/* reduce lazy search above this match length */
+	u16  max_lazy;	/* do not perform lazy search above this match length */
+	u16  nice_length;	/* quit search above this match length */
+	u16  depth;
+	bool lazy_search;
+} kite_mfcfg[10] = {
+/*      good lazy nice depth */
+/* 0 */ {0,    0,  0,    0, false},	/* store only [unsupported] */
+/* 1 */ {4,    4,  8,    4, false},	/* maximum speed, no lazy matches */
+/* 2 */ {4,    5, 16,    8, false},
+/* 3 */ {4,    6, 32,   32, false},
+
+/* 4 */ {4,    4,  16,   16, true},	/* lazy matches */
+/* 5 */ {8,   16,  32,   32, true},
+/* 6 */ {8,   16, 128,  128, true},
+/* 7 */ {8,   32, 128,  256, true},
+/* 8 */ {32, 128, 258, 1024, true},
+/* 9 */ {32, 258, 258, 4096, true},	/* maximum compression */
+};
+
+static int kite_mf_init(struct kite_matchfinder *mf, int wsiz, int level)
+{
+	const struct kite_matchfinder_cfg *cfg;
+
+	if (!level || level >= ARRAY_SIZE(kite_mfcfg))
+		return -EINVAL;
+	cfg = &kite_mfcfg[level];
+
+	if (wsiz > kHistorySize32 || (1 << ilog2(wsiz)) != wsiz)
+		return -EINVAL;
+
+	mf->hash = calloc(0x10000, sizeof(mf->hash[0]));
+	if (!mf->hash)
+		return -ENOMEM;
+
+	mf->chain = malloc(sizeof(mf->chain[0]) * wsiz);
+	if (!mf->chain) {
+		free(mf->hash);
+		mf->hash = NULL;
+		return -ENOMEM;
+	}
+	mf->wsiz = wsiz;
+
+	mf->good_len = cfg->good_length;
+	mf->nice_len = cfg->nice_length;
+	mf->depth = cfg->depth;
+	mf->max_lazy = cfg->max_lazy;
+	return cfg->lazy_search;
+}
+
+static void kite_mf_reset(struct kite_matchfinder *mf,
+			  const void *buffer, const void *end)
+{
+	mf->buffer = buffer;
+	mf->end = end;
+
+	/*
+	 * Set the initial value as max_distance + 1.  This would avoid hash
+	 * zero initialization.
+	 */
+	mf->base += mf->offset + kHistorySize32 + 1;
+
+	mf->offset = 0;
+	mf->cyclic_pos = 0;
+
+	mf->matches = mf->matches_matrix[0];
+	mf->matches_matrix[0][0].len =
+		mf->matches_matrix[1][0].len = kMatchMinLen - 1;
+}
+
+static bool deflate_count_code(struct kite_deflate *s, bool literal,
+			       unsigned int lenSlot, unsigned int distSlot)
+{
+	struct kite_deflate_table *t = s->tab;
+	unsigned int lenbase = (literal ? 0 : kSymbolMatch);
+	u64 rem = (s->outlen - s->pos_out) * 8 - s->bitpos;
+	bool recalc = false;
+	unsigned int bits;
+
+	s->freq_changed = true;
+	++s->mainFreqs[lenbase + lenSlot];
+	if (!literal)
+		++s->distFreqs[distSlot];
+
+	if (s->encode_mode == 1) {
+		if (literal) {
+			bits = kstaticHuff_litLenLevels[lenSlot];
+			goto out;
+		}
+		bits = kstaticHuff_litLenLevels[kSymbolMatch + lenSlot] +
+			kLenExtraBits32[lenSlot] + 5 + kDistExtraBits[distSlot];
+		goto out;
+	}
+
+	/* XXX: more ideas to be done later */
+	recalc |= (!literal && !t->distLevels[distSlot]);
+	recalc |= !t->litLenLevels[lenbase + lenSlot];
+	if (recalc) {
+		kite_dbg("recalc %c lS %u dS %u", literal ? 'l' : 'm',
+			 lenSlot, distSlot);
+		s->tab = s->tables + (s->tab == s->tables);
+		kite_deflate_fixdynblock(s);
+		bits = 0;
+		goto out;
+	}
+
+	if (literal) {
+		bits = t->litLenLevels[lenSlot];
+		goto out;
+	}
+
+	bits = t->distLevels[distSlot] + kDistExtraBits[distSlot] +
+		t->litLenLevels[kSymbolMatch + lenSlot] +
+		kLenExtraBits32[lenSlot];
+out:
+	if (rem < s->costbits + bits) {
+		--s->mainFreqs[lenbase + lenSlot];
+		if (!literal)
+			--s->distFreqs[distSlot];
+		if (recalc)
+			s->tab = s->tables + (s->tab == s->tables);
+		return false;
+	}
+	s->costbits += bits;
+	return true;
+}
+
+static bool kite_deflate_tally(struct kite_deflate *s,
+			       struct kite_match *match)
+{
+	struct kite_deflate_symbol *sym = s->sym + s->symbols;
+	u32 fixedcost = ~0;
+	bool hassp;
+
+	*sym = (struct kite_deflate_symbol) {
+		.len = match->len,
+		.dist = match->dist,
+	};
+
+retry:
+	if (sym->len < kMatchMinLen) {
+		hassp = deflate_count_code(s, true, sym->dist, 0);
+	} else {
+		unsigned int lc = sym->len - kMatchMinLen;
+		unsigned int lenSlot = g_LenSlots[lc];
+		unsigned int distSlot = deflateDistSlot(sym->dist - 1);
+
+		hassp = deflate_count_code(s, false, lenSlot, distSlot);
+	}
+
+	if (!hassp) {
+		if (s->encode_mode == 1) {
+			fixedcost = s->costbits;
+			s->encode_mode = 2;
+			goto retry;
+		}
+		s->lastblock = true;
+		if (fixedcost <= s->costbits)
+			s->encode_mode = 1;
+		return true;
+	}
+	++s->symbols;
+	return false;
+}
+
+static void kite_deflate_writestore(struct kite_deflate *s)
+{
+	bool fb = !s->startpos && !s->bitpos;
+	unsigned int totalsiz = s->pos_in - s->prev_valid - s->startpos;
+
+	do {
+		unsigned int len = min_t(unsigned int, totalsiz, 65535);
+
+		totalsiz -= len;
+		writebits(s, (fb << 3) | (kStored << 1) |
+			  (s->lastblock && !totalsiz), 3 + fb);
+		flushbits(s);
+		writebits(s, len, 16);
+		writebits(s, len ^ 0xffff, 16);
+		flushbits(s);
+		memcpy(s->out + s->pos_out, s->in + s->startpos, len);
+		s->pos_out += len;
+		s->startpos += len;
+	} while (totalsiz);
+}
+
+static void kite_deflate_endblock(struct kite_deflate *s)
+{
+	if (s->encode_mode == 1) {
+		u32 fixedcost = s->costbits;
+		unsigned int storelen, storeblocks, storecost;
+
+		kite_deflate_fixdynblock(s);
+		if (fixedcost > s->costbits)
+			s->encode_mode = 2;
+		else
+			s->costbits = fixedcost;
+
+		storelen = s->pos_in - s->prev_valid - s->startpos;
+		storeblocks = max(DIV_ROUND_UP(storelen, 65535), 1U);
+		storecost = (8 - s->bitpos) + storeblocks - 1 +
+			storeblocks * 32 + storelen * 8;
+		if (s->costbits > storecost) {
+			s->costbits = storecost;
+			s->encode_mode = 0;
+		}
+	}
+
+	s->lastblock |= (s->costbits + s->bitpos >=
+			(s->outlen - s->pos_out) * 8);
+}
+
+static void kite_deflate_startblock(struct kite_deflate *s)
+{
+	memset(s->mainFreqs, 0, sizeof(s->mainFreqs));
+	memset(s->distFreqs, 0, sizeof(s->distFreqs));
+	memset(s->tables, 0, sizeof(s->tables[0]));
+	s->symbols = 0;
+	s->mainFreqs[kSymbolEndOfBlock]++;
+	s->encode_mode = 1;
+	s->tab = s->tables;
+	s->costbits = 3 + kstaticHuff_litLenLevels[kSymbolEndOfBlock];
+}
+
+static bool kite_deflate_commitblock(struct kite_deflate *s)
+{
+	if (s->encode_mode == 1) {
+		kite_deflate_setfixedtrees(s);
+		kite_deflate_writeblock(s, true);
+	} else if (s->encode_mode == 2) {
+		kite_deflate_sendtrees(s);
+		kite_deflate_writeblock(s, false);
+	} else {
+		kite_deflate_writestore(s);
+	}
+	s->startpos = s->pos_in - s->prev_valid;
+	return s->lastblock;
+}
+
+static bool kite_deflate_fast(struct kite_deflate *s)
+{
+	struct kite_matchfinder *mf = s->mf;
+
+	kite_deflate_startblock(s);
+	while (1) {
+		int matches = kite_mf_getmatches_hc3(mf, mf->depth,
+				kMatchMinLen - 1);
+
+		if (matches) {
+			unsigned int len = mf->matches[matches].len;
+			unsigned int dist = mf->matches[matches].dist;
+
+			if (len == kMatchMinLen && dist > ZLIB_DISTANCE_TOO_FAR)
+				goto nomatch;
+
+			kite_dbg("%u matches found: longest [%u,%u] of distance %u",
+				 matches, s->pos_in, s->pos_in + len - 1, dist);
+
+			if (kite_deflate_tally(s, mf->matches + matches))
+				break;
+			s->pos_in += len;
+			/* skip the rest bytes */
+			while (--len)
+				(void)kite_mf_getmatches_hc3(mf, 0, 0);
+		} else {
+nomatch:
+			mf->matches[0].dist = s->in[s->pos_in];
+			if (isprint(s->in[s->pos_in]))
+				kite_dbg("literal %c pos_in %u", s->in[s->pos_in], s->pos_in);
+			else
+				kite_dbg("literal %x pos_in %u", s->in[s->pos_in], s->pos_in);
+
+			if (kite_deflate_tally(s, mf->matches))
+				break;
+			++s->pos_in;
+		}
+
+		s->lastblock |= (s->pos_in >= s->inlen);
+		if (s->pos_in >= s->inlen || s->symbols >= s->max_symbols) {
+			kite_deflate_endblock(s);
+			break;
+		}
+	}
+	return kite_deflate_commitblock(s);
+}
+
+static bool kite_deflate_slow(struct kite_deflate *s)
+{
+	struct kite_matchfinder *mf = s->mf;
+	bool flush = false;
+
+	kite_deflate_startblock(s);
+	while (1) {
+		struct kite_match *prev_matches = mf->matches;
+		unsigned int len = kMatchMinLen - 1;
+		int matches;
+		unsigned int len0;
+
+		mf->matches = mf->matches_matrix[
+				mf->matches == mf->matches_matrix[0]];
+		mf->matches[0].dist = s->in[s->pos_in];
+
+		len0 = prev_matches[s->prev_longest].len;
+		if (len0 < mf->max_lazy) {
+			matches = kite_mf_getmatches_hc3(mf, mf->depth >>
+				(len0 >= mf->good_len), len0);
+			if (matches) {
+				len = mf->matches[matches].len;
+				if (len == kMatchMinLen &&
+				    mf->matches[matches].dist > ZLIB_DISTANCE_TOO_FAR) {
+					matches = 0;
+					len = kMatchMinLen - 1;
+				}
+			}
+		} else {
+			matches = 0;
+			(void)kite_mf_getmatches_hc3(mf, 0, 0);
+		}
+
+		if (len < len0) {
+			if (kite_deflate_tally(s,
+					prev_matches + s->prev_longest))
+				break;
+
+			s->pos_in += --len0;
+			/* skip the rest bytes */
+			while (--len0)
+				(void)kite_mf_getmatches_hc3(mf, 0, 0);
+			s->prev_valid = false;
+			s->prev_longest = 0;
+		} else {
+			if (!s->prev_valid)
+				s->prev_valid = true;
+			else if (kite_deflate_tally(s, prev_matches))
+				break;
+			++s->pos_in;
+			s->prev_longest = matches;
+		}
+
+		s->lastblock |= (s->pos_in >= s->inlen);
+		if (s->pos_in >= s->inlen) {
+			flush = true;
+			break;
+		}
+		if (s->symbols >= s->max_symbols) {
+			kite_deflate_endblock(s);
+			break;
+		}
+	}
+
+	if (flush && s->prev_valid) {
+		(void)kite_deflate_tally(s, mf->matches + s->prev_longest);
+		s->prev_valid = false;
+	}
+	return kite_deflate_commitblock(s);
+}
+
+void kite_deflate_end(struct kite_deflate *s)
+{
+	if (s->mf) {
+		if (s->mf->hash)
+			free(s->mf->hash);
+		if (s->mf->chain)
+			free(s->mf->chain);
+		free(s->mf);
+	}
+	if (s->sym)
+		free(s->sym);
+	free(s);
+}
+
+struct kite_deflate *kite_deflate_init(int level, unsigned int dict_size)
+{
+	struct kite_deflate *s;
+	int err;
+
+	kite_deflate_init_once();
+	s = calloc(1, sizeof(*s));
+	if (!s)
+		return ERR_PTR(-ENOMEM);
+
+	s->max_symbols = 16384;
+	s->sym = malloc(sizeof(s->sym[0]) * s->max_symbols);
+	if (!s->sym) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	s->mf = malloc(sizeof(*s->mf));
+	if (!s->mf) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	if (!dict_size)
+		dict_size = kHistorySize32;
+
+	err = kite_mf_init(s->mf, dict_size, level);
+	if (err < 0)
+		goto err_out;
+
+	s->lazy_search = err;
+	return s;
+err_out:
+	if (s->mf)
+		free(s->mf);
+	if (s->sym)
+		free(s->sym);
+	free(s);
+	return ERR_PTR(err);
+}
+
+int kite_deflate_destsize(struct kite_deflate *s, const u8 *in, u8 *out,
+			   unsigned int *srcsize, unsigned int target_dstsize)
+{
+	memset(s, 0, offsetof(struct kite_deflate, mainFreqs));
+	s->in = in;
+	s->inlen = *srcsize;
+	s->out = out;
+	s->outlen = target_dstsize;
+	kite_mf_reset(s->mf, in, in + s->inlen);
+
+	if (s->lazy_search)
+		while (!kite_deflate_slow(s));
+	else
+		while (!kite_deflate_fast(s));
+	flushbits(s);
+
+	*srcsize = s->startpos;
+	return s->pos_out;
+}
+
+#if TEST
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+int main(int argc, char *argv[])
+{
+	int fd;
+	u64 filelength;
+	u8 out[1048576], *buf;
+	int dstsize = 4096;
+	unsigned int srcsize, outsize;
+	struct kite_deflate *s;
+
+	fd = open(argv[1], O_RDONLY);
+	if (fd < 0)
+		return -errno;
+	if (argc > 2)
+		dstsize = atoi(argv[2]);
+	filelength = lseek(fd, 0, SEEK_END);
+
+	s = kite_deflate_init(9, 0);
+	if (IS_ERR(s))
+		return PTR_ERR(s);
+
+	filelength = lseek(fd, 0, SEEK_END);
+	buf = mmap(NULL, filelength, PROT_READ, MAP_SHARED, fd, 0);
+	if (buf == MAP_FAILED)
+		return -errno;
+	close(fd);
+
+	srcsize = filelength;
+	outsize = kite_deflate_destsize(s, buf, out, &srcsize, dstsize);
+	fd = open("out.txt", O_WRONLY | O_CREAT | O_TRUNC, 0644);
+	write(fd, out, outsize);
+	close(fd);
+	kite_deflate_end(s);
+	return 0;
+}
+#endif
diff --git a/lib/liberofs_uuid.h b/lib/liberofs_uuid.h
new file mode 100644
index 0000000..63b358a
--- /dev/null
+++ b/lib/liberofs_uuid.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+#ifndef __EROFS_LIB_UUID_H
+#define __EROFS_LIB_UUID_H
+
+void erofs_uuid_generate(unsigned char *out);
+void erofs_uuid_unparse_lower(const unsigned char *buf, char *out);
+int erofs_uuid_parse(const char *in, unsigned char *uu);
+
+#endif
diff --git a/lib/namei.c b/lib/namei.c
index 7b69a59..294d7a3 100644
--- a/lib/namei.c
+++ b/lib/namei.c
@@ -26,11 +26,15 @@
 {
 	int ret, ifmt;
 	char buf[sizeof(struct erofs_inode_extended)];
+	struct erofs_sb_info *sbi = vi->sbi;
 	struct erofs_inode_compact *dic;
 	struct erofs_inode_extended *die;
-	const erofs_off_t inode_loc = iloc(vi->nid);
+	erofs_off_t inode_loc;
 
-	ret = dev_read(0, buf, inode_loc, sizeof(*dic));
+	DBG_BUGON(!sbi);
+	inode_loc = erofs_iloc(vi);
+
+	ret = dev_read(sbi, 0, buf, inode_loc, sizeof(*dic));
 	if (ret < 0)
 		return -EIO;
 
@@ -47,7 +51,8 @@
 	case EROFS_INODE_LAYOUT_EXTENDED:
 		vi->inode_isize = sizeof(struct erofs_inode_extended);
 
-		ret = dev_read(0, buf + sizeof(*dic), inode_loc + sizeof(*dic),
+		ret = dev_read(sbi, 0, buf + sizeof(*dic),
+			       inode_loc + sizeof(*dic),
 			       sizeof(*die) - sizeof(*dic));
 		if (ret < 0)
 			return -EIO;
@@ -55,6 +60,7 @@
 		die = (struct erofs_inode_extended *)buf;
 		vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount);
 		vi->i_mode = le16_to_cpu(die->i_mode);
+		vi->i_ino[0] = le32_to_cpu(die->i_ino);
 
 		switch (vi->i_mode & S_IFMT) {
 		case S_IFREG:
@@ -90,6 +96,7 @@
 		vi->inode_isize = sizeof(struct erofs_inode_compact);
 		vi->xattr_isize = erofs_xattr_ibody_size(dic->i_xattr_icount);
 		vi->i_mode = le16_to_cpu(dic->i_mode);
+		vi->i_ino[0] = le32_to_cpu(dic->i_ino);
 
 		switch (vi->i_mode & S_IFMT) {
 		case S_IFREG:
@@ -114,8 +121,8 @@
 		vi->i_gid = le16_to_cpu(dic->i_gid);
 		vi->i_nlink = le16_to_cpu(dic->i_nlink);
 
-		vi->i_mtime = sbi.build_time;
-		vi->i_mtime_nsec = sbi.build_time_nsec;
+		vi->i_mtime = sbi->build_time;
+		vi->i_mtime_nsec = sbi->build_time_nsec;
 
 		vi->i_size = le32_to_cpu(dic->i_size);
 		if (vi->datalayout == EROFS_INODE_CHUNK_BASED)
@@ -134,10 +141,11 @@
 				  vi->u.chunkformat, vi->nid | 0ULL);
 			return -EOPNOTSUPP;
 		}
-		vi->u.chunkbits = LOG_BLOCK_SIZE +
+		vi->u.chunkbits = sbi->blkszbits +
 			(vi->u.chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK);
-	} else if (erofs_inode_is_data_compressed(vi->datalayout))
+	} else if (erofs_inode_is_data_compressed(vi->datalayout)) {
 		return z_erofs_fill_inode(vi);
+	}
 	return 0;
 bogusimode:
 	erofs_err("bogus i_mode (%o) @ nid %llu", vi->i_mode, vi->nid | 0ULL);
@@ -182,17 +190,18 @@
 }
 
 struct nameidata {
+	struct erofs_sb_info *sbi;
 	erofs_nid_t	nid;
 	unsigned int	ftype;
 };
 
-int erofs_namei(struct nameidata *nd,
-		const char *name, unsigned int len)
+int erofs_namei(struct nameidata *nd, const char *name, unsigned int len)
 {
 	erofs_nid_t nid = nd->nid;
 	int ret;
-	char buf[EROFS_BLKSIZ];
-	struct erofs_inode vi = { .nid = nid };
+	char buf[EROFS_MAX_BLOCK_SIZE];
+	struct erofs_sb_info *sbi = nd->sbi;
+	struct erofs_inode vi = { .sbi = sbi, .nid = nid };
 	erofs_off_t offset;
 
 	ret = erofs_read_inode_from_disk(&vi);
@@ -202,7 +211,7 @@
 	offset = 0;
 	while (offset < vi.i_size) {
 		erofs_off_t maxsize = min_t(erofs_off_t,
-					    vi.i_size - offset, EROFS_BLKSIZ);
+					    vi.i_size - offset, erofs_blksiz(sbi));
 		struct erofs_dirent *de = (void *)buf;
 		unsigned int nameoff;
 
@@ -212,7 +221,7 @@
 
 		nameoff = le16_to_cpu(de->nameoff);
 		if (nameoff < sizeof(struct erofs_dirent) ||
-		    nameoff >= EROFS_BLKSIZ) {
+		    nameoff >= erofs_blksiz(sbi)) {
 			erofs_err("invalid de[0].nameoff %u @ nid %llu",
 				  nameoff, nid | 0ULL);
 			return -EFSCORRUPTED;
@@ -234,7 +243,7 @@
 
 static int link_path_walk(const char *name, struct nameidata *nd)
 {
-	nd->nid = sbi.root_nid;
+	nd->nid = nd->sbi->root_nid;
 
 	while (*name == '/')
 		name++;
@@ -253,7 +262,6 @@
 		if (ret)
 			return ret;
 
-		name = p;
 		/* Skip until no more slashes. */
 		for (name = p; *name == '/'; ++name)
 			;
@@ -264,7 +272,7 @@
 int erofs_ilookup(const char *path, struct erofs_inode *vi)
 {
 	int ret;
-	struct nameidata nd;
+	struct nameidata nd = { .sbi = vi->sbi };
 
 	ret = link_path_walk(path, &nd);
 	if (ret)
diff --git a/lib/rb_tree.c b/lib/rb_tree.c
new file mode 100644
index 0000000..28800a9
--- /dev/null
+++ b/lib/rb_tree.c
@@ -0,0 +1,512 @@
+// SPDX-License-Identifier: Unlicense
+//
+// Based on Julienne Walker's <http://eternallyconfuzzled.com/> rb_tree
+// implementation.
+//
+// Modified by Mirek Rusin <http://github.com/mirek/rb_tree>.
+//
+// This is free and unencumbered software released into the public domain.
+//
+// Anyone is free to copy, modify, publish, use, compile, sell, or
+// distribute this software, either in source code form or as a compiled
+// binary, for any purpose, commercial or non-commercial, and by any
+// means.
+//
+// In jurisdictions that recognize copyright laws, the author or authors
+// of this software dedicate any and all copyright interest in the
+// software to the public domain. We make this dedication for the benefit
+// of the public at large and to the detriment of our heirs and
+// successors. We intend this dedication to be an overt act of
+// relinquishment in perpetuity of all present and future rights to this
+// software under copyright law.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+//
+// For more information, please refer to <http://unlicense.org/>
+//
+
+#include "rb_tree.h"
+
+// rb_node
+
+struct rb_node *
+rb_node_alloc () {
+    return malloc(sizeof(struct rb_node));
+}
+
+struct rb_node *
+rb_node_init (struct rb_node *self, void *value) {
+    if (self) {
+        self->red = 1;
+        self->link[0] = self->link[1] = NULL;
+        self->value = value;
+    }
+    return self;
+}
+
+struct rb_node *
+rb_node_create (void *value) {
+    return rb_node_init(rb_node_alloc(), value);
+}
+
+void
+rb_node_dealloc (struct rb_node *self) {
+    if (self) {
+        free(self);
+    }
+}
+
+static int
+rb_node_is_red (const struct rb_node *self) {
+    return self ? self->red : 0;
+}
+
+static struct rb_node *
+rb_node_rotate (struct rb_node *self, int dir) {
+    struct rb_node *result = NULL;
+    if (self) {
+        result = self->link[!dir];
+        self->link[!dir] = result->link[dir];
+        result->link[dir] = self;
+        self->red = 1;
+        result->red = 0;
+    }
+    return result;
+}
+
+static struct rb_node *
+rb_node_rotate2 (struct rb_node *self, int dir) {
+    struct rb_node *result = NULL;
+    if (self) {
+        self->link[!dir] = rb_node_rotate(self->link[!dir], !dir);
+        result = rb_node_rotate(self, dir);
+    }
+    return result;
+}
+
+// rb_tree - default callbacks
+
+int
+rb_tree_node_cmp_ptr_cb (struct rb_tree *self, struct rb_node *a, struct rb_node *b) {
+    return (a->value > b->value) - (a->value < b->value);
+}
+
+void
+rb_tree_node_dealloc_cb (struct rb_tree *self, struct rb_node *node) {
+    if (self) {
+        if (node) {
+            rb_node_dealloc(node);
+        }
+    }
+}
+
+// rb_tree
+
+struct rb_tree *
+rb_tree_alloc () {
+    return malloc(sizeof(struct rb_tree));
+}
+
+struct rb_tree *
+rb_tree_init (struct rb_tree *self, rb_tree_node_cmp_f node_cmp_cb) {
+    if (self) {
+        self->root = NULL;
+        self->size = 0;
+        self->cmp = node_cmp_cb ? node_cmp_cb : rb_tree_node_cmp_ptr_cb;
+    }
+    return self;
+}
+
+struct rb_tree *
+rb_tree_create (rb_tree_node_cmp_f node_cb) {
+    return rb_tree_init(rb_tree_alloc(), node_cb);
+}
+
+void
+rb_tree_dealloc (struct rb_tree *self, rb_tree_node_f node_cb) {
+    if (self) {
+        if (node_cb) {
+            struct rb_node *node = self->root;
+            struct rb_node *save = NULL;
+
+            // Rotate away the left links so that
+            // we can treat this like the destruction
+            // of a linked list
+            while (node) {
+                if (node->link[0] == NULL) {
+
+                    // No left links, just kill the node and move on
+                    save = node->link[1];
+                    node_cb(self, node);
+                    node = NULL;
+                } else {
+
+                    // Rotate away the left link and check again
+                    save = node->link[0];
+                    node->link[0] = save->link[1];
+                    save->link[1] = node;
+                }
+                node = save;
+            }
+        }
+        free(self);
+    }
+}
+
+int
+rb_tree_test (struct rb_tree *self, struct rb_node *root) {
+    int lh, rh;
+
+    if ( root == NULL )
+        return 1;
+    else {
+        struct rb_node *ln = root->link[0];
+        struct rb_node *rn = root->link[1];
+
+        /* Consecutive red links */
+        if (rb_node_is_red(root)) {
+            if (rb_node_is_red(ln) || rb_node_is_red(rn)) {
+                printf("Red violation");
+                return 0;
+            }
+        }
+
+        lh = rb_tree_test(self, ln);
+        rh = rb_tree_test(self, rn);
+
+        /* Invalid binary search tree */
+        if ( ( ln != NULL && self->cmp(self, ln, root) >= 0 )
+            || ( rn != NULL && self->cmp(self, rn, root) <= 0))
+        {
+            puts ( "Binary tree violation" );
+            return 0;
+        }
+
+        /* Black height mismatch */
+        if ( lh != 0 && rh != 0 && lh != rh ) {
+            puts ( "Black violation" );
+            return 0;
+        }
+
+        /* Only count black links */
+        if ( lh != 0 && rh != 0 )
+            return rb_node_is_red ( root ) ? lh : lh + 1;
+        else
+            return 0;
+    }
+}
+
+void *
+rb_tree_find(struct rb_tree *self, void *value) {
+    void *result = NULL;
+    if (self) {
+        struct rb_node node = { .value = value };
+        struct rb_node *it = self->root;
+        int cmp = 0;
+        while (it) {
+            if ((cmp = self->cmp(self, it, &node))) {
+
+                // If the tree supports duplicates, they should be
+                // chained to the right subtree for this to work
+                it = it->link[cmp < 0];
+            } else {
+                break;
+            }
+        }
+        result = it ? it->value : NULL;
+    }
+    return result;
+}
+
+// Creates (malloc'ates)
+int
+rb_tree_insert (struct rb_tree *self, void *value) {
+    return rb_tree_insert_node(self, rb_node_create(value));
+}
+
+// Returns 1 on success, 0 otherwise.
+int
+rb_tree_insert_node (struct rb_tree *self, struct rb_node *node) {
+    if (self && node) {
+        if (self->root == NULL) {
+            self->root = node;
+        } else {
+            struct rb_node head = { 0 }; // False tree root
+            struct rb_node *g, *t;       // Grandparent & parent
+            struct rb_node *p, *q;       // Iterator & parent
+            int dir = 0, last = 0;
+
+            // Set up our helpers
+            t = &head;
+            g = p = NULL;
+            q = t->link[1] = self->root;
+
+            // Search down the tree for a place to insert
+            while (1) {
+                if (q == NULL) {
+
+                    // Insert node at the first null link.
+                    p->link[dir] = q = node;
+                } else if (rb_node_is_red(q->link[0]) && rb_node_is_red(q->link[1])) {
+
+                    // Simple red violation: color flip
+                    q->red = 1;
+                    q->link[0]->red = 0;
+                    q->link[1]->red = 0;
+                }
+
+                if (rb_node_is_red(q) && rb_node_is_red(p)) {
+
+                    // Hard red violation: rotations necessary
+                    int dir2 = t->link[1] == g;
+                    if (q == p->link[last]) {
+                        t->link[dir2] = rb_node_rotate(g, !last);
+                    } else {
+                        t->link[dir2] = rb_node_rotate2(g, !last);
+                    }
+                }
+
+                // Stop working if we inserted a node. This
+                // check also disallows duplicates in the tree
+                if (self->cmp(self, q, node) == 0) {
+                    break;
+                }
+
+                last = dir;
+                dir = self->cmp(self, q, node) < 0;
+
+                // Move the helpers down
+                if (g != NULL) {
+                    t = g;
+                }
+
+                g = p, p = q;
+                q = q->link[dir];
+            }
+
+            // Update the root (it may be different)
+            self->root = head.link[1];
+        }
+
+        // Make the root black for simplified logic
+        self->root->red = 0;
+        ++self->size;
+        return 1;
+    }
+    return 0;
+}
+
+// Returns 1 if the value was removed, 0 otherwise. Optional node callback
+// can be provided to dealloc node and/or user data. Use rb_tree_node_dealloc
+// default callback to deallocate node created by rb_tree_insert(...).
+int
+rb_tree_remove_with_cb (struct rb_tree *self, void *value, rb_tree_node_f node_cb) {
+    if (self->root != NULL) {
+        struct rb_node head = {0}; // False tree root
+        struct rb_node node = { .value = value }; // Value wrapper node
+        struct rb_node *q, *p, *g; // Helpers
+        struct rb_node *f = NULL;  // Found item
+        int dir = 1;
+
+        // Set up our helpers
+        q = &head;
+        g = p = NULL;
+        q->link[1] = self->root;
+
+        // Search and push a red node down
+        // to fix red violations as we go
+        while (q->link[dir] != NULL) {
+            int last = dir;
+
+            // Move the helpers down
+            g = p, p = q;
+            q = q->link[dir];
+            dir = self->cmp(self, q, &node) < 0;
+
+            // Save the node with matching value and keep
+            // going; we'll do removal tasks at the end
+            if (self->cmp(self, q, &node) == 0) {
+                f = q;
+            }
+
+            // Push the red node down with rotations and color flips
+            if (!rb_node_is_red(q) && !rb_node_is_red(q->link[dir])) {
+                if (rb_node_is_red(q->link[!dir])) {
+                    p = p->link[last] = rb_node_rotate(q, dir);
+                } else if (!rb_node_is_red(q->link[!dir])) {
+                    struct rb_node *s = p->link[!last];
+                    if (s) {
+                        if (!rb_node_is_red(s->link[!last]) && !rb_node_is_red(s->link[last])) {
+
+                            // Color flip
+                            p->red = 0;
+                            s->red = 1;
+                            q->red = 1;
+                        } else {
+                            int dir2 = g->link[1] == p;
+                            if (rb_node_is_red(s->link[last])) {
+                                g->link[dir2] = rb_node_rotate2(p, last);
+                            } else if (rb_node_is_red(s->link[!last])) {
+                                g->link[dir2] = rb_node_rotate(p, last);
+                            }
+
+                            // Ensure correct coloring
+                            q->red = g->link[dir2]->red = 1;
+                            g->link[dir2]->link[0]->red = 0;
+                            g->link[dir2]->link[1]->red = 0;
+                        }
+                    }
+                }
+            }
+        }
+
+        // Replace and remove the saved node
+        if (f) {
+            void *tmp = f->value;
+            f->value = q->value;
+            q->value = tmp;
+
+            p->link[p->link[1] == q] = q->link[q->link[0] == NULL];
+
+            if (node_cb) {
+                node_cb(self, q);
+            }
+            q = NULL;
+        }
+
+        // Update the root (it may be different)
+        self->root = head.link[1];
+
+        // Make the root black for simplified logic
+        if (self->root != NULL) {
+            self->root->red = 0;
+        }
+
+        --self->size;
+    }
+    return 1;
+}
+
+int
+rb_tree_remove (struct rb_tree *self, void *value) {
+    int result = 0;
+    if (self) {
+        result = rb_tree_remove_with_cb(self, value, rb_tree_node_dealloc_cb);
+    }
+    return result;
+}
+
+size_t
+rb_tree_size (struct rb_tree *self) {
+    size_t result = 0;
+    if (self) {
+        result = self->size;
+    }
+    return result;
+}
+
+// rb_iter
+
+struct rb_iter *
+rb_iter_alloc () {
+    return malloc(sizeof(struct rb_iter));
+}
+
+struct rb_iter *
+rb_iter_init (struct rb_iter *self) {
+    if (self) {
+        self->tree = NULL;
+        self->node = NULL;
+        self->top = 0;
+    }
+    return self;
+}
+
+struct rb_iter *
+rb_iter_create () {
+    return rb_iter_init(rb_iter_alloc());
+}
+
+void
+rb_iter_dealloc (struct rb_iter *self) {
+    if (self) {
+        free(self);
+    }
+}
+
+// Internal function, init traversal object, dir determines whether
+// to begin traversal at the smallest or largest valued node.
+static void *
+rb_iter_start (struct rb_iter *self, struct rb_tree *tree, int dir) {
+    void *result = NULL;
+    if (self) {
+        self->tree = tree;
+        self->node = tree->root;
+        self->top = 0;
+
+        // Save the path for later selfersal
+        if (self->node != NULL) {
+            while (self->node->link[dir] != NULL) {
+                self->path[self->top++] = self->node;
+                self->node = self->node->link[dir];
+            }
+        }
+
+        result = self->node == NULL ? NULL : self->node->value;
+    }
+    return result;
+}
+
+// Traverse a red black tree in the user-specified direction (0 asc, 1 desc)
+static void *
+rb_iter_move (struct rb_iter *self, int dir) {
+    if (self->node->link[dir] != NULL) {
+
+        // Continue down this branch
+        self->path[self->top++] = self->node;
+        self->node = self->node->link[dir];
+        while ( self->node->link[!dir] != NULL ) {
+            self->path[self->top++] = self->node;
+            self->node = self->node->link[!dir];
+        }
+    } else {
+
+        // Move to the next branch
+        struct rb_node *last = NULL;
+        do {
+            if (self->top == 0) {
+                self->node = NULL;
+                break;
+            }
+            last = self->node;
+            self->node = self->path[--self->top];
+        } while (last == self->node->link[dir]);
+    }
+    return self->node == NULL ? NULL : self->node->value;
+}
+
+void *
+rb_iter_first (struct rb_iter *self, struct rb_tree *tree) {
+    return rb_iter_start(self, tree, 0);
+}
+
+void *
+rb_iter_last (struct rb_iter *self, struct rb_tree *tree) {
+    return rb_iter_start(self, tree, 1);
+}
+
+void *
+rb_iter_next (struct rb_iter *self) {
+    return rb_iter_move(self, 1);
+}
+
+void *
+rb_iter_prev (struct rb_iter *self) {
+    return rb_iter_move(self, 0);
+}
diff --git a/lib/rb_tree.h b/lib/rb_tree.h
new file mode 100644
index 0000000..67ec0a7
--- /dev/null
+++ b/lib/rb_tree.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: Unlicense */
+//
+// Based on Julienne Walker's <http://eternallyconfuzzled.com/> rb_tree
+// implementation.
+//
+// Modified by Mirek Rusin <http://github.com/mirek/rb_tree>.
+//
+// This is free and unencumbered software released into the public domain.
+//
+// Anyone is free to copy, modify, publish, use, compile, sell, or
+// distribute this software, either in source code form or as a compiled
+// binary, for any purpose, commercial or non-commercial, and by any
+// means.
+//
+// In jurisdictions that recognize copyright laws, the author or authors
+// of this software dedicate any and all copyright interest in the
+// software to the public domain. We make this dedication for the benefit
+// of the public at large and to the detriment of our heirs and
+// successors. We intend this dedication to be an overt act of
+// relinquishment in perpetuity of all present and future rights to this
+// software under copyright law.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+//
+// For more information, please refer to <http://unlicense.org/>
+//
+
+#ifndef __RB_TREE_H__
+#define __RB_TREE_H__ 1
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#ifndef RB_ITER_MAX_HEIGHT
+#define RB_ITER_MAX_HEIGHT 64 // Tallest allowable tree to iterate
+#endif
+
+struct rb_node;
+struct rb_tree;
+
+typedef int  (*rb_tree_node_cmp_f) (struct rb_tree *self, struct rb_node *a, struct rb_node *b);
+typedef void (*rb_tree_node_f)     (struct rb_tree *self, struct rb_node *node);
+
+struct rb_node {
+    int             red;     // Color red (1), black (0)
+    struct rb_node *link[2]; // Link left [0] and right [1]
+    void           *value;   // User provided, used indirectly via rb_tree_node_cmp_f.
+};
+
+struct rb_tree {
+    struct rb_node    *root;
+    rb_tree_node_cmp_f cmp;
+    size_t             size;
+    void              *info; // User provided, not used by rb_tree.
+};
+
+struct rb_iter {
+    struct rb_tree *tree;
+    struct rb_node *node;                     // Current node
+    struct rb_node *path[RB_ITER_MAX_HEIGHT]; // Traversal path
+    size_t          top;                      // Top of stack
+    void           *info;                     // User provided, not used by rb_iter.
+};
+
+int             rb_tree_node_cmp_ptr_cb (struct rb_tree *self, struct rb_node *a, struct rb_node *b);
+void            rb_tree_node_dealloc_cb (struct rb_tree *self, struct rb_node *node);
+
+struct rb_node *rb_node_alloc           ();
+struct rb_node *rb_node_create          (void *value);
+struct rb_node *rb_node_init            (struct rb_node *self, void *value);
+void            rb_node_dealloc         (struct rb_node *self);
+
+struct rb_tree *rb_tree_alloc           ();
+struct rb_tree *rb_tree_create          (rb_tree_node_cmp_f cmp);
+struct rb_tree *rb_tree_init            (struct rb_tree *self, rb_tree_node_cmp_f cmp);
+void            rb_tree_dealloc         (struct rb_tree *self, rb_tree_node_f node_cb);
+void           *rb_tree_find            (struct rb_tree *self, void *value);
+int             rb_tree_insert          (struct rb_tree *self, void *value);
+int             rb_tree_remove          (struct rb_tree *self, void *value);
+size_t          rb_tree_size            (struct rb_tree *self);
+
+int             rb_tree_insert_node     (struct rb_tree *self, struct rb_node *node);
+int             rb_tree_remove_with_cb  (struct rb_tree *self, void *value, rb_tree_node_f node_cb);
+
+int             rb_tree_test            (struct rb_tree *self, struct rb_node *root);
+
+struct rb_iter *rb_iter_alloc           ();
+struct rb_iter *rb_iter_init            (struct rb_iter *self);
+struct rb_iter *rb_iter_create          ();
+void            rb_iter_dealloc         (struct rb_iter *self);
+void           *rb_iter_first           (struct rb_iter *self, struct rb_tree *tree);
+void           *rb_iter_last            (struct rb_iter *self, struct rb_tree *tree);
+void           *rb_iter_next            (struct rb_iter *self);
+void           *rb_iter_prev            (struct rb_iter *self);
+
+#endif
diff --git a/lib/rebuild.c b/lib/rebuild.c
new file mode 100644
index 0000000..5993730
--- /dev/null
+++ b/lib/rebuild.c
@@ -0,0 +1,409 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <config.h>
+#if defined(HAVE_SYS_SYSMACROS_H)
+#include <sys/sysmacros.h>
+#endif
+#include "erofs/print.h"
+#include "erofs/inode.h"
+#include "erofs/rebuild.h"
+#include "erofs/io.h"
+#include "erofs/dir.h"
+#include "erofs/xattr.h"
+#include "erofs/blobchunk.h"
+#include "erofs/internal.h"
+
+#ifdef HAVE_LINUX_AUFS_TYPE_H
+#include <linux/aufs_type.h>
+#else
+#define AUFS_WH_PFX		".wh."
+#define AUFS_DIROPQ_NAME	AUFS_WH_PFX ".opq"
+#define AUFS_WH_DIROPQ		AUFS_WH_PFX AUFS_DIROPQ_NAME
+#endif
+
+static struct erofs_dentry *erofs_rebuild_mkdir(struct erofs_inode *dir,
+						const char *s)
+{
+	struct erofs_inode *inode;
+	struct erofs_dentry *d;
+
+	inode = erofs_new_inode();
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+
+	inode->i_mode = S_IFDIR | 0755;
+	inode->i_parent = dir;
+	inode->i_uid = getuid();
+	inode->i_gid = getgid();
+	inode->i_mtime = inode->sbi->build_time;
+	inode->i_mtime_nsec = inode->sbi->build_time_nsec;
+	erofs_init_empty_dir(inode);
+
+	d = erofs_d_alloc(dir, s);
+	if (!IS_ERR(d)) {
+		d->type = EROFS_FT_DIR;
+		d->inode = inode;
+	}
+	return d;
+}
+
+struct erofs_dentry *erofs_rebuild_get_dentry(struct erofs_inode *pwd,
+		char *path, bool aufs, bool *whout, bool *opq, bool to_head)
+{
+	struct erofs_dentry *d = NULL;
+	unsigned int len = strlen(path);
+	char *s = path;
+
+	*whout = false;
+	*opq = false;
+
+	while (s < path + len) {
+		char *slash = memchr(s, '/', path + len - s);
+
+		if (slash) {
+			if (s == slash) {
+				while (*++s == '/');	/* skip '//...' */
+				continue;
+			}
+			*slash = '\0';
+		}
+
+		if (!memcmp(s, ".", 2)) {
+			/* null */
+		} else if (!memcmp(s, "..", 3)) {
+			pwd = pwd->i_parent;
+		} else {
+			struct erofs_inode *inode = NULL;
+
+			if (aufs && !slash) {
+				if (!memcmp(s, AUFS_WH_DIROPQ, sizeof(AUFS_WH_DIROPQ))) {
+					*opq = true;
+					break;
+				}
+				if (!memcmp(s, AUFS_WH_PFX, sizeof(AUFS_WH_PFX) - 1)) {
+					s += sizeof(AUFS_WH_PFX) - 1;
+					*whout = true;
+				}
+			}
+
+			list_for_each_entry(d, &pwd->i_subdirs, d_child) {
+				if (!strcmp(d->name, s)) {
+					if (d->type != EROFS_FT_DIR && slash)
+						return ERR_PTR(-EIO);
+					inode = d->inode;
+					break;
+				}
+			}
+
+			if (inode) {
+				if (to_head) {
+					list_del(&d->d_child);
+					list_add(&d->d_child, &pwd->i_subdirs);
+				}
+				pwd = inode;
+			} else if (!slash) {
+				d = erofs_d_alloc(pwd, s);
+				if (IS_ERR(d))
+					return d;
+				d->type = EROFS_FT_UNKNOWN;
+				d->inode = pwd;
+			} else {
+				d = erofs_rebuild_mkdir(pwd, s);
+				if (IS_ERR(d))
+					return d;
+				pwd = d->inode;
+			}
+		}
+		if (slash) {
+			*slash = '/';
+			s = slash + 1;
+		} else {
+			break;
+		}
+	}
+	return d;
+}
+
+static int erofs_rebuild_fixup_inode_index(struct erofs_inode *inode)
+{
+	int ret;
+	unsigned int count, unit, chunkbits, i;
+	struct erofs_inode_chunk_index *idx;
+	erofs_off_t chunksize;
+	erofs_blk_t blkaddr;
+
+	/* TODO: fill data map in other layouts */
+	if (inode->datalayout != EROFS_INODE_CHUNK_BASED &&
+	    inode->datalayout != EROFS_INODE_FLAT_PLAIN) {
+		erofs_err("%s: unsupported datalayout %d", inode->i_srcpath, inode->datalayout);
+		return -EOPNOTSUPP;
+	}
+
+	if (inode->sbi->extra_devices) {
+		chunkbits = inode->u.chunkbits;
+		if (chunkbits < sbi.blkszbits) {
+			erofs_err("%s: chunk size %u is too small to fit the target block size %u",
+				  inode->i_srcpath, 1U << chunkbits, 1U << sbi.blkszbits);
+			return -EINVAL;
+		}
+	} else {
+		chunkbits = ilog2(inode->i_size - 1) + 1;
+		if (chunkbits < sbi.blkszbits)
+			chunkbits = sbi.blkszbits;
+		if (chunkbits - sbi.blkszbits > EROFS_CHUNK_FORMAT_BLKBITS_MASK)
+			chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + sbi.blkszbits;
+	}
+	chunksize = 1ULL << chunkbits;
+	count = DIV_ROUND_UP(inode->i_size, chunksize);
+
+	unit = sizeof(struct erofs_inode_chunk_index);
+	inode->extent_isize = count * unit;
+	idx = malloc(max(sizeof(*idx), sizeof(void *)));
+	if (!idx)
+		return -ENOMEM;
+	inode->chunkindexes = idx;
+
+	for (i = 0; i < count; i++) {
+		struct erofs_blobchunk *chunk;
+		struct erofs_map_blocks map = {
+			.index = UINT_MAX,
+		};
+
+		map.m_la = i << chunkbits;
+		ret = erofs_map_blocks(inode, &map, 0);
+		if (ret)
+			goto err;
+
+		blkaddr = erofs_blknr(&sbi, map.m_pa);
+		chunk = erofs_get_unhashed_chunk(inode->dev, blkaddr, 0);
+		if (IS_ERR(chunk)) {
+			ret = PTR_ERR(chunk);
+			goto err;
+		}
+		*(void **)idx++ = chunk;
+
+	}
+	inode->datalayout = EROFS_INODE_CHUNK_BASED;
+	inode->u.chunkformat = EROFS_CHUNK_FORMAT_INDEXES;
+	inode->u.chunkformat |= chunkbits - sbi.blkszbits;
+	return 0;
+err:
+	free(inode->chunkindexes);
+	inode->chunkindexes = NULL;
+	return ret;
+}
+
+static int erofs_rebuild_fill_inode(struct erofs_inode *inode)
+{
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFCHR:
+		if (erofs_inode_is_whiteout(inode))
+			inode->i_parent->whiteouts = true;
+		/* fallthrough */
+	case S_IFBLK:
+	case S_IFIFO:
+	case S_IFSOCK:
+		inode->i_size = 0;
+		erofs_dbg("\tdev: %d %d", major(inode->u.i_rdev),
+			  minor(inode->u.i_rdev));
+		inode->u.i_rdev = erofs_new_encode_dev(inode->u.i_rdev);
+		return 0;
+	case S_IFDIR:
+		return erofs_init_empty_dir(inode);
+	case S_IFLNK: {
+		int ret;
+
+		inode->i_link = malloc(inode->i_size + 1);
+		if (!inode->i_link)
+			return -ENOMEM;
+		ret = erofs_pread(inode, inode->i_link, inode->i_size, 0);
+		erofs_dbg("\tsymlink: %s -> %s", inode->i_srcpath, inode->i_link);
+		return ret;
+	}
+	case S_IFREG:
+		if (inode->i_size)
+			return erofs_rebuild_fixup_inode_index(inode);
+		return 0;
+	default:
+		break;
+	}
+	return -EINVAL;
+}
+
+/*
+ * @parent:  parent directory in inode tree
+ * @ctx.dir: parent directory when itering erofs_iterate_dir()
+ */
+struct erofs_rebuild_dir_context {
+	struct erofs_dir_context ctx;
+	struct erofs_inode *parent;
+};
+
+static int erofs_rebuild_dirent_iter(struct erofs_dir_context *ctx)
+{
+	struct erofs_rebuild_dir_context *rctx = (void *)ctx;
+	struct erofs_inode *parent = rctx->parent;
+	struct erofs_inode *dir = ctx->dir;
+	struct erofs_inode *inode, *candidate;
+	struct erofs_inode src;
+	struct erofs_dentry *d;
+	char *path, *dname;
+	bool dumb;
+	int ret;
+
+	if (ctx->dot_dotdot)
+		return 0;
+
+	ret = asprintf(&path, "%s/%.*s", rctx->parent->i_srcpath,
+		       ctx->de_namelen, ctx->dname);
+	if (ret < 0)
+		return ret;
+
+	erofs_dbg("parsing %s", path);
+	dname = path + strlen(parent->i_srcpath) + 1;
+
+	d = erofs_rebuild_get_dentry(parent, dname, false,
+				     &dumb, &dumb, false);
+	if (IS_ERR(d)) {
+		ret = PTR_ERR(d);
+		goto out;
+	}
+
+	ret = 0;
+	if (d->type != EROFS_FT_UNKNOWN) {
+		/*
+		 * bail out if the file exists in the upper layers.  (Note that
+		 * extended attributes won't be merged too even for dirs.)
+		 */
+		if (!S_ISDIR(d->inode->i_mode) || d->inode->opaque)
+			goto out;
+
+		/* merge directory entries */
+		src = (struct erofs_inode) {
+			.sbi = dir->sbi,
+			.nid = ctx->de_nid
+		};
+		ret = erofs_read_inode_from_disk(&src);
+		if (ret || !S_ISDIR(src.i_mode))
+			goto out;
+		parent = d->inode;
+		inode = dir = &src;
+	} else {
+		u64 nid;
+
+		DBG_BUGON(parent != d->inode);
+		inode = erofs_new_inode();
+		if (IS_ERR(inode)) {
+			ret = PTR_ERR(inode);
+			goto out;
+		}
+
+		/* reuse i_ino[0] to read nid in source fs */
+		nid = inode->i_ino[0];
+		inode->sbi = dir->sbi;
+		inode->nid = ctx->de_nid;
+		ret = erofs_read_inode_from_disk(inode);
+		if (ret)
+			goto out;
+
+		/* restore nid in new generated fs */
+		inode->i_ino[1] = inode->i_ino[0];
+		inode->i_ino[0] = nid;
+		inode->dev = inode->sbi->dev;
+
+		if (S_ISREG(inode->i_mode) && inode->i_nlink > 1 &&
+		    (candidate = erofs_iget(inode->dev, ctx->de_nid))) {
+			/* hardlink file */
+			erofs_iput(inode);
+			inode = candidate;
+			if (S_ISDIR(inode->i_mode)) {
+				erofs_err("hardlink directory not supported");
+				ret = -EISDIR;
+				goto out;
+			}
+			inode->i_nlink++;
+			erofs_dbg("\thardlink: %s -> %s", path, inode->i_srcpath);
+		} else {
+			ret = erofs_read_xattrs_from_disk(inode);
+			if (ret) {
+				erofs_iput(inode);
+				goto out;
+			}
+
+			inode->i_parent = d->inode;
+			inode->i_srcpath = path;
+			path = NULL;
+			inode->i_ino[1] = inode->nid;
+			inode->i_nlink = 1;
+
+			ret = erofs_rebuild_fill_inode(inode);
+			if (ret) {
+				erofs_iput(inode);
+				goto out;
+			}
+
+			erofs_insert_ihash(inode, inode->dev, inode->i_ino[1]);
+			parent = dir = inode;
+		}
+
+		d->inode = inode;
+		d->type = erofs_mode_to_ftype(inode->i_mode);
+	}
+
+	if (S_ISDIR(inode->i_mode)) {
+		struct erofs_rebuild_dir_context nctx = *rctx;
+
+		nctx.parent = parent;
+		nctx.ctx.dir = dir;
+		ret = erofs_iterate_dir(&nctx.ctx, false);
+		if (ret)
+			goto out;
+	}
+
+	/* reset sbi, nid after subdirs are all loaded for the final dump */
+	inode->sbi = &sbi;
+	inode->nid = 0;
+out:
+	free(path);
+	return ret;
+}
+
+int erofs_rebuild_load_tree(struct erofs_inode *root, struct erofs_sb_info *sbi)
+{
+	struct erofs_inode inode = {};
+	struct erofs_rebuild_dir_context ctx;
+	int ret;
+
+	if (!sbi->devname) {
+		erofs_err("failed to find a device for rebuilding");
+		return -EINVAL;
+	}
+
+	ret = erofs_read_superblock(sbi);
+	if (ret) {
+		erofs_err("failed to read superblock of %s", sbi->devname);
+		return ret;
+	}
+
+	inode.nid = sbi->root_nid;
+	inode.sbi = sbi;
+	ret = erofs_read_inode_from_disk(&inode);
+	if (ret) {
+		erofs_err("failed to read root inode of %s", sbi->devname);
+		return ret;
+	}
+	inode.i_srcpath = strdup("/");
+
+	ctx = (struct erofs_rebuild_dir_context) {
+		.ctx.dir = &inode,
+		.ctx.cb = erofs_rebuild_dirent_iter,
+		.parent = root,
+	};
+	ret = erofs_iterate_dir(&ctx.ctx, false);
+	free(inode.i_srcpath);
+	return ret;
+}
diff --git a/lib/rolling_hash.h b/lib/rolling_hash.h
new file mode 100644
index 0000000..448db34
--- /dev/null
+++ b/lib/rolling_hash.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+/*
+ * Copyright (C) 2022 Alibaba Cloud
+ */
+#ifndef __ROLLING_HASH_H__
+#define __ROLLING_HASH_H__
+
+#include <erofs/defs.h>
+
+#define PRIME_NUMBER	4294967295LL
+#define RADIX		256
+
+static inline long long erofs_rolling_hash_init(u8 *input,
+						int len, bool backwards)
+{
+	long long hash = 0;
+
+	if (!backwards) {
+		int i;
+
+		for (i = 0; i < len; ++i)
+			hash = (RADIX * hash + input[i]) % PRIME_NUMBER;
+	} else {
+		while (len)
+			hash = (RADIX * hash + input[--len]) % PRIME_NUMBER;
+	}
+	return hash;
+}
+
+/* RM = R ^ (M-1) % Q */
+/*
+ * NOTE: value of "hash" could be negative so we cannot use unsiged types for "hash"
+ * "long long" is used here and PRIME_NUMBER can be ULONG_MAX
+ */
+static inline long long erofs_rolling_hash_advance(long long old_hash,
+						   unsigned long long RM,
+						   u8 to_remove, u8 to_add)
+{
+	long long hash = old_hash;
+	long long to_remove_val = (to_remove * RM) % PRIME_NUMBER;
+
+	hash = RADIX * (old_hash - to_remove_val) % PRIME_NUMBER;
+	hash = (hash + to_add) % PRIME_NUMBER;
+
+	/* We might get negative value of hash, converting it to positive */
+	if (hash < 0)
+		hash += PRIME_NUMBER;
+	return hash;
+}
+
+static inline long long erofs_rollinghash_calc_rm(int window_size)
+{
+	int i;
+	long long RM = 1;
+
+	for (i = 0; i < window_size - 1; ++i)
+		RM = (RM * RADIX) % PRIME_NUMBER;
+	return RM;
+}
+#endif
diff --git a/lib/sha256.c b/lib/sha256.c
index dd0e058..9bb7fbb 100644
--- a/lib/sha256.c
+++ b/lib/sha256.c
@@ -1,49 +1,45 @@
+// SPDX-License-Identifier: Unlicense
 /*
  * sha256.c --- The sha256 algorithm
  *
- * Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
- * (copied from libtomcrypt and then relicensed under GPLv2)
- *
- * %Begin-Header%
- * This file may be redistributed under the terms of the GNU Library
- * General Public License, version 2.
- * %End-Header%
+ * (copied from LibTomCrypt with adaption.)
  */
-#include "erofs/defs.h"
+#include "sha256.h"
 #include <string.h>
 
-static const __u32 K[64] = {
-    0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, 0x3956c25bUL,
-    0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, 0xd807aa98UL, 0x12835b01UL,
-    0x243185beUL, 0x550c7dc3UL, 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL,
-    0xc19bf174UL, 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
-    0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, 0x983e5152UL,
-    0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, 0xc6e00bf3UL, 0xd5a79147UL,
-    0x06ca6351UL, 0x14292967UL, 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL,
-    0x53380d13UL, 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
-    0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, 0xd192e819UL,
-    0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, 0x19a4c116UL, 0x1e376c08UL,
-    0x2748774cUL, 0x34b0bcb5UL, 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL,
-    0x682e6ff3UL, 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
-    0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL
+/* This is based on SHA256 implementation in LibTomCrypt that was released into
+ * public domain by Tom St Denis. */
+/* the K array */
+static const unsigned long K[64] = {
+	0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, 0x3956c25bUL,
+	0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, 0xd807aa98UL, 0x12835b01UL,
+	0x243185beUL, 0x550c7dc3UL, 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL,
+	0xc19bf174UL, 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
+	0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, 0x983e5152UL,
+	0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, 0xc6e00bf3UL, 0xd5a79147UL,
+	0x06ca6351UL, 0x14292967UL, 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL,
+	0x53380d13UL, 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
+	0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, 0xd192e819UL,
+	0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, 0x19a4c116UL, 0x1e376c08UL,
+	0x2748774cUL, 0x34b0bcb5UL, 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL,
+	0x682e6ff3UL, 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
+	0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL
 };
-
 /* Various logical functions */
+#define RORc(x, y) \
+( ((((unsigned long) (x) & 0xFFFFFFFFUL) >> (unsigned long) ((y) & 31)) | \
+   ((unsigned long) (x) << (unsigned long) (32 - ((y) & 31)))) & 0xFFFFFFFFUL)
 #define Ch(x,y,z)       (z ^ (x & (y ^ z)))
 #define Maj(x,y,z)      (((x | y) & z) | (x & y))
-#define S(x, n)         RORc((x),(n))
+#define S(x, n)         RORc((x), (n))
 #define R(x, n)         (((x)&0xFFFFFFFFUL)>>(n))
 #define Sigma0(x)       (S(x, 2) ^ S(x, 13) ^ S(x, 22))
 #define Sigma1(x)       (S(x, 6) ^ S(x, 11) ^ S(x, 25))
 #define Gamma0(x)       (S(x, 7) ^ S(x, 18) ^ R(x, 3))
 #define Gamma1(x)       (S(x, 17) ^ S(x, 19) ^ R(x, 10))
-#define RORc(x, y) ( ((((__u32)(x)&0xFFFFFFFFUL)>>(__u32)((y)&31)) | ((__u32)(x)<<(__u32)(32-((y)&31)))) & 0xFFFFFFFFUL)
-
-#define RND(a,b,c,d,e,f,g,h,i)                         \
-     t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i];   \
-     t1 = Sigma0(a) + Maj(a, b, c);                    \
-     d += t0;                                          \
-     h  = t0 + t1;
+#ifndef MIN
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+#endif
 
 #define STORE64H(x, y) \
 	do { \
@@ -61,145 +57,149 @@
        (y)[2] = (unsigned char)(((x)>>8)&255); (y)[3] = (unsigned char)((x)&255); } while(0)
 
 #define LOAD32H(x, y)                            \
-  do { x = ((__u32)((y)[0] & 255)<<24) | \
-           ((__u32)((y)[1] & 255)<<16) | \
-           ((__u32)((y)[2] & 255)<<8)  | \
-           ((__u32)((y)[3] & 255)); } while(0)
+  do { x = ((u32)((y)[0] & 255)<<24) | \
+           ((u32)((y)[1] & 255)<<16) | \
+           ((u32)((y)[2] & 255)<<8)  | \
+           ((u32)((y)[3] & 255)); } while(0)
 
-struct sha256_state {
-    __u64 length;
-    __u32 state[8], curlen;
-    unsigned char buf[64];
-};
-
-/* This is a highly simplified version from libtomcrypt */
-struct hash_state {
-	struct sha256_state sha256;
-};
-
-static void sha256_compress(struct hash_state * md, const unsigned char *buf)
+/* compress 512-bits */
+static int sha256_compress(struct sha256_state *md, unsigned char *buf)
 {
-    __u32 S[8], W[64], t0, t1;
-    __u32 t;
-    int i;
-
-    /* copy state into S */
-    for (i = 0; i < 8; i++) {
-        S[i] = md->sha256.state[i];
-    }
-
-    /* copy the state into 512-bits into W[0..15] */
-    for (i = 0; i < 16; i++) {
-        LOAD32H(W[i], buf + (4*i));
-    }
-
-    /* fill W[16..63] */
-    for (i = 16; i < 64; i++) {
-        W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16];
-    }
-
-    /* Compress */
-     for (i = 0; i < 64; ++i) {
-         RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i);
-         t = S[7]; S[7] = S[6]; S[6] = S[5]; S[5] = S[4];
-         S[4] = S[3]; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t;
-     }
-
-    /* feedback */
-    for (i = 0; i < 8; i++) {
-        md->sha256.state[i] = md->sha256.state[i] + S[i];
-    }
+	u32 S[8], W[64], t0, t1;
+	u32 t;
+	int i;
+	/* copy state into S */
+	for (i = 0; i < 8; i++) {
+		S[i] = md->state[i];
+	}
+	/* copy the state into 512-bits into W[0..15] */
+	for (i = 0; i < 16; i++)
+		LOAD32H(W[i], buf + (4 * i));
+	/* fill W[16..63] */
+	for (i = 16; i < 64; i++) {
+		W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) +
+			W[i - 16];
+	}
+	/* Compress */
+#define RND(a,b,c,d,e,f,g,h,i)                          \
+	t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i];	\
+	t1 = Sigma0(a) + Maj(a, b, c);			\
+	d += t0;					\
+	h  = t0 + t1;
+	for (i = 0; i < 64; ++i) {
+		RND(S[0], S[1], S[2], S[3], S[4], S[5], S[6], S[7], i);
+		t = S[7]; S[7] = S[6]; S[6] = S[5]; S[5] = S[4];
+		S[4] = S[3]; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t;
+	}
+	/* feedback */
+	for (i = 0; i < 8; i++) {
+		md->state[i] = md->state[i] + S[i];
+	}
+	return 0;
 }
-
-static void sha256_init(struct hash_state * md)
+/* Initialize the hash state */
+void erofs_sha256_init(struct sha256_state *md)
 {
-    md->sha256.curlen = 0;
-    md->sha256.length = 0;
-    md->sha256.state[0] = 0x6A09E667UL;
-    md->sha256.state[1] = 0xBB67AE85UL;
-    md->sha256.state[2] = 0x3C6EF372UL;
-    md->sha256.state[3] = 0xA54FF53AUL;
-    md->sha256.state[4] = 0x510E527FUL;
-    md->sha256.state[5] = 0x9B05688CUL;
-    md->sha256.state[6] = 0x1F83D9ABUL;
-    md->sha256.state[7] = 0x5BE0CD19UL;
+	md->curlen = 0;
+	md->length = 0;
+	md->state[0] = 0x6A09E667UL;
+	md->state[1] = 0xBB67AE85UL;
+	md->state[2] = 0x3C6EF372UL;
+	md->state[3] = 0xA54FF53AUL;
+	md->state[4] = 0x510E527FUL;
+	md->state[5] = 0x9B05688CUL;
+	md->state[6] = 0x1F83D9ABUL;
+	md->state[7] = 0x5BE0CD19UL;
 }
-
-#define MIN(x, y) ( ((x)<(y))?(x):(y) )
-#define SHA256_BLOCKSIZE 64
-static void sha256_process(struct hash_state * md, const unsigned char *in, unsigned long inlen)
+/**
+   Process a block of memory though the hash
+   @param md     The hash state
+   @param in     The data to hash
+   @param inlen  The length of the data (octets)
+   @return CRYPT_OK if successful
+*/
+int erofs_sha256_process(struct sha256_state *md,
+		const unsigned char *in, unsigned long inlen)
 {
-    unsigned long n;
-
-    while (inlen > 0) {
-	    if (md->sha256.curlen == 0 && inlen >= SHA256_BLOCKSIZE) {
-		    sha256_compress(md, in);
-		    md->sha256.length += SHA256_BLOCKSIZE * 8;
-		    in += SHA256_BLOCKSIZE;
-		    inlen -= SHA256_BLOCKSIZE;
-	    } else {
-		    n = MIN(inlen, (SHA256_BLOCKSIZE - md->sha256.curlen));
-		    memcpy(md->sha256.buf + md->sha256.curlen, in, (size_t)n);
-		    md->sha256.curlen += n;
-		    in += n;
-		    inlen -= n;
-		    if (md->sha256.curlen == SHA256_BLOCKSIZE) {
-			    sha256_compress(md, md->sha256.buf);
-			    md->sha256.length += 8*SHA256_BLOCKSIZE;
-			    md->sha256.curlen = 0;
-		    }
-	    }
-    }
+	unsigned long n;
+#define block_size 64
+	if (md->curlen > sizeof(md->buf))
+		return -1;
+	while (inlen > 0) {
+		if (md->curlen == 0 && inlen >= block_size) {
+			if (sha256_compress(md, (unsigned char *) in) < 0)
+				return -1;
+			md->length += block_size * 8;
+			in += block_size;
+			inlen -= block_size;
+		} else {
+			n = MIN(inlen, (block_size - md->curlen));
+			memcpy(md->buf + md->curlen, in, n);
+			md->curlen += n;
+			in += n;
+			inlen -= n;
+			if (md->curlen == block_size) {
+				if (sha256_compress(md, md->buf) < 0)
+					return -1;
+				md->length += 8 * block_size;
+				md->curlen = 0;
+			}
+		}
+	}
+	return 0;
 }
-
-static void sha256_done(struct hash_state * md, unsigned char *out)
+/**
+   Terminate the hash to get the digest
+   @param md  The hash state
+   @param out [out] The destination of the hash (32 bytes)
+   @return CRYPT_OK if successful
+*/
+int erofs_sha256_done(struct sha256_state *md, unsigned char *out)
 {
-    int i;
-
-    /* increase the length of the message */
-    md->sha256.length += md->sha256.curlen * 8;
-
-    /* append the '1' bit */
-    md->sha256.buf[md->sha256.curlen++] = (unsigned char)0x80;
-
-    /* if the length is currently above 56 bytes we append zeros
-     * then compress.  Then we can fall back to padding zeros and length
-     * encoding like normal.
-     */
-    if (md->sha256.curlen > 56) {
-        while (md->sha256.curlen < 64) {
-            md->sha256.buf[md->sha256.curlen++] = (unsigned char)0;
-        }
-        sha256_compress(md, md->sha256.buf);
-        md->sha256.curlen = 0;
-    }
-
-    /* pad upto 56 bytes of zeroes */
-    while (md->sha256.curlen < 56) {
-        md->sha256.buf[md->sha256.curlen++] = (unsigned char)0;
-    }
-
-    /* store length */
-    STORE64H(md->sha256.length, md->sha256.buf+56);
-    sha256_compress(md, md->sha256.buf);
-
-    /* copy output */
-    for (i = 0; i < 8; i++) {
-        STORE32H(md->sha256.state[i], out+(4*i));
-    }
+	int i;
+	if (md->curlen >= sizeof(md->buf))
+		return -1;
+	/* increase the length of the message */
+	md->length += md->curlen * 8;
+	/* append the '1' bit */
+	md->buf[md->curlen++] = (unsigned char) 0x80;
+	/* if the length is currently above 56 bytes we append zeros
+	 * then compress.  Then we can fall back to padding zeros and length
+	 * encoding like normal.
+	 */
+	if (md->curlen > 56) {
+		while (md->curlen < 64) {
+			md->buf[md->curlen++] = (unsigned char) 0;
+		}
+		sha256_compress(md, md->buf);
+		md->curlen = 0;
+	}
+	/* pad upto 56 bytes of zeroes */
+	while (md->curlen < 56) {
+		md->buf[md->curlen++] = (unsigned char) 0;
+	}
+	/* store length */
+	STORE64H(md->length, md->buf+56);
+	sha256_compress(md, md->buf);
+	/* copy output */
+	for (i = 0; i < 8; i++)
+		STORE32H(md->state[i], out + (4 * i));
+	return 0;
 }
 
 void erofs_sha256(const unsigned char *in, unsigned long in_size,
 		  unsigned char out[32])
 {
-	struct hash_state md;
+	struct sha256_state md;
 
-	sha256_init(&md);
-	sha256_process(&md, in, in_size);
-	sha256_done(&md, out);
+	erofs_sha256_init(&md);
+	erofs_sha256_process(&md, in, in_size);
+	erofs_sha256_done(&md, out);
 }
 
 #ifdef UNITTEST
+#include <stdio.h>
+
 static const struct {
 	char *msg;
 	unsigned char hash[32];
diff --git a/lib/sha256.h b/lib/sha256.h
new file mode 100644
index 0000000..dd39970
--- /dev/null
+++ b/lib/sha256.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+#ifndef __EROFS_LIB_SHA256_H
+#define __EROFS_LIB_SHA256_H
+
+#include "erofs/defs.h"
+
+struct sha256_state {
+	u64 length;
+	u32 state[8], curlen;
+	u8 buf[64];
+};
+
+void erofs_sha256_init(struct sha256_state *md);
+int erofs_sha256_process(struct sha256_state *md,
+		const unsigned char *in, unsigned long inlen);
+int erofs_sha256_done(struct sha256_state *md, unsigned char *out);
+
+void erofs_sha256(const unsigned char *in, unsigned long in_size,
+		  unsigned char out[32]);
+
+#endif
diff --git a/lib/super.c b/lib/super.c
index f486eb7..f952f7e 100644
--- a/lib/super.c
+++ b/lib/super.c
@@ -6,6 +6,7 @@
 #include <stdlib.h>
 #include "erofs/io.h"
 #include "erofs/print.h"
+#include "erofs/xattr.h"
 
 static bool check_layout_compatibility(struct erofs_sb_info *sbi,
 				       struct erofs_super_block *dsb)
@@ -31,12 +32,13 @@
 
 	sbi->total_blocks = sbi->primarydevice_blocks;
 
-	if (!erofs_sb_has_device_table())
+	if (!erofs_sb_has_device_table(sbi))
 		ondisk_extradevs = 0;
 	else
 		ondisk_extradevs = le16_to_cpu(dsb->extra_devices);
 
-	if (ondisk_extradevs != sbi->extra_devices) {
+	if (sbi->extra_devices &&
+	    ondisk_extradevs != sbi->extra_devices) {
 		erofs_err("extra devices don't match (ondisk %u, given %u)",
 			  ondisk_extradevs, sbi->extra_devices);
 		return -EINVAL;
@@ -44,32 +46,40 @@
 	if (!ondisk_extradevs)
 		return 0;
 
+	sbi->extra_devices = ondisk_extradevs;
 	sbi->device_id_mask = roundup_pow_of_two(ondisk_extradevs + 1) - 1;
 	sbi->devs = calloc(ondisk_extradevs, sizeof(*sbi->devs));
+	if (!sbi->devs)
+		return -ENOMEM;
 	pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE;
 	for (i = 0; i < ondisk_extradevs; ++i) {
 		struct erofs_deviceslot dis;
 		int ret;
 
-		ret = dev_read(0, &dis, pos, sizeof(dis));
-		if (ret < 0)
+		ret = dev_read(sbi, 0, &dis, pos, sizeof(dis));
+		if (ret < 0) {
+			free(sbi->devs);
+			sbi->devs = NULL;
 			return ret;
+		}
 
-		sbi->devs[i].mapped_blkaddr = dis.mapped_blkaddr;
-		sbi->total_blocks += dis.blocks;
+		sbi->devs[i].mapped_blkaddr = le32_to_cpu(dis.mapped_blkaddr);
+		sbi->devs[i].blocks = le32_to_cpu(dis.blocks);
+		memcpy(sbi->devs[i].tag, dis.tag, sizeof(dis.tag));
+		sbi->total_blocks += sbi->devs[i].blocks;
 		pos += EROFS_DEVT_SLOT_SIZE;
 	}
 	return 0;
 }
 
-int erofs_read_superblock(void)
+int erofs_read_superblock(struct erofs_sb_info *sbi)
 {
-	char data[EROFS_BLKSIZ];
+	u8 data[EROFS_MAX_BLOCK_SIZE];
 	struct erofs_super_block *dsb;
-	unsigned int blkszbits;
 	int ret;
 
-	ret = blk_read(0, data, 0, 1);
+	sbi->blkszbits = ilog2(EROFS_MAX_BLOCK_SIZE);
+	ret = blk_read(sbi, 0, data, 0, erofs_blknr(sbi, sizeof(data)));
 	if (ret < 0) {
 		erofs_err("cannot read erofs superblock: %d", ret);
 		return -EIO;
@@ -82,30 +92,57 @@
 		return ret;
 	}
 
-	sbi.feature_compat = le32_to_cpu(dsb->feature_compat);
+	sbi->feature_compat = le32_to_cpu(dsb->feature_compat);
 
-	blkszbits = dsb->blkszbits;
-	/* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */
-	if (blkszbits != LOG_BLOCK_SIZE) {
-		erofs_err("blksize %d isn't supported on this platform",
-			  1 << blkszbits);
+	sbi->blkszbits = dsb->blkszbits;
+	if (sbi->blkszbits < 9 ||
+	    sbi->blkszbits > ilog2(EROFS_MAX_BLOCK_SIZE)) {
+		erofs_err("blksize %llu isn't supported on this platform",
+			  erofs_blksiz(sbi) | 0ULL);
+		return ret;
+	} else if (!check_layout_compatibility(sbi, dsb)) {
 		return ret;
 	}
 
-	if (!check_layout_compatibility(&sbi, dsb))
+	sbi->primarydevice_blocks = le32_to_cpu(dsb->blocks);
+	sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr);
+	sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
+	sbi->xattr_prefix_start = le32_to_cpu(dsb->xattr_prefix_start);
+	sbi->xattr_prefix_count = dsb->xattr_prefix_count;
+	sbi->islotbits = EROFS_ISLOTBITS;
+	sbi->root_nid = le16_to_cpu(dsb->root_nid);
+	sbi->packed_nid = le64_to_cpu(dsb->packed_nid);
+	sbi->inos = le64_to_cpu(dsb->inos);
+	sbi->checksum = le32_to_cpu(dsb->checksum);
+	sbi->extslots = dsb->sb_extslots;
+
+	sbi->build_time = le64_to_cpu(dsb->build_time);
+	sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec);
+
+	memcpy(&sbi->uuid, dsb->uuid, sizeof(dsb->uuid));
+
+	if (erofs_sb_has_compr_cfgs(sbi))
+		sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs);
+	else
+		sbi->lz4_max_distance = le16_to_cpu(dsb->u1.lz4_max_distance);
+
+	ret = erofs_init_devices(sbi, dsb);
+	if (ret)
 		return ret;
 
-	sbi.primarydevice_blocks = le32_to_cpu(dsb->blocks);
-	sbi.meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr);
-	sbi.xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
-	sbi.islotbits = EROFS_ISLOTBITS;
-	sbi.root_nid = le16_to_cpu(dsb->root_nid);
-	sbi.inos = le64_to_cpu(dsb->inos);
-	sbi.checksum = le32_to_cpu(dsb->checksum);
+	ret = erofs_xattr_prefixes_init(sbi);
+	if (ret && sbi->devs) {
+		free(sbi->devs);
+		sbi->devs = NULL;
+	}
+	return ret;
+}
 
-	sbi.build_time = le64_to_cpu(dsb->build_time);
-	sbi.build_time_nsec = le32_to_cpu(dsb->build_time_nsec);
-
-	memcpy(&sbi.uuid, dsb->uuid, sizeof(dsb->uuid));
-	return erofs_init_devices(&sbi, dsb);
+void erofs_put_super(struct erofs_sb_info *sbi)
+{
+	if (sbi->devs) {
+		free(sbi->devs);
+		sbi->devs = NULL;
+	}
+	erofs_xattr_prefixes_cleanup(sbi);
 }
diff --git a/lib/tar.c b/lib/tar.c
new file mode 100644
index 0000000..8204939
--- /dev/null
+++ b/lib/tar.c
@@ -0,0 +1,976 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#if defined(HAVE_ZLIB)
+#include <zlib.h>
+#endif
+#include "erofs/print.h"
+#include "erofs/cache.h"
+#include "erofs/diskbuf.h"
+#include "erofs/inode.h"
+#include "erofs/list.h"
+#include "erofs/tar.h"
+#include "erofs/io.h"
+#include "erofs/xattr.h"
+#include "erofs/blobchunk.h"
+#include "erofs/rebuild.h"
+
+/* This file is a tape/volume header.  Ignore it on extraction.  */
+#define GNUTYPE_VOLHDR 'V'
+
+struct tar_header {
+	char name[100];		/*   0-99 */
+	char mode[8];		/* 100-107 */
+	char uid[8];		/* 108-115 */
+	char gid[8];		/* 116-123 */
+	char size[12];		/* 124-135 */
+	char mtime[12];		/* 136-147 */
+	char chksum[8];		/* 148-155 */
+	char typeflag;		/* 156-156 */
+	char linkname[100];	/* 157-256 */
+	char magic[6];		/* 257-262 */
+	char version[2];	/* 263-264 */
+	char uname[32];		/* 265-296 */
+	char gname[32];		/* 297-328 */
+	char devmajor[8];	/* 329-336 */
+	char devminor[8];	/* 337-344 */
+	char prefix[155];	/* 345-499 */
+	char padding[12];	/* 500-512 (pad to exactly the 512 byte) */
+};
+
+s64 erofs_read_from_fd(int fd, void *buf, u64 bytes)
+{
+	s64 i = 0;
+
+	while (bytes) {
+		int len = bytes > INT_MAX ? INT_MAX : bytes;
+		int ret;
+
+		ret = read(fd, buf + i, len);
+		if (ret < 1) {
+			if (ret == 0) {
+				break;
+			} else if (errno != EINTR) {
+				erofs_err("failed to read : %s\n",
+					  strerror(errno));
+				return -errno;
+			}
+		}
+		bytes -= ret;
+		i += ret;
+        }
+        return i;
+}
+
+void erofs_iostream_close(struct erofs_iostream *ios)
+{
+	free(ios->buffer);
+	if (ios->decoder == EROFS_IOS_DECODER_GZIP) {
+#if defined(HAVE_ZLIB)
+		gzclose(ios->handler);
+#endif
+		return;
+	}
+	close(ios->fd);
+}
+
+int erofs_iostream_open(struct erofs_iostream *ios, int fd, int decoder)
+{
+	s64 fsz;
+
+	ios->tail = ios->head = 0;
+	ios->decoder = decoder;
+	if (decoder == EROFS_IOS_DECODER_GZIP) {
+#if defined(HAVE_ZLIB)
+		ios->handler = gzdopen(fd, "r");
+		if (!ios->handler)
+			return -ENOMEM;
+		ios->sz = fsz = 0;
+		ios->bufsize = 32768;
+#else
+		return -EOPNOTSUPP;
+#endif
+	} else {
+		ios->fd = fd;
+		fsz = lseek(fd, 0, SEEK_END);
+		if (fsz <= 0) {
+			ios->feof = !fsz;
+			ios->sz = 0;
+		} else {
+			ios->feof = false;
+			ios->sz = fsz;
+			if (lseek(fd, 0, SEEK_SET))
+				return -EIO;
+#ifdef HAVE_POSIX_FADVISE
+			if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL))
+				erofs_warn("failed to fadvise: %s, ignored.",
+					   erofs_strerror(errno));
+#endif
+		}
+		ios->bufsize = 16384;
+	}
+
+	do {
+		ios->buffer = malloc(ios->bufsize);
+		if (ios->buffer)
+			break;
+		ios->bufsize >>= 1;
+	} while (ios->bufsize >= 1024);
+
+	if (!ios->buffer)
+		return -ENOMEM;
+	return 0;
+}
+
+int erofs_iostream_read(struct erofs_iostream *ios, void **buf, u64 bytes)
+{
+	unsigned int rabytes = ios->tail - ios->head;
+	int ret;
+
+	if (rabytes >= bytes) {
+		*buf = ios->buffer + ios->head;
+		ios->head += bytes;
+		return bytes;
+	}
+
+	if (ios->head) {
+		memmove(ios->buffer, ios->buffer + ios->head, rabytes);
+		ios->head = 0;
+		ios->tail = rabytes;
+	}
+
+	if (!ios->feof) {
+		if (ios->decoder == EROFS_IOS_DECODER_GZIP) {
+#if defined(HAVE_ZLIB)
+			ret = gzread(ios->handler, ios->buffer + rabytes,
+				     ios->bufsize - rabytes);
+			if (!ret) {
+				int errnum;
+				const char *errstr;
+
+				errstr = gzerror(ios->handler, &errnum);
+				if (errnum != Z_STREAM_END) {
+					erofs_err("failed to gzread: %s", errstr);
+					return -EIO;
+				}
+				ios->feof = true;
+			}
+			ios->tail += ret;
+#else
+			return -EOPNOTSUPP;
+#endif
+		} else {
+			ret = erofs_read_from_fd(ios->fd, ios->buffer + rabytes,
+						 ios->bufsize - rabytes);
+			if (ret < 0)
+				return ret;
+			ios->tail += ret;
+			if (ret < ios->bufsize - rabytes)
+				ios->feof = true;
+		}
+	}
+	*buf = ios->buffer;
+	ret = min_t(int, ios->tail, bytes);
+	ios->head = ret;
+	return ret;
+}
+
+int erofs_iostream_bread(struct erofs_iostream *ios, void *buf, u64 bytes)
+{
+	u64 rem = bytes;
+	void *src;
+	int ret;
+
+	do {
+		ret = erofs_iostream_read(ios, &src, rem);
+		if (ret < 0)
+			return ret;
+		memcpy(buf, src, ret);
+		rem -= ret;
+	} while (rem && ret);
+
+	return bytes - rem;
+}
+
+int erofs_iostream_lskip(struct erofs_iostream *ios, u64 sz)
+{
+	unsigned int rabytes = ios->tail - ios->head;
+	int ret;
+	void *dummy;
+
+	if (rabytes >= sz) {
+		ios->head += sz;
+		return 0;
+	}
+
+	sz -= rabytes;
+	ios->head = ios->tail = 0;
+	if (ios->feof)
+		return sz;
+
+	if (ios->sz) {
+		s64 cur = lseek(ios->fd, sz, SEEK_CUR);
+
+		if (cur > ios->sz)
+			return cur - ios->sz;
+		return 0;
+	}
+
+	do {
+		ret = erofs_iostream_read(ios, &dummy, sz);
+		if (ret < 0)
+			return ret;
+		sz -= ret;
+	} while (!(ios->feof || !ret || !sz));
+
+	return sz;
+}
+
+static long long tarerofs_otoi(const char *ptr, int len)
+{
+	char inp[32];
+	char *endp = inp;
+	long long val;
+
+	memcpy(inp, ptr, len);
+	inp[len] = '\0';
+
+	errno = 0;
+	val = strtol(ptr, &endp, 8);
+	if ((!val && endp == inp) |
+	     (*endp && *endp != ' '))
+		errno = EINVAL;
+	return val;
+}
+
+static long long tarerofs_parsenum(const char *ptr, int len)
+{
+	/*
+	 * For fields containing numbers or timestamps that are out of range
+	 * for the basic format, the GNU format uses a base-256 representation
+	 * instead of an ASCII octal number.
+	 */
+	if (*(char *)ptr == '\200') {
+		long long res = 0;
+
+		while (--len)
+			res = (res << 8) + (u8)*(++ptr);
+		return res;
+	}
+	return tarerofs_otoi(ptr, len);
+}
+
+struct tarerofs_xattr_item {
+	struct list_head list;
+	char *kv;
+	unsigned int len, namelen;
+};
+
+int tarerofs_insert_xattr(struct list_head *xattrs,
+			  char *kv, int namelen, int len, bool skip)
+{
+	struct tarerofs_xattr_item *item;
+	char *nv;
+
+	DBG_BUGON(namelen >= len);
+	list_for_each_entry(item, xattrs, list) {
+		if (!strncmp(item->kv, kv, namelen + 1)) {
+			if (skip)
+				return 0;
+			goto found;
+		}
+	}
+
+	item = malloc(sizeof(*item));
+	if (!item)
+		return -ENOMEM;
+	item->kv = NULL;
+	item->namelen = namelen;
+	namelen = 0;
+	list_add_tail(&item->list, xattrs);
+found:
+	nv = realloc(item->kv, len);
+	if (!nv)
+		return -ENOMEM;
+	item->kv = nv;
+	item->len = len;
+	memcpy(nv + namelen, kv + namelen, len - namelen);
+	return 0;
+}
+
+int tarerofs_merge_xattrs(struct list_head *dst, struct list_head *src)
+{
+	struct tarerofs_xattr_item *item;
+
+	list_for_each_entry(item, src, list) {
+		int ret;
+
+		ret = tarerofs_insert_xattr(dst, item->kv, item->namelen,
+					    item->len, true);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+void tarerofs_remove_xattrs(struct list_head *xattrs)
+{
+	struct tarerofs_xattr_item *item, *n;
+
+	list_for_each_entry_safe(item, n, xattrs, list) {
+		DBG_BUGON(!item->kv);
+		free(item->kv);
+		list_del(&item->list);
+		free(item);
+	}
+}
+
+int tarerofs_apply_xattrs(struct erofs_inode *inode, struct list_head *xattrs)
+{
+	struct tarerofs_xattr_item *item;
+	int ret;
+
+	list_for_each_entry(item, xattrs, list) {
+		const char *v = item->kv + item->namelen + 1;
+		unsigned int vsz = item->len - item->namelen - 1;
+
+		if (item->len <= item->namelen - 1) {
+			DBG_BUGON(item->len < item->namelen - 1);
+			continue;
+		}
+		item->kv[item->namelen] = '\0';
+		erofs_dbg("Recording xattr(%s)=\"%s\" (of %u bytes) to file %s",
+			  item->kv, v, vsz, inode->i_srcpath);
+		ret = erofs_setxattr(inode, item->kv, v, vsz);
+		if (ret == -ENODATA)
+			erofs_err("Failed to set xattr(%s)=%s to file %s",
+				  item->kv, v, inode->i_srcpath);
+		else if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+static const char lookup_table[65] =
+	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
+
+static int base64_decode(const char *src, int len, u8 *dst)
+{
+	int i, bits = 0, ac = 0;
+	const char *p;
+	u8 *cp = dst;
+
+	if(!(len % 4)) {
+		/* Check for and ignore any end padding */
+		if (src[len - 2] == '=' && src[len - 1] == '=')
+			len -= 2;
+		else if (src[len - 1] == '=')
+			--len;
+	}
+
+	for (i = 0; i < len; i++) {
+		p = strchr(lookup_table, src[i]);
+		if (p == NULL || src[i] == 0)
+			return -2;
+		ac += (p - lookup_table) << bits;
+		bits += 6;
+		if (bits >= 8) {
+			*cp++ = ac & 0xff;
+			ac >>= 8;
+			bits -= 8;
+		}
+	}
+	if (ac)
+		return -1;
+	return cp - dst;
+}
+
+int tarerofs_parse_pax_header(struct erofs_iostream *ios,
+			      struct erofs_pax_header *eh, u32 size)
+{
+	char *buf, *p;
+	int ret;
+
+	buf = malloc(size);
+	if (!buf)
+		return -ENOMEM;
+	p = buf;
+
+	ret = erofs_iostream_bread(ios, buf, size);
+	if (ret != size)
+		goto out;
+
+	while (p < buf + size) {
+		char *kv, *value;
+		int len, n;
+		/* extended records are of the format: "LEN NAME=VALUE\n" */
+		ret = sscanf(p, "%d %n", &len, &n);
+		if (ret < 1 || len <= n || len > buf + size - p) {
+			ret = -EIO;
+			goto out;
+		}
+		kv = p + n;
+		p += len;
+		len -= n;
+
+		if (p[-1] != '\n') {
+			ret = -EIO;
+			goto out;
+		}
+		p[-1] = '\0';
+
+		value = memchr(kv, '=', p - kv);
+		if (!value) {
+			ret = -EIO;
+			goto out;
+		} else {
+			long long lln;
+
+			value++;
+
+			if (!strncmp(kv, "path=", sizeof("path=") - 1)) {
+				int j = p - 1 - value;
+				free(eh->path);
+				eh->path = strdup(value);
+				while (eh->path[j - 1] == '/')
+					eh->path[--j] = '\0';
+			} else if (!strncmp(kv, "linkpath=",
+					sizeof("linkpath=") - 1)) {
+				free(eh->link);
+				eh->link = strdup(value);
+			} else if (!strncmp(kv, "mtime=",
+					sizeof("mtime=") - 1)) {
+				ret = sscanf(value, "%lld %n", &lln, &n);
+				if(ret < 1) {
+					ret = -EIO;
+					goto out;
+				}
+				eh->st.st_mtime = lln;
+				if (value[n] == '.') {
+					ret = sscanf(value + n + 1, "%d", &n);
+					if (ret < 1) {
+						ret = -EIO;
+						goto out;
+					}
+#if ST_MTIM_NSEC
+					ST_MTIM_NSEC(&eh->st) = n;
+#endif
+				}
+				eh->use_mtime = true;
+			} else if (!strncmp(kv, "size=",
+					sizeof("size=") - 1)) {
+				ret = sscanf(value, "%lld %n", &lln, &n);
+				if(ret < 1 || value[n] != '\0') {
+					ret = -EIO;
+					goto out;
+				}
+				eh->st.st_size = lln;
+				eh->use_size = true;
+			} else if (!strncmp(kv, "uid=", sizeof("uid=") - 1)) {
+				ret = sscanf(value, "%lld %n", &lln, &n);
+				if(ret < 1 || value[n] != '\0') {
+					ret = -EIO;
+					goto out;
+				}
+				eh->st.st_uid = lln;
+				eh->use_uid = true;
+			} else if (!strncmp(kv, "gid=", sizeof("gid=") - 1)) {
+				ret = sscanf(value, "%lld %n", &lln, &n);
+				if(ret < 1 || value[n] != '\0') {
+					ret = -EIO;
+					goto out;
+				}
+				eh->st.st_gid = lln;
+				eh->use_gid = true;
+			} else if (!strncmp(kv, "SCHILY.xattr.",
+				   sizeof("SCHILY.xattr.") - 1)) {
+				char *key = kv + sizeof("SCHILY.xattr.") - 1;
+
+				--len; /* p[-1] == '\0' */
+				ret = tarerofs_insert_xattr(&eh->xattrs, key,
+						value - key - 1,
+						len - (key - kv), false);
+				if (ret)
+					goto out;
+			} else if (!strncmp(kv, "LIBARCHIVE.xattr.",
+				   sizeof("LIBARCHIVE.xattr.") - 1)) {
+				char *key;
+				key = kv + sizeof("LIBARCHIVE.xattr.") - 1;
+
+				--len; /* p[-1] == '\0' */
+				ret = base64_decode(value, len - (value - kv),
+						    (u8 *)value);
+				if (ret < 0) {
+					ret = -EFSCORRUPTED;
+					goto out;
+				}
+
+				ret = tarerofs_insert_xattr(&eh->xattrs, key,
+						value - key - 1,
+						value - key + ret, false);
+				if (ret)
+					goto out;
+			} else {
+				erofs_info("unrecognized pax keyword \"%s\", ignoring", kv);
+			}
+		}
+	}
+	ret = 0;
+out:
+	free(buf);
+	return ret;
+}
+
+void tarerofs_remove_inode(struct erofs_inode *inode)
+{
+	struct erofs_dentry *d;
+
+	--inode->i_nlink;
+	if (!S_ISDIR(inode->i_mode))
+		return;
+
+	/* remove all subdirss */
+	list_for_each_entry(d, &inode->i_subdirs, d_child) {
+		if (!is_dot_dotdot(d->name))
+			tarerofs_remove_inode(d->inode);
+		erofs_iput(d->inode);
+		d->inode = NULL;
+	}
+	--inode->i_parent->i_nlink;
+}
+
+static int tarerofs_write_file_data(struct erofs_inode *inode,
+				    struct erofs_tarfile *tar)
+{
+	unsigned int j;
+	void *buf;
+	int fd, nread;
+	u64 off;
+
+	if (!inode->i_diskbuf) {
+		inode->i_diskbuf = calloc(1, sizeof(*inode->i_diskbuf));
+		if (!inode->i_diskbuf)
+			return -ENOSPC;
+	} else {
+		erofs_diskbuf_close(inode->i_diskbuf);
+	}
+
+	fd = erofs_diskbuf_reserve(inode->i_diskbuf, 0, &off);
+	if (fd < 0)
+		return -EBADF;
+
+	for (j = inode->i_size; j; ) {
+		nread = erofs_iostream_read(&tar->ios, &buf, j);
+		if (nread < 0)
+			break;
+		if (write(fd, buf, nread) != nread) {
+			nread = -EIO;
+			break;
+		}
+		j -= nread;
+	}
+	erofs_diskbuf_commit(inode->i_diskbuf, inode->i_size);
+	inode->with_diskbuf = true;
+	return 0;
+}
+
+static int tarerofs_write_file_index(struct erofs_inode *inode,
+		struct erofs_tarfile *tar, erofs_off_t data_offset)
+{
+	int ret;
+
+	ret = tarerofs_write_chunkes(inode, data_offset);
+	if (ret)
+		return ret;
+	if (erofs_iostream_lskip(&tar->ios, inode->i_size))
+		return -EIO;
+	return 0;
+}
+
+int tarerofs_parse_tar(struct erofs_inode *root, struct erofs_tarfile *tar)
+{
+	char path[PATH_MAX];
+	struct erofs_pax_header eh = tar->global;
+	struct erofs_sb_info *sbi = root->sbi;
+	bool whout, opq, e = false;
+	struct stat st;
+	erofs_off_t tar_offset, data_offset;
+
+	struct tar_header *th;
+	struct erofs_dentry *d;
+	struct erofs_inode *inode;
+	unsigned int j, csum, cksum;
+	int ckksum, ret, rem;
+
+	if (eh.path)
+		eh.path = strdup(eh.path);
+	if (eh.link)
+		eh.link = strdup(eh.link);
+	init_list_head(&eh.xattrs);
+
+restart:
+	rem = tar->offset & 511;
+	if (rem) {
+		if (erofs_iostream_lskip(&tar->ios, 512 - rem)) {
+			ret = -EIO;
+			goto out;
+		}
+		tar->offset += 512 - rem;
+	}
+
+	tar_offset = tar->offset;
+	ret = erofs_iostream_read(&tar->ios, (void **)&th, sizeof(*th));
+	if (ret != sizeof(*th)) {
+		erofs_err("failed to read header block @ %llu", tar_offset);
+		ret = -EIO;
+		goto out;
+	}
+	tar->offset += sizeof(*th);
+	if (*th->name == '\0') {
+		if (e) {	/* end of tar 2 empty blocks */
+			ret = 1;
+			goto out;
+		}
+		e = true;	/* empty jump to next block */
+		goto restart;
+	}
+
+	/* chksum field itself treated as ' ' */
+	csum = tarerofs_otoi(th->chksum, sizeof(th->chksum));
+	if (errno) {
+		erofs_err("invalid chksum @ %llu", tar_offset);
+		ret = -EBADMSG;
+		goto out;
+	}
+	cksum = 0;
+	for (j = 0; j < 8; ++j)
+		cksum += (unsigned int)' ';
+	ckksum = cksum;
+	for (j = 0; j < 148; ++j) {
+		cksum += (unsigned int)((u8*)th)[j];
+		ckksum += (int)((char*)th)[j];
+	}
+	for (j = 156; j < 500; ++j) {
+		cksum += (unsigned int)((u8*)th)[j];
+		ckksum += (int)((char*)th)[j];
+	}
+	if (csum != cksum && csum != ckksum) {
+		erofs_err("chksum mismatch @ %llu", tar_offset);
+		ret = -EBADMSG;
+		goto out;
+	}
+
+	if (th->typeflag == GNUTYPE_VOLHDR) {
+		if (th->size[0])
+			erofs_warn("GNUTYPE_VOLHDR with non-zeroed size @ %llu",
+				   tar_offset);
+		/* anyway, strncpy could cause some GCC warning here */
+		memcpy(sbi->volume_name, th->name, sizeof(sbi->volume_name));
+		goto restart;
+	}
+
+	if (memcmp(th->magic, "ustar", 5)) {
+		erofs_err("invalid tar magic @ %llu", tar_offset);
+		ret = -EIO;
+		goto out;
+	}
+
+	st.st_mode = tarerofs_otoi(th->mode, sizeof(th->mode));
+	if (errno)
+		goto invalid_tar;
+
+	if (eh.use_uid) {
+		st.st_uid = eh.st.st_uid;
+	} else {
+		st.st_uid = tarerofs_parsenum(th->uid, sizeof(th->uid));
+		if (errno)
+			goto invalid_tar;
+	}
+
+	if (eh.use_gid) {
+		st.st_gid = eh.st.st_gid;
+	} else {
+		st.st_gid = tarerofs_parsenum(th->gid, sizeof(th->gid));
+		if (errno)
+			goto invalid_tar;
+	}
+
+	if (eh.use_size) {
+		st.st_size = eh.st.st_size;
+	} else {
+		st.st_size = tarerofs_parsenum(th->size, sizeof(th->size));
+		if (errno)
+			goto invalid_tar;
+	}
+
+	if (eh.use_mtime) {
+		st.st_mtime = eh.st.st_mtime;
+#if ST_MTIM_NSEC
+		ST_MTIM_NSEC(&st) = ST_MTIM_NSEC(&eh.st);
+#endif
+	} else {
+		st.st_mtime = tarerofs_parsenum(th->mtime, sizeof(th->mtime));
+		if (errno)
+			goto invalid_tar;
+	}
+
+	if (th->typeflag <= '7' && !eh.path) {
+		eh.path = path;
+		j = 0;
+		if (*th->prefix) {
+			memcpy(path, th->prefix, sizeof(th->prefix));
+			path[sizeof(th->prefix)] = '\0';
+			j = strlen(path);
+			if (path[j - 1] != '/') {
+				path[j] = '/';
+				path[++j] = '\0';
+			}
+		}
+		memcpy(path + j, th->name, sizeof(th->name));
+		path[j + sizeof(th->name)] = '\0';
+		j = strlen(path);
+		while (path[j - 1] == '/')
+			path[--j] = '\0';
+	}
+
+	data_offset = tar->offset;
+	tar->offset += st.st_size;
+	switch(th->typeflag) {
+	case '0':
+	case '7':
+	case '1':
+		st.st_mode |= S_IFREG;
+		break;
+	case '2':
+		st.st_mode |= S_IFLNK;
+		break;
+	case '3':
+		st.st_mode |= S_IFCHR;
+		break;
+	case '4':
+		st.st_mode |= S_IFBLK;
+		break;
+	case '5':
+		st.st_mode |= S_IFDIR;
+		break;
+	case '6':
+		st.st_mode |= S_IFIFO;
+		break;
+	case 'g':
+		ret = tarerofs_parse_pax_header(&tar->ios, &tar->global,
+						st.st_size);
+		if (ret)
+			goto out;
+		if (tar->global.path) {
+			free(eh.path);
+			eh.path = strdup(tar->global.path);
+		}
+		if (tar->global.link) {
+			free(eh.link);
+			eh.link = strdup(tar->global.link);
+		}
+		goto restart;
+	case 'x':
+		ret = tarerofs_parse_pax_header(&tar->ios, &eh, st.st_size);
+		if (ret)
+			goto out;
+		goto restart;
+	case 'L':
+		free(eh.path);
+		eh.path = malloc(st.st_size + 1);
+		if (st.st_size != erofs_iostream_bread(&tar->ios, eh.path,
+						       st.st_size))
+			goto invalid_tar;
+		eh.path[st.st_size] = '\0';
+		goto restart;
+	case 'K':
+		free(eh.link);
+		eh.link = malloc(st.st_size + 1);
+		if (st.st_size > PATH_MAX || st.st_size !=
+		    erofs_iostream_bread(&tar->ios, eh.link, st.st_size))
+			goto invalid_tar;
+		eh.link[st.st_size] = '\0';
+		goto restart;
+	default:
+		erofs_info("unrecognized typeflag %xh @ %llu - ignoring",
+			   th->typeflag, tar_offset);
+		(void)erofs_iostream_lskip(&tar->ios, st.st_size);
+		ret = 0;
+		goto out;
+	}
+
+	st.st_rdev = 0;
+	if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode)) {
+		int major, minor;
+
+		major = tarerofs_parsenum(th->devmajor, sizeof(th->devmajor));
+		if (errno) {
+			erofs_err("invalid device major @ %llu", tar_offset);
+			goto out;
+		}
+
+		minor = tarerofs_parsenum(th->devminor, sizeof(th->devminor));
+		if (errno) {
+			erofs_err("invalid device minor @ %llu", tar_offset);
+			goto out;
+		}
+
+		st.st_rdev = (major << 8) | (minor & 0xff) | ((minor & ~0xff) << 12);
+	} else if (th->typeflag == '1' || th->typeflag == '2') {
+		if (!eh.link)
+			eh.link = strndup(th->linkname, sizeof(th->linkname));
+	}
+
+	if (tar->index_mode && !tar->mapfile &&
+	    erofs_blkoff(sbi, data_offset)) {
+		erofs_err("invalid tar data alignment @ %llu", tar_offset);
+		ret = -EIO;
+		goto out;
+	}
+
+	erofs_dbg("parsing %s (mode %05o)", eh.path, st.st_mode);
+
+	d = erofs_rebuild_get_dentry(root, eh.path, tar->aufs, &whout, &opq, true);
+	if (IS_ERR(d)) {
+		ret = PTR_ERR(d);
+		goto out;
+	}
+
+	if (!d) {
+		/* some tarballs include '.' which indicates the root directory */
+		if (!S_ISDIR(st.st_mode)) {
+			ret = -ENOTDIR;
+			goto out;
+		}
+		inode = root;
+	} else if (opq) {
+		DBG_BUGON(d->type == EROFS_FT_UNKNOWN);
+		DBG_BUGON(!d->inode);
+		ret = erofs_set_opaque_xattr(d->inode);
+		goto out;
+	} else if (th->typeflag == '1') {	/* hard link cases */
+		struct erofs_dentry *d2;
+		bool dumb;
+
+		if (S_ISDIR(st.st_mode)) {
+			ret = -EISDIR;
+			goto out;
+		}
+
+		if (d->type != EROFS_FT_UNKNOWN) {
+			tarerofs_remove_inode(d->inode);
+			erofs_iput(d->inode);
+		}
+		d->inode = NULL;
+
+		d2 = erofs_rebuild_get_dentry(root, eh.link, tar->aufs,
+					      &dumb, &dumb, false);
+		if (IS_ERR(d2)) {
+			ret = PTR_ERR(d2);
+			goto out;
+		}
+		if (d2->type == EROFS_FT_UNKNOWN) {
+			ret = -ENOENT;
+			goto out;
+		}
+		if (S_ISDIR(d2->inode->i_mode)) {
+			ret = -EISDIR;
+			goto out;
+		}
+		inode = erofs_igrab(d2->inode);
+		d->inode = inode;
+		d->type = d2->type;
+		++inode->i_nlink;
+		ret = 0;
+		goto out;
+	} else if (d->type != EROFS_FT_UNKNOWN) {
+		if (d->type != EROFS_FT_DIR || !S_ISDIR(st.st_mode)) {
+			struct erofs_inode *parent = d->inode->i_parent;
+
+			tarerofs_remove_inode(d->inode);
+			erofs_iput(d->inode);
+			d->inode = parent;
+			goto new_inode;
+		}
+		inode = d->inode;
+	} else {
+new_inode:
+		inode = erofs_new_inode();
+		if (IS_ERR(inode)) {
+			ret = PTR_ERR(inode);
+			goto out;
+		}
+		inode->i_parent = d->inode;
+		d->inode = inode;
+		d->type = erofs_mode_to_ftype(st.st_mode);
+	}
+
+	if (whout) {
+		inode->i_mode = (inode->i_mode & ~S_IFMT) | S_IFCHR;
+		inode->u.i_rdev = EROFS_WHITEOUT_DEV;
+		d->type = EROFS_FT_CHRDEV;
+
+		/*
+		 * Mark the parent directory as copied-up to avoid exposing
+		 * whiteouts if mounted.  See kernel commit b79e05aaa166
+		 * ("ovl: no direct iteration for dir with origin xattr")
+		 */
+		inode->i_parent->whiteouts = true;
+	} else {
+		inode->i_mode = st.st_mode;
+		if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode))
+			inode->u.i_rdev = erofs_new_encode_dev(st.st_rdev);
+	}
+
+	inode->i_srcpath = strdup(eh.path);
+	if (!inode->i_srcpath) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = __erofs_fill_inode(inode, &st, eh.path);
+	if (ret)
+		goto out;
+	inode->i_size = st.st_size;
+
+	if (!S_ISDIR(inode->i_mode)) {
+		if (S_ISLNK(inode->i_mode)) {
+			inode->i_size = strlen(eh.link);
+			inode->i_link = malloc(inode->i_size + 1);
+			memcpy(inode->i_link, eh.link, inode->i_size + 1);
+		} else if (inode->i_size) {
+			if (tar->index_mode)
+				ret = tarerofs_write_file_index(inode, tar,
+								data_offset);
+			else
+				ret = tarerofs_write_file_data(inode, tar);
+			if (ret)
+				goto out;
+		}
+		inode->i_nlink++;
+	} else if (!inode->i_nlink) {
+		ret = erofs_init_empty_dir(inode);
+		if (ret)
+			goto out;
+	}
+
+	ret = tarerofs_merge_xattrs(&eh.xattrs, &tar->global.xattrs);
+	if (ret)
+		goto out;
+
+	ret = tarerofs_apply_xattrs(inode, &eh.xattrs);
+
+out:
+	if (eh.path != path)
+		free(eh.path);
+	free(eh.link);
+	tarerofs_remove_xattrs(&eh.xattrs);
+	return ret;
+
+invalid_tar:
+	erofs_err("invalid tar @ %llu", tar_offset);
+	ret = -EIO;
+	goto out;
+}
diff --git a/lib/uuid.c b/lib/uuid.c
new file mode 100644
index 0000000..ec0f9d9
--- /dev/null
+++ b/lib/uuid.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+/*
+ * Copyright (C) 2023 Norbert Lange <nolange79@gmail.com>
+ */
+
+#include <string.h>
+#include <errno.h>
+
+#include "erofs/config.h"
+#include "erofs/defs.h"
+#include "liberofs_uuid.h"
+
+#ifdef HAVE_LIBUUID
+#include <uuid.h>
+#else
+
+#include <stdlib.h>
+#ifdef HAVE_SYS_RANDOM_H
+#include <sys/random.h>
+#else
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#endif
+
+/* Flags to be used, will be modified if kernel does not support them */
+static unsigned int erofs_grnd_flag =
+#ifdef GRND_INSECURE
+	GRND_INSECURE;
+#else
+	0x0004;
+#endif
+
+static int s_getrandom(void *out, unsigned size, bool insecure)
+{
+	unsigned int kflags = erofs_grnd_flag;
+	unsigned int flags = insecure ? kflags : 0;
+
+	for (;;)
+	{
+#ifdef HAVE_SYS_RANDOM_H
+		ssize_t r = getrandom(out, size, flags);
+#else
+		ssize_t r = (ssize_t)syscall(__NR_getrandom, out, size, flags);
+#endif
+		int err;
+
+		if (r == size)
+			break;
+		err = errno;
+		if (err != EINTR) {
+			if (err == EINVAL && kflags) {
+				// Kernel likely does not support GRND_INSECURE
+				erofs_grnd_flag = 0;
+				kflags = 0;
+				continue;
+			}
+			return -err;
+		}
+	}
+	return 0;
+}
+#endif
+
+void erofs_uuid_generate(unsigned char *out)
+{
+#ifdef HAVE_LIBUUID
+	uuid_t new_uuid;
+
+	do {
+		uuid_generate(new_uuid);
+	} while (uuid_is_null(new_uuid));
+#else
+	unsigned char new_uuid[16];
+	int res __maybe_unused;
+
+	res = s_getrandom(new_uuid, sizeof(new_uuid), true);
+	BUG_ON(res != 0);
+
+	// UID type + version bits
+	new_uuid[0] = (new_uuid[4 + 2] & 0x0f) | 0x40;
+	new_uuid[1] = (new_uuid[4 + 2 + 2] & 0x3f) | 0x80;
+#endif
+	memcpy(out, new_uuid, sizeof(new_uuid));
+}
+
+int erofs_uuid_parse(const char *in, unsigned char *uu) {
+#ifdef HAVE_LIBUUID
+	return uuid_parse((char *)in, uu);
+#else
+	unsigned char new_uuid[16];
+	unsigned int hypens = ((1U << 3) | (1U << 5) | (1U << 7) | (1U << 9));
+	int i;
+
+	for (i = 0; i < sizeof(new_uuid); hypens >>= 1, i++)
+	{
+		char c[] = { in[0], in[1], '\0' };
+		char* endptr = c;
+		unsigned long val = strtoul(c, &endptr, 16);
+
+		if (endptr - c != 2)
+			return -EINVAL;
+
+		in += 2;
+
+		if ((hypens & 1U) != 0) {
+			if (*in++ != '-')
+				return -EINVAL;
+		}
+		new_uuid[i] = (unsigned char)val;
+	}
+
+	if (*in != '\0')
+		return -EINVAL;
+	memcpy(uu, new_uuid, sizeof(new_uuid));
+	return 0;
+#endif
+}
diff --git a/lib/uuid_unparse.c b/lib/uuid_unparse.c
new file mode 100644
index 0000000..3255c4b
--- /dev/null
+++ b/lib/uuid_unparse.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+/*
+ * Copyright (C) 2023 Norbert Lange <nolange79@gmail.com>
+ */
+
+#include <stdio.h>
+
+#include "erofs/config.h"
+#include "liberofs_uuid.h"
+
+void erofs_uuid_unparse_lower(const unsigned char *buf, char *out) {
+	sprintf(out, "%04x%04x-%04x-%04x-%04x-%04x%04x%04x",
+			(buf[0] << 8) | buf[1],
+			(buf[2] << 8) | buf[3],
+			(buf[4] << 8) | buf[5],
+			(buf[6] << 8) | buf[7],
+			(buf[8] << 8) | buf[9],
+			(buf[10] << 8) | buf[11],
+			(buf[12] << 8) | buf[13],
+			(buf[14] << 8) | buf[15]);
+}
diff --git a/lib/xattr.c b/lib/xattr.c
index 71ffe3e..6c8ebf4 100644
--- a/lib/xattr.c
+++ b/lib/xattr.c
@@ -17,15 +17,84 @@
 #include "erofs/xattr.h"
 #include "erofs/cache.h"
 #include "erofs/io.h"
+#include "erofs/fragments.h"
+#include "erofs/xxhash.h"
 #include "liberofs_private.h"
 
+#ifndef XATTR_SYSTEM_PREFIX
+#define XATTR_SYSTEM_PREFIX	"system."
+#endif
+#ifndef XATTR_SYSTEM_PREFIX_LEN
+#define XATTR_SYSTEM_PREFIX_LEN (sizeof(XATTR_SYSTEM_PREFIX) - 1)
+#endif
+#ifndef XATTR_USER_PREFIX
+#define XATTR_USER_PREFIX	"user."
+#endif
+#ifndef XATTR_USER_PREFIX_LEN
+#define XATTR_USER_PREFIX_LEN (sizeof(XATTR_USER_PREFIX) - 1)
+#endif
+#ifndef XATTR_SECURITY_PREFIX
+#define XATTR_SECURITY_PREFIX	"security."
+#endif
+#ifndef XATTR_SECURITY_PREFIX_LEN
+#define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1)
+#endif
+#ifndef XATTR_TRUSTED_PREFIX
+#define XATTR_TRUSTED_PREFIX	"trusted."
+#endif
+#ifndef XATTR_TRUSTED_PREFIX_LEN
+#define XATTR_TRUSTED_PREFIX_LEN (sizeof(XATTR_TRUSTED_PREFIX) - 1)
+#endif
+#ifndef XATTR_NAME_POSIX_ACL_ACCESS
+#define XATTR_NAME_POSIX_ACL_ACCESS "system.posix_acl_access"
+#endif
+#ifndef XATTR_NAME_POSIX_ACL_DEFAULT
+#define XATTR_NAME_POSIX_ACL_DEFAULT "system.posix_acl_default"
+#endif
+#ifndef XATTR_NAME_SECURITY_SELINUX
+#define XATTR_NAME_SECURITY_SELINUX "security.selinux"
+#endif
+#ifndef XATTR_NAME_SECURITY_CAPABILITY
+#define XATTR_NAME_SECURITY_CAPABILITY "security.capability"
+#endif
+#ifndef OVL_XATTR_NAMESPACE
+#define OVL_XATTR_NAMESPACE "overlay."
+#endif
+#ifndef OVL_XATTR_OPAQUE_POSTFIX
+#define OVL_XATTR_OPAQUE_POSTFIX "opaque"
+#endif
+#ifndef OVL_XATTR_ORIGIN_POSTFIX
+#define OVL_XATTR_ORIGIN_POSTFIX "origin"
+#endif
+#ifndef OVL_XATTR_TRUSTED_PREFIX
+#define OVL_XATTR_TRUSTED_PREFIX XATTR_TRUSTED_PREFIX OVL_XATTR_NAMESPACE
+#endif
+#ifndef OVL_XATTR_OPAQUE
+#define OVL_XATTR_OPAQUE OVL_XATTR_TRUSTED_PREFIX OVL_XATTR_OPAQUE_POSTFIX
+#endif
+#ifndef OVL_XATTR_ORIGIN
+#define OVL_XATTR_ORIGIN OVL_XATTR_TRUSTED_PREFIX OVL_XATTR_ORIGIN_POSTFIX
+#endif
+
 #define EA_HASHTABLE_BITS 16
 
+/* one extra byte for the trailing `\0` of attribute name */
+#define EROFS_XATTR_KSIZE(kvlen)	(kvlen[0] + 1)
+#define EROFS_XATTR_KVSIZE(kvlen)	(EROFS_XATTR_KSIZE(kvlen) + kvlen[1])
+
+/*
+ * @base_index:	the index of the matched predefined short prefix
+ * @prefix:	the index of the matched long prefix, if any;
+ *		same as base_index otherwise
+ * @prefix_len:	the length of the matched long prefix if any;
+ *		the length of the matched predefined short prefix otherwise
+ */
 struct xattr_item {
+	struct xattr_item *next_shared_xattr;
 	const char *kvbuf;
 	unsigned int hash[2], len[2], count;
 	int shared_xattr_id;
-	u8 prefix;
+	unsigned int prefix, base_index, prefix_len;
 	struct hlist_node node;
 };
 
@@ -36,12 +105,12 @@
 
 static DECLARE_HASHTABLE(ea_hashtable, EA_HASHTABLE_BITS);
 
-static LIST_HEAD(shared_xattrs_list);
-static unsigned int shared_xattrs_count, shared_xattrs_size;
+static struct xattr_item *shared_xattrs_list;
+static unsigned int shared_xattrs_count;
 
 static struct xattr_prefix {
 	const char *prefix;
-	u16 prefix_len;
+	unsigned int prefix_len;
 } xattr_types[] = {
 	[EROFS_XATTR_INDEX_USER] = {
 		XATTR_USER_PREFIX,
@@ -61,73 +130,17 @@
 	}
 };
 
-static unsigned int BKDRHash(char *str, unsigned int len)
-{
-	const unsigned int seed = 131313;
-	unsigned int hash = 0;
+struct ea_type_node {
+	struct list_head list;
+	struct xattr_prefix type;
+	unsigned int index, base_index, base_len;
+};
 
-	while (len) {
-		hash = hash * seed + (*str++);
-		--len;
-	}
-	return hash;
-}
+static LIST_HEAD(ea_name_prefixes);
+static unsigned int ea_prefix_count;
 
-static unsigned int xattr_item_hash(u8 prefix, char *buf,
-				    unsigned int len[2], unsigned int hash[2])
-{
-	hash[0] = BKDRHash(buf, len[0]);	/* key */
-	hash[1] = BKDRHash(buf + len[0], len[1]);	/* value */
-
-	return prefix ^ hash[0] ^ hash[1];
-}
-
-static unsigned int put_xattritem(struct xattr_item *item)
-{
-	if (item->count > 1)
-		return --item->count;
-	free(item);
-	return 0;
-}
-
-static struct xattr_item *get_xattritem(u8 prefix, char *kvbuf,
-					unsigned int len[2])
-{
-	struct xattr_item *item;
-	unsigned int hash[2], hkey;
-
-	hkey = xattr_item_hash(prefix, kvbuf, len, hash);
-
-	hash_for_each_possible(ea_hashtable, item, node, hkey) {
-		if (prefix == item->prefix &&
-		    item->len[0] == len[0] && item->len[1] == len[1] &&
-		    item->hash[0] == hash[0] && item->hash[1] == hash[1] &&
-		    !memcmp(kvbuf, item->kvbuf, len[0] + len[1])) {
-			free(kvbuf);
-			++item->count;
-			return item;
-		}
-	}
-
-	item = malloc(sizeof(*item));
-	if (!item) {
-		free(kvbuf);
-		return ERR_PTR(-ENOMEM);
-	}
-	INIT_HLIST_NODE(&item->node);
-	item->count = 1;
-	item->kvbuf = kvbuf;
-	item->len[0] = len[0];
-	item->len[1] = len[1];
-	item->hash[0] = hash[0];
-	item->hash[1] = hash[1];
-	item->shared_xattr_id = -1;
-	item->prefix = prefix;
-	hash_add(ea_hashtable, &item->node, hkey);
-	return item;
-}
-
-static bool match_prefix(const char *key, u8 *index, u16 *len)
+static bool match_prefix(const char *key, unsigned int *index,
+			 unsigned int *len)
 {
 	struct xattr_prefix *p;
 
@@ -141,21 +154,98 @@
 	return false;
 }
 
+static unsigned int BKDRHash(char *str, unsigned int len)
+{
+	const unsigned int seed = 131313;
+	unsigned int hash = 0;
+
+	while (len) {
+		hash = hash * seed + (*str++);
+		--len;
+	}
+	return hash;
+}
+
+static unsigned int xattr_item_hash(char *buf, unsigned int len[2],
+				    unsigned int hash[2])
+{
+	hash[0] = BKDRHash(buf, len[0]);	/* key */
+	hash[1] = BKDRHash(buf + len[0], len[1]);	/* value */
+	return hash[0] ^ hash[1];
+}
+
+static unsigned int put_xattritem(struct xattr_item *item)
+{
+	if (item->count > 1)
+		return --item->count;
+	free(item);
+	return 0;
+}
+
+static struct xattr_item *get_xattritem(char *kvbuf, unsigned int len[2])
+{
+	struct xattr_item *item;
+	struct ea_type_node *tnode;
+	unsigned int hash[2], hkey;
+
+	hkey = xattr_item_hash(kvbuf, len, hash);
+	hash_for_each_possible(ea_hashtable, item, node, hkey) {
+		if (item->len[0] == len[0] && item->len[1] == len[1] &&
+		    item->hash[0] == hash[0] && item->hash[1] == hash[1] &&
+		    !memcmp(kvbuf, item->kvbuf, len[0] + len[1])) {
+			free(kvbuf);
+			++item->count;
+			return item;
+		}
+	}
+
+	item = malloc(sizeof(*item));
+	if (!item) {
+		free(kvbuf);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	if (!match_prefix(kvbuf, &item->base_index, &item->prefix_len)) {
+		free(item);
+		free(kvbuf);
+		return ERR_PTR(-ENODATA);
+	}
+	DBG_BUGON(len[0] < item->prefix_len);
+
+	INIT_HLIST_NODE(&item->node);
+	item->count = 1;
+	item->kvbuf = kvbuf;
+	item->len[0] = len[0];
+	item->len[1] = len[1];
+	item->hash[0] = hash[0];
+	item->hash[1] = hash[1];
+	item->shared_xattr_id = -1;
+	item->prefix = item->base_index;
+
+	list_for_each_entry(tnode, &ea_name_prefixes, list) {
+		if (item->base_index == tnode->base_index &&
+		    !strncmp(tnode->type.prefix, kvbuf,
+			     tnode->type.prefix_len)) {
+			item->prefix = tnode->index;
+			item->prefix_len = tnode->type.prefix_len;
+			break;
+		}
+	}
+	hash_add(ea_hashtable, &item->node, hkey);
+	return item;
+}
+
 static struct xattr_item *parse_one_xattr(const char *path, const char *key,
 					  unsigned int keylen)
 {
 	ssize_t ret;
-	u8 prefix;
-	u16 prefixlen;
 	unsigned int len[2];
 	char *kvbuf;
 
 	erofs_dbg("parse xattr [%s] of %s", path, key);
 
-	if (!match_prefix(key, &prefix, &prefixlen))
-		return ERR_PTR(-ENODATA);
-
-	DBG_BUGON(keylen < prefixlen);
+	/* length of the key */
+	len[0] = keylen;
 
 	/* determine length of the value */
 #ifdef HAVE_LGETXATTR
@@ -170,19 +260,18 @@
 	len[1] = ret;
 
 	/* allocate key-value buffer */
-	len[0] = keylen - prefixlen;
-
-	kvbuf = malloc(len[0] + len[1]);
+	kvbuf = malloc(EROFS_XATTR_KVSIZE(len));
 	if (!kvbuf)
 		return ERR_PTR(-ENOMEM);
-	memcpy(kvbuf, key + prefixlen, len[0]);
+	memcpy(kvbuf, key, EROFS_XATTR_KSIZE(len));
 	if (len[1]) {
 		/* copy value to buffer */
 #ifdef HAVE_LGETXATTR
-		ret = lgetxattr(path, key, kvbuf + len[0], len[1]);
+		ret = lgetxattr(path, key, kvbuf + EROFS_XATTR_KSIZE(len),
+				len[1]);
 #elif defined(__APPLE__)
-		ret = getxattr(path, key, kvbuf + len[0], len[1], 0,
-			       XATTR_NOFOLLOW);
+		ret = getxattr(path, key, kvbuf + EROFS_XATTR_KSIZE(len),
+			       len[1], 0, XATTR_NOFOLLOW);
 #else
 		free(kvbuf);
 		return ERR_PTR(-EOPNOTSUPP);
@@ -197,7 +286,7 @@
 			len[1] = ret;
 		}
 	}
-	return get_xattritem(prefix, kvbuf, len);
+	return get_xattritem(kvbuf, len);
 }
 
 static struct xattr_item *erofs_get_selabel_xattr(const char *srcpath,
@@ -210,12 +299,10 @@
 		unsigned int len[2];
 		char *kvbuf, *fspath;
 
-#ifdef WITH_ANDROID
 		if (cfg.mount_point)
 			ret = asprintf(&fspath, "/%s/%s", cfg.mount_point,
 				       erofs_fspath(srcpath));
 		else
-#endif
 			ret = asprintf(&fspath, "/%s", erofs_fspath(srcpath));
 		if (ret <= 0)
 			return ERR_PTR(-ENOMEM);
@@ -234,16 +321,17 @@
 			return NULL;
 		}
 
-		len[0] = sizeof("selinux") - 1;
+		len[0] = sizeof(XATTR_NAME_SECURITY_SELINUX) - 1;
 		len[1] = strlen(secontext);
-		kvbuf = malloc(len[0] + len[1] + 1);
+		kvbuf = malloc(EROFS_XATTR_KVSIZE(len));
 		if (!kvbuf) {
 			freecon(secontext);
 			return ERR_PTR(-ENOMEM);
 		}
-		sprintf(kvbuf, "selinux%s", secontext);
+		sprintf(kvbuf, "%s", XATTR_NAME_SECURITY_SELINUX);
+		memcpy(kvbuf + EROFS_XATTR_KSIZE(len), secontext, len[1]);
 		freecon(secontext);
-		return get_xattritem(EROFS_XATTR_INDEX_SECURITY, kvbuf, len);
+		return get_xattritem(kvbuf, len);
 	}
 #endif
 	return NULL;
@@ -263,18 +351,8 @@
 
 static int shared_xattr_add(struct xattr_item *item)
 {
-	struct inode_xattr_node *node = malloc(sizeof(*node));
-
-	if (!node)
-		return -ENOMEM;
-
-	init_list_head(&node->list);
-	node->item = item;
-	list_add(&node->list, &shared_xattrs_list);
-
-	shared_xattrs_size += sizeof(struct erofs_xattr_entry);
-	shared_xattrs_size = EROFS_XATTR_ALIGN(shared_xattrs_size +
-					       item->len[0] + item->len[1]);
+	item->next_shared_xattr = shared_xattrs_list;
+	shared_xattrs_list = item;
 	return ++shared_xattrs_count;
 }
 
@@ -299,6 +377,18 @@
 	if (cfg.sehnd && !strcmp(key, XATTR_SECURITY_PREFIX "selinux"))
 		return true;
 #endif
+
+	/* skip xattrs with unidentified "system." prefix */
+	if (!strncmp(key, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) {
+		if (!strcmp(key, XATTR_NAME_POSIX_ACL_ACCESS) ||
+		    !strcmp(key, XATTR_NAME_POSIX_ACL_DEFAULT)) {
+			return false;
+		} else {
+			erofs_warn("skip unidentified xattr: %s", key);
+			return true;
+		}
+	}
+
 	return false;
 }
 
@@ -317,7 +407,7 @@
 	unsigned int keylen;
 	struct xattr_item *item;
 
-	if (kllen < 0 && errno != ENODATA) {
+	if (kllen < 0 && errno != ENODATA && errno != EOPNOTSUPP) {
 		erofs_err("llistxattr to get the size of names for %s failed",
 			  path);
 		return -errno;
@@ -384,6 +474,59 @@
 	return ret;
 }
 
+int erofs_setxattr(struct erofs_inode *inode, char *key,
+		   const void *value, size_t size)
+{
+	char *kvbuf;
+	unsigned int len[2];
+	struct xattr_item *item;
+
+	len[0] = strlen(key);
+	len[1] = size;
+
+	kvbuf = malloc(EROFS_XATTR_KVSIZE(len));
+	if (!kvbuf)
+		return -ENOMEM;
+
+	memcpy(kvbuf, key, EROFS_XATTR_KSIZE(len));
+	memcpy(kvbuf + EROFS_XATTR_KSIZE(len), value, size);
+
+	item = get_xattritem(kvbuf, len);
+	if (IS_ERR(item))
+		return PTR_ERR(item);
+	DBG_BUGON(!item);
+
+	return erofs_xattr_add(&inode->i_xattrs, item);
+}
+
+static void erofs_removexattr(struct erofs_inode *inode, const char *key)
+{
+	struct inode_xattr_node *node, *n;
+
+	list_for_each_entry_safe(node, n, &inode->i_xattrs, list) {
+		if (!strcmp(node->item->kvbuf, key)) {
+			list_del(&node->list);
+			put_xattritem(node->item);
+			free(node);
+		}
+	}
+}
+
+int erofs_set_opaque_xattr(struct erofs_inode *inode)
+{
+	return erofs_setxattr(inode, OVL_XATTR_OPAQUE, "y", 1);
+}
+
+void erofs_clear_opaque_xattr(struct erofs_inode *inode)
+{
+	erofs_removexattr(inode, OVL_XATTR_OPAQUE);
+}
+
+int erofs_set_origin_xattr(struct erofs_inode *inode)
+{
+	return erofs_setxattr(inode, OVL_XATTR_ORIGIN, NULL, 0);
+}
+
 #ifdef WITH_ANDROID
 static int erofs_droid_xattr_set_caps(struct erofs_inode *inode)
 {
@@ -396,26 +539,25 @@
 	if (!capabilities)
 		return 0;
 
-	len[0] = sizeof("capability") - 1;
+	len[0] = sizeof(XATTR_NAME_SECURITY_CAPABILITY) - 1;
 	len[1] = sizeof(caps);
 
-	kvbuf = malloc(len[0] + len[1]);
+	kvbuf = malloc(EROFS_XATTR_KVSIZE(len));
 	if (!kvbuf)
 		return -ENOMEM;
 
-	memcpy(kvbuf, "capability", len[0]);
+	sprintf(kvbuf, "%s", XATTR_NAME_SECURITY_CAPABILITY);
 	caps.magic_etc = VFS_CAP_REVISION_2 | VFS_CAP_FLAGS_EFFECTIVE;
 	caps.data[0].permitted = (u32) capabilities;
 	caps.data[0].inheritable = 0;
 	caps.data[1].permitted = (u32) (capabilities >> 32);
 	caps.data[1].inheritable = 0;
-	memcpy(kvbuf + len[0], &caps, len[1]);
+	memcpy(kvbuf + EROFS_XATTR_KSIZE(len), &caps, len[1]);
 
-	item = get_xattritem(EROFS_XATTR_INDEX_SECURITY, kvbuf, len);
+	item = get_xattritem(kvbuf, len);
 	if (IS_ERR(item))
 		return PTR_ERR(item);
-	if (!item)
-		return 0;
+	DBG_BUGON(!item);
 
 	return erofs_xattr_add(&inode->i_xattrs, item);
 }
@@ -426,10 +568,9 @@
 }
 #endif
 
-int erofs_prepare_xattr_ibody(struct erofs_inode *inode)
+int erofs_scan_file_xattrs(struct erofs_inode *inode)
 {
 	int ret;
-	struct inode_xattr_node *node;
 	struct list_head *ixattrs = &inode->i_xattrs;
 
 	/* check if xattr is disabled */
@@ -440,24 +581,109 @@
 	if (ret < 0)
 		return ret;
 
-	ret = erofs_droid_xattr_set_caps(inode);
-	if (ret < 0)
-		return ret;
+	return erofs_droid_xattr_set_caps(inode);
+}
 
-	if (list_empty(ixattrs))
+int erofs_read_xattrs_from_disk(struct erofs_inode *inode)
+{
+	ssize_t kllen;
+	char *keylst, *key;
+	int ret;
+
+	init_list_head(&inode->i_xattrs);
+	kllen = erofs_listxattr(inode, NULL, 0);
+	if (kllen < 0)
+		return kllen;
+	if (kllen <= 1)
 		return 0;
 
+	keylst = malloc(kllen);
+	if (!keylst)
+		return -ENOMEM;
+
+	ret = erofs_listxattr(inode, keylst, kllen);
+	if (ret < 0)
+		goto out;
+
+	for (key = keylst; key < keylst + kllen; key += strlen(key) + 1) {
+		void *value = NULL;
+		size_t size = 0;
+
+		if (!strcmp(key, OVL_XATTR_OPAQUE)) {
+			if (!S_ISDIR(inode->i_mode)) {
+				erofs_dbg("file %s: opaque xattr on non-dir",
+					  inode->i_srcpath);
+				ret = -EINVAL;
+				goto out;
+			}
+			inode->opaque = true;
+		}
+
+		ret = erofs_getxattr(inode, key, NULL, 0);
+		if (ret < 0)
+			goto out;
+		if (ret) {
+			size = ret;
+			value = malloc(size);
+			if (!value) {
+				ret = -ENOMEM;
+				goto out;
+			}
+
+			ret = erofs_getxattr(inode, key, value, size);
+			if (ret < 0) {
+				free(value);
+				goto out;
+			}
+			DBG_BUGON(ret != size);
+		} else if (S_ISDIR(inode->i_mode) &&
+			   !strcmp(key, OVL_XATTR_ORIGIN)) {
+			ret = 0;
+			inode->whiteouts = true;
+			continue;
+		}
+
+		ret = erofs_setxattr(inode, key, value, size);
+		free(value);
+		if (ret)
+			break;
+	}
+out:
+	free(keylst);
+	return ret;
+}
+
+static inline unsigned int erofs_next_xattr_align(unsigned int pos,
+						  struct xattr_item *item)
+{
+	return EROFS_XATTR_ALIGN(pos + sizeof(struct erofs_xattr_entry) +
+			item->len[0] + item->len[1] - item->prefix_len);
+}
+
+int erofs_prepare_xattr_ibody(struct erofs_inode *inode)
+{
+	int ret;
+	struct inode_xattr_node *node;
+	struct list_head *ixattrs = &inode->i_xattrs;
+	unsigned int h_shared_count;
+
+	if (list_empty(ixattrs)) {
+		inode->xattr_isize = 0;
+		return 0;
+	}
+
 	/* get xattr ibody size */
+	h_shared_count = 0;
 	ret = sizeof(struct erofs_xattr_ibody_header);
 	list_for_each_entry(node, ixattrs, list) {
-		const struct xattr_item *item = node->item;
+		struct xattr_item *item = node->item;
 
-		if (item->shared_xattr_id >= 0) {
+		if (item->shared_xattr_id >= 0 && h_shared_count < UCHAR_MAX) {
+			++h_shared_count;
 			ret += sizeof(__le32);
 			continue;
 		}
-		ret += sizeof(struct erofs_xattr_entry);
-		ret = EROFS_XATTR_ALIGN(ret + item->len[0] + item->len[1]);
+		ret = erofs_next_xattr_align(ret, item);
 	}
 	inode->xattr_isize = ret;
 	return ret;
@@ -467,7 +693,7 @@
 {
 	int ret;
 	DIR *_dir;
-	struct stat64 st;
+	struct stat st;
 
 	_dir = opendir(path);
 	if (!_dir) {
@@ -502,7 +728,7 @@
 			goto fail;
 		}
 
-		ret = lstat64(buf, &st);
+		ret = lstat(buf, &st);
 		if (ret) {
 			ret = -errno;
 			goto fail;
@@ -545,32 +771,17 @@
 	if (sharedxattrs)
 		return;
 
-	shared_xattrs_size = shared_xattrs_count = 0;
+	shared_xattrs_count = 0;
 }
 
-static bool erofs_bh_flush_write_shared_xattrs(struct erofs_buffer_head *bh)
-{
-	void *buf = bh->fsprivate;
-	int err = dev_write(buf, erofs_btell(bh, false), shared_xattrs_size);
-
-	if (err)
-		return false;
-	free(buf);
-	return erofs_bh_flush_generic_end(bh);
-}
-
-static struct erofs_bhops erofs_write_shared_xattrs_bhops = {
-	.flush = erofs_bh_flush_write_shared_xattrs,
-};
-
-static int comp_xattr_item(const void *a, const void *b)
+static int comp_shared_xattr_item(const void *a, const void *b)
 {
 	const struct xattr_item *ia, *ib;
 	unsigned int la, lb;
 	int ret;
 
-	ia = (*((const struct inode_xattr_node **)a))->item;
-	ib = (*((const struct inode_xattr_node **)b))->item;
+	ia = *((const struct xattr_item **)a);
+	ib = *((const struct xattr_item **)b);
 	la = ia->len[0] + ia->len[1];
 	lb = ib->len[0] + ib->len[1];
 
@@ -581,21 +792,88 @@
 	return la > lb;
 }
 
-int erofs_build_shared_xattrs_from_path(const char *path)
+int erofs_xattr_write_name_prefixes(struct erofs_sb_info *sbi, FILE *f)
+{
+	struct ea_type_node *tnode;
+	off_t offset;
+
+	if (!ea_prefix_count)
+		return 0;
+	offset = ftello(f);
+	if (offset < 0)
+		return -errno;
+	if (offset > UINT32_MAX)
+		return -EOVERFLOW;
+
+	offset = round_up(offset, 4);
+	if (fseek(f, offset, SEEK_SET))
+		return -errno;
+	sbi->xattr_prefix_start = (u32)offset >> 2;
+	sbi->xattr_prefix_count = ea_prefix_count;
+
+	list_for_each_entry(tnode, &ea_name_prefixes, list) {
+		union {
+			struct {
+				__le16 size;
+				struct erofs_xattr_long_prefix prefix;
+			} s;
+			u8 data[EROFS_NAME_LEN + 2 +
+				sizeof(struct erofs_xattr_long_prefix)];
+		} u;
+		int len, infix_len;
+
+		u.s.prefix.base_index = tnode->base_index;
+		infix_len = tnode->type.prefix_len - tnode->base_len;
+		memcpy(u.s.prefix.infix, tnode->type.prefix + tnode->base_len,
+		       infix_len);
+		len = sizeof(struct erofs_xattr_long_prefix) + infix_len;
+		u.s.size = cpu_to_le16(len);
+		if (fwrite(&u.s, sizeof(__le16) + len, 1, f) != 1)
+			return -EIO;
+		offset = round_up(offset + sizeof(__le16) + len, 4);
+		if (fseek(f, offset, SEEK_SET))
+			return -errno;
+	}
+	erofs_sb_set_fragments(sbi);
+	erofs_sb_set_xattr_prefixes(sbi);
+	return 0;
+}
+
+static void erofs_write_xattr_entry(char *buf, struct xattr_item *item)
+{
+	struct erofs_xattr_entry entry = {
+		.e_name_index = item->prefix,
+		.e_name_len = item->len[0] - item->prefix_len,
+		.e_value_size = cpu_to_le16(item->len[1]),
+	};
+
+	memcpy(buf, &entry, sizeof(entry));
+	buf += sizeof(struct erofs_xattr_entry);
+	memcpy(buf, item->kvbuf + item->prefix_len,
+	       item->len[0] - item->prefix_len);
+	buf += item->len[0] - item->prefix_len;
+	memcpy(buf, item->kvbuf + item->len[0] + 1, item->len[1]);
+
+	erofs_dbg("writing xattr %d %s (%d %s)", item->base_index, item->kvbuf,
+			item->prefix, item->kvbuf + item->prefix_len);
+}
+
+int erofs_build_shared_xattrs_from_path(struct erofs_sb_info *sbi, const char *path)
 {
 	int ret;
 	struct erofs_buffer_head *bh;
-	struct inode_xattr_node *node, *n, **sorted_n;
+	struct xattr_item *item, *n, **sorted_n;
 	char *buf;
 	unsigned int p, i;
 	erofs_off_t off;
+	erofs_off_t shared_xattrs_size = 0;
 
 	/* check if xattr or shared xattr is disabled */
 	if (cfg.c_inline_xattr_tolerance < 0 ||
 	    cfg.c_inline_xattr_tolerance == INT_MAX)
 		return 0;
 
-	if (shared_xattrs_size || shared_xattrs_count) {
+	if (shared_xattrs_count) {
 		DBG_BUGON(1);
 		return -EINVAL;
 	}
@@ -604,15 +882,34 @@
 	if (ret)
 		return ret;
 
-	if (!shared_xattrs_size)
+	if (!shared_xattrs_count)
 		goto out;
 
-	buf = calloc(1, shared_xattrs_size);
-	if (!buf)
+	sorted_n = malloc((shared_xattrs_count + 1) * sizeof(n));
+	if (!sorted_n)
 		return -ENOMEM;
 
+	i = 0;
+	while (shared_xattrs_list) {
+		item = shared_xattrs_list;
+		sorted_n[i++] = item;
+		shared_xattrs_list = item->next_shared_xattr;
+		shared_xattrs_size = erofs_next_xattr_align(shared_xattrs_size,
+							    item);
+	}
+	DBG_BUGON(i != shared_xattrs_count);
+	sorted_n[i] = NULL;
+	qsort(sorted_n, shared_xattrs_count, sizeof(n), comp_shared_xattr_item);
+
+	buf = calloc(1, shared_xattrs_size);
+	if (!buf) {
+		free(sorted_n);
+		return -ENOMEM;
+	}
+
 	bh = erofs_balloc(XATTR, shared_xattrs_size, 0, 0);
 	if (IS_ERR(bh)) {
+		free(sorted_n);
 		free(buf);
 		return PTR_ERR(bh);
 	}
@@ -621,51 +918,33 @@
 	erofs_mapbh(bh->block);
 	off = erofs_btell(bh, false);
 
-	sbi.xattr_blkaddr = off / EROFS_BLKSIZ;
-	off %= EROFS_BLKSIZ;
+	sbi->xattr_blkaddr = off / erofs_blksiz(sbi);
+	off %= erofs_blksiz(sbi);
 	p = 0;
-
-	sorted_n = malloc(shared_xattrs_count * sizeof(n));
-	if (!sorted_n)
-		return -ENOMEM;
-	i = 0;
-	list_for_each_entry_safe(node, n, &shared_xattrs_list, list) {
-		list_del(&node->list);
-		sorted_n[i++] = node;
-	}
-	DBG_BUGON(i != shared_xattrs_count);
-	qsort(sorted_n, shared_xattrs_count, sizeof(n), comp_xattr_item);
-
 	for (i = 0; i < shared_xattrs_count; i++) {
-		struct inode_xattr_node *const tnode = sorted_n[i];
-		struct xattr_item *const item = tnode->item;
-		const struct erofs_xattr_entry entry = {
-			.e_name_index = item->prefix,
-			.e_name_len = item->len[0],
-			.e_value_size = cpu_to_le16(item->len[1])
-		};
-
-		item->shared_xattr_id = (off + p) /
-			sizeof(struct erofs_xattr_entry);
-
-		memcpy(buf + p, &entry, sizeof(entry));
-		p += sizeof(struct erofs_xattr_entry);
-		memcpy(buf + p, item->kvbuf, item->len[0] + item->len[1]);
-		p = EROFS_XATTR_ALIGN(p + item->len[0] + item->len[1]);
-		free(tnode);
+		item = sorted_n[i];
+		erofs_write_xattr_entry(buf + p, item);
+		item->next_shared_xattr = sorted_n[i + 1];
+		item->shared_xattr_id = (off + p) / sizeof(__le32);
+		p = erofs_next_xattr_align(p, item);
 	}
-
+	shared_xattrs_list = sorted_n[0];
 	free(sorted_n);
-	bh->fsprivate = buf;
-	bh->op = &erofs_write_shared_xattrs_bhops;
+	bh->op = &erofs_drop_directly_bhops;
+	ret = dev_write(sbi, buf, erofs_btell(bh, false), shared_xattrs_size);
+	free(buf);
+	erofs_bdrop(bh, false);
 out:
 	erofs_cleanxattrs(true);
-	return 0;
+	return ret;
 }
 
-char *erofs_export_xattr_ibody(struct list_head *ixattrs, unsigned int size)
+char *erofs_export_xattr_ibody(struct erofs_inode *inode)
 {
+	struct list_head *ixattrs = &inode->i_xattrs;
+	unsigned int size = inode->xattr_isize;
 	struct inode_xattr_node *node, *n;
+	struct xattr_item *item;
 	struct erofs_xattr_ibody_header *header;
 	LIST_HEAD(ilst);
 	unsigned int p;
@@ -677,14 +956,35 @@
 	header = (struct erofs_xattr_ibody_header *)buf;
 	header->h_shared_count = 0;
 
+	if (cfg.c_xattr_name_filter) {
+		u32 name_filter = 0;
+		int hashbit;
+		unsigned int base_len;
+
+		list_for_each_entry(node, ixattrs, list) {
+			item = node->item;
+			base_len = xattr_types[item->base_index].prefix_len;
+			hashbit = xxh32(item->kvbuf + base_len,
+					item->len[0] - base_len,
+					EROFS_XATTR_FILTER_SEED + item->base_index) &
+				  (EROFS_XATTR_FILTER_BITS - 1);
+			name_filter |= (1UL << hashbit);
+		}
+		name_filter = EROFS_XATTR_FILTER_DEFAULT & ~name_filter;
+
+		header->h_name_filter = cpu_to_le32(name_filter);
+		if (header->h_name_filter)
+			erofs_sb_set_xattr_filter(inode->sbi);
+	}
+
 	p = sizeof(struct erofs_xattr_ibody_header);
 	list_for_each_entry_safe(node, n, ixattrs, list) {
-		struct xattr_item *const item = node->item;
-
+		item = node->item;
 		list_del(&node->list);
 
 		/* move inline xattrs to the onstack list */
-		if (item->shared_xattr_id < 0) {
+		if (item->shared_xattr_id < 0 ||
+		    header->h_shared_count >= UCHAR_MAX) {
 			list_add(&node->list, &ilst);
 			continue;
 		}
@@ -697,18 +997,9 @@
 	}
 
 	list_for_each_entry_safe(node, n, &ilst, list) {
-		struct xattr_item *const item = node->item;
-		const struct erofs_xattr_entry entry = {
-			.e_name_index = item->prefix,
-			.e_name_len = item->len[0],
-			.e_value_size = cpu_to_le16(item->len[1])
-		};
-
-		memcpy(buf + p, &entry, sizeof(entry));
-		p += sizeof(struct erofs_xattr_entry);
-		memcpy(buf + p, item->kvbuf, item->len[0] + item->len[1]);
-		p = EROFS_XATTR_ALIGN(p + item->len[0] + item->len[1]);
-
+		item = node->item;
+		erofs_write_xattr_entry(buf + p, item);
+		p = erofs_next_xattr_align(p, item);
 		list_del(&node->list);
 		free(node);
 		put_xattritem(item);
@@ -716,3 +1007,656 @@
 	DBG_BUGON(p > size);
 	return buf;
 }
+
+struct xattr_iter {
+	char page[EROFS_MAX_BLOCK_SIZE];
+
+	void *kaddr;
+
+	erofs_blk_t blkaddr;
+	unsigned int ofs;
+	struct erofs_sb_info *sbi;
+};
+
+static int init_inode_xattrs(struct erofs_inode *vi)
+{
+	struct erofs_sb_info *sbi = vi->sbi;
+	struct xattr_iter it;
+	unsigned int i;
+	struct erofs_xattr_ibody_header *ih;
+	int ret = 0;
+
+	/* the most case is that xattrs of this inode are initialized. */
+	if (vi->flags & EROFS_I_EA_INITED)
+		return ret;
+
+	/*
+	 * bypass all xattr operations if ->xattr_isize is not greater than
+	 * sizeof(struct erofs_xattr_ibody_header), in detail:
+	 * 1) it is not enough to contain erofs_xattr_ibody_header then
+	 *    ->xattr_isize should be 0 (it means no xattr);
+	 * 2) it is just to contain erofs_xattr_ibody_header, which is on-disk
+	 *    undefined right now (maybe use later with some new sb feature).
+	 */
+	if (vi->xattr_isize == sizeof(struct erofs_xattr_ibody_header)) {
+		erofs_err("xattr_isize %d of nid %llu is not supported yet",
+			  vi->xattr_isize, vi->nid);
+		return -EOPNOTSUPP;
+	} else if (vi->xattr_isize < sizeof(struct erofs_xattr_ibody_header)) {
+		if (vi->xattr_isize) {
+			erofs_err("bogus xattr ibody @ nid %llu", vi->nid);
+			DBG_BUGON(1);
+			return -EFSCORRUPTED;	/* xattr ondisk layout error */
+		}
+		return -ENOATTR;
+	}
+
+	it.blkaddr = erofs_blknr(sbi, erofs_iloc(vi) + vi->inode_isize);
+	it.ofs = erofs_blkoff(sbi, erofs_iloc(vi) + vi->inode_isize);
+
+	ret = blk_read(sbi, 0, it.page, it.blkaddr, 1);
+	if (ret < 0)
+		return -EIO;
+
+	it.kaddr = it.page;
+	ih = (struct erofs_xattr_ibody_header *)(it.kaddr + it.ofs);
+
+	vi->xattr_shared_count = ih->h_shared_count;
+	vi->xattr_shared_xattrs = malloc(vi->xattr_shared_count * sizeof(uint));
+	if (!vi->xattr_shared_xattrs)
+		return -ENOMEM;
+
+	/* let's skip ibody header */
+	it.ofs += sizeof(struct erofs_xattr_ibody_header);
+
+	for (i = 0; i < vi->xattr_shared_count; ++i) {
+		if (it.ofs >= erofs_blksiz(sbi)) {
+			/* cannot be unaligned */
+			DBG_BUGON(it.ofs != erofs_blksiz(sbi));
+
+			ret = blk_read(sbi, 0, it.page, ++it.blkaddr, 1);
+			if (ret < 0) {
+				free(vi->xattr_shared_xattrs);
+				vi->xattr_shared_xattrs = NULL;
+				return -EIO;
+			}
+
+			it.kaddr = it.page;
+			it.ofs = 0;
+		}
+		vi->xattr_shared_xattrs[i] =
+			le32_to_cpu(*(__le32 *)(it.kaddr + it.ofs));
+		it.ofs += sizeof(__le32);
+	}
+
+	vi->flags |= EROFS_I_EA_INITED;
+
+	return ret;
+}
+
+/*
+ * the general idea for these return values is
+ * if    0 is returned, go on processing the current xattr;
+ *       1 (> 0) is returned, skip this round to process the next xattr;
+ *    -err (< 0) is returned, an error (maybe ENOXATTR) occurred
+ *                            and need to be handled
+ */
+struct xattr_iter_handlers {
+	int (*entry)(struct xattr_iter *_it, struct erofs_xattr_entry *entry);
+	int (*name)(struct xattr_iter *_it, unsigned int processed, char *buf,
+		    unsigned int len);
+	int (*alloc_buffer)(struct xattr_iter *_it, unsigned int value_sz);
+	void (*value)(struct xattr_iter *_it, unsigned int processed, char *buf,
+		      unsigned int len);
+};
+
+static inline int xattr_iter_fixup(struct xattr_iter *it)
+{
+	struct erofs_sb_info *sbi = it->sbi;
+	int ret;
+
+	if (it->ofs < erofs_blksiz(sbi))
+		return 0;
+
+	it->blkaddr += erofs_blknr(sbi, it->ofs);
+
+	ret = blk_read(sbi, 0, it->page, it->blkaddr, 1);
+	if (ret < 0)
+		return -EIO;
+
+	it->kaddr = it->page;
+	it->ofs = erofs_blkoff(sbi, it->ofs);
+	return 0;
+}
+
+static int inline_xattr_iter_pre(struct xattr_iter *it,
+				   struct erofs_inode *vi)
+{
+	struct erofs_sb_info *sbi = vi->sbi;
+	unsigned int xattr_header_sz, inline_xattr_ofs;
+	int ret;
+
+	xattr_header_sz = inlinexattr_header_size(vi);
+	if (xattr_header_sz >= vi->xattr_isize) {
+		DBG_BUGON(xattr_header_sz > vi->xattr_isize);
+		return -ENOATTR;
+	}
+
+	inline_xattr_ofs = vi->inode_isize + xattr_header_sz;
+
+	it->blkaddr = erofs_blknr(sbi, erofs_iloc(vi) + inline_xattr_ofs);
+	it->ofs = erofs_blkoff(sbi, erofs_iloc(vi) + inline_xattr_ofs);
+
+	ret = blk_read(sbi, 0, it->page, it->blkaddr, 1);
+	if (ret < 0)
+		return -EIO;
+
+	it->kaddr = it->page;
+	return vi->xattr_isize - xattr_header_sz;
+}
+
+/*
+ * Regardless of success or failure, `xattr_foreach' will end up with
+ * `ofs' pointing to the next xattr item rather than an arbitrary position.
+ */
+static int xattr_foreach(struct xattr_iter *it,
+			 const struct xattr_iter_handlers *op,
+			 unsigned int *tlimit)
+{
+	struct erofs_sb_info *sbi = it->sbi;
+	struct erofs_xattr_entry entry;
+	unsigned int value_sz, processed, slice;
+	int err;
+
+	/* 0. fixup blkaddr, ofs, ipage */
+	err = xattr_iter_fixup(it);
+	if (err)
+		return err;
+
+	/*
+	 * 1. read xattr entry to the memory,
+	 *    since we do EROFS_XATTR_ALIGN
+	 *    therefore entry should be in the page
+	 */
+	entry = *(struct erofs_xattr_entry *)(it->kaddr + it->ofs);
+	if (tlimit) {
+		unsigned int entry_sz = erofs_xattr_entry_size(&entry);
+
+		/* xattr on-disk corruption: xattr entry beyond xattr_isize */
+		if (*tlimit < entry_sz) {
+			DBG_BUGON(1);
+			return -EFSCORRUPTED;
+		}
+		*tlimit -= entry_sz;
+	}
+
+	it->ofs += sizeof(struct erofs_xattr_entry);
+	value_sz = le16_to_cpu(entry.e_value_size);
+
+	/* handle entry */
+	err = op->entry(it, &entry);
+	if (err) {
+		it->ofs += entry.e_name_len + value_sz;
+		goto out;
+	}
+
+	/* 2. handle xattr name (ofs will finally be at the end of name) */
+	processed = 0;
+
+	while (processed < entry.e_name_len) {
+		if (it->ofs >= erofs_blksiz(sbi)) {
+			DBG_BUGON(it->ofs > erofs_blksiz(sbi));
+
+			err = xattr_iter_fixup(it);
+			if (err)
+				goto out;
+			it->ofs = 0;
+		}
+
+		slice = min_t(unsigned int, erofs_blksiz(sbi) - it->ofs,
+			      entry.e_name_len - processed);
+
+		/* handle name */
+		err = op->name(it, processed, it->kaddr + it->ofs, slice);
+		if (err) {
+			it->ofs += entry.e_name_len - processed + value_sz;
+			goto out;
+		}
+
+		it->ofs += slice;
+		processed += slice;
+	}
+
+	/* 3. handle xattr value */
+	processed = 0;
+
+	if (op->alloc_buffer) {
+		err = op->alloc_buffer(it, value_sz);
+		if (err) {
+			it->ofs += value_sz;
+			goto out;
+		}
+	}
+
+	while (processed < value_sz) {
+		if (it->ofs >= erofs_blksiz(sbi)) {
+			DBG_BUGON(it->ofs > erofs_blksiz(sbi));
+
+			err = xattr_iter_fixup(it);
+			if (err)
+				goto out;
+			it->ofs = 0;
+		}
+
+		slice = min_t(unsigned int, erofs_blksiz(sbi) - it->ofs,
+			      value_sz - processed);
+		op->value(it, processed, it->kaddr + it->ofs, slice);
+		it->ofs += slice;
+		processed += slice;
+	}
+
+out:
+	/* xattrs should be 4-byte aligned (on-disk constraint) */
+	it->ofs = EROFS_XATTR_ALIGN(it->ofs);
+	return err < 0 ? err : 0;
+}
+
+struct getxattr_iter {
+	struct xattr_iter it;
+
+	int buffer_size, index, infix_len;
+	char *buffer;
+	const char *name;
+	size_t len;
+};
+
+static int erofs_xattr_long_entrymatch(struct getxattr_iter *it,
+				       struct erofs_xattr_entry *entry)
+{
+	struct erofs_sb_info *sbi = it->it.sbi;
+	struct erofs_xattr_prefix_item *pf = sbi->xattr_prefixes +
+		(entry->e_name_index & EROFS_XATTR_LONG_PREFIX_MASK);
+
+	if (pf >= sbi->xattr_prefixes + sbi->xattr_prefix_count)
+		return -ENOATTR;
+
+	if (it->index != pf->prefix->base_index ||
+	    it->len != entry->e_name_len + pf->infix_len)
+		return -ENOATTR;
+
+	if (memcmp(it->name, pf->prefix->infix, pf->infix_len))
+		return -ENOATTR;
+
+	it->infix_len = pf->infix_len;
+	return 0;
+}
+
+static int xattr_entrymatch(struct xattr_iter *_it,
+			    struct erofs_xattr_entry *entry)
+{
+	struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
+
+	/* should also match the infix for long name prefixes */
+	if (entry->e_name_index & EROFS_XATTR_LONG_PREFIX)
+		return erofs_xattr_long_entrymatch(it, entry);
+
+	if (it->index != entry->e_name_index ||
+	    it->len != entry->e_name_len)
+		return -ENOATTR;
+	it->infix_len = 0;
+	return 0;
+}
+
+static int xattr_namematch(struct xattr_iter *_it,
+			   unsigned int processed, char *buf, unsigned int len)
+{
+	struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
+
+	if (memcmp(buf, it->name + it->infix_len + processed, len))
+		return -ENOATTR;
+	return 0;
+}
+
+static int xattr_checkbuffer(struct xattr_iter *_it,
+			     unsigned int value_sz)
+{
+	struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
+	int err = it->buffer_size < value_sz ? -ERANGE : 0;
+
+	it->buffer_size = value_sz;
+	return !it->buffer ? 1 : err;
+}
+
+static void xattr_copyvalue(struct xattr_iter *_it,
+			    unsigned int processed,
+			    char *buf, unsigned int len)
+{
+	struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
+
+	memcpy(it->buffer + processed, buf, len);
+}
+
+static const struct xattr_iter_handlers find_xattr_handlers = {
+	.entry = xattr_entrymatch,
+	.name = xattr_namematch,
+	.alloc_buffer = xattr_checkbuffer,
+	.value = xattr_copyvalue
+};
+
+static int inline_getxattr(struct erofs_inode *vi, struct getxattr_iter *it)
+{
+	int ret;
+	unsigned int remaining;
+
+	ret = inline_xattr_iter_pre(&it->it, vi);
+	if (ret < 0)
+		return ret;
+
+	remaining = ret;
+	while (remaining) {
+		ret = xattr_foreach(&it->it, &find_xattr_handlers, &remaining);
+		if (ret != -ENOATTR)
+			break;
+	}
+
+	return ret ? ret : it->buffer_size;
+}
+
+static int shared_getxattr(struct erofs_inode *vi, struct getxattr_iter *it)
+{
+	unsigned int i;
+	int ret = -ENOATTR;
+
+	for (i = 0; i < vi->xattr_shared_count; ++i) {
+		erofs_blk_t blkaddr =
+			xattrblock_addr(vi, vi->xattr_shared_xattrs[i]);
+
+		it->it.ofs = xattrblock_offset(vi, vi->xattr_shared_xattrs[i]);
+
+		if (!i || blkaddr != it->it.blkaddr) {
+			ret = blk_read(vi->sbi, 0, it->it.page, blkaddr, 1);
+			if (ret < 0)
+				return -EIO;
+
+			it->it.kaddr = it->it.page;
+			it->it.blkaddr = blkaddr;
+		}
+
+		ret = xattr_foreach(&it->it, &find_xattr_handlers, NULL);
+		if (ret != -ENOATTR)
+			break;
+	}
+
+	return ret ? ret : it->buffer_size;
+}
+
+int erofs_getxattr(struct erofs_inode *vi, const char *name, char *buffer,
+		   size_t buffer_size)
+{
+	int ret;
+	unsigned int prefix, prefixlen;
+	struct getxattr_iter it;
+
+	if (!name)
+		return -EINVAL;
+
+	ret = init_inode_xattrs(vi);
+	if (ret)
+		return ret;
+
+	if (!match_prefix(name, &prefix, &prefixlen))
+		return -ENODATA;
+
+	it.it.sbi = vi->sbi;
+	it.index = prefix;
+	it.name = name + prefixlen;
+	it.len = strlen(it.name);
+	if (it.len > EROFS_NAME_LEN)
+		return -ERANGE;
+
+	it.buffer = buffer;
+	it.buffer_size = buffer_size;
+
+	ret = inline_getxattr(vi, &it);
+	if (ret == -ENOATTR)
+		ret = shared_getxattr(vi, &it);
+	return ret;
+}
+
+struct listxattr_iter {
+	struct xattr_iter it;
+
+	char *buffer;
+	int buffer_size, buffer_ofs;
+};
+
+static int xattr_entrylist(struct xattr_iter *_it,
+			   struct erofs_xattr_entry *entry)
+{
+	struct listxattr_iter *it =
+		container_of(_it, struct listxattr_iter, it);
+	unsigned int base_index = entry->e_name_index;
+	unsigned int prefix_len, infix_len = 0;
+	const char *prefix, *infix = NULL;
+
+	if (entry->e_name_index & EROFS_XATTR_LONG_PREFIX) {
+		struct erofs_sb_info *sbi = _it->sbi;
+		struct erofs_xattr_prefix_item *pf = sbi->xattr_prefixes +
+			(entry->e_name_index & EROFS_XATTR_LONG_PREFIX_MASK);
+
+		if (pf >= sbi->xattr_prefixes + sbi->xattr_prefix_count)
+			return 1;
+		infix = pf->prefix->infix;
+		infix_len = pf->infix_len;
+		base_index = pf->prefix->base_index;
+	}
+
+	if (base_index >= ARRAY_SIZE(xattr_types))
+		return 1;
+	prefix = xattr_types[base_index].prefix;
+	prefix_len = xattr_types[base_index].prefix_len;
+
+	if (!it->buffer) {
+		it->buffer_ofs += prefix_len + infix_len +
+					entry->e_name_len + 1;
+		return 1;
+	}
+
+	if (it->buffer_ofs + prefix_len + infix_len
+		+ entry->e_name_len + 1 > it->buffer_size)
+		return -ERANGE;
+
+	memcpy(it->buffer + it->buffer_ofs, prefix, prefix_len);
+	memcpy(it->buffer + it->buffer_ofs + prefix_len, infix, infix_len);
+	it->buffer_ofs += prefix_len + infix_len;
+	return 0;
+}
+
+static int xattr_namelist(struct xattr_iter *_it,
+			  unsigned int processed, char *buf, unsigned int len)
+{
+	struct listxattr_iter *it =
+		container_of(_it, struct listxattr_iter, it);
+
+	memcpy(it->buffer + it->buffer_ofs, buf, len);
+	it->buffer_ofs += len;
+	return 0;
+}
+
+static int xattr_skipvalue(struct xattr_iter *_it,
+			   unsigned int value_sz)
+{
+	struct listxattr_iter *it =
+		container_of(_it, struct listxattr_iter, it);
+
+	it->buffer[it->buffer_ofs++] = '\0';
+	return 1;
+}
+
+static const struct xattr_iter_handlers list_xattr_handlers = {
+	.entry = xattr_entrylist,
+	.name = xattr_namelist,
+	.alloc_buffer = xattr_skipvalue,
+	.value = NULL
+};
+
+static int inline_listxattr(struct erofs_inode *vi, struct listxattr_iter *it)
+{
+	int ret;
+	unsigned int remaining;
+
+	ret = inline_xattr_iter_pre(&it->it, vi);
+	if (ret < 0)
+		return ret;
+
+	remaining = ret;
+	while (remaining) {
+		ret = xattr_foreach(&it->it, &list_xattr_handlers, &remaining);
+		if (ret)
+			break;
+	}
+
+	return ret ? ret : it->buffer_ofs;
+}
+
+static int shared_listxattr(struct erofs_inode *vi, struct listxattr_iter *it)
+{
+	unsigned int i;
+	int ret = 0;
+
+	for (i = 0; i < vi->xattr_shared_count; ++i) {
+		erofs_blk_t blkaddr =
+			xattrblock_addr(vi, vi->xattr_shared_xattrs[i]);
+
+		it->it.ofs = xattrblock_offset(vi, vi->xattr_shared_xattrs[i]);
+		if (!i || blkaddr != it->it.blkaddr) {
+			ret = blk_read(vi->sbi, 0, it->it.page, blkaddr, 1);
+			if (ret < 0)
+				return -EIO;
+
+			it->it.kaddr = it->it.page;
+			it->it.blkaddr = blkaddr;
+		}
+
+		ret = xattr_foreach(&it->it, &list_xattr_handlers, NULL);
+		if (ret)
+			break;
+	}
+
+	return ret ? ret : it->buffer_ofs;
+}
+
+int erofs_listxattr(struct erofs_inode *vi, char *buffer, size_t buffer_size)
+{
+	int ret;
+	struct listxattr_iter it;
+
+	ret = init_inode_xattrs(vi);
+	if (ret == -ENOATTR)
+		return 0;
+	if (ret)
+		return ret;
+
+	it.it.sbi = vi->sbi;
+	it.buffer = buffer;
+	it.buffer_size = buffer_size;
+	it.buffer_ofs = 0;
+
+	ret = inline_listxattr(vi, &it);
+	if (ret < 0 && ret != -ENOATTR)
+		return ret;
+	return shared_listxattr(vi, &it);
+}
+
+int erofs_xattr_insert_name_prefix(const char *prefix)
+{
+	struct ea_type_node *tnode;
+
+	if (ea_prefix_count >= 0x80 || strlen(prefix) > UINT8_MAX)
+		return -EOVERFLOW;
+
+	tnode = calloc(1, sizeof(*tnode));
+	if (!tnode)
+		return -ENOMEM;
+
+	if (!match_prefix(prefix, &tnode->base_index, &tnode->base_len)) {
+		free(tnode);
+		return -ENODATA;
+	}
+
+	tnode->type.prefix_len = strlen(prefix);
+	tnode->type.prefix = strdup(prefix);
+	if (!tnode->type.prefix) {
+		free(tnode);
+		return -ENOMEM;
+	}
+
+	tnode->index = EROFS_XATTR_LONG_PREFIX | ea_prefix_count;
+	ea_prefix_count++;
+	init_list_head(&tnode->list);
+	list_add_tail(&tnode->list, &ea_name_prefixes);
+	return 0;
+}
+
+void erofs_xattr_cleanup_name_prefixes(void)
+{
+	struct ea_type_node *tnode, *n;
+
+	list_for_each_entry_safe(tnode, n, &ea_name_prefixes, list) {
+		list_del(&tnode->list);
+		free((void *)tnode->type.prefix);
+		free(tnode);
+	}
+}
+
+void erofs_xattr_prefixes_cleanup(struct erofs_sb_info *sbi)
+{
+	int i;
+
+	if (sbi->xattr_prefixes) {
+		for (i = 0; i < sbi->xattr_prefix_count; i++)
+			free(sbi->xattr_prefixes[i].prefix);
+		free(sbi->xattr_prefixes);
+		sbi->xattr_prefixes = NULL;
+	}
+}
+
+int erofs_xattr_prefixes_init(struct erofs_sb_info *sbi)
+{
+	erofs_off_t pos = (erofs_off_t)sbi->xattr_prefix_start << 2;
+	struct erofs_xattr_prefix_item *pfs;
+	erofs_nid_t nid = 0;
+	int ret = 0, i, len;
+	void *buf;
+
+	if (!sbi->xattr_prefix_count)
+		return 0;
+
+	if (sbi->packed_nid)
+		nid = sbi->packed_nid;
+
+	pfs = calloc(sbi->xattr_prefix_count, sizeof(*pfs));
+	if (!pfs)
+		return -ENOMEM;
+
+	for (i = 0; i < sbi->xattr_prefix_count; i++) {
+		buf = erofs_read_metadata(sbi, nid, &pos, &len);
+		if (IS_ERR(buf)) {
+			ret = PTR_ERR(buf);
+			goto out;
+		}
+		if (len < sizeof(*pfs->prefix) ||
+		    len > EROFS_NAME_LEN + sizeof(*pfs->prefix)) {
+			free(buf);
+			ret = -EFSCORRUPTED;
+			goto out;
+		}
+		pfs[i].prefix = buf;
+		pfs[i].infix_len = len - sizeof(struct erofs_xattr_long_prefix);
+	}
+out:
+	sbi->xattr_prefixes = pfs;
+	if (ret)
+		erofs_xattr_prefixes_cleanup(sbi);
+	return ret;
+}
diff --git a/lib/xxhash.c b/lib/xxhash.c
new file mode 100644
index 0000000..7289c77
--- /dev/null
+++ b/lib/xxhash.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only
+/*
+ * The xxhash is copied from the linux kernel at:
+ *	https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/xxhash.c
+ *
+ * The original copyright is:
+ *
+ * xxHash - Extremely Fast Hash algorithm
+ * Copyright (C) 2012-2016, Yann Collet.
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following disclaimer
+ *     in the documentation and/or other materials provided with the
+ *     distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ *
+ * You can contact the author at:
+ * - xxHash homepage: https://cyan4973.github.io/xxHash/
+ * - xxHash source repository: https://github.com/Cyan4973/xxHash
+ */
+
+#include "erofs/defs.h"
+#include "erofs/xxhash.h"
+
+/*-*************************************
+ * Macros
+ **************************************/
+#define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r)))
+
+/*-*************************************
+ * Constants
+ **************************************/
+static const uint32_t PRIME32_1 = 2654435761U;
+static const uint32_t PRIME32_2 = 2246822519U;
+static const uint32_t PRIME32_3 = 3266489917U;
+static const uint32_t PRIME32_4 =  668265263U;
+static const uint32_t PRIME32_5 =  374761393U;
+
+/*-***************************
+ * Simple Hash Functions
+ ****************************/
+static uint32_t xxh32_round(uint32_t seed, const uint32_t input)
+{
+	seed += input * PRIME32_2;
+	seed = xxh_rotl32(seed, 13);
+	seed *= PRIME32_1;
+	return seed;
+}
+
+uint32_t xxh32(const void *input, const size_t len, const uint32_t seed)
+{
+	const uint8_t *p = (const uint8_t *)input;
+	const uint8_t *b_end = p + len;
+	uint32_t h32;
+
+	if (len >= 16) {
+		const uint8_t *const limit = b_end - 16;
+		uint32_t v1 = seed + PRIME32_1 + PRIME32_2;
+		uint32_t v2 = seed + PRIME32_2;
+		uint32_t v3 = seed + 0;
+		uint32_t v4 = seed - PRIME32_1;
+
+		do {
+			v1 = xxh32_round(v1, get_unaligned_le32(p));
+			p += 4;
+			v2 = xxh32_round(v2, get_unaligned_le32(p));
+			p += 4;
+			v3 = xxh32_round(v3, get_unaligned_le32(p));
+			p += 4;
+			v4 = xxh32_round(v4, get_unaligned_le32(p));
+			p += 4;
+		} while (p <= limit);
+
+		h32 = xxh_rotl32(v1, 1) + xxh_rotl32(v2, 7) +
+			xxh_rotl32(v3, 12) + xxh_rotl32(v4, 18);
+	} else {
+		h32 = seed + PRIME32_5;
+	}
+
+	h32 += (uint32_t)len;
+
+	while (p + 4 <= b_end) {
+		h32 += get_unaligned_le32(p) * PRIME32_3;
+		h32 = xxh_rotl32(h32, 17) * PRIME32_4;
+		p += 4;
+	}
+
+	while (p < b_end) {
+		h32 += (*p) * PRIME32_5;
+		h32 = xxh_rotl32(h32, 11) * PRIME32_1;
+		p++;
+	}
+
+	h32 ^= h32 >> 15;
+	h32 *= PRIME32_2;
+	h32 ^= h32 >> 13;
+	h32 *= PRIME32_3;
+	h32 ^= h32 >> 16;
+
+	return h32;
+}
diff --git a/lib/zmap.c b/lib/zmap.c
index 95745c5..81fa22b 100644
--- a/lib/zmap.c
+++ b/lib/zmap.c
@@ -16,13 +16,15 @@
 
 int z_erofs_fill_inode(struct erofs_inode *vi)
 {
-	if (!erofs_sb_has_big_pcluster() &&
-	    !erofs_sb_has_ztailpacking() &&
-	    vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
+	struct erofs_sb_info *sbi = vi->sbi;
+
+	if (!erofs_sb_has_big_pcluster(sbi) &&
+	    !erofs_sb_has_ztailpacking(sbi) && !erofs_sb_has_fragments(sbi) &&
+	    vi->datalayout == EROFS_INODE_COMPRESSED_FULL) {
 		vi->z_advise = 0;
 		vi->z_algorithmtype[0] = 0;
 		vi->z_algorithmtype[1] = 0;
-		vi->z_logical_clusterbits = LOG_BLOCK_SIZE;
+		vi->z_logical_clusterbits = sbi->blkszbits;
 
 		vi->flags |= EROFS_I_Z_INITED;
 	}
@@ -35,20 +37,28 @@
 	erofs_off_t pos;
 	struct z_erofs_map_header *h;
 	char buf[sizeof(struct z_erofs_map_header)];
+	struct erofs_sb_info *sbi = vi->sbi;
 
 	if (vi->flags & EROFS_I_Z_INITED)
 		return 0;
 
-	DBG_BUGON(!erofs_sb_has_big_pcluster() &&
-		  !erofs_sb_has_ztailpacking() &&
-		  vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY);
-	pos = round_up(iloc(vi->nid) + vi->inode_isize + vi->xattr_isize, 8);
-
-	ret = dev_read(0, buf, pos, sizeof(buf));
+	pos = round_up(erofs_iloc(vi) + vi->inode_isize + vi->xattr_isize, 8);
+	ret = dev_read(sbi, 0, buf, pos, sizeof(buf));
 	if (ret < 0)
 		return -EIO;
 
 	h = (struct z_erofs_map_header *)buf;
+	/*
+	 * if the highest bit of the 8-byte map header is set, the whole file
+	 * is stored in the packed inode. The rest bits keeps z_fragmentoff.
+	 */
+	if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) {
+		vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
+		vi->fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63);
+		vi->z_tailextent_headlcn = 0;
+		goto out;
+	}
+
 	vi->z_advise = le16_to_cpu(h->h_advise);
 	vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
 	vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
@@ -59,8 +69,8 @@
 		return -EOPNOTSUPP;
 	}
 
-	vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7);
-	if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION &&
+	vi->z_logical_clusterbits = sbi->blkszbits + (h->h_clusterbits & 7);
+	if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT &&
 	    !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^
 	    !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
 		erofs_err("big pcluster head1/2 of compact indexes should be consistent for nid %llu",
@@ -75,7 +85,7 @@
 		ret = z_erofs_do_map_blocks(vi, &map,
 					    EROFS_GET_BLOCKS_FINDTAIL);
 		if (!map.m_plen ||
-		    erofs_blkoff(map.m_pa) + map.m_plen > EROFS_BLKSIZ) {
+		    erofs_blkoff(sbi, map.m_pa) + map.m_plen > erofs_blksiz(sbi)) {
 			erofs_err("invalid tail-packing pclustersize %llu",
 				  map.m_plen | 0ULL);
 			return -EFSCORRUPTED;
@@ -83,6 +93,17 @@
 		if (ret < 0)
 			return ret;
 	}
+	if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER &&
+	    !(h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT)) {
+		struct erofs_map_blocks map = { .index = UINT_MAX };
+
+		vi->fragmentoff = le32_to_cpu(h->h_fragmentoff);
+		ret = z_erofs_do_map_blocks(vi, &map,
+					    EROFS_GET_BLOCKS_FINDTAIL);
+		if (ret < 0)
+			return ret;
+	}
+out:
 	vi->flags |= EROFS_I_Z_INITED;
 	return 0;
 }
@@ -97,8 +118,9 @@
 	u8  type, headtype;
 	u16 clusterofs;
 	u16 delta[2];
-	erofs_blk_t pblk, compressedlcs;
+	erofs_blk_t pblk, compressedblks;
 	erofs_off_t nextpackoff;
+	bool partialref;
 };
 
 static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m,
@@ -111,7 +133,7 @@
 	if (map->index == eblk)
 		return 0;
 
-	ret = blk_read(0, mpage, eblk, 1);
+	ret = blk_read(m->inode->sbi, 0, mpage, eblk, 1);
 	if (ret < 0)
 		return -EIO;
 
@@ -124,43 +146,45 @@
 					 unsigned long lcn)
 {
 	struct erofs_inode *const vi = m->inode;
-	const erofs_off_t ibase = iloc(vi->nid);
-	const erofs_off_t pos =
-		Z_EROFS_VLE_LEGACY_INDEX_ALIGN(ibase + vi->inode_isize +
-					       vi->xattr_isize) +
-		lcn * sizeof(struct z_erofs_vle_decompressed_index);
-	struct z_erofs_vle_decompressed_index *di;
+	struct erofs_sb_info *sbi = vi->sbi;
+	const erofs_off_t ibase = erofs_iloc(vi);
+	const erofs_off_t pos = Z_EROFS_FULL_INDEX_ALIGN(ibase +
+			vi->inode_isize + vi->xattr_isize) +
+		lcn * sizeof(struct z_erofs_lcluster_index);
+	struct z_erofs_lcluster_index *di;
 	unsigned int advise, type;
 	int err;
 
-	err = z_erofs_reload_indexes(m, erofs_blknr(pos));
+	err = z_erofs_reload_indexes(m, erofs_blknr(sbi, pos));
 	if (err)
 		return err;
 
-	m->nextpackoff = pos + sizeof(struct z_erofs_vle_decompressed_index);
+	m->nextpackoff = pos + sizeof(struct z_erofs_lcluster_index);
 	m->lcn = lcn;
-	di = m->kaddr + erofs_blkoff(pos);
+	di = m->kaddr + erofs_blkoff(sbi, pos);
 
 	advise = le16_to_cpu(di->di_advise);
-	type = (advise >> Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT) &
-		((1 << Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) - 1);
+	type = (advise >> Z_EROFS_LI_LCLUSTER_TYPE_BIT) &
+		((1 << Z_EROFS_LI_LCLUSTER_TYPE_BITS) - 1);
 	switch (type) {
-	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+	case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
 		m->clusterofs = 1 << vi->z_logical_clusterbits;
 		m->delta[0] = le16_to_cpu(di->di_u.delta[0]);
-		if (m->delta[0] & Z_EROFS_VLE_DI_D0_CBLKCNT) {
+		if (m->delta[0] & Z_EROFS_LI_D0_CBLKCNT) {
 			if (!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) {
 				DBG_BUGON(1);
 				return -EFSCORRUPTED;
 			}
-			m->compressedlcs = m->delta[0] &
-				~Z_EROFS_VLE_DI_D0_CBLKCNT;
+			m->compressedblks = m->delta[0] &
+				~Z_EROFS_LI_D0_CBLKCNT;
 			m->delta[0] = 1;
 		}
 		m->delta[1] = le16_to_cpu(di->di_u.delta[1]);
 		break;
-	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
-	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+	case Z_EROFS_LCLUSTER_TYPE_PLAIN:
+	case Z_EROFS_LCLUSTER_TYPE_HEAD1:
+		if (advise & Z_EROFS_LI_PARTIAL_REF)
+			m->partialref = true;
 		m->clusterofs = le16_to_cpu(di->di_clusterofs);
 		m->pblk = le32_to_cpu(di->di_u.blkaddr);
 		break;
@@ -197,13 +221,13 @@
 		lo = decode_compactedbits(lclusterbits, lomask,
 					  in, encodebits * i, &type);
 
-		if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
+		if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD)
 			return d1;
 		++d1;
 	} while (++i < vcnt);
 
-	/* vcnt - 1 (Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) item */
-	if (!(lo & Z_EROFS_VLE_DI_D0_CBLKCNT))
+	/* vcnt - 1 (Z_EROFS_LCLUSTER_TYPE_NONHEAD) item */
+	if (!(lo & Z_EROFS_LI_D0_CBLKCNT))
 		d1 += lo - 1;
 	return d1;
 }
@@ -220,7 +244,7 @@
 	u8 *in, type;
 	bool big_pcluster;
 
-	if (1 << amortizedshift == 4)
+	if (1 << amortizedshift == 4 && lclusterbits <= 14)
 		vcnt = 2;
 	else if (1 << amortizedshift == 2 && lclusterbits == 12)
 		vcnt = 16;
@@ -232,7 +256,7 @@
 			 (vcnt << amortizedshift);
 	big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
 	encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
-	eofs = erofs_blkoff(pos);
+	eofs = erofs_blkoff(vi->sbi, pos);
 	base = round_down(eofs, vcnt << amortizedshift);
 	in = m->kaddr + base;
 
@@ -241,19 +265,19 @@
 	lo = decode_compactedbits(lclusterbits, lomask,
 				  in, encodebits * i, &type);
 	m->type = type;
-	if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
+	if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
 		m->clusterofs = 1 << lclusterbits;
 
 		/* figure out lookahead_distance: delta[1] if needed */
 		if (lookahead)
 			m->delta[1] = get_compacted_la_distance(lclusterbits,
 						encodebits, vcnt, in, i);
-		if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) {
+		if (lo & Z_EROFS_LI_D0_CBLKCNT) {
 			if (!big_pcluster) {
 				DBG_BUGON(1);
 				return -EFSCORRUPTED;
 			}
-			m->compressedlcs = lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT;
+			m->compressedblks = lo & ~Z_EROFS_LI_D0_CBLKCNT;
 			m->delta[0] = 1;
 			return 0;
 		} else if (i + 1 != (int)vcnt) {
@@ -267,9 +291,9 @@
 		 */
 		lo = decode_compactedbits(lclusterbits, lomask,
 					  in, encodebits * (i - 1), &type);
-		if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
+		if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD)
 			lo = 0;
-		else if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT)
+		else if (lo & Z_EROFS_LI_D0_CBLKCNT)
 			lo = 1;
 		m->delta[0] = lo + 1;
 		return 0;
@@ -283,7 +307,7 @@
 			--i;
 			lo = decode_compactedbits(lclusterbits, lomask,
 						  in, encodebits * i, &type);
-			if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
+			if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD)
 				i -= lo;
 
 			if (i >= 0)
@@ -295,13 +319,13 @@
 			--i;
 			lo = decode_compactedbits(lclusterbits, lomask,
 						  in, encodebits * i, &type);
-			if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
-				if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) {
+			if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
+				if (lo & Z_EROFS_LI_D0_CBLKCNT) {
 					--i;
-					nblk += lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT;
+					nblk += lo & ~Z_EROFS_LI_D0_CBLKCNT;
 					continue;
 				}
-				if (lo == 1) {
+				if (lo <= 1) {
 					DBG_BUGON(1);
 					/* --i; ++nblk;	continue; */
 					return -EFSCORRUPTED;
@@ -321,19 +345,16 @@
 					    unsigned long lcn, bool lookahead)
 {
 	struct erofs_inode *const vi = m->inode;
-	const unsigned int lclusterbits = vi->z_logical_clusterbits;
-	const erofs_off_t ebase = round_up(iloc(vi->nid) + vi->inode_isize +
+	struct erofs_sb_info *sbi = vi->sbi;
+	const erofs_off_t ebase = round_up(erofs_iloc(vi) + vi->inode_isize +
 					   vi->xattr_isize, 8) +
 		sizeof(struct z_erofs_map_header);
-	const unsigned int totalidx = DIV_ROUND_UP(vi->i_size, EROFS_BLKSIZ);
+	const unsigned int totalidx = BLK_ROUND_UP(sbi, vi->i_size);
 	unsigned int compacted_4b_initial, compacted_2b;
 	unsigned int amortizedshift;
 	erofs_off_t pos;
 	int err;
 
-	if (lclusterbits != 12)
-		return -EOPNOTSUPP;
-
 	if (lcn >= totalidx)
 		return -EINVAL;
 
@@ -366,7 +387,7 @@
 	amortizedshift = 2;
 out:
 	pos += lcn * (1 << amortizedshift);
-	err = z_erofs_reload_indexes(m, erofs_blknr(pos));
+	err = z_erofs_reload_indexes(m, erofs_blknr(sbi, pos));
 	if (err)
 		return err;
 	return unpack_compacted_index(m, amortizedshift, pos, lookahead);
@@ -377,10 +398,10 @@
 {
 	const unsigned int datamode = m->inode->datalayout;
 
-	if (datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY)
+	if (datamode == EROFS_INODE_COMPRESSED_FULL)
 		return legacy_load_cluster_from_disk(m, lcn);
 
-	if (datamode == EROFS_INODE_FLAT_COMPRESSION)
+	if (datamode == EROFS_INODE_COMPRESSED_COMPACT)
 		return compacted_load_cluster_from_disk(m, lcn, lookahead);
 
 	return -EINVAL;
@@ -409,7 +430,7 @@
 		return err;
 
 	switch (m->type) {
-	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+	case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
 		if (!m->delta[0]) {
 			erofs_err("invalid lookback distance 0 @ nid %llu",
 				  (unsigned long long)vi->nid);
@@ -417,8 +438,8 @@
 			return -EFSCORRUPTED;
 		}
 		return z_erofs_extent_lookback(m, m->delta[0]);
-	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
-	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+	case Z_EROFS_LCLUSTER_TYPE_PLAIN:
+	case Z_EROFS_LCLUSTER_TYPE_HEAD1:
 		m->headtype = m->type;
 		map->m_la = (lcn << lclusterbits) | m->clusterofs;
 		break;
@@ -435,22 +456,23 @@
 					    unsigned int initial_lcn)
 {
 	struct erofs_inode *const vi = m->inode;
+	struct erofs_sb_info *sbi = vi->sbi;
 	struct erofs_map_blocks *const map = m->map;
 	const unsigned int lclusterbits = vi->z_logical_clusterbits;
 	unsigned long lcn;
 	int err;
 
-	DBG_BUGON(m->type != Z_EROFS_VLE_CLUSTER_TYPE_PLAIN &&
-		  m->type != Z_EROFS_VLE_CLUSTER_TYPE_HEAD);
+	DBG_BUGON(m->type != Z_EROFS_LCLUSTER_TYPE_PLAIN &&
+		  m->type != Z_EROFS_LCLUSTER_TYPE_HEAD1);
 
-	if (m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN ||
+	if (m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
 	    !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) {
 		map->m_plen = 1 << lclusterbits;
 		return 0;
 	}
 
 	lcn = m->lcn + 1;
-	if (m->compressedlcs)
+	if (m->compressedblks)
 		goto out;
 
 	err = z_erofs_load_cluster_from_disk(m, lcn, false);
@@ -459,28 +481,28 @@
 
 	/*
 	 * If the 1st NONHEAD lcluster has already been handled initially w/o
-	 * valid compressedlcs, which means at least it mustn't be CBLKCNT, or
+	 * valid compressedblks, which means at least it mustn't be CBLKCNT, or
 	 * an internal implemenatation error is detected.
 	 *
 	 * The following code can also handle it properly anyway, but let's
 	 * BUG_ON in the debugging mode only for developers to notice that.
 	 */
 	DBG_BUGON(lcn == initial_lcn &&
-		  m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD);
+		  m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD);
 
 	switch (m->type) {
-	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
-	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+	case Z_EROFS_LCLUSTER_TYPE_PLAIN:
+	case Z_EROFS_LCLUSTER_TYPE_HEAD1:
 		/*
 		 * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
 		 * rather than CBLKCNT, it's a 1 lcluster-sized pcluster.
 		 */
-		m->compressedlcs = 1;
+		m->compressedblks = 1 << (lclusterbits - sbi->blkszbits);
 		break;
-	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+	case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
 		if (m->delta[0] != 1)
 			goto err_bonus_cblkcnt;
-		if (m->compressedlcs)
+		if (m->compressedblks)
 			break;
 		/* fallthrough */
 	default:
@@ -490,7 +512,7 @@
 		return -EFSCORRUPTED;
 	}
 out:
-	map->m_plen = m->compressedlcs << lclusterbits;
+	map->m_plen = m->compressedblks << sbi->blkszbits;
 	return 0;
 err_bonus_cblkcnt:
 	erofs_err("bogus CBLKCNT @ lcn %lu of nid %llu",
@@ -518,11 +540,11 @@
 		if (err)
 			return err;
 
-		if (m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
+		if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
 			DBG_BUGON(!m->delta[1] &&
 				  m->clusterofs != 1 << lclusterbits);
-		} else if (m->type == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN ||
-			   m->type == Z_EROFS_VLE_CLUSTER_TYPE_HEAD) {
+		} else if (m->type == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
+			   m->type == Z_EROFS_LCLUSTER_TYPE_HEAD1) {
 			/* go on until the next HEAD lcluster */
 			if (lcn != headlcn)
 				break;
@@ -545,7 +567,9 @@
 				 struct erofs_map_blocks *map,
 				 int flags)
 {
+	struct erofs_sb_info *sbi = vi->sbi;
 	bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER;
+	bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
 	struct z_erofs_maprecorder m = {
 		.inode = vi,
 		.map = map,
@@ -571,11 +595,18 @@
 	map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED;
 	end = (m.lcn + 1ULL) << lclusterbits;
 	switch (m.type) {
-	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
-	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+	case Z_EROFS_LCLUSTER_TYPE_PLAIN:
+	case Z_EROFS_LCLUSTER_TYPE_HEAD1:
 		if (endoff >= m.clusterofs) {
 			m.headtype = m.type;
 			map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
+			/*
+			 * For ztailpacking files, in order to inline data more
+			 * effectively, special EOF lclusters are now supported
+			 * which can have three parts at most.
+			 */
+			if (ztailpacking && end > vi->i_size)
+				end = vi->i_size;
 			break;
 		}
 		/* m.lcn should be >= 1 if endoff < m.clusterofs */
@@ -589,7 +620,7 @@
 		map->m_flags |= EROFS_MAP_FULL_MAPPED;
 		m.delta[0] = 1;
 		/* fallthrough */
-	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+	case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
 		/* get the correspoinding first chunk */
 		err = z_erofs_extent_lookback(&m, m.delta[0]);
 		if (err)
@@ -601,25 +632,43 @@
 		err = -EOPNOTSUPP;
 		goto out;
 	}
-
+	if (m.partialref)
+		map->m_flags |= EROFS_MAP_PARTIAL_REF;
 	map->m_llen = end - map->m_la;
-	if (flags & EROFS_GET_BLOCKS_FINDTAIL)
+	if (flags & EROFS_GET_BLOCKS_FINDTAIL) {
 		vi->z_tailextent_headlcn = m.lcn;
+		/* for non-compact indexes, fragmentoff is 64 bits */
+		if (fragment && vi->datalayout == EROFS_INODE_COMPRESSED_FULL)
+			vi->fragmentoff |= (u64)m.pblk << 32;
+	}
 	if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) {
 		map->m_flags |= EROFS_MAP_META;
 		map->m_pa = vi->z_idataoff;
 		map->m_plen = vi->z_idata_size;
+	} else if (fragment && m.lcn == vi->z_tailextent_headlcn) {
+		map->m_flags |= EROFS_MAP_FRAGMENT;
 	} else {
-		map->m_pa = blknr_to_addr(m.pblk);
+		map->m_pa = erofs_pos(sbi, m.pblk);
 		err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
 		if (err)
 			goto out;
 	}
 
-	if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN)
-		map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
-	else
+	if (m.headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN) {
+		if (map->m_llen > map->m_plen) {
+			DBG_BUGON(1);
+			err = -EFSCORRUPTED;
+			goto out;
+		}
+		if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER)
+			map->m_algorithmformat =
+				Z_EROFS_COMPRESSION_INTERLACED;
+		else
+			map->m_algorithmformat =
+				Z_EROFS_COMPRESSION_SHIFTED;
+	} else {
 		map->m_algorithmformat = vi->z_algorithmtype[0];
+	}
 
 	if (flags & EROFS_GET_BLOCKS_FIEMAP) {
 		err = z_erofs_get_extent_decompressedlen(&m);
@@ -652,6 +701,15 @@
 	if (err)
 		goto out;
 
+	if ((vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) &&
+	    !vi->z_tailextent_headlcn) {
+		map->m_la = 0;
+		map->m_llen = vi->i_size;
+		map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED |
+				EROFS_MAP_FRAGMENT;
+		goto out;
+	}
+
 	err = z_erofs_do_map_blocks(vi, map, flags);
 out:
 	DBG_BUGON(err < 0 && err != -ENOMEM);
diff --git a/man/dump.erofs.1 b/man/dump.erofs.1
index 209e5f9..7316f4b 100644
--- a/man/dump.erofs.1
+++ b/man/dump.erofs.1
@@ -9,18 +9,28 @@
 .SH DESCRIPTION
 .B dump.erofs
 is used to retrieve erofs metadata from \fIIMAGE\fP and demonstrate
+.br
 1) overall disk statistics,
+.br
 2) superblock information,
+.br
 3) file information of the given inode NID,
+.br
 4) file extent information of the given inode NID.
 .SH OPTIONS
 .TP
 .BI "\-\-device=" path
 Specify an extra device to be used together.
-You may give multiple `--device' options in the correct order.
+You may give multiple
+.B --device
+options in the correct order.
 .TP
 .BI "\-\-ls"
-List directory contents. An inode should be specified together.
+List directory contents.
+.I NID
+or
+.I path
+required.
 .TP
 .BI "\-\-nid=" NID
 Specify an inode NID in order to print its file information.
@@ -29,16 +39,21 @@
 Specify an inode path in order to print its file information.
 .TP
 .BI \-e
-Show the file extent information. An inode should be specified together.
+Show the file extent information.
+.I NID
+or
+.I path
+required.
 .TP
 .BI \-V
 Print the version number and exit.
 .TP
 .BI \-s
-Show superblock information of the an EROFS-formatted image.
+Show superblock information.
+This is the default if no options are specified.
 .TP
 .BI \-S
-Show EROFS disk statistics, including file type/size distribution, number of (un)compressed files, compression ratio of the whole image, etc.
+Show image statistics, including file type/size distribution, number of (un)compressed files, compression ratio, etc.
 .SH AUTHOR
 Initial code was written by Wang Qi <mpiglet@outlook.com>, Guo Xuenan <guoxuenan@huawei.com>.
 .PP
diff --git a/man/erofsfuse.1 b/man/erofsfuse.1
index 9db6827..8b99368 100644
--- a/man/erofsfuse.1
+++ b/man/erofsfuse.1
@@ -26,6 +26,9 @@
 .BI "\-\-device=" path
 Specify an extra device to be used together.
 You may give multiple `--device' options in the correct order.
+.TP
+.BI "\-\-offset=" #
+Specify `--offset' bytes to skip when reading image file. The default is 0.
 .SS "FUSE options:"
 .TP
 \fB-d -o\fR debug
diff --git a/man/fsck.erofs.1 b/man/fsck.erofs.1
index f3e9c3b..364219a 100644
--- a/man/fsck.erofs.1
+++ b/man/fsck.erofs.1
@@ -2,7 +2,7 @@
 .\"
 .TH FSCK.EROFS 1
 .SH NAME
-fsck.erofs \- tool to check the EROFS filesystem's integrity
+fsck.erofs \- tool to check an EROFS filesystem's integrity
 .SH SYNOPSIS
 \fBfsck.erofs\fR [\fIOPTIONS\fR] \fIIMAGE\fR
 .SH DESCRIPTION
@@ -22,15 +22,18 @@
 non-compressed files.
 .TP
 .BI "\-\-device=" path
-Specify an extra device to be used together.
-You may give multiple `--device' options in the correct order.
+Specify an extra blob device to be used together.
+You may give multiple
+.B --device
+options in the correct order.
 .TP
 .B \-\-extract
-Check if all files are well encoded. This will induce more I/Os to read
-compressed file data, so it might take too much time depending on the image.
+Check if all files are well encoded. This read all compressed files,
+and hence create more I/O load,
+so it might take too much time depending on the image.
 .TP
 .B \-\-help
-Display this help and exit.
+Display help string and exit.
 .SH AUTHOR
 This version of \fBfsck.erofs\fR is written by
 Daeho Jeong <daehojeong@google.com>.
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index d811f20..00ac2ac 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -20,55 +20,86 @@
 from \fISOURCE\fR directory.
 .SH OPTIONS
 .TP
-.BI "\-z " compression-algorithm " [" ",#" "]"
-Set an algorithm for file compression, which can be set with an optional
-compression level separated by a comma.
+.BI "\-z " compression-algorithm \fR[\fP, # \fR][\fP: ... \fR]\fP
+Set a primary algorithm for data compression, which can be set with an optional
+compression level (1 to 12 for LZ4HC, 0 to 9 for LZMA and 100 to 109 for LZMA
+extreme compression) separated by a comma.  Alternative algorithms could be
+specified and separated by colons.
+.TP
+.BI "\-b " block-size
+Set the fundamental block size of the filesystem in bytes.  In other words,
+specify the smallest amount of data that can be accessed at a time.  The
+default is the system page size.  It cannot be less than 512 bytes.
 .TP
 .BI "\-C " max-pcluster-size
-Specify the maximum size of compress physical cluster in bytes. It may enable
-big pcluster feature if needed (Linux v5.13+).
+Specify the maximum size of compress physical cluster in bytes.
+This may cause the big pcluster feature to be enabled (Linux v5.13+).
 .TP
 .BI "\-d " #
 Specify the level of debugging messages. The default is 2, which shows basic
 warning messages.
 .TP
 .BI "\-x " #
-Specify the upper limit of an xattr which is still inlined. The default is 2.
-Disable storing xattrs if < 0.
+Limit how many xattrs will be inlined. The default is 2.
+Disables storing xattrs if < 0.
 .TP
-.BI "\-E " extended-option " [,...]"
+.BI "\-E " extended-option \fR[\fP, ... \fR]\fP
 Set extended options for the filesystem. Extended options are comma separated,
-and may take an argument using the equals ('=') sign.
+and may take an extra argument using the equals ('=') sign.
 The following extended options are supported:
 .RS 1.2i
 .TP
+.BI all-fragments
+Forcely record the whole files into a special inode for better compression and
+it may take an argument as the pcluster size of the packed inode in bytes.
+(Linux v6.1+)
+.TP
+.BI dedupe
+Enable global compressed data deduplication to minimize duplicated data in
+the filesystem. May further reduce image size when used with
+.BR -E\ fragments .
+(Linux v6.1+)
+.TP
 .BI force-inode-compact
-Forcely generate compact inodes (32-byte inodes) to output.
+Force generation of compact (32-byte) inodes.
 .TP
 .BI force-inode-extended
-Forcely generate extended inodes (64-byte inodes) to output.
+Force generation of extended (64-byte) inodes.
 .TP
 .BI force-inode-blockmap
-Forcely generate inode chunk format in 4-byte block address array.
+Force generation of inode chunk format as a 4-byte block address array.
 .TP
 .BI force-chunk-indexes
-Forcely generate inode chunk format in 8-byte chunk indexes (with device id).
+Forcely generate inode chunk format as an 8-byte chunk index (with device ID).
+.TP
+.BI fragments\fR[\fP= size \fR]\fP
+Pack the tail part (pcluster) of compressed files, or entire files, into a
+special inode for smaller image sizes, and it may take an argument as the
+pcluster size of the packed inode in bytes. (Linux v6.1+)
 .TP
 .BI legacy-compress
-Drop "inplace decompression" and "compacted indexes" support, which is used
-to generate compatible EROFS images for Linux v4.19 - 5.3.
+Disable "inplace decompression" and "compacted indexes",
+for compatibility with Linux pre-v5.4.
 .TP
 .BI noinline_data
 Don't inline regular files to enable FSDAX for these files (Linux v5.15+).
 .TP
+.B ^xattr-name-filter
+Turn off/on xattr name filter to optimize negative xattr lookups (Linux v6.6+).
+.TP
 .BI ztailpacking
 Pack the tail part (pcluster) of compressed files into its metadata to save
 more space and the tail part I/O. (Linux v5.17+)
 .RE
 .TP
+.BI "\-L " volume-label
+Set the volume label for the filesystem to
+.IR volume-label .
+The maximum length of the volume label is 16 bytes.
+.TP
 .BI "\-T " #
-Set all files to the given UNIX timestamp. Reproducible builds requires setting
-all to a specific one.
+Set all files to the given UNIX timestamp. Reproducible builds require setting
+all to a specific one. By default, the source file's modification time is used.
 .TP
 .BI "\-U " UUID
 Set the universally unique identifier (UUID) of the filesystem to
@@ -80,53 +111,93 @@
 Make all files owned by root.
 .TP
 .BI "\-\-blobdev " file
-Specify another extra blob device to store chunk-based data.
+Specify an extra blob device to store chunk-based data.
 .TP
 .BI "\-\-chunksize " #
 Generate chunk-based files with #-byte chunks.
 .TP
 .BI "\-\-compress-hints " file
-If the optional
-.BI "\-\-compress-hints " file
-argument is given,
-.B mkfs.erofs
-uses it to apply the per-file compression strategy. Each line is defined by
-tokens separated by spaces in the following form:
+Apply a per-file compression strategy. Each line in
+.I file
+is defined by
+tokens separated by spaces in the following form.  Optionally, instead of
+the given primary algorithm, alternative algorithms can be specified with
+\fIalgorithm-index\fR explicitly:
 .RS 1.2i
-<pcluster-in-bytes> <match-pattern>
+<pcluster-size-in-bytes> [algorithm-index] <match-pattern>
 .RE
+.IR match-pattern s
+are extended regular expressions, matched against absolute paths within
+the output filesystem, with no leading /.
 .TP
 .BI "\-\-exclude-path=" path
 Ignore file that matches the exact literal path.
-You may give multiple `--exclude-path' options.
+You may give multiple
+.B --exclude-path
+options.
 .TP
 .BI "\-\-exclude-regex=" regex
-Ignore files that match the given regular expression.
-You may give multiple `--exclude-regex` options.
+Ignore files that match the given extended regular expression.
+You may give multiple
+.B --exclude-regex
+options.
 .TP
 .BI "\-\-file-contexts=" file
-Specify a \fIfile_contexts\fR file to setup / override selinux labels.
+Read SELinux label configuration/overrides from \fIfile\fR in the
+.BR selinux_file (5)
+format.
 .TP
 .BI "\-\-force-uid=" UID
-Set all file uids to \fIUID\fR.
+Set all file UIDs to \fIUID\fR.
 .TP
 .BI "\-\-force-gid=" GID
-Set all file gids to \fIGID\fR.
+Set all file GIDs to \fIGID\fR.
+.TP
+.BI "\-\-gid-offset=" GIDOFFSET
+Add \fIGIDOFFSET\fR to all file GIDs.
+When this option is used together with
+.BR --force-gid ,
+the final file gids are
+set to \fIGID\fR + \fIGID-OFFSET\fR.
+.TP
+.B \-\-gzip
+Filter tarball streams through gzip.
 .TP
 .B \-\-help
-Display this help and exit.
+Display help string and exit.
 .TP
 .B "\-\-ignore-mtime"
-File modification time is ignored whenever it would cause \fBmkfs.erofs\fR to
+Ignore the file modification time whenever it would cause \fBmkfs.erofs\fR to
 use extended inodes over compact inodes. When not using a fixed timestamp, this
-can reduce total metadata size.
+can reduce total metadata size. Implied by
+.BR "-E force-inode-compact" .
 .TP
 .BI "\-\-max-extent-bytes " #
-Specify maximum decompressed extent size # in bytes.
+Specify maximum decompressed extent size in bytes.
 .TP
 .B "\-\-preserve-mtime"
-File modification time is preserved whenever \fBmkfs.erofs\fR decides to use
-extended inodes over compact inodes.
+Use extended inodes instead of compact inodes if the file modification time
+would overflow compact inodes. This is the default. Overrides
+.BR --ignore-mtime .
+.TP
+.B "\-\-tar=f"
+Generate a full EROFS image from a tarball.
+.TP
+.B "\-\-tar=i"
+Generate an meta-only EROFS image from a tarball.
+.TP
+.BI "\-\-uid-offset=" UIDOFFSET
+Add \fIUIDOFFSET\fR to all file UIDs.
+When this option is used together with
+.BR --force-uid ,
+the final file uids are
+set to \fIUID\fR + \fIUIDOFFSET\fR.
+.TP
+.BI "\-\-xattr-prefix=" PREFIX
+Specify a customized extended attribute namespace prefix for space saving,
+e.g. "trusted.overlay.".  You may give multiple
+.B --xattr-prefix
+options (Linux v6.4+).
 .SH AUTHOR
 This version of \fBmkfs.erofs\fR is written by Li Guifu <blucerlee@gmail.com>,
 Miao Xie <miaoxie@huawei.com> and Gao Xiang <xiang@kernel.org> with
diff --git a/mkfs/Makefile.am b/mkfs/Makefile.am
index 709d9bf..dd75485 100644
--- a/mkfs/Makefile.am
+++ b/mkfs/Makefile.am
@@ -2,8 +2,9 @@
 
 AUTOMAKE_OPTIONS = foreign
 bin_PROGRAMS     = mkfs.erofs
-AM_CPPFLAGS = ${libuuid_CFLAGS} ${libselinux_CFLAGS}
+AM_CPPFLAGS = ${libselinux_CFLAGS}
 mkfs_erofs_SOURCES = main.c
 mkfs_erofs_CFLAGS = -Wall -I$(top_srcdir)/include
-mkfs_erofs_LDADD = ${libuuid_LIBS} $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
-	${liblz4_LIBS} ${liblzma_LIBS}
+mkfs_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
+	${libuuid_LIBS} ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} \
+	${libdeflate_LIBS}
diff --git a/mkfs/main.c b/mkfs/main.c
index d2c9830..6d2b700 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -15,19 +15,21 @@
 #include "erofs/config.h"
 #include "erofs/print.h"
 #include "erofs/cache.h"
+#include "erofs/diskbuf.h"
 #include "erofs/inode.h"
+#include "erofs/tar.h"
 #include "erofs/io.h"
 #include "erofs/compress.h"
+#include "erofs/dedupe.h"
 #include "erofs/xattr.h"
 #include "erofs/exclude.h"
 #include "erofs/block_list.h"
 #include "erofs/compress_hints.h"
 #include "erofs/blobchunk.h"
+#include "erofs/fragments.h"
+#include "erofs/rebuild.h"
 #include "../lib/liberofs_private.h"
-
-#ifdef HAVE_LIBUUID
-#include <uuid.h>
-#endif
+#include "../lib/liberofs_uuid.h"
 
 #define EROFS_SUPER_END (EROFS_SUPER_OFFSET + sizeof(struct erofs_super_block))
 
@@ -43,6 +45,7 @@
 	{"all-root", no_argument, NULL, 7},
 #ifndef NDEBUG
 	{"random-pclusterblks", no_argument, NULL, 8},
+	{"random-algorithms", no_argument, NULL, 18},
 #endif
 	{"max-extent-bytes", required_argument, NULL, 9},
 	{"compress-hints", required_argument, NULL, 10},
@@ -51,41 +54,53 @@
 	{"blobdev", required_argument, NULL, 13},
 	{"ignore-mtime", no_argument, NULL, 14},
 	{"preserve-mtime", no_argument, NULL, 15},
-#ifdef WITH_ANDROID
+	{"uid-offset", required_argument, NULL, 16},
+	{"gid-offset", required_argument, NULL, 17},
+	{"tar", optional_argument, NULL, 20},
+	{"aufs", no_argument, NULL, 21},
 	{"mount-point", required_argument, NULL, 512},
+	{"xattr-prefix", required_argument, NULL, 19},
+#ifdef WITH_ANDROID
 	{"product-out", required_argument, NULL, 513},
 	{"fs-config-file", required_argument, NULL, 514},
 	{"block-list-file", required_argument, NULL, 515},
 #endif
+	{"ovlfs-strip", optional_argument, NULL, 516},
+#ifdef HAVE_ZLIB
+	{"gzip", no_argument, NULL, 517},
+#endif
 	{0, 0, 0, 0},
 };
 
 static void print_available_compressors(FILE *f, const char *delim)
 {
-	unsigned int i = 0;
+	int i = 0;
+	bool comma = false;
 	const char *s;
 
-	while ((s = z_erofs_list_available_compressors(i)) != NULL) {
-		if (i++)
+	while ((s = z_erofs_list_available_compressors(&i)) != NULL) {
+		if (comma)
 			fputs(delim, f);
 		fputs(s, f);
+		comma = true;
 	}
 	fputc('\n', f);
 }
 
 static void usage(void)
 {
-	fputs("usage: [options] FILE DIRECTORY\n\n"
-	      "Generate erofs image from DIRECTORY to FILE, and [options] are:\n"
+	fputs("usage: [options] FILE SOURCE(s)\n"
+	      "Generate EROFS image (FILE) from DIRECTORY, TARBALL and/or EROFS images.  And [options] are:\n"
+	      " -b#                   set block size to # (# = page size by default)\n"
 	      " -d#                   set output message level to # (maximum 9)\n"
 	      " -x#                   set xattr tolerance to # (< 0, disable xattrs; default 2)\n"
-	      " -zX[,Y]               X=compressor (Y=compression level, optional)\n"
+	      " -zX[,Y][:..]          X=compressor (Y=compression level, optional)\n"
+	      "                       alternative algorithms can be separated by colons(:)\n"
 	      " -C#                   specify the size of compress physical cluster in bytes\n"
 	      " -EX[,...]             X=extended options\n"
+	      " -L volume-label       set the volume label (maximum 16)\n"
 	      " -T#                   set a fixed UNIX timestamp # to all files\n"
-#ifdef HAVE_LIBUUID
 	      " -UX                   use a given filesystem UUID\n"
-#endif
 	      " --all-root            make all files owned by root\n"
 	      " --blobdev=X           specify an extra device X to store chunked data\n"
 	      " --chunksize=#         generate chunk-based files with #-byte chunks\n"
@@ -97,17 +112,27 @@
 #endif
 	      " --force-uid=#         set all file uids to # (# = UID)\n"
 	      " --force-gid=#         set all file gids to # (# = GID)\n"
+	      " --uid-offset=#        add offset # to all file uids (# = id offset)\n"
+	      " --gid-offset=#        add offset # to all file gids (# = id offset)\n"
+#ifdef HAVE_ZLIB
+	      " --gzip                try to filter the tarball stream through gzip\n"
+#endif
 	      " --help                display this help and exit\n"
 	      " --ignore-mtime        use build time instead of strict per-file modification time\n"
 	      " --max-extent-bytes=#  set maximum decompressed extent size # in bytes\n"
 	      " --preserve-mtime      keep per-file modification time strictly\n"
+	      " --aufs                replace aufs special files with overlayfs metadata\n"
+	      " --tar=[fi]            generate an image from tarball(s)\n"
+	      " --ovlfs-strip=[01]    strip overlayfs metadata in the target image (e.g. whiteouts)\n"
 	      " --quiet               quiet execution (do not write anything to standard output.)\n"
 #ifndef NDEBUG
 	      " --random-pclusterblks randomize pclusterblks for big pcluster (debugging only)\n"
+	      " --random-algorithms   randomize per-file algorithms (debugging only)\n"
 #endif
+	      " --xattr-prefix=X      X=extra xattr name prefix\n"
+	      " --mount-point=X       X=prefix of target fs path (default: /)\n"
 #ifdef WITH_ANDROID
 	      "\nwith following android-specific options:\n"
-	      " --mount-point=X       X=prefix of target fs path (default: /)\n"
 	      " --product-out=X       X=product_out directory\n"
 	      " --fs-config-file=X    X=fs_config file\n"
 	      " --block-list-file=X   X=block_list file\n"
@@ -116,6 +141,15 @@
 	print_available_compressors(stderr, ", ");
 }
 
+static unsigned int pclustersize_packed, pclustersize_max;
+static struct erofs_tarfile erofstar = {
+	.global.xattrs = LIST_HEAD_INIT(erofstar.global.xattrs)
+};
+static bool tar_mode, rebuild_mode, gzip_supported;
+
+static unsigned int rebuild_src_count;
+static LIST_HEAD(rebuild_src_list);
+
 static int parse_extended_opts(const char *opts)
 {
 #define MATCH_EXTENTED_OPT(opt, token, keylen) \
@@ -126,12 +160,13 @@
 
 	value = NULL;
 	for (token = opts; *token != '\0'; token = next) {
+		bool clear = false;
 		const char *p = strchr(token, ',');
 
 		next = NULL;
-		if (p)
+		if (p) {
 			next = p + 1;
-		else {
+		} else {
 			p = token + strlen(token);
 			next = p;
 		}
@@ -149,83 +184,147 @@
 			vallen = 0;
 		}
 
+		if (token[0] == '^') {
+			if (keylen < 2)
+				return -EINVAL;
+			++token;
+			--keylen;
+			clear = true;
+		}
+
 		if (MATCH_EXTENTED_OPT("legacy-compress", token, keylen)) {
 			if (vallen)
 				return -EINVAL;
 			/* disable compacted indexes and 0padding */
 			cfg.c_legacy_compress = true;
-		}
-
-		if (MATCH_EXTENTED_OPT("force-inode-compact", token, keylen)) {
+		} else if (MATCH_EXTENTED_OPT("force-inode-compact", token, keylen)) {
 			if (vallen)
 				return -EINVAL;
 			cfg.c_force_inodeversion = FORCE_INODE_COMPACT;
 			cfg.c_ignore_mtime = true;
-		}
-
-		if (MATCH_EXTENTED_OPT("force-inode-extended", token, keylen)) {
+		} else if (MATCH_EXTENTED_OPT("force-inode-extended", token, keylen)) {
 			if (vallen)
 				return -EINVAL;
 			cfg.c_force_inodeversion = FORCE_INODE_EXTENDED;
-		}
-
-		if (MATCH_EXTENTED_OPT("nosbcrc", token, keylen)) {
+		} else if (MATCH_EXTENTED_OPT("nosbcrc", token, keylen)) {
 			if (vallen)
 				return -EINVAL;
-			erofs_sb_clear_sb_chksum();
-		}
-
-		if (MATCH_EXTENTED_OPT("noinline_data", token, keylen)) {
+			erofs_sb_clear_sb_chksum(&sbi);
+		} else if (MATCH_EXTENTED_OPT("noinline_data", token, keylen)) {
 			if (vallen)
 				return -EINVAL;
-			cfg.c_noinline_data = true;
-		}
-
-		if (MATCH_EXTENTED_OPT("force-inode-blockmap", token, keylen)) {
+			cfg.c_inline_data = false;
+		} else if (MATCH_EXTENTED_OPT("inline_data", token, keylen)) {
+			if (vallen)
+				return -EINVAL;
+			cfg.c_inline_data = !clear;
+		} else if (MATCH_EXTENTED_OPT("force-inode-blockmap", token, keylen)) {
 			if (vallen)
 				return -EINVAL;
 			cfg.c_force_chunkformat = FORCE_INODE_BLOCK_MAP;
-		}
-
-		if (MATCH_EXTENTED_OPT("force-chunk-indexes", token, keylen)) {
+		} else if (MATCH_EXTENTED_OPT("force-chunk-indexes", token, keylen)) {
 			if (vallen)
 				return -EINVAL;
 			cfg.c_force_chunkformat = FORCE_INODE_CHUNK_INDEXES;
-		}
-
-		if (MATCH_EXTENTED_OPT("ztailpacking", token, keylen)) {
+		} else if (MATCH_EXTENTED_OPT("ztailpacking", token, keylen)) {
 			if (vallen)
 				return -EINVAL;
-			cfg.c_ztailpacking = true;
+			cfg.c_ztailpacking = !clear;
+		} else if (MATCH_EXTENTED_OPT("all-fragments", token, keylen)) {
+			cfg.c_all_fragments = true;
+			goto handle_fragment;
+		} else if (MATCH_EXTENTED_OPT("fragments", token, keylen)) {
+			char *endptr;
+			u64 i;
+
+handle_fragment:
+			cfg.c_fragments = true;
+			if (vallen) {
+				i = strtoull(value, &endptr, 0);
+				if (endptr - value != vallen) {
+					erofs_err("invalid pcluster size for the packed file %s",
+						  next);
+					return -EINVAL;
+				}
+				pclustersize_packed = i;
+			}
+		} else if (MATCH_EXTENTED_OPT("dedupe", token, keylen)) {
+			if (vallen)
+				return -EINVAL;
+			cfg.c_dedupe = !clear;
+		} else if (MATCH_EXTENTED_OPT("xattr-name-filter", token, keylen)) {
+			if (vallen)
+				return -EINVAL;
+			cfg.c_xattr_name_filter = !clear;
+		} else {
+			erofs_err("unknown extended option %.*s",
+				  p - token, token);
+			return -EINVAL;
 		}
 	}
 	return 0;
 }
 
+static int mkfs_parse_compress_algs(char *algs)
+{
+	unsigned int i;
+	char *s;
+
+	for (s = strtok(algs, ":"), i = 0; s; s = strtok(NULL, ":"), ++i) {
+		const char *lv;
+
+		if (i >= EROFS_MAX_COMPR_CFGS - 1) {
+			erofs_err("too many algorithm types");
+			return -EINVAL;
+		}
+
+		lv = strchr(s, ',');
+		if (lv) {
+			cfg.c_compr_level[i] = atoi(lv + 1);
+			cfg.c_compr_alg[i] = strndup(s, lv - s);
+		} else {
+			cfg.c_compr_level[i] = -1;
+			cfg.c_compr_alg[i] = strdup(s);
+		}
+	}
+	return 0;
+}
+
+static void erofs_rebuild_cleanup(void)
+{
+	struct erofs_sb_info *src, *n;
+
+	list_for_each_entry_safe(src, n, &rebuild_src_list, list) {
+		list_del(&src->list);
+		erofs_put_super(src);
+		dev_close(src);
+		free(src);
+	}
+	rebuild_src_count = 0;
+}
+
 static int mkfs_parse_options_cfg(int argc, char *argv[])
 {
 	char *endptr;
-	int opt, i;
+	int opt, i, err;
 	bool quiet = false;
 
-	while ((opt = getopt_long(argc, argv, "C:E:T:U:d:x:z:",
+	while ((opt = getopt_long(argc, argv, "C:E:L:T:U:b:d:x:z:",
 				  long_options, NULL)) != -1) {
 		switch (opt) {
 		case 'z':
-			if (!optarg) {
-				cfg.c_compr_alg_master = "(default)";
-				break;
+			i = mkfs_parse_compress_algs(optarg);
+			if (i)
+				return i;
+			break;
+
+		case 'b':
+			i = atoi(optarg);
+			if (i < 512 || i > EROFS_MAX_BLOCK_SIZE) {
+				erofs_err("invalid block size %s", optarg);
+				return -EINVAL;
 			}
-			/* get specified compression level */
-			for (i = 0; optarg[i] != '\0'; ++i) {
-				if (optarg[i] == ',') {
-					cfg.c_compr_level_master =
-						atoi(optarg + i + 1);
-					optarg[i] = '\0';
-					break;
-				}
-			}
-			cfg.c_compr_alg_master = strndup(optarg, i);
+			sbi.blkszbits = ilog2(i);
 			break;
 
 		case 'd':
@@ -251,6 +350,17 @@
 			if (opt)
 				return opt;
 			break;
+
+		case 'L':
+			if (optarg == NULL ||
+			    strlen(optarg) > sizeof(sbi.volume_name)) {
+				erofs_err("invalid volume label");
+				return -EINVAL;
+			}
+			strncpy(sbi.volume_name, optarg,
+				sizeof(sbi.volume_name));
+			break;
+
 		case 'T':
 			cfg.c_unix_timestamp = strtoull(optarg, &endptr, 0);
 			if (cfg.c_unix_timestamp == -1 || *endptr != '\0') {
@@ -259,14 +369,12 @@
 			}
 			cfg.c_timeinherit = TIMESTAMP_FIXED;
 			break;
-#ifdef HAVE_LIBUUID
 		case 'U':
-			if (uuid_parse(optarg, sbi.uuid)) {
+			if (erofs_uuid_parse(optarg, sbi.uuid)) {
 				erofs_err("invalid UUID %s", optarg);
 				return -EINVAL;
 			}
 			break;
-#endif
 		case 2:
 			opt = erofs_parse_exclude_path(optarg, false);
 			if (opt) {
@@ -310,6 +418,9 @@
 		case 8:
 			cfg.c_random_pclusterblks = true;
 			break;
+		case 18:
+			cfg.c_random_algorithms = true;
+			break;
 #endif
 		case 9:
 			cfg.c_max_decompressed_extent_bytes =
@@ -323,7 +434,6 @@
 		case 10:
 			cfg.c_compress_hints_file = optarg;
 			break;
-#ifdef WITH_ANDROID
 		case 512:
 			cfg.mount_point = optarg;
 			/* all trailing '/' should be deleted */
@@ -331,6 +441,7 @@
 			if (opt && optarg[opt - 1] == '/')
 				optarg[opt - 1] = '\0';
 			break;
+#ifdef WITH_ANDROID
 		case 513:
 			cfg.target_out_path = optarg;
 			break;
@@ -343,14 +454,12 @@
 #endif
 		case 'C':
 			i = strtoull(optarg, &endptr, 0);
-			if (*endptr != '\0' ||
-			    i < EROFS_BLKSIZ || i % EROFS_BLKSIZ) {
+			if (*endptr != '\0') {
 				erofs_err("invalid physical clustersize %s",
 					  optarg);
 				return -EINVAL;
 			}
-			cfg.c_pclusterblks_max = i / EROFS_BLKSIZ;
-			cfg.c_pclusterblks_def = cfg.c_pclusterblks_max;
+			pclustersize_max = i;
 			break;
 		case 11:
 			i = strtol(optarg, &endptr, 0);
@@ -364,12 +473,7 @@
 					  optarg);
 				return -EINVAL;
 			}
-			if (i < EROFS_BLKSIZ) {
-				erofs_err("chunksize %s must be larger than block size",
-					  optarg);
-				return -EINVAL;
-			}
-			erofs_sb_set_chunked_file();
+			erofs_sb_set_chunked_file(&sbi);
 			break;
 		case 12:
 			quiet = true;
@@ -383,6 +487,53 @@
 		case 15:
 			cfg.c_ignore_mtime = false;
 			break;
+		case 16:
+			errno = 0;
+			cfg.c_uid_offset = strtoll(optarg, &endptr, 0);
+			if (errno || *endptr != '\0') {
+				erofs_err("invalid uid offset %s", optarg);
+				return -EINVAL;
+			}
+			break;
+		case 17:
+			errno = 0;
+			cfg.c_gid_offset = strtoll(optarg, &endptr, 0);
+			if (errno || *endptr != '\0') {
+				erofs_err("invalid gid offset %s", optarg);
+				return -EINVAL;
+			}
+			break;
+		case 19:
+			errno = 0;
+			opt = erofs_xattr_insert_name_prefix(optarg);
+			if (opt) {
+				erofs_err("failed to parse xattr name prefix: %s",
+					  erofs_strerror(opt));
+				return opt;
+			}
+			cfg.c_extra_ea_name_prefixes = true;
+			break;
+		case 20:
+			if (optarg && (!strcmp(optarg, "i") ||
+				!strcmp(optarg, "0") || !memcmp(optarg, "0,", 2))) {
+				erofstar.index_mode = true;
+				if (!memcmp(optarg, "0,", 2))
+					erofstar.mapfile = strdup(optarg + 2);
+			}
+			tar_mode = true;
+			break;
+		case 21:
+			erofstar.aufs = true;
+			break;
+		case 516:
+			if (!optarg || !strcmp(optarg, "1"))
+				cfg.c_ovlfs_strip = true;
+			else
+				cfg.c_ovlfs_strip = false;
+			break;
+		case 517:
+			gzip_supported = true;
+			break;
 		case 1:
 			usage();
 			exit(0);
@@ -392,7 +543,7 @@
 		}
 	}
 
-	if (cfg.c_blobdev_path && cfg.c_chunkbits < LOG_BLOCK_SIZE) {
+	if (cfg.c_blobdev_path && cfg.c_chunkbits < sbi.blkszbits) {
 		erofs_err("--blobdev must be used together with --chunksize");
 		return -EINVAL;
 	}
@@ -414,58 +565,152 @@
 		return -ENOMEM;
 
 	if (optind >= argc) {
-		erofs_err("missing argument: DIRECTORY");
-		return -EINVAL;
-	}
+		if (!tar_mode) {
+			erofs_err("missing argument: SOURCE(s)");
+			return -EINVAL;
+		} else {
+			int dupfd;
 
-	cfg.c_src_path = realpath(argv[optind++], NULL);
-	if (!cfg.c_src_path) {
-		erofs_err("failed to parse source directory: %s",
-			  erofs_strerror(-errno));
-		return -ENOENT;
-	}
+			dupfd = dup(STDIN_FILENO);
+			if (dupfd < 0) {
+				erofs_err("failed to duplicate STDIN_FILENO: %s",
+					  strerror(errno));
+				return -errno;
+			}
+			err = erofs_iostream_open(&erofstar.ios, dupfd, gzip_supported);
+			if (err)
+				return err;
+		}
+	} else {
+		struct stat st;
 
-	if (optind < argc) {
-		erofs_err("unexpected argument: %s\n", argv[optind]);
-		return -EINVAL;
+		cfg.c_src_path = realpath(argv[optind++], NULL);
+		if (!cfg.c_src_path) {
+			erofs_err("failed to parse source directory: %s",
+				  erofs_strerror(-errno));
+			return -ENOENT;
+		}
+
+		if (tar_mode) {
+			int fd = open(cfg.c_src_path, O_RDONLY);
+
+			if (fd < 0) {
+				erofs_err("failed to open file: %s", cfg.c_src_path);
+				return -errno;
+			}
+			err = erofs_iostream_open(&erofstar.ios, fd, gzip_supported);
+			if (err)
+				return err;
+		} else {
+			err = lstat(cfg.c_src_path, &st);
+			if (err)
+				return -errno;
+			if (S_ISDIR(st.st_mode))
+				erofs_set_fs_root(cfg.c_src_path);
+			else
+				rebuild_mode = true;
+		}
+
+		if (rebuild_mode) {
+			char *srcpath = cfg.c_src_path;
+			struct erofs_sb_info *src;
+
+			do {
+				src = calloc(1, sizeof(struct erofs_sb_info));
+				if (!src) {
+					erofs_rebuild_cleanup();
+					return -ENOMEM;
+				}
+
+				err = dev_open_ro(src, srcpath);
+				if (err) {
+					free(src);
+					erofs_rebuild_cleanup();
+					return err;
+				}
+
+				/* extra device index starts from 1 */
+				src->dev = ++rebuild_src_count;
+				list_add(&src->list, &rebuild_src_list);
+			} while (optind < argc && (srcpath = argv[optind++]));
+		} else if (optind < argc) {
+			erofs_err("unexpected argument: %s\n", argv[optind]);
+			return -EINVAL;
+		}
 	}
 	if (quiet) {
 		cfg.c_dbg_lvl = EROFS_ERR;
 		cfg.c_showprogress = false;
 	}
+
+	if (cfg.c_compr_alg[0] && erofs_blksiz(&sbi) != getpagesize())
+		erofs_warn("Please note that subpage blocksize with compression isn't yet supported in kernel. "
+			   "This compressed image will only work with bs = ps = %u bytes",
+			   erofs_blksiz(&sbi));
+
+	if (pclustersize_max) {
+		if (pclustersize_max < erofs_blksiz(&sbi) ||
+		    pclustersize_max % erofs_blksiz(&sbi)) {
+			erofs_err("invalid physical clustersize %u",
+				  pclustersize_max);
+			return -EINVAL;
+		}
+		cfg.c_pclusterblks_max = pclustersize_max >> sbi.blkszbits;
+		cfg.c_pclusterblks_def = cfg.c_pclusterblks_max;
+	}
+	if (cfg.c_chunkbits && cfg.c_chunkbits < sbi.blkszbits) {
+		erofs_err("chunksize %u must be larger than block size",
+			  1u << cfg.c_chunkbits);
+		return -EINVAL;
+	}
+
+	if (pclustersize_packed) {
+		if (pclustersize_max < erofs_blksiz(&sbi) ||
+		    pclustersize_max % erofs_blksiz(&sbi)) {
+			erofs_err("invalid pcluster size for the packed file %u",
+				  pclustersize_packed);
+			return -EINVAL;
+		}
+		cfg.c_pclusterblks_packed = pclustersize_packed >> sbi.blkszbits;
+	}
 	return 0;
 }
 
 int erofs_mkfs_update_super_block(struct erofs_buffer_head *bh,
 				  erofs_nid_t root_nid,
-				  erofs_blk_t *blocks)
+				  erofs_blk_t *blocks,
+				  erofs_nid_t packed_nid)
 {
 	struct erofs_super_block sb = {
 		.magic     = cpu_to_le32(EROFS_SUPER_MAGIC_V1),
-		.blkszbits = LOG_BLOCK_SIZE,
+		.blkszbits = sbi.blkszbits,
 		.inos   = cpu_to_le64(sbi.inos),
 		.build_time = cpu_to_le64(sbi.build_time),
 		.build_time_nsec = cpu_to_le32(sbi.build_time_nsec),
 		.blocks = 0,
-		.meta_blkaddr  = sbi.meta_blkaddr,
-		.xattr_blkaddr = sbi.xattr_blkaddr,
+		.meta_blkaddr  = cpu_to_le32(sbi.meta_blkaddr),
+		.xattr_blkaddr = cpu_to_le32(sbi.xattr_blkaddr),
+		.xattr_prefix_count = sbi.xattr_prefix_count,
+		.xattr_prefix_start = cpu_to_le32(sbi.xattr_prefix_start),
 		.feature_incompat = cpu_to_le32(sbi.feature_incompat),
 		.feature_compat = cpu_to_le32(sbi.feature_compat &
 					      ~EROFS_FEATURE_COMPAT_SB_CHKSUM),
 		.extra_devices = cpu_to_le16(sbi.extra_devices),
 		.devt_slotoff = cpu_to_le16(sbi.devt_slotoff),
 	};
-	const unsigned int sb_blksize =
-		round_up(EROFS_SUPER_END, EROFS_BLKSIZ);
+	const u32 sb_blksize = round_up(EROFS_SUPER_END, erofs_blksiz(&sbi));
 	char *buf;
+	int ret;
 
 	*blocks         = erofs_mapbh(NULL);
 	sb.blocks       = cpu_to_le32(*blocks);
 	sb.root_nid     = cpu_to_le16(root_nid);
+	sb.packed_nid    = cpu_to_le64(packed_nid);
 	memcpy(sb.uuid, sbi.uuid, sizeof(sb.uuid));
+	memcpy(sb.volume_name, sbi.volume_name, sizeof(sb.volume_name));
 
-	if (erofs_sb_has_compr_cfgs())
-		sb.u1.available_compr_algs = sbi.available_compr_algs;
+	if (erofs_sb_has_compr_cfgs(&sbi))
+		sb.u1.available_compr_algs = cpu_to_le16(sbi.available_compr_algs);
 	else
 		sb.u1.lz4_max_distance = cpu_to_le16(sbi.lz4_max_distance);
 
@@ -477,19 +722,21 @@
 	}
 	memcpy(buf + EROFS_SUPER_OFFSET, &sb, sizeof(sb));
 
-	bh->fsprivate = buf;
-	bh->op = &erofs_buf_write_bhops;
-	return 0;
+	ret = dev_write(&sbi, buf, erofs_btell(bh, false), EROFS_SUPER_END);
+	free(buf);
+	erofs_bdrop(bh, false);
+	return ret;
 }
 
 static int erofs_mkfs_superblock_csum_set(void)
 {
 	int ret;
-	u8 buf[EROFS_BLKSIZ];
+	u8 buf[EROFS_MAX_BLOCK_SIZE];
 	u32 crc;
+	unsigned int len;
 	struct erofs_super_block *sb;
 
-	ret = blk_read(0, buf, 0, 1);
+	ret = blk_read(&sbi, 0, buf, 0, erofs_blknr(&sbi, EROFS_SUPER_END) + 1);
 	if (ret) {
 		erofs_err("failed to read superblock to set checksum: %s",
 			  erofs_strerror(ret));
@@ -510,12 +757,16 @@
 	/* turn on checksum feature */
 	sb->feature_compat = cpu_to_le32(le32_to_cpu(sb->feature_compat) |
 					 EROFS_FEATURE_COMPAT_SB_CHKSUM);
-	crc = erofs_crc32c(~0, (u8 *)sb, EROFS_BLKSIZ - EROFS_SUPER_OFFSET);
+	if (erofs_blksiz(&sbi) > EROFS_SUPER_OFFSET)
+		len = erofs_blksiz(&sbi) - EROFS_SUPER_OFFSET;
+	else
+		len = erofs_blksiz(&sbi);
+	crc = erofs_crc32c(~0, (u8 *)sb, len);
 
 	/* set up checksum field to erofs_super_block */
 	sb->checksum = cpu_to_le32(crc);
 
-	ret = blk_write(buf, 0, 1);
+	ret = blk_write(&sbi, buf, 0, 1);
 	if (ret) {
 		erofs_err("failed to write checksummed superblock: %s",
 			  erofs_strerror(ret));
@@ -530,16 +781,15 @@
 {
 	cfg.c_showprogress = true;
 	cfg.c_legacy_compress = false;
-	sbi.feature_incompat = EROFS_FEATURE_INCOMPAT_LZ4_0PADDING;
+	cfg.c_inline_data = true;
+	cfg.c_xattr_name_filter = true;
+	sbi.blkszbits = ilog2(min_t(u32, getpagesize(), EROFS_MAX_BLOCK_SIZE));
+	sbi.feature_incompat = EROFS_FEATURE_INCOMPAT_ZERO_PADDING;
 	sbi.feature_compat = EROFS_FEATURE_COMPAT_SB_CHKSUM |
 			     EROFS_FEATURE_COMPAT_MTIME;
 
 	/* generate a default uuid first */
-#ifdef HAVE_LIBUUID
-	do {
-		uuid_generate(sbi.uuid);
-	} while (uuid_is_null(sbi.uuid));
-#endif
+	erofs_uuid_generate(sbi.uuid);
 }
 
 /* https://reproducible-builds.org/specs/source-date-epoch/ for more details */
@@ -574,17 +824,111 @@
 	if (cfg.c_dbg_lvl >= EROFS_WARN)
 		printf("%s %s\n", basename(argv[0]), cfg.c_version);
 }
+static struct erofs_inode *erofs_alloc_root_inode(void)
+{
+	struct erofs_inode *root;
+
+	root = erofs_new_inode();
+	if (IS_ERR(root))
+		return root;
+	root->i_srcpath = strdup("/");
+	root->i_mode = S_IFDIR | 0777;
+	root->i_parent = root;
+	root->i_mtime = root->sbi->build_time;
+	root->i_mtime_nsec = root->sbi->build_time_nsec;
+	erofs_init_empty_dir(root);
+	return root;
+}
+
+static int erofs_rebuild_load_trees(struct erofs_inode *root)
+{
+	struct erofs_sb_info *src;
+	unsigned int extra_devices = 0;
+	erofs_blk_t nblocks;
+	int ret, idx;
+
+	list_for_each_entry(src, &rebuild_src_list, list) {
+		ret = erofs_rebuild_load_tree(root, src);
+		if (ret) {
+			erofs_err("failed to load %s", src->devname);
+			return ret;
+		}
+		if (src->extra_devices > 1) {
+			erofs_err("%s: unsupported number of extra devices",
+				  src->devname, src->extra_devices);
+			return -EOPNOTSUPP;
+		}
+		extra_devices += src->extra_devices;
+	}
+
+	if (extra_devices && extra_devices != rebuild_src_count) {
+		erofs_err("extra_devices(%u) is mismatched with source images(%u)",
+			  extra_devices, rebuild_src_count);
+		return -EOPNOTSUPP;
+	}
+
+	ret = erofs_mkfs_init_devices(&sbi, rebuild_src_count);
+	if (ret)
+		return ret;
+
+	list_for_each_entry(src, &rebuild_src_list, list) {
+		u8 *tag = NULL;
+
+		if (extra_devices) {
+			nblocks = src->devs[0].blocks;
+			tag = src->devs[0].tag;
+		} else {
+			nblocks = src->primarydevice_blocks;
+		}
+		DBG_BUGON(src->dev < 1);
+		idx = src->dev - 1;
+		sbi.devs[idx].blocks = nblocks;
+		if (tag && *tag)
+			memcpy(sbi.devs[idx].tag, tag, sizeof(sbi.devs[0].tag));
+		else
+			/* convert UUID of the source image to a hex string */
+			sprintf((char *)sbi.devs[idx].tag,
+				"%04x%04x%04x%04x%04x%04x%04x%04x",
+				(src->uuid[0] << 8) | src->uuid[1],
+				(src->uuid[2] << 8) | src->uuid[3],
+				(src->uuid[4] << 8) | src->uuid[5],
+				(src->uuid[6] << 8) | src->uuid[7],
+				(src->uuid[8] << 8) | src->uuid[9],
+				(src->uuid[10] << 8) | src->uuid[11],
+				(src->uuid[12] << 8) | src->uuid[13],
+				(src->uuid[14] << 8) | src->uuid[15]);
+	}
+	return 0;
+}
+
+static void erofs_mkfs_showsummaries(erofs_blk_t nblocks)
+{
+	char uuid_str[37] = {};
+
+	if (!(cfg.c_dbg_lvl > EROFS_ERR && cfg.c_showprogress))
+		return;
+
+	erofs_uuid_unparse_lower(sbi.uuid, uuid_str);
+
+	fprintf(stdout, "------\nFilesystem UUID: %s\n"
+		"Filesystem total blocks: %u (of %u-byte blocks)\n"
+		"Filesystem total inodes: %llu\n"
+		"Filesystem total metadata blocks: %u\n"
+		"Filesystem total deduplicated bytes (of source files): %llu\n",
+		uuid_str, nblocks, 1U << sbi.blkszbits, sbi.inos | 0ULL,
+		erofs_total_metablocks(),
+		sbi.saved_by_deduplication | 0ULL);
+}
 
 int main(int argc, char **argv)
 {
 	int err = 0;
 	struct erofs_buffer_head *sb_bh;
-	struct erofs_inode *root_inode;
-	erofs_nid_t root_nid;
-	struct stat64 st;
+	struct erofs_inode *root_inode, *packed_inode;
+	erofs_nid_t root_nid, packed_nid;
 	erofs_blk_t nblocks;
 	struct timeval t;
-	char uuid_str[37] = "not available";
+	FILE *packedfile = NULL;
 
 	erofs_init_configure();
 	erofs_mkfs_default_options();
@@ -603,22 +947,6 @@
 		return 1;
 	}
 
-	if (cfg.c_chunkbits) {
-		err = erofs_blob_init(cfg.c_blobdev_path);
-		if (err)
-			return 1;
-	}
-
-	err = lstat64(cfg.c_src_path, &st);
-	if (err)
-		return 1;
-	if (!S_ISDIR(st.st_mode)) {
-		erofs_err("root of the filesystem is not a directory - %s",
-			  cfg.c_src_path);
-		usage();
-		return 1;
-	}
-
 	if (cfg.c_unix_timestamp != -1) {
 		sbi.build_time      = cfg.c_unix_timestamp;
 		sbi.build_time_nsec = 0;
@@ -627,12 +955,20 @@
 		sbi.build_time_nsec = t.tv_usec;
 	}
 
-	err = dev_open(cfg.c_img_path);
+	err = dev_open(&sbi, cfg.c_img_path);
 	if (err) {
 		usage();
 		return 1;
 	}
 
+	if (tar_mode && !erofstar.index_mode) {
+		err = erofs_diskbuf_init(1);
+		if (err) {
+			erofs_err("failed to initialize diskbuf: %s",
+				   strerror(-err));
+			goto exit;
+		}
+	}
 #ifdef WITH_ANDROID
 	if (cfg.fs_config_file &&
 	    load_canned_fs_config(cfg.fs_config_file) < 0) {
@@ -640,21 +976,64 @@
 		return 1;
 	}
 
-	if (cfg.block_list_file && erofs_droid_blocklist_fopen() < 0) {
+	if (cfg.block_list_file &&
+	    erofs_blocklist_open(cfg.block_list_file, false)) {
 		erofs_err("failed to open %s", cfg.block_list_file);
 		return 1;
 	}
 #endif
 	erofs_show_config();
-	if (erofs_sb_has_chunked_file())
-		erofs_warn("EXPERIMENTAL chunked file feature in use. Use at your own risk!");
-	if (cfg.c_ztailpacking)
-		erofs_warn("EXPERIMENTAL compressed inline data feature in use. Use at your own risk!");
-	erofs_set_fs_root(cfg.c_src_path);
+	if (cfg.c_fragments || cfg.c_extra_ea_name_prefixes) {
+		if (!cfg.c_pclusterblks_packed)
+			cfg.c_pclusterblks_packed = cfg.c_pclusterblks_def;
+
+		packedfile = erofs_packedfile_init();
+		if (IS_ERR(packedfile)) {
+			erofs_err("failed to initialize packedfile");
+			return 1;
+		}
+	}
+
+	if (cfg.c_fragments) {
+		err = z_erofs_fragments_init();
+		if (err) {
+			erofs_err("failed to initialize fragments");
+			return 1;
+		}
+	}
+
 #ifndef NDEBUG
 	if (cfg.c_random_pclusterblks)
 		srand(time(NULL));
 #endif
+	if (tar_mode && erofstar.index_mode) {
+		if (erofstar.mapfile) {
+			err = erofs_blocklist_open(erofstar.mapfile, true);
+			if (err) {
+				erofs_err("failed to open %s", erofstar.mapfile);
+				goto exit;
+			}
+		} else {
+			sbi.blkszbits = 9;
+		}
+	}
+
+	if (rebuild_mode) {
+		struct erofs_sb_info *src;
+
+		erofs_warn("EXPERIMENTAL rebuild mode in use. Use at your own risk!");
+
+		src = list_first_entry(&rebuild_src_list, struct erofs_sb_info, list);
+		if (!src)
+			goto exit;
+		err = erofs_read_superblock(src);
+		if (err) {
+			erofs_err("failed to read superblock of %s", src->devname);
+			goto exit;
+		}
+		sbi.blkszbits = src->blkszbits;
+	}
+
 	sb_bh = erofs_buffer_init();
 	if (IS_ERR(sb_bh)) {
 		err = PTR_ERR(sb_bh);
@@ -669,57 +1048,135 @@
 		goto exit;
 	}
 
-	err = erofs_load_compress_hints();
+	/* make sure that the super block should be the very first blocks */
+	(void)erofs_mapbh(sb_bh->block);
+	if (erofs_btell(sb_bh, false) != 0) {
+		erofs_err("failed to reserve erofs_super_block");
+		goto exit;
+	}
+
+	err = erofs_load_compress_hints(&sbi);
 	if (err) {
 		erofs_err("failed to load compress hints %s",
 			  cfg.c_compress_hints_file);
 		goto exit;
 	}
 
-	err = z_erofs_compress_init(sb_bh);
+	err = z_erofs_compress_init(&sbi, sb_bh);
 	if (err) {
 		erofs_err("failed to initialize compressor: %s",
 			  erofs_strerror(err));
 		goto exit;
 	}
 
-	err = erofs_generate_devtable();
+	if (cfg.c_dedupe) {
+		if (!cfg.c_compr_alg[0]) {
+			erofs_err("Compression is not enabled.  Turn on chunk-based data deduplication instead.");
+			cfg.c_chunkbits = sbi.blkszbits;
+		} else {
+			err = z_erofs_dedupe_init(erofs_blksiz(&sbi));
+			if (err) {
+				erofs_err("failed to initialize deduplication: %s",
+					  erofs_strerror(err));
+				goto exit;
+			}
+		}
+	}
+
+	if (cfg.c_chunkbits) {
+		err = erofs_blob_init(cfg.c_blobdev_path);
+		if (err)
+			return 1;
+	}
+
+	if ((erofstar.index_mode && !erofstar.mapfile) || cfg.c_blobdev_path)
+		err = erofs_mkfs_init_devices(&sbi, 1);
 	if (err) {
 		erofs_err("failed to generate device table: %s",
 			  erofs_strerror(err));
 		goto exit;
 	}
-#ifdef HAVE_LIBUUID
-	uuid_unparse_lower(sbi.uuid, uuid_str);
-#endif
-	erofs_info("filesystem UUID: %s", uuid_str);
 
 	erofs_inode_manager_init();
 
-	err = erofs_build_shared_xattrs_from_path(cfg.c_src_path);
-	if (err) {
-		erofs_err("failed to build shared xattrs: %s",
-			  erofs_strerror(err));
-		goto exit;
-	}
+	if (tar_mode) {
+		root_inode = erofs_alloc_root_inode();
+		if (IS_ERR(root_inode)) {
+			err = PTR_ERR(root_inode);
+			goto exit;
+		}
 
-	root_inode = erofs_mkfs_build_tree_from_path(NULL, cfg.c_src_path);
-	if (IS_ERR(root_inode)) {
-		err = PTR_ERR(root_inode);
-		goto exit;
-	}
+		while (!(err = tarerofs_parse_tar(root_inode, &erofstar)));
 
+		if (err < 0)
+			goto exit;
+
+		err = erofs_rebuild_dump_tree(root_inode);
+		if (err < 0)
+			goto exit;
+	} else if (rebuild_mode) {
+		root_inode = erofs_alloc_root_inode();
+		if (IS_ERR(root_inode)) {
+			err = PTR_ERR(root_inode);
+			goto exit;
+		}
+
+		err = erofs_rebuild_load_trees(root_inode);
+		if (err)
+			goto exit;
+		err = erofs_rebuild_dump_tree(root_inode);
+		if (err)
+			goto exit;
+	} else {
+		err = erofs_build_shared_xattrs_from_path(&sbi, cfg.c_src_path);
+		if (err) {
+			erofs_err("failed to build shared xattrs: %s",
+				  erofs_strerror(err));
+			goto exit;
+		}
+
+		if (cfg.c_extra_ea_name_prefixes)
+			erofs_xattr_write_name_prefixes(&sbi, packedfile);
+
+		root_inode = erofs_mkfs_build_tree_from_path(cfg.c_src_path);
+		if (IS_ERR(root_inode)) {
+			err = PTR_ERR(root_inode);
+			goto exit;
+		}
+	}
 	root_nid = erofs_lookupnid(root_inode);
 	erofs_iput(root_inode);
 
-	if (cfg.c_chunkbits) {
-		erofs_info("total metadata: %u blocks", erofs_mapbh(NULL));
-		err = erofs_blob_remap();
+	if (erofstar.index_mode || cfg.c_chunkbits || sbi.extra_devices) {
+		if (erofstar.index_mode && !erofstar.mapfile)
+			sbi.devs[0].blocks =
+				BLK_ROUND_UP(&sbi, erofstar.offset);
+		err = erofs_mkfs_dump_blobs(&sbi);
 		if (err)
 			goto exit;
 	}
 
-	err = erofs_mkfs_update_super_block(sb_bh, root_nid, &nblocks);
+	packed_nid = 0;
+	if ((cfg.c_fragments || cfg.c_extra_ea_name_prefixes) &&
+	    erofs_sb_has_fragments(&sbi)) {
+		erofs_update_progressinfo("Handling packed_file ...");
+		packed_inode = erofs_mkfs_build_packedfile();
+		if (IS_ERR(packed_inode)) {
+			err = PTR_ERR(packed_inode);
+			goto exit;
+		}
+		packed_nid = erofs_lookupnid(packed_inode);
+		erofs_iput(packed_inode);
+	}
+
+	/* flush all buffers except for the superblock */
+	if (!erofs_bflush(NULL)) {
+		err = -EIO;
+		goto exit;
+	}
+
+	err = erofs_mkfs_update_super_block(sb_bh, root_nid, &nblocks,
+					    packed_nid);
 	if (err)
 		goto exit;
 
@@ -727,28 +1184,35 @@
 	if (!erofs_bflush(NULL))
 		err = -EIO;
 	else
-		err = dev_resize(nblocks);
+		err = dev_resize(&sbi, nblocks);
 
-	if (!err && erofs_sb_has_sb_chksum())
+	if (!err && erofs_sb_has_sb_chksum(&sbi))
 		err = erofs_mkfs_superblock_csum_set();
 exit:
 	z_erofs_compress_exit();
-#ifdef WITH_ANDROID
-	erofs_droid_blocklist_fclose();
-#endif
-	dev_close();
+	z_erofs_dedupe_exit();
+	erofs_blocklist_close();
+	dev_close(&sbi);
 	erofs_cleanup_compress_hints();
 	erofs_cleanup_exclude_rules();
 	if (cfg.c_chunkbits)
 		erofs_blob_exit();
+	if (cfg.c_fragments)
+		z_erofs_fragments_exit();
+	erofs_packedfile_exit();
+	erofs_xattr_cleanup_name_prefixes();
+	erofs_rebuild_cleanup();
+	erofs_diskbuf_exit();
 	erofs_exit_configure();
+	if (tar_mode)
+		erofs_iostream_close(&erofstar.ios);
 
 	if (err) {
 		erofs_err("\tCould not format the device : %s\n",
 			  erofs_strerror(err));
 		return 1;
-	} else {
-		erofs_update_progressinfo("Build completed.\n");
 	}
+	erofs_update_progressinfo("Build completed.\n");
+	erofs_mkfs_showsummaries(nblocks);
 	return 0;
 }