Merge tag 'v1.3' into update

erofs-utils: release 1.3
Change-Id: I4623efd3e4246d2acd2b6a1afe785321af8991b5
diff --git a/ChangeLog b/ChangeLog
index d8e89d9..6637bc3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+erofs-utils 1.3
+
+ * This release includes the following updates:
+   - support new big pcluster feature together with Linux 5.13+;
+   - optimize buffer allocation logic (Hu Weiwen);
+   - optimize build performance for large directories (Hu Weiwen);
+   - add support to override uid / gid (Hu Weiwen);
+   - add support to adjust lz4 history window size (Huang Jianan);
+   - add a manual for erofsfuse;
+   - add support to limit max decompressed extent size;
+   - various bugfixes and cleanups;
+
+ -- Gao Xiang <xiang@kernel.org>  Tue, 01 Jun 2021 00:00:00 +0800
+
 erofs-utils (1.2.1-1) unstable; urgency=medium
 
  * A quick maintenance release includes the following updates:
diff --git a/METADATA b/METADATA
index a1390e0..c4346bf 100644
--- a/METADATA
+++ b/METADATA
@@ -5,11 +5,11 @@
     type: GIT
     value: "https://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git"
   }
-  version: "v1.2.1"
+  version: "v1.3"
   license_type: RESTRICTED
   last_upgrade_date {
-    year: 2020
-    month: 12
-    day: 7
+    year: 2021
+    month: 7
+    day: 8
   }
 }
diff --git a/README b/README
index b57550b..af9cdf1 100644
--- a/README
+++ b/README
@@ -1,7 +1,7 @@
 erofs-utils
 ===========
 
-erofs-utils includes user-space tools for erofs filesystem.
+erofs-utils includes user-space tools for EROFS filesystem.
 Currently mkfs.erofs and erofsfuse (experimental) are available.
 
 Dependencies & build
@@ -50,7 +50,7 @@
 mkfs.erofs
 ----------
 
-two main kinds of erofs images can be generated: (un)compressed.
+two main kinds of EROFS images can be generated: (un)compressed.
 
  - For uncompressed images, there will be none of compression
    files in these images. However, it can decide whether the tail
@@ -61,8 +61,8 @@
    saved with compression. If not, fallback to an uncompressed
    file.
 
-How to generate erofs images
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+How to generate EROFS images (Linux 5.3+)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Currently lz4 and lz4hc are available for compression, e.g.
  $ mkfs.erofs -zlz4hc foo.erofs.img foo/
@@ -70,17 +70,42 @@
 Or leave all files uncompressed as an option:
  $ mkfs.erofs foo.erofs.img foo/
 
-How to generate legacy erofs images
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In addition, you could specify a higher compression level to get a
+(slightly) better compression ratio than the default level, e.g.
+ $ mkfs.erofs -zlz4hc,12 foo.erofs.img foo/
+
+How to generate EROFS big pcluster images (Linux 5.13+)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In order to get much better compression ratios (thus better sequential
+read performance for common storage devices), big pluster feature has
+been introduced since linux-5.13, which is not forward-compatible with
+old kernels.
+
+In details, -C is used to specify the maximum size of each big pcluster
+in bytes, e.g.
+ $ mkfs.erofs -zlz4hc -C65536 foo.erofs.img foo/
+
+So in that case, pcluster size can be 64KiB at most.
+
+Note that large pcluster size can cause bad random performance, so
+please evaluate carefully in advance. Or make your own per-(sub)file
+compression strategies according to file access patterns if needed.
+
+How to generate legacy EROFS images (Linux 4.19+)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Decompression inplace and compacted indexes have been introduced in
 linux-5.3, which are not forward-compatible with older kernels.
 
-In order to generate _legacy_ erofs images for old kernels,
+In order to generate _legacy_ EROFS images for old kernels,
 consider adding "-E legacy-compress" to the command line, e.g.
 
  $ mkfs.erofs -E legacy-compress -zlz4hc foo.erofs.img foo/
 
+For Linux kernel >= 5.3, legacy EROFS images are _NOT recommended_
+due to runtime performance loss compared with non-legacy images.
+
 Obsoleted erofs.mkfs
 ~~~~~~~~~~~~~~~~~~~~
 
@@ -94,7 +119,7 @@
 erofsfuse (experimental, unstable)
 ----------------------------------
 
-erofsfuse is introduced to support erofs format for various platforms
+erofsfuse is introduced to support EROFS format for various platforms
 (including older linux kernels) and new on-disk features iteration.
 It can also be used as an unpacking tool for unprivileged users.
 
@@ -120,7 +145,7 @@
 
 erofsfuse binary will be generated under fuse folder.
 
-How to mount an erofs image with erofsfuse
+How to mount an EROFS image with erofsfuse
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 As the other FUSE implementations, it's quite simple to mount with
@@ -139,7 +164,7 @@
 Contribution
 ------------
 
-erofs-utils is under GPLv2+ as a part of erofs project,
+erofs-utils is under GPLv2+ as a part of EROFS filesystem project,
 feel free to send patches or feedback to us.
 
 To:
@@ -155,7 +180,7 @@
 Comments
 --------
 
-[1] According to the erofs on-disk format, the tail block of files
+[1] According to the EROFS on-disk format, the tail block of files
     could be inlined aggressively with its metadata in order to reduce
     the I/O overhead and save the storage space (called tail-packing).
 
diff --git a/VERSION b/VERSION
index 203a5b8..ebe8d0d 100644
--- a/VERSION
+++ b/VERSION
@@ -1,2 +1,2 @@
-1.2.1
-2021-01-10
+1.3
+2021-06-01
diff --git a/configure.ac b/configure.ac
index 28926c3..f626064 100644
--- a/configure.ac
+++ b/configure.ac
@@ -59,6 +59,12 @@
  fi
 ])
 
+AC_ARG_ENABLE([debug],
+    [AS_HELP_STRING([--enable-debug],
+                    [enable debugging mode @<:@default=no@:>@])],
+    [enable_debug="$enableval"],
+    [enable_debug="no"])
+
 AC_ARG_ENABLE(lz4,
    [AS_HELP_STRING([--disable-lz4], [disable LZ4 compression support @<:@default=enabled@:>@])],
    [enable_lz4="$enableval"], [enable_lz4="yes"])
@@ -150,6 +156,12 @@
 # Checks for library functions.
 AC_CHECK_FUNCS([backtrace fallocate gettimeofday memset realpath strdup strerror strrchr strtoull])
 
+# Configure debug mode
+AS_IF([test "x$enable_debug" != "xno"], [], [
+  dnl Turn off all assert checking.
+  CPPFLAGS="$CPPFLAGS -DNDEBUG"
+])
+
 # Configure libuuid
 AS_IF([test "x$with_uuid" != "xno"], [
   PKG_CHECK_MODULES([libuuid], [uuid])
diff --git a/fuse/main.c b/fuse/main.c
index c162912..37119ea 100644
--- a/fuse/main.c
+++ b/fuse/main.c
@@ -74,6 +74,10 @@
 	ret = erofs_pread(&vi, buffer, size, offset);
 	if (ret)
 		return ret;
+	if (offset + size > vi.i_size)
+		return vi.i_size - offset;
+	if (offset >= vi.i_size)
+		return 0;
 	return size;
 }
 
@@ -83,6 +87,10 @@
 
 	if (ret < 0)
 		return ret;
+	DBG_BUGON(ret > size);
+	if (ret == size)
+		buffer[size - 1] = '\0';
+	erofs_dbg("readlink(%s): %s", path, buffer);
 	return 0;
 }
 
diff --git a/include/erofs/cache.h b/include/erofs/cache.h
index 8c171f5..611ca5b 100644
--- a/include/erofs/cache.h
+++ b/include/erofs/cache.h
@@ -39,6 +39,7 @@
 
 struct erofs_buffer_block {
 	struct list_head list;
+	struct list_head mapped_list;
 
 	erofs_blk_t blkaddr;
 	int type;
@@ -95,7 +96,7 @@
 struct erofs_buffer_head *erofs_battach(struct erofs_buffer_head *bh,
 					int type, unsigned int size);
 
-erofs_blk_t erofs_mapbh(struct erofs_buffer_block *bb, bool end);
+erofs_blk_t erofs_mapbh(struct erofs_buffer_block *bb);
 bool erofs_bflush(struct erofs_buffer_block *bb);
 
 void erofs_bdrop(struct erofs_buffer_head *bh, bool tryrevoke);
diff --git a/include/erofs/compress.h b/include/erofs/compress.h
index 952f287..d234e8b 100644
--- a/include/erofs/compress.h
+++ b/include/erofs/compress.h
@@ -18,7 +18,7 @@
 
 int erofs_write_compressed_file(struct erofs_inode *inode);
 
-int z_erofs_compress_init(void);
+int z_erofs_compress_init(struct erofs_buffer_head *bh);
 int z_erofs_compress_exit(void);
 
 const char *z_erofs_list_available_compressors(unsigned int i);
diff --git a/include/erofs/config.h b/include/erofs/config.h
index 02ddf59..d140a73 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -40,6 +40,9 @@
 	int c_dbg_lvl;
 	bool c_dry_run;
 	bool c_legacy_compress;
+#ifndef NDEBUG
+	bool c_random_pclusterblks;
+#endif
 	char c_timeinherit;
 
 #ifdef HAVE_LIBSELINUX
@@ -53,7 +56,11 @@
 	int c_force_inodeversion;
 	/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
 	int c_inline_xattr_tolerance;
+
+	u32 c_physical_clusterblks;
+	u32 c_max_decompressed_extent_bytes;
 	u64 c_unix_timestamp;
+	u32 c_uid, c_gid;
 #ifdef WITH_ANDROID
 	char *mount_point;
 	char *target_out_path;
diff --git a/include/erofs/defs.h b/include/erofs/defs.h
index b54cd9d..2e40944 100644
--- a/include/erofs/defs.h
+++ b/include/erofs/defs.h
@@ -87,7 +87,7 @@
 #ifndef __OPTIMIZE__
 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2 * !!(condition)]))
 #else
-#define BUILD_BUG_ON(condition) assert(condition)
+#define BUILD_BUG_ON(condition) assert(!(condition))
 #endif
 
 #define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index ac5b270..da7be56 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -79,6 +79,9 @@
 	u64 inos;
 
 	u8 uuid[16];
+
+	u16 available_compr_algs;
+	u16 lz4_max_distance;
 };
 
 /* global sbi */
@@ -104,6 +107,8 @@
 }
 
 EROFS_FEATURE_FUNCS(lz4_0padding, incompat, INCOMPAT_LZ4_0PADDING)
+EROFS_FEATURE_FUNCS(compr_cfgs, incompat, INCOMPAT_COMPR_CFGS)
+EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER)
 EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
 
 #define EROFS_I_EA_INITED	(1 << 0)
@@ -159,7 +164,6 @@
 			uint16_t z_advise;
 			uint8_t  z_algorithmtype[2];
 			uint8_t  z_logical_clusterbits;
-			uint8_t  z_physical_clusterbits[2];
 		};
 	};
 #ifdef WITH_ANDROID
diff --git a/include/erofs_fs.h b/include/erofs_fs.h
index a69f179..18fc182 100644
--- a/include/erofs_fs.h
+++ b/include/erofs_fs.h
@@ -20,15 +20,22 @@
  * be incompatible with this kernel version.
  */
 #define EROFS_FEATURE_INCOMPAT_LZ4_0PADDING	0x00000001
-#define EROFS_ALL_FEATURE_INCOMPAT		EROFS_FEATURE_INCOMPAT_LZ4_0PADDING
+#define EROFS_FEATURE_INCOMPAT_COMPR_CFGS	0x00000002
+#define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER	0x00000002
+#define EROFS_ALL_FEATURE_INCOMPAT		\
+	(EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \
+	 EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
+	 EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER)
 
-/* 128-byte erofs on-disk super block */
+#define EROFS_SB_EXTSLOT_SIZE	16
+
+/* erofs on-disk super block (currently 128 bytes) */
 struct erofs_super_block {
 	__le32 magic;           /* file system magic number */
 	__le32 checksum;        /* crc32c(super_block) */
 	__le32 feature_compat;
 	__u8 blkszbits;         /* support block_size == PAGE_SIZE only */
-	__u8 reserved;
+	__u8 sb_extslots;	/* superblock size = 128 + sb_extslots * 16 */
 
 	__le16 root_nid;	/* nid of root directory */
 	__le64 inos;            /* total valid ino # (== f_files - f_favail) */
@@ -41,7 +48,13 @@
 	__u8 uuid[16];          /* 128-bit uuid for volume */
 	__u8 volume_name[16];   /* volume name */
 	__le32 feature_incompat;
-	__u8 reserved2[44];
+	union {
+		/* bitmap for available compression algorithms */
+		__le16 available_compr_algs;
+		/* customized sliding window size instead of 64k by default */
+		__le16 lz4_max_distance;
+	} __packed u1;
+	__u8 reserved2[42];
 };
 
 /*
@@ -77,6 +90,9 @@
 #define EROFS_I_VERSION_BIT             0
 #define EROFS_I_DATALAYOUT_BIT          1
 
+#define EROFS_I_ALL	\
+	((1 << (EROFS_I_DATALAYOUT_BIT + EROFS_I_DATALAYOUT_BITS)) - 1)
+
 /* 32-byte reduced form of an ondisk inode */
 struct erofs_inode_compact {
 	__le16 i_format;	/* inode format hints */
@@ -191,20 +207,33 @@
 				 e->e_name_len + le16_to_cpu(e->e_value_size));
 }
 
+/* maximum supported size of a physical compression cluster */
+#define Z_EROFS_PCLUSTER_MAX_SIZE	(1024 * 1024)
+
 /* available compression algorithm types (for h_algorithmtype) */
 enum {
 	Z_EROFS_COMPRESSION_LZ4	= 0,
 	Z_EROFS_COMPRESSION_MAX
 };
+#define Z_EROFS_ALL_COMPR_ALGS		(1 << (Z_EROFS_COMPRESSION_MAX - 1))
+
+/* 14 bytes (+ length field = 16 bytes) */
+struct z_erofs_lz4_cfgs {
+	__le16 max_distance;
+	__le16 max_pclusterblks;
+	u8 reserved[10];
+} __packed;
 
 /*
  * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
  *  e.g. for 4k logical cluster size,      4B        if compacted 2B is off;
  *                                  (4B) + 2B + (4B) if compacted 2B is on.
+ * bit 1 : HEAD1 big pcluster (0 - off; 1 - on)
+ * bit 2 : HEAD2 big pcluster (0 - off; 1 - on)
  */
-#define Z_EROFS_ADVISE_COMPACTED_2B_BIT         0
-
-#define Z_EROFS_ADVISE_COMPACTED_2B     (1 << Z_EROFS_ADVISE_COMPACTED_2B_BIT)
+#define Z_EROFS_ADVISE_COMPACTED_2B		0x0001
+#define Z_EROFS_ADVISE_BIG_PCLUSTER_1		0x0002
+#define Z_EROFS_ADVISE_BIG_PCLUSTER_2		0x0004
 
 struct z_erofs_map_header {
 	__le32	h_reserved1;
@@ -216,9 +245,7 @@
 	__u8	h_algorithmtype;
 	/*
 	 * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096;
-	 * bit 3-4 : (physical - logical) cluster bits of head 1:
-	 *       For example, if logical clustersize = 4096, 1 for 8192.
-	 * bit 5-7 : (physical - logical) cluster bits of head 2.
+	 * bit 3-7 : reserved.
 	 */
 	__u8	h_clusterbits;
 };
@@ -261,6 +288,13 @@
 #define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS        2
 #define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT         0
 
+/*
+ * D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the
+ * compressed block count of a compressed extent (in logical clusters, aka.
+ * block count of a pcluster).
+ */
+#define Z_EROFS_VLE_DI_D0_CBLKCNT		(1 << 11)
+
 struct z_erofs_vle_decompressed_index {
 	__le16 di_advise;
 	/* where to decompress in the head cluster */
diff --git a/lib/Makefile.am b/lib/Makefile.am
index f21dc35..b12e2c1 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -2,6 +2,24 @@
 # Makefile.am
 
 noinst_LTLIBRARIES = liberofs.la
+noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
+      $(top_srcdir)/include/erofs/cache.h \
+      $(top_srcdir)/include/erofs/compress.h \
+      $(top_srcdir)/include/erofs/config.h \
+      $(top_srcdir)/include/erofs/decompress.h \
+      $(top_srcdir)/include/erofs/defs.h \
+      $(top_srcdir)/include/erofs/err.h \
+      $(top_srcdir)/include/erofs/exclude.h \
+      $(top_srcdir)/include/erofs/hashtable.h \
+      $(top_srcdir)/include/erofs/inode.h \
+      $(top_srcdir)/include/erofs/internal.h \
+      $(top_srcdir)/include/erofs/io.h \
+      $(top_srcdir)/include/erofs/list.h \
+      $(top_srcdir)/include/erofs/print.h \
+      $(top_srcdir)/include/erofs/trace.h \
+      $(top_srcdir)/include/erofs/xattr.h
+
+noinst_HEADERS += compressor.h
 liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
 		      namei.c data.c compress.c compressor.c zmap.c decompress.c
 liberofs_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
diff --git a/lib/cache.c b/lib/cache.c
index 0d5c4a5..340dcdd 100644
--- a/lib/cache.c
+++ b/lib/cache.c
@@ -18,6 +18,11 @@
 };
 static erofs_blk_t tail_blkaddr;
 
+/* buckets for all mapped buffer blocks to boost up allocation */
+static struct list_head mapped_buckets[META + 1][EROFS_BLKSIZ];
+/* last mapped buffer block to accelerate erofs_mapbh() */
+static struct erofs_buffer_block *last_mapped_block = &blkh;
+
 static bool erofs_bh_flush_drop_directly(struct erofs_buffer_head *bh)
 {
 	return erofs_bh_flush_generic_end(bh);
@@ -62,15 +67,32 @@
 /* return buffer_head of erofs super block (with size 0) */
 struct erofs_buffer_head *erofs_buffer_init(void)
 {
+	int i, j;
 	struct erofs_buffer_head *bh = erofs_balloc(META, 0, 0, 0);
 
 	if (IS_ERR(bh))
 		return bh;
 
 	bh->op = &erofs_skip_write_bhops;
+
+	for (i = 0; i < ARRAY_SIZE(mapped_buckets); i++)
+		for (j = 0; j < ARRAY_SIZE(mapped_buckets[0]); j++)
+			init_list_head(&mapped_buckets[i][j]);
 	return bh;
 }
 
+static void erofs_bupdate_mapped(struct erofs_buffer_block *bb)
+{
+	struct list_head *bkt;
+
+	if (bb->blkaddr == NULL_ADDR)
+		return;
+
+	bkt = mapped_buckets[bb->type] + bb->buffers.off % EROFS_BLKSIZ;
+	list_del(&bb->mapped_list);
+	list_add_tail(&bb->mapped_list, bkt);
+}
+
 /* return occupied bytes in specific buffer block if succeed */
 static int __erofs_battach(struct erofs_buffer_block *bb,
 			   struct erofs_buffer_head *bh,
@@ -80,7 +102,7 @@
 			   bool dryrun)
 {
 	const erofs_off_t alignedoffset = roundup(bb->buffers.off, alignsize);
-	const int oob = cmpsgn(roundup(bb->buffers.off % EROFS_BLKSIZ,
+	const int oob = cmpsgn(roundup((bb->buffers.off - 1) % EROFS_BLKSIZ + 1,
 				       alignsize) + incr + extrasize,
 			       EROFS_BLKSIZ);
 	bool tailupdate = false;
@@ -110,8 +132,9 @@
 		/* need to update the tail_blkaddr */
 		if (tailupdate)
 			tail_blkaddr = blkaddr + BLK_ROUND_UP(bb->buffers.off);
+		erofs_bupdate_mapped(bb);
 	}
-	return (alignedoffset + incr) % EROFS_BLKSIZ;
+	return (alignedoffset + incr - 1) % EROFS_BLKSIZ + 1;
 }
 
 int erofs_bh_balloon(struct erofs_buffer_head *bh, erofs_off_t incr)
@@ -125,27 +148,75 @@
 	return __erofs_battach(bb, NULL, incr, 1, 0, false);
 }
 
-struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
-				       unsigned int required_ext,
-				       unsigned int inline_ext)
+static int erofs_bfind_for_attach(int type, erofs_off_t size,
+				  unsigned int required_ext,
+				  unsigned int inline_ext,
+				  unsigned int alignsize,
+				  struct erofs_buffer_block **bbp)
 {
 	struct erofs_buffer_block *cur, *bb;
-	struct erofs_buffer_head *bh;
-	unsigned int alignsize, used0, usedmax;
-
-	int ret = get_alignsize(type, &type);
-
-	if (ret < 0)
-		return ERR_PTR(ret);
-	alignsize = ret;
+	unsigned int used0, used_before, usedmax, used;
+	int ret;
 
 	used0 = (size + required_ext) % EROFS_BLKSIZ + inline_ext;
+	/* inline data should be in the same fs block */
+	if (used0 > EROFS_BLKSIZ)
+		return -ENOSPC;
+
+	if (!used0 || alignsize == EROFS_BLKSIZ) {
+		*bbp = NULL;
+		return 0;
+	}
+
 	usedmax = 0;
 	bb = NULL;
 
-	list_for_each_entry(cur, &blkh.list, list) {
-		unsigned int used_before, used;
+	/* try to find a most-fit mapped buffer block first */
+	if (size + required_ext + inline_ext >= EROFS_BLKSIZ)
+		goto skip_mapped;
 
+	used_before = rounddown(EROFS_BLKSIZ -
+				(size + required_ext + inline_ext), alignsize);
+	for (; used_before; --used_before) {
+		struct list_head *bt = mapped_buckets[type] + used_before;
+
+		if (list_empty(bt))
+			continue;
+		cur = list_first_entry(bt, struct erofs_buffer_block,
+				       mapped_list);
+
+		/* last mapped block can be expended, don't handle it here */
+		if (list_next_entry(cur, list)->blkaddr == NULL_ADDR) {
+			DBG_BUGON(cur != last_mapped_block);
+			continue;
+		}
+
+		DBG_BUGON(cur->type != type);
+		DBG_BUGON(cur->blkaddr == NULL_ADDR);
+		DBG_BUGON(used_before != cur->buffers.off % EROFS_BLKSIZ);
+
+		ret = __erofs_battach(cur, NULL, size, alignsize,
+				      required_ext + inline_ext, true);
+		if (ret < 0) {
+			DBG_BUGON(1);
+			continue;
+		}
+
+		/* should contain all data in the current block */
+		used = ret + required_ext + inline_ext;
+		DBG_BUGON(used > EROFS_BLKSIZ);
+
+		bb = cur;
+		usedmax = used;
+		break;
+	}
+
+skip_mapped:
+	/* try to start from the last mapped one, which can be expended */
+	cur = last_mapped_block;
+	if (cur == &blkh)
+		cur = list_next_entry(cur, list);
+	for (; cur != &blkh; cur = list_next_entry(cur, list)) {
 		used_before = cur->buffers.off % EROFS_BLKSIZ;
 
 		/* skip if buffer block is just full */
@@ -179,34 +250,56 @@
 			usedmax = used;
 		}
 	}
+	*bbp = bb;
+	return 0;
+}
+
+struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
+				       unsigned int required_ext,
+				       unsigned int inline_ext)
+{
+	struct erofs_buffer_block *bb;
+	struct erofs_buffer_head *bh;
+	unsigned int alignsize;
+
+	int ret = get_alignsize(type, &type);
+
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	DBG_BUGON(type < 0 || type > META);
+	alignsize = ret;
+
+	/* try to find if we could reuse an allocated buffer block */
+	ret = erofs_bfind_for_attach(type, size, required_ext, inline_ext,
+				     alignsize, &bb);
+	if (ret)
+		return ERR_PTR(ret);
 
 	if (bb) {
 		bh = malloc(sizeof(struct erofs_buffer_head));
 		if (!bh)
 			return ERR_PTR(-ENOMEM);
-		goto found;
+	} else {
+		/* get a new buffer block instead */
+		bb = malloc(sizeof(struct erofs_buffer_block));
+		if (!bb)
+			return ERR_PTR(-ENOMEM);
+
+		bb->type = type;
+		bb->blkaddr = NULL_ADDR;
+		bb->buffers.off = 0;
+		init_list_head(&bb->buffers.list);
+		list_add_tail(&bb->list, &blkh.list);
+		init_list_head(&bb->mapped_list);
+
+		bh = malloc(sizeof(struct erofs_buffer_head));
+		if (!bh) {
+			free(bb);
+			return ERR_PTR(-ENOMEM);
+		}
 	}
 
-	/* allocate a new buffer block */
-	if (used0 > EROFS_BLKSIZ)
-		return ERR_PTR(-ENOSPC);
-
-	bb = malloc(sizeof(struct erofs_buffer_block));
-	if (!bb)
-		return ERR_PTR(-ENOMEM);
-
-	bb->type = type;
-	bb->blkaddr = NULL_ADDR;
-	bb->buffers.off = 0;
-	init_list_head(&bb->buffers.list);
-	list_add_tail(&bb->list, &blkh.list);
-
-	bh = malloc(sizeof(struct erofs_buffer_head));
-	if (!bh) {
-		free(bb);
-		return ERR_PTR(-ENOMEM);
-	}
-found:
 	ret = __erofs_battach(bb, bh, size, alignsize,
 			      required_ext + inline_ext, false);
 	if (ret < 0)
@@ -247,8 +340,11 @@
 {
 	erofs_blk_t blkaddr;
 
-	if (bb->blkaddr == NULL_ADDR)
+	if (bb->blkaddr == NULL_ADDR) {
 		bb->blkaddr = tail_blkaddr;
+		last_mapped_block = bb;
+		erofs_bupdate_mapped(bb);
+	}
 
 	blkaddr = bb->blkaddr + BLK_ROUND_UP(bb->buffers.off);
 	if (blkaddr > tail_blkaddr)
@@ -257,19 +353,20 @@
 	return blkaddr;
 }
 
-erofs_blk_t erofs_mapbh(struct erofs_buffer_block *bb, bool end)
+erofs_blk_t erofs_mapbh(struct erofs_buffer_block *bb)
 {
-	struct erofs_buffer_block *t, *nt;
+	struct erofs_buffer_block *t = last_mapped_block;
 
-	if (!bb || bb->blkaddr == NULL_ADDR) {
-		list_for_each_entry_safe(t, nt, &blkh.list, list) {
-			if (!end && (t == bb || nt == &blkh))
-				break;
-			(void)__erofs_mapbh(t);
-			if (end && t == bb)
-				break;
-		}
-	}
+	if (bb && bb->blkaddr != NULL_ADDR)
+		return bb->blkaddr;
+	do {
+		t = list_next_entry(t, list);
+		if (t == &blkh)
+			break;
+
+		DBG_BUGON(t->blkaddr != NULL_ADDR);
+		(void)__erofs_mapbh(t);
+	} while (t != bb);
 	return tail_blkaddr;
 }
 
@@ -311,6 +408,7 @@
 
 		erofs_dbg("block %u to %u flushed", p->blkaddr, blkaddr - 1);
 
+		list_del(&p->mapped_list);
 		list_del(&p->list);
 		free(p);
 	}
@@ -334,6 +432,10 @@
 	if (!list_empty(&bb->buffers.list))
 		return;
 
+	if (bb == last_mapped_block)
+		last_mapped_block = list_prev_entry(bb, list);
+
+	list_del(&bb->mapped_list);
 	list_del(&bb->list);
 	free(bb);
 
diff --git a/lib/compress.c b/lib/compress.c
index 86db940..2093bfd 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -22,15 +22,15 @@
 static struct erofs_compress compresshandle;
 static int compressionlevel;
 
-static struct z_erofs_map_header mapheader;
+static unsigned int algorithmtype[2];
 
 struct z_erofs_vle_compress_ctx {
 	u8 *metacur;
 
 	u8 queue[EROFS_CONFIG_COMPR_MAX_SZ * 2];
 	unsigned int head, tail;
-
-	erofs_blk_t blkaddr;	/* pointing to the next blkaddr */
+	unsigned int compressedblks;
+	erofs_blk_t blkaddr;		/* pointing to the next blkaddr */
 	u16 clusterofs;
 };
 
@@ -89,7 +89,13 @@
 	}
 
 	do {
-		if (d0) {
+		/* XXX: big pcluster feature should be per-inode */
+		if (d0 == 1 && cfg.c_physical_clusterblks > 1) {
+			type = Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD;
+			di.di_u.delta[0] = cpu_to_le16(ctx->compressedblks |
+					Z_EROFS_VLE_DI_D0_CBLKCNT);
+			di.di_u.delta[1] = cpu_to_le16(d1);
+		} else if (d0) {
 			type = Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD;
 
 			di.di_u.delta[0] = cpu_to_le16(d0);
@@ -115,9 +121,8 @@
 	ctx->clusterofs = clusterofs + count;
 }
 
-static int write_uncompressed_block(struct z_erofs_vle_compress_ctx *ctx,
-				    unsigned int *len,
-				    char *dst)
+static int write_uncompressed_extent(struct z_erofs_vle_compress_ctx *ctx,
+				     unsigned int *len, char *dst)
 {
 	int ret;
 	unsigned int count;
@@ -144,6 +149,16 @@
 	return count;
 }
 
+/* TODO: apply per-(sub)file strategies here */
+static unsigned int z_erofs_get_max_pclusterblks(struct erofs_inode *inode)
+{
+#ifndef NDEBUG
+	if (cfg.c_random_pclusterblks)
+		return 1 + rand() % cfg.c_physical_clusterblks;
+#endif
+	return cfg.c_physical_clusterblks;
+}
+
 static int vle_compress_one(struct erofs_inode *inode,
 			    struct z_erofs_vle_compress_ctx *ctx,
 			    bool final)
@@ -152,22 +167,27 @@
 	unsigned int len = ctx->tail - ctx->head;
 	unsigned int count;
 	int ret;
-	static char dstbuf[EROFS_BLKSIZ * 2];
+	static char dstbuf[EROFS_CONFIG_COMPR_MAX_SZ + EROFS_BLKSIZ];
 	char *const dst = dstbuf + EROFS_BLKSIZ;
 
 	while (len) {
+		const unsigned int pclustersize =
+			z_erofs_get_max_pclusterblks(inode) * EROFS_BLKSIZ;
 		bool raw;
 
-		if (len <= EROFS_BLKSIZ) {
-			if (final)
-				goto nocompression;
-			break;
+		if (len <= pclustersize) {
+			if (final) {
+				if (len <= EROFS_BLKSIZ)
+					goto nocompression;
+			} else {
+				break;
+			}
 		}
 
-		count = len;
+		count = min(len, cfg.c_max_decompressed_extent_bytes);
 		ret = erofs_compress_destsize(h, compressionlevel,
 					      ctx->queue + ctx->head,
-					      &count, dst, EROFS_BLKSIZ);
+					      &count, dst, pclustersize);
 		if (ret <= 0) {
 			if (ret != -EAGAIN) {
 				erofs_err("failed to compress %s: %s",
@@ -175,32 +195,42 @@
 					  erofs_strerror(ret));
 			}
 nocompression:
-			ret = write_uncompressed_block(ctx, &len, dst);
+			ret = write_uncompressed_extent(ctx, &len, dst);
 			if (ret < 0)
 				return ret;
 			count = ret;
+			ctx->compressedblks = 1;
 			raw = true;
 		} else {
+			const unsigned int tailused = ret & (EROFS_BLKSIZ - 1);
+			const unsigned int padding =
+				erofs_sb_has_lz4_0padding() && tailused ?
+					EROFS_BLKSIZ - tailused : 0;
+
+			ctx->compressedblks = DIV_ROUND_UP(ret, EROFS_BLKSIZ);
+			DBG_BUGON(ctx->compressedblks * EROFS_BLKSIZ >= count);
+
+			/* zero out garbage trailing data for non-0padding */
+			if (!erofs_sb_has_lz4_0padding())
+				memset(dst + ret, 0,
+				       roundup(ret, EROFS_BLKSIZ) - ret);
+
 			/* write compressed data */
-			erofs_dbg("Writing %u compressed data to block %u",
-				  count, ctx->blkaddr);
+			erofs_dbg("Writing %u compressed data to %u of %u blocks",
+				  count, ctx->blkaddr, ctx->compressedblks);
 
-			if (erofs_sb_has_lz4_0padding())
-				ret = blk_write(dst - (EROFS_BLKSIZ - ret),
-						ctx->blkaddr, 1);
-			else
-				ret = blk_write(dst, ctx->blkaddr, 1);
-
+			ret = blk_write(dst - padding, ctx->blkaddr,
+					ctx->compressedblks);
 			if (ret)
 				return ret;
 			raw = false;
 		}
 
 		ctx->head += count;
-		/* write compression indexes for this blkaddr */
+		/* write compression indexes for this pcluster */
 		vle_write_indexes(ctx, count, raw);
 
-		++ctx->blkaddr;
+		ctx->blkaddr += ctx->compressedblks;
 		len -= count;
 
 		if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) {
@@ -256,9 +286,10 @@
 				     erofs_blk_t *blkaddr_ret,
 				     unsigned int destsize,
 				     unsigned int logical_clusterbits,
-				     bool final)
+				     bool final, bool *dummy_head)
 {
-	unsigned int vcnt, encodebits, pos, i;
+	unsigned int vcnt, encodebits, pos, i, cblks;
+	bool update_blkaddr;
 	erofs_blk_t blkaddr;
 
 	if (destsize == 4) {
@@ -270,6 +301,7 @@
 	}
 	encodebits = (vcnt * destsize * 8 - 32) / vcnt;
 	blkaddr = *blkaddr_ret;
+	update_blkaddr = erofs_sb_has_big_pcluster();
 
 	pos = 0;
 	for (i = 0; i < vcnt; ++i) {
@@ -277,13 +309,26 @@
 		u8 ch, rem;
 
 		if (cv[i].clustertype == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
-			if (i + 1 == vcnt)
-				offset = cv[i].u.delta[1];
-			else
+			if (cv[i].u.delta[0] & Z_EROFS_VLE_DI_D0_CBLKCNT) {
+				cblks = cv[i].u.delta[0] & ~Z_EROFS_VLE_DI_D0_CBLKCNT;
 				offset = cv[i].u.delta[0];
+				blkaddr += cblks;
+				*dummy_head = false;
+			} else if (i + 1 == vcnt) {
+				offset = cv[i].u.delta[1];
+			} else {
+				offset = cv[i].u.delta[0];
+			}
 		} else {
 			offset = cv[i].clusterofs;
-			++blkaddr;
+			if (*dummy_head) {
+				++blkaddr;
+				if (update_blkaddr)
+					*blkaddr_ret = blkaddr;
+			}
+			*dummy_head = true;
+			update_blkaddr = false;
+
 			if (cv[i].u.blkaddr != blkaddr) {
 				if (i + 1 != vcnt)
 					DBG_BUGON(!final);
@@ -307,18 +352,20 @@
 int z_erofs_convert_to_compacted_format(struct erofs_inode *inode,
 					erofs_blk_t blkaddr,
 					unsigned int legacymetasize,
-					unsigned int logical_clusterbits)
+					void *compressmeta)
 {
 	const unsigned int mpos = Z_EROFS_VLE_EXTENT_ALIGN(inode->inode_isize +
 							   inode->xattr_isize) +
 				  sizeof(struct z_erofs_map_header);
 	const unsigned int totalidx = (legacymetasize -
 				       Z_EROFS_LEGACY_MAP_HEADER_SIZE) / 8;
+	const unsigned int logical_clusterbits = inode->z_logical_clusterbits;
 	u8 *out, *in;
 	struct z_erofs_compressindex_vec cv[16];
 	/* # of 8-byte units so that it can be aligned with 32 bytes */
 	unsigned int compacted_4b_initial, compacted_4b_end;
 	unsigned int compacted_2b;
+	bool dummy_head;
 
 	if (logical_clusterbits < LOG_BLOCK_SIZE || LOG_BLOCK_SIZE < 12)
 		return -EINVAL;
@@ -343,18 +390,24 @@
 		compacted_4b_end = totalidx;
 	}
 
-	out = in = inode->compressmeta;
+	out = in = compressmeta;
 
-	/* write out compacted header */
-	memcpy(out, &mapheader, sizeof(mapheader));
-	out += sizeof(mapheader);
+	out += sizeof(struct z_erofs_map_header);
 	in += Z_EROFS_LEGACY_MAP_HEADER_SIZE;
 
+	dummy_head = false;
+	/* prior to bigpcluster, blkaddr was bumped up once coming into HEAD */
+	if (!erofs_sb_has_big_pcluster()) {
+		--blkaddr;
+		dummy_head = true;
+	}
+
 	/* generate compacted_4b_initial */
 	while (compacted_4b_initial) {
 		in = parse_legacy_indexes(cv, 2, in);
 		out = write_compacted_indexes(out, cv, &blkaddr,
-					      4, logical_clusterbits, false);
+					      4, logical_clusterbits, false,
+					      &dummy_head);
 		compacted_4b_initial -= 2;
 	}
 	DBG_BUGON(compacted_4b_initial);
@@ -363,7 +416,8 @@
 	while (compacted_2b) {
 		in = parse_legacy_indexes(cv, 16, in);
 		out = write_compacted_indexes(out, cv, &blkaddr,
-					      2, logical_clusterbits, false);
+					      2, logical_clusterbits, false,
+					      &dummy_head);
 		compacted_2b -= 16;
 	}
 	DBG_BUGON(compacted_2b);
@@ -372,7 +426,8 @@
 	while (compacted_4b_end > 1) {
 		in = parse_legacy_indexes(cv, 2, in);
 		out = write_compacted_indexes(out, cv, &blkaddr,
-					      4, logical_clusterbits, false);
+					      4, logical_clusterbits, false,
+					      &dummy_head);
 		compacted_4b_end -= 2;
 	}
 
@@ -381,13 +436,29 @@
 		memset(cv, 0, sizeof(cv));
 		in = parse_legacy_indexes(cv, 1, in);
 		out = write_compacted_indexes(out, cv, &blkaddr,
-					      4, logical_clusterbits, true);
+					      4, logical_clusterbits, true,
+					      &dummy_head);
 	}
-	inode->extent_isize = out - (u8 *)inode->compressmeta;
-	inode->datalayout = EROFS_INODE_FLAT_COMPRESSION;
+	inode->extent_isize = out - (u8 *)compressmeta;
 	return 0;
 }
 
+static void z_erofs_write_mapheader(struct erofs_inode *inode,
+				    void *compressmeta)
+{
+	struct z_erofs_map_header h = {
+		.h_advise = cpu_to_le16(inode->z_advise),
+		.h_algorithmtype = inode->z_algorithmtype[1] << 4 |
+				   inode->z_algorithmtype[0],
+		/* lclustersize */
+		.h_clusterbits = inode->z_logical_clusterbits - 12,
+	};
+
+	memset(compressmeta, 0, Z_EROFS_LEGACY_MAP_HEADER_SIZE);
+	/* write out map header */
+	memcpy(compressmeta, &h, sizeof(struct z_erofs_map_header));
+}
+
 int erofs_write_compressed_file(struct erofs_inode *inode)
 {
 	struct erofs_buffer_head *bh;
@@ -414,9 +485,27 @@
 		goto err_close;
 	}
 
-	memset(compressmeta, 0, Z_EROFS_LEGACY_MAP_HEADER_SIZE);
+	/* initialize per-file compression setting */
+	inode->z_advise = 0;
+	if (!cfg.c_legacy_compress) {
+		inode->z_advise |= Z_EROFS_ADVISE_COMPACTED_2B;
+		inode->datalayout = EROFS_INODE_FLAT_COMPRESSION;
+	} else {
+		inode->datalayout = EROFS_INODE_FLAT_COMPRESSION_LEGACY;
+	}
 
-	blkaddr = erofs_mapbh(bh->block, true);	/* start_blkaddr */
+	if (cfg.c_physical_clusterblks > 1) {
+		inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
+		if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION)
+			inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
+	}
+	inode->z_algorithmtype[0] = algorithmtype[0];
+	inode->z_algorithmtype[1] = algorithmtype[1];
+	inode->z_logical_clusterbits = LOG_BLOCK_SIZE;
+
+	z_erofs_write_mapheader(inode, compressmeta);
+
+	blkaddr = erofs_mapbh(bh->block);	/* start_blkaddr */
 	ctx.blkaddr = blkaddr;
 	ctx.metacur = compressmeta + Z_EROFS_LEGACY_MAP_HEADER_SIZE;
 	ctx.head = ctx.tail = 0;
@@ -456,8 +545,9 @@
 	vle_write_indexes_final(&ctx);
 
 	close(fd);
+	DBG_BUGON(!compressed_blocks);
 	ret = erofs_bh_balloon(bh, blknr_to_addr(compressed_blocks));
-	DBG_BUGON(ret);
+	DBG_BUGON(ret != EROFS_BLKSIZ);
 
 	erofs_info("compressed %s (%llu bytes) into %u blocks",
 		   inode->i_srcpath, (unsigned long long)inode->i_size,
@@ -468,19 +558,19 @@
 	 *       when both mkfs & kernel support compression inline.
 	 */
 	erofs_bdrop(bh, false);
-	inode->compressmeta = compressmeta;
 	inode->idata_size = 0;
 	inode->u.i_blocks = compressed_blocks;
 
 	legacymetasize = ctx.metacur - compressmeta;
-	if (cfg.c_legacy_compress) {
+	if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
 		inode->extent_isize = legacymetasize;
-		inode->datalayout = EROFS_INODE_FLAT_COMPRESSION_LEGACY;
 	} else {
-		ret = z_erofs_convert_to_compacted_format(inode, blkaddr - 1,
-							  legacymetasize, 12);
+		ret = z_erofs_convert_to_compacted_format(inode, blkaddr,
+							  legacymetasize,
+							  compressmeta);
 		DBG_BUGON(ret);
 	}
+	inode->compressmeta = compressmeta;
 	return 0;
 
 err_bdrop:
@@ -499,9 +589,39 @@
 	return -ENOTSUP;
 }
 
-int z_erofs_compress_init(void)
+int z_erofs_build_compr_cfgs(struct erofs_buffer_head *sb_bh)
 {
-	unsigned int algorithmtype[2];
+	struct erofs_buffer_head *bh = sb_bh;
+	int ret = 0;
+
+	if (sbi.available_compr_algs & (1 << Z_EROFS_COMPRESSION_LZ4)) {
+		struct {
+			__le16 size;
+			struct z_erofs_lz4_cfgs lz4;
+		} __packed lz4alg = {
+			.size = cpu_to_le16(sizeof(struct z_erofs_lz4_cfgs)),
+			.lz4 = {
+				.max_distance =
+					cpu_to_le16(sbi.lz4_max_distance),
+				.max_pclusterblks = cfg.c_physical_clusterblks,
+			}
+		};
+
+		bh = erofs_battach(bh, META, sizeof(lz4alg));
+		if (IS_ERR(bh)) {
+			DBG_BUGON(1);
+			return PTR_ERR(bh);
+		}
+		erofs_mapbh(bh->block);
+		ret = dev_write(&lz4alg, erofs_btell(bh, false),
+				sizeof(lz4alg));
+		bh->op = &erofs_drop_directly_bhops;
+	}
+	return ret;
+}
+
+int z_erofs_compress_init(struct erofs_buffer_head *sb_bh)
+{
 	/* initialize for primary compression algorithm */
 	int ret = erofs_compressor_init(&compresshandle,
 					cfg.c_compr_alg_master);
@@ -524,17 +644,32 @@
 		compresshandle.alg->default_level :
 		cfg.c_compr_level_master;
 
-	/* figure out mapheader */
+	/* figure out primary algorithm */
 	ret = erofs_get_compress_algorithm_id(cfg.c_compr_alg_master);
 	if (ret < 0)
 		return ret;
 
 	algorithmtype[0] = ret;	/* primary algorithm (head 0) */
 	algorithmtype[1] = 0;	/* secondary algorithm (head 1) */
-	mapheader.h_advise |= Z_EROFS_ADVISE_COMPACTED_2B;
-	mapheader.h_algorithmtype = algorithmtype[1] << 4 |
-					  algorithmtype[0];
-	mapheader.h_clusterbits = LOG_BLOCK_SIZE - 12;
+	/*
+	 * if big pcluster is enabled, an extra CBLKCNT lcluster index needs
+	 * to be loaded in order to get those compressed block counts.
+	 */
+	if (cfg.c_physical_clusterblks > 1) {
+		if (cfg.c_physical_clusterblks >
+		    Z_EROFS_PCLUSTER_MAX_SIZE / EROFS_BLKSIZ) {
+			erofs_err("unsupported clusterblks %u (too large)",
+				  cfg.c_physical_clusterblks);
+			return -EINVAL;
+		}
+		erofs_sb_set_big_pcluster();
+		erofs_warn("EXPERIMENTAL big pcluster feature in use. Use at your own risk!");
+	}
+
+	if (erofs_sb_has_compr_cfgs()) {
+		sbi.available_compr_algs |= 1 << ret;
+		return z_erofs_build_compr_cfgs(sb_bh);
+	}
 	return 0;
 }
 
diff --git a/lib/compressor.c b/lib/compressor.c
index b2434e0..8836e0c 100644
--- a/lib/compressor.c
+++ b/lib/compressor.c
@@ -28,6 +28,7 @@
 			    void *dst,
 			    unsigned int dstsize)
 {
+	unsigned uncompressed_size;
 	int ret;
 
 	DBG_BUGON(!c->alg);
@@ -40,7 +41,9 @@
 		return ret;
 
 	/* check if there is enough gains to compress */
-	if (*srcsize <= dstsize * c->compress_threshold / 100)
+	uncompressed_size = *srcsize;
+	if (roundup(ret, EROFS_BLKSIZ) >= uncompressed_size *
+	    c->compress_threshold / 100)
 		return -EAGAIN;
 	return ret;
 }
diff --git a/lib/compressor_lz4.c b/lib/compressor_lz4.c
index 8540a0d..292d0f2 100644
--- a/lib/compressor_lz4.c
+++ b/lib/compressor_lz4.c
@@ -10,6 +10,10 @@
 #include "erofs/internal.h"
 #include "compressor.h"
 
+#ifndef LZ4_DISTANCE_MAX	/* history window size */
+#define LZ4_DISTANCE_MAX 65535	/* set to maximum value by default */
+#endif
+
 static int lz4_compress_destsize(struct erofs_compress *c,
 				 int compression_level,
 				 void *src, unsigned int *srcsize,
@@ -32,6 +36,7 @@
 static int compressor_lz4_init(struct erofs_compress *c)
 {
 	c->alg = &erofs_compressor_lz4;
+	sbi.lz4_max_distance = LZ4_DISTANCE_MAX;
 	return 0;
 }
 
diff --git a/lib/compressor_lz4hc.c b/lib/compressor_lz4hc.c
index 6680563..14c3a71 100644
--- a/lib/compressor_lz4hc.c
+++ b/lib/compressor_lz4hc.c
@@ -11,6 +11,10 @@
 #include "erofs/internal.h"
 #include "compressor.h"
 
+#ifndef LZ4_DISTANCE_MAX	/* history window size */
+#define LZ4_DISTANCE_MAX 65535	/* set to maximum value by default */
+#endif
+
 static int lz4hc_compress_destsize(struct erofs_compress *c,
 				   int compression_level,
 				   void *src,
@@ -44,6 +48,8 @@
 	c->private_data = LZ4_createStreamHC();
 	if (!c->private_data)
 		return -ENOMEM;
+
+	sbi.lz4_max_distance = LZ4_DISTANCE_MAX;
 	return 0;
 }
 
diff --git a/lib/config.c b/lib/config.c
index 3ecd481..99fcf49 100644
--- a/lib/config.c
+++ b/lib/config.c
@@ -17,13 +17,17 @@
 {
 	memset(&cfg, 0, sizeof(cfg));
 
-	cfg.c_dbg_lvl  = 0;
+	cfg.c_dbg_lvl  = 2;
 	cfg.c_version  = PACKAGE_VERSION;
 	cfg.c_dry_run  = false;
 	cfg.c_compr_level_master = -1;
 	cfg.c_force_inodeversion = 0;
 	cfg.c_inline_xattr_tolerance = 2;
 	cfg.c_unix_timestamp = -1;
+	cfg.c_uid = -1;
+	cfg.c_gid = -1;
+	cfg.c_physical_clusterblks = 1;
+	cfg.c_max_decompressed_extent_bytes = -1;
 }
 
 void erofs_show_config(void)
diff --git a/lib/data.c b/lib/data.c
index 3781846..31d81f3 100644
--- a/lib/data.c
+++ b/lib/data.c
@@ -29,6 +29,7 @@
 	if (offset >= inode->i_size) {
 		/* leave out-of-bound access unmapped */
 		map->m_flags = 0;
+		map->m_plen = 0;
 		goto out;
 	}
 
@@ -80,6 +81,7 @@
 	erofs_off_t ptr = offset;
 
 	while (ptr < offset + size) {
+		char *const estart = buffer + ptr - offset;
 		erofs_off_t eend;
 
 		map.m_la = ptr;
@@ -89,29 +91,30 @@
 
 		DBG_BUGON(map.m_plen != map.m_llen);
 
-		if (!(map.m_flags & EROFS_MAP_MAPPED)) {
-			if (!map.m_llen) {
-				ptr = offset + size;
-				continue;
-			}
-			ptr = map.m_la + map.m_llen;
-			continue;
-		}
-
 		/* trim extent */
 		eend = min(offset + size, map.m_la + map.m_llen);
 		DBG_BUGON(ptr < map.m_la);
 
+		if (!(map.m_flags & EROFS_MAP_MAPPED)) {
+			if (!map.m_llen) {
+				/* reached EOF */
+				memset(estart, 0, offset + size - ptr);
+				ptr = offset + size;
+				continue;
+			}
+			memset(estart, 0, eend - ptr);
+			ptr = eend;
+			continue;
+		}
+
 		if (ptr > map.m_la) {
 			map.m_pa += ptr - map.m_la;
 			map.m_la = ptr;
 		}
 
-		ret = dev_read(buffer + ptr - offset,
-			       map.m_pa, eend - map.m_la);
+		ret = dev_read(estart, map.m_pa, eend - map.m_la);
 		if (ret < 0)
 			return -EIO;
-
 		ptr = eend;
 	}
 	return 0;
@@ -127,7 +130,7 @@
 	};
 	bool partial;
 	unsigned int algorithmformat;
-	char raw[EROFS_BLKSIZ];
+	char raw[Z_EROFS_PCLUSTER_MAX_SIZE];
 
 	end = offset + size;
 	while (end > offset) {
@@ -137,19 +140,6 @@
 		if (ret)
 			return ret;
 
-		if (!(map.m_flags & EROFS_MAP_MAPPED)) {
-			end = map.m_la;
-			continue;
-		}
-
-		ret = dev_read(raw, map.m_pa, EROFS_BLKSIZ);
-		if (ret < 0)
-			return -EIO;
-
-		algorithmformat = map.m_flags & EROFS_MAP_ZIPPED ?
-						Z_EROFS_COMPRESSION_LZ4 :
-						Z_EROFS_COMPRESSION_SHIFTED;
-
 		/*
 		 * trim to the needed size if the returned extent is quite
 		 * larger than requested, and set up partial flag as well.
@@ -171,6 +161,20 @@
 			end = map.m_la;
 		}
 
+		if (!(map.m_flags & EROFS_MAP_MAPPED)) {
+			memset(buffer + end - offset, 0, length);
+			end = map.m_la;
+			continue;
+		}
+
+		ret = dev_read(raw, map.m_pa, map.m_plen);
+		if (ret < 0)
+			return -EIO;
+
+		algorithmformat = map.m_flags & EROFS_MAP_ZIPPED ?
+						Z_EROFS_COMPRESSION_LZ4 :
+						Z_EROFS_COMPRESSION_SHIFTED;
+
 		ret = z_erofs_decompress(&(struct z_erofs_decompress_req) {
 					.in = raw,
 					.out = buffer + end - offset,
diff --git a/lib/inode.c b/lib/inode.c
index d0b4d51..787e5b4 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -96,21 +96,6 @@
 	return 0;
 }
 
-static int dentry_add_sorted(struct erofs_dentry *d, struct list_head *head)
-{
-	struct list_head *pos;
-
-	list_for_each(pos, head) {
-		struct erofs_dentry *d2 =
-			container_of(pos, struct erofs_dentry, d_child);
-
-		if (strcmp(d->name, d2->name) < 0)
-			break;
-	}
-	list_add_tail(&d->d_child, pos);
-	return 0;
-}
-
 struct erofs_dentry *erofs_d_alloc(struct erofs_inode *parent,
 				   const char *name)
 {
@@ -122,7 +107,7 @@
 	strncpy(d->name, name, EROFS_NAME_LEN - 1);
 	d->name[EROFS_NAME_LEN - 1] = '\0';
 
-	dentry_add_sorted(d, &parent->i_subdirs);
+	list_add_tail(&d->d_child, &parent->i_subdirs);
 	return d;
 }
 
@@ -148,7 +133,7 @@
 	inode->bh_data = bh;
 
 	/* get blkaddr of the bh */
-	ret = erofs_mapbh(bh->block, true);
+	ret = erofs_mapbh(bh->block);
 	DBG_BUGON(ret < 0);
 
 	/* write blocks except for the tail-end block */
@@ -156,10 +141,19 @@
 	return 0;
 }
 
-int erofs_prepare_dir_file(struct erofs_inode *dir)
+static int comp_subdir(const void *a, const void *b)
 {
-	struct erofs_dentry *d;
-	unsigned int d_size, i_nlink;
+	const struct erofs_dentry *da, *db;
+
+	da = *((const struct erofs_dentry **)a);
+	db = *((const struct erofs_dentry **)b);
+	return strcmp(da->name, db->name);
+}
+
+int erofs_prepare_dir_file(struct erofs_inode *dir, unsigned int nr_subdirs)
+{
+	struct erofs_dentry *d, *n, **sorted_d;
+	unsigned int d_size, i_nlink, i;
 	int ret;
 
 	/* dot is pointed to the current dir inode */
@@ -172,6 +166,22 @@
 	d->inode = erofs_igrab(dir->i_parent);
 	d->type = EROFS_FT_DIR;
 
+	/* sort subdirs */
+	nr_subdirs += 2;
+	sorted_d = malloc(nr_subdirs * sizeof(d));
+	if (!sorted_d)
+		return -ENOMEM;
+	i = 0;
+	list_for_each_entry_safe(d, n, &dir->i_subdirs, d_child) {
+		list_del(&d->d_child);
+		sorted_d[i++] = d;
+	}
+	DBG_BUGON(i != nr_subdirs);
+	qsort(sorted_d, nr_subdirs, sizeof(d), comp_subdir);
+	for (i = 0; i < nr_subdirs; i++)
+		list_add_tail(&sorted_d[i]->d_child, &dir->i_subdirs);
+	free(sorted_d);
+
 	/* let's calculate dir size and update i_nlink */
 	d_size = 0;
 	i_nlink = 0;
@@ -412,7 +422,7 @@
 		u.dic.i_uid = cpu_to_le16((u16)inode->i_uid);
 		u.dic.i_gid = cpu_to_le16((u16)inode->i_gid);
 
-		switch ((inode->i_mode) >> S_SHIFT) {
+		switch (inode->i_mode & S_IFMT) {
 		case S_IFCHR:
 		case S_IFBLK:
 		case S_IFIFO:
@@ -445,7 +455,7 @@
 		u.die.i_ctime = cpu_to_le64(inode->i_ctime);
 		u.die.i_ctime_nsec = cpu_to_le32(inode->i_ctime_nsec);
 
-		switch ((inode->i_mode) >> S_SHIFT) {
+		switch (inode->i_mode & S_IFMT) {
 		case S_IFCHR:
 		case S_IFBLK:
 		case S_IFIFO:
@@ -522,7 +532,7 @@
 		bh->op = &erofs_skip_write_bhops;
 
 		/* get blkaddr of bh */
-		ret = erofs_mapbh(bh->block, true);
+		ret = erofs_mapbh(bh->block);
 		DBG_BUGON(ret < 0);
 		inode->u.i_blkaddr = bh->block->blkaddr;
 
@@ -531,7 +541,7 @@
 	}
 	/* expend a block as the tail block (should be successful) */
 	ret = erofs_bh_balloon(bh, EROFS_BLKSIZ);
-	DBG_BUGON(ret);
+	DBG_BUGON(ret != EROFS_BLKSIZ);
 	return 0;
 }
 
@@ -632,7 +642,7 @@
 		int ret;
 		erofs_off_t pos;
 
-		erofs_mapbh(bh->block, true);
+		erofs_mapbh(bh->block);
 		pos = erofs_btell(bh, true) - EROFS_BLKSIZ;
 		ret = dev_write(inode->idata, pos, inode->idata_size);
 		if (ret)
@@ -752,8 +762,8 @@
 	if (err)
 		return err;
 	inode->i_mode = st->st_mode;
-	inode->i_uid = st->st_uid;
-	inode->i_gid = st->st_gid;
+	inode->i_uid = cfg.c_uid == -1 ? st->st_uid : cfg.c_uid;
+	inode->i_gid = cfg.c_gid == -1 ? st->st_gid : cfg.c_gid;
 	inode->i_ctime = st->st_ctime;
 	inode->i_ctime_nsec = st->st_ctim.tv_nsec;
 
@@ -867,8 +877,10 @@
 		return inode;
 
 	ret = erofs_fill_inode(inode, &st, path);
-	if (ret)
+	if (ret) {
+		free(inode);
 		return ERR_PTR(ret);
+	}
 
 	return inode;
 }
@@ -879,7 +891,7 @@
 	struct erofs_buffer_head *const bh = rootdir->bh;
 	erofs_off_t off, meta_offset;
 
-	erofs_mapbh(bh->block, true);
+	erofs_mapbh(bh->block);
 	off = erofs_btell(bh, false);
 
 	if (off > rootnid_maxoffset)
@@ -898,7 +910,7 @@
 	if (!bh)
 		return inode->nid;
 
-	erofs_mapbh(bh->block, true);
+	erofs_mapbh(bh->block);
 	off = erofs_btell(bh, false);
 
 	meta_offset = blknr_to_addr(sbi.meta_blkaddr);
@@ -920,6 +932,7 @@
 	DIR *_dir;
 	struct dirent *dp;
 	struct erofs_dentry *d;
+	unsigned int nr_subdirs;
 
 	ret = erofs_prepare_xattr_ibody(dir);
 	if (ret < 0)
@@ -954,11 +967,12 @@
 
 	_dir = opendir(dir->i_srcpath);
 	if (!_dir) {
-		erofs_err("%s, failed to opendir at %s: %s",
-			  __func__, dir->i_srcpath, erofs_strerror(errno));
+		erofs_err("failed to opendir at %s: %s",
+			  dir->i_srcpath, erofs_strerror(errno));
 		return ERR_PTR(-errno);
 	}
 
+	nr_subdirs = 0;
 	while (1) {
 		/*
 		 * set errno to 0 before calling readdir() in order to
@@ -982,6 +996,7 @@
 			ret = PTR_ERR(d);
 			goto err_closedir;
 		}
+		nr_subdirs++;
 
 		/* to count i_nlink for directories */
 		d->type = (dp->d_type == DT_DIR ?
@@ -994,7 +1009,7 @@
 	}
 	closedir(_dir);
 
-	ret = erofs_prepare_dir_file(dir);
+	ret = erofs_prepare_dir_file(dir, nr_subdirs);
 	if (ret)
 		goto err;
 
diff --git a/lib/xattr.c b/lib/xattr.c
index 49ebb9c..a7677b9 100644
--- a/lib/xattr.c
+++ b/lib/xattr.c
@@ -446,8 +446,8 @@
 
 	_dir = opendir(path);
 	if (!_dir) {
-		erofs_err("%s, failed to opendir at %s: %s",
-			  __func__, path, erofs_strerror(errno));
+		erofs_err("failed to opendir at %s: %s",
+			  path, erofs_strerror(errno));
 		return -errno;
 	}
 
@@ -575,7 +575,7 @@
 	}
 	bh->op = &erofs_skip_write_bhops;
 
-	erofs_mapbh(bh->block, true);
+	erofs_mapbh(bh->block);
 	off = erofs_btell(bh, false);
 
 	sbi.xattr_blkaddr = off / EROFS_BLKSIZ;
diff --git a/lib/zmap.c b/lib/zmap.c
index ee63de7..1084faa 100644
--- a/lib/zmap.c
+++ b/lib/zmap.c
@@ -14,13 +14,12 @@
 
 int z_erofs_fill_inode(struct erofs_inode *vi)
 {
-	if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
+	if (!erofs_sb_has_big_pcluster() &&
+	    vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
 		vi->z_advise = 0;
 		vi->z_algorithmtype[0] = 0;
 		vi->z_algorithmtype[1] = 0;
 		vi->z_logical_clusterbits = LOG_BLOCK_SIZE;
-		vi->z_physical_clusterbits[0] = vi->z_logical_clusterbits;
-		vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits;
 
 		vi->flags |= EROFS_I_Z_INITED;
 	}
@@ -37,7 +36,8 @@
 	if (vi->flags & EROFS_I_Z_INITED)
 		return 0;
 
-	DBG_BUGON(vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY);
+	DBG_BUGON(!erofs_sb_has_big_pcluster() &&
+		  vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY);
 	pos = round_up(iloc(vi->nid) + vi->inode_isize + vi->xattr_isize, 8);
 
 	ret = dev_read(buf, pos, sizeof(buf));
@@ -56,17 +56,14 @@
 	}
 
 	vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7);
-	vi->z_physical_clusterbits[0] = vi->z_logical_clusterbits +
-					((h->h_clusterbits >> 3) & 3);
-
-	if (vi->z_physical_clusterbits[0] != LOG_BLOCK_SIZE) {
-		erofs_err("unsupported physical clusterbits %u for nid %llu",
-			  vi->z_physical_clusterbits[0], (unsigned long long)vi->nid);
-		return -EOPNOTSUPP;
+	if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION &&
+	    !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^
+	    !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
+		erofs_err(
+"big pcluster head1/2 of compact indexes should be consistent for nid %llu",
+			  vi->nid * 1ULL);
+		return -EFSCORRUPTED;
 	}
-
-	vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits +
-					((h->h_clusterbits >> 5) & 7);
 	vi->flags |= EROFS_I_Z_INITED;
 	return 0;
 }
@@ -81,7 +78,7 @@
 	u8  type;
 	u16 clusterofs;
 	u16 delta[2];
-	erofs_blk_t pblk;
+	erofs_blk_t pblk, compressedlcs;
 };
 
 static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m,
@@ -130,6 +127,15 @@
 	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
 		m->clusterofs = 1 << vi->z_logical_clusterbits;
 		m->delta[0] = le16_to_cpu(di->di_u.delta[0]);
+		if (m->delta[0] & Z_EROFS_VLE_DI_D0_CBLKCNT) {
+			if (!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) {
+				DBG_BUGON(1);
+				return -EFSCORRUPTED;
+			}
+			m->compressedlcs = m->delta[0] &
+				~Z_EROFS_VLE_DI_D0_CBLKCNT;
+			m->delta[0] = 1;
+		}
 		m->delta[1] = le16_to_cpu(di->di_u.delta[1]);
 		break;
 	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
@@ -166,6 +172,7 @@
 	unsigned int vcnt, base, lo, encodebits, nblk;
 	int i;
 	u8 *in, type;
+	bool big_pcluster;
 
 	if (1 << amortizedshift == 4)
 		vcnt = 2;
@@ -174,6 +181,7 @@
 	else
 		return -EOPNOTSUPP;
 
+	big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
 	encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
 	base = round_down(eofs, vcnt << amortizedshift);
 	in = m->kaddr + base;
@@ -185,7 +193,15 @@
 	m->type = type;
 	if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
 		m->clusterofs = 1 << lclusterbits;
-		if (i + 1 != (int)vcnt) {
+		if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) {
+			if (!big_pcluster) {
+				DBG_BUGON(1);
+				return -EFSCORRUPTED;
+			}
+			m->compressedlcs = lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT;
+			m->delta[0] = 1;
+			return 0;
+		} else if (i + 1 != (int)vcnt) {
 			m->delta[0] = lo;
 			return 0;
 		}
@@ -198,22 +214,48 @@
 					  in, encodebits * (i - 1), &type);
 		if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
 			lo = 0;
+		else if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT)
+			lo = 1;
 		m->delta[0] = lo + 1;
 		return 0;
 	}
 	m->clusterofs = lo;
 	m->delta[0] = 0;
 	/* figout out blkaddr (pblk) for HEAD lclusters */
-	nblk = 1;
-	while (i > 0) {
-		--i;
-		lo = decode_compactedbits(lclusterbits, lomask,
-					  in, encodebits * i, &type);
-		if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
-			i -= lo;
+	if (!big_pcluster) {
+		nblk = 1;
+		while (i > 0) {
+			--i;
+			lo = decode_compactedbits(lclusterbits, lomask,
+						  in, encodebits * i, &type);
+			if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
+				i -= lo;
 
-		if (i >= 0)
+			if (i >= 0)
+				++nblk;
+		}
+	} else {
+		nblk = 0;
+		while (i > 0) {
+			--i;
+			lo = decode_compactedbits(lclusterbits, lomask,
+						  in, encodebits * i, &type);
+			if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
+				if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) {
+					--i;
+					nblk += lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT;
+					continue;
+				}
+				if (lo == 1) {
+					DBG_BUGON(1);
+					/* --i; ++nblk;	continue; */
+					return -EFSCORRUPTED;
+				}
+				i -= lo - 2;
+				continue;
+			}
 			++nblk;
+		}
 	}
 	in += (vcnt << amortizedshift) - sizeof(__le32);
 	m->pblk = le32_to_cpu(*(__le32 *)in) + nblk;
@@ -333,6 +375,73 @@
 	return 0;
 }
 
+static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
+					    unsigned int initial_lcn)
+{
+	struct erofs_inode *const vi = m->inode;
+	struct erofs_map_blocks *const map = m->map;
+	const unsigned int lclusterbits = vi->z_logical_clusterbits;
+	unsigned long lcn;
+	int err;
+
+	DBG_BUGON(m->type != Z_EROFS_VLE_CLUSTER_TYPE_PLAIN &&
+		  m->type != Z_EROFS_VLE_CLUSTER_TYPE_HEAD);
+	if (!(map->m_flags & EROFS_MAP_ZIPPED) ||
+	    !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) {
+		map->m_plen = 1 << lclusterbits;
+		return 0;
+	}
+
+	lcn = m->lcn + 1;
+	if (m->compressedlcs)
+		goto out;
+
+	err = z_erofs_load_cluster_from_disk(m, lcn);
+	if (err)
+		return err;
+
+	/*
+	 * If the 1st NONHEAD lcluster has already been handled initially w/o
+	 * valid compressedlcs, which means at least it mustn't be CBLKCNT, or
+	 * an internal implemenatation error is detected.
+	 *
+	 * The following code can also handle it properly anyway, but let's
+	 * BUG_ON in the debugging mode only for developers to notice that.
+	 */
+	DBG_BUGON(lcn == initial_lcn &&
+		  m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD);
+
+	switch (m->type) {
+	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+		/*
+		 * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
+		 * rather than CBLKCNT, it's a 1 lcluster-sized pcluster.
+		 */
+		m->compressedlcs = 1;
+		break;
+	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+		if (m->delta[0] != 1)
+			goto err_bonus_cblkcnt;
+		if (m->compressedlcs)
+			break;
+		/* fallthrough */
+	default:
+		erofs_err("cannot found CBLKCNT @ lcn %lu of nid %llu",
+			  lcn, vi->nid | 0ULL);
+		DBG_BUGON(1);
+		return -EFSCORRUPTED;
+	}
+out:
+	map->m_plen = m->compressedlcs << lclusterbits;
+	return 0;
+err_bonus_cblkcnt:
+	erofs_err("bogus CBLKCNT @ lcn %lu of nid %llu",
+		  lcn, vi->nid | 0ULL);
+	DBG_BUGON(1);
+	return -EFSCORRUPTED;
+}
+
 int z_erofs_map_blocks_iter(struct erofs_inode *vi,
 			    struct erofs_map_blocks *map)
 {
@@ -343,6 +452,7 @@
 	};
 	int err = 0;
 	unsigned int lclusterbits, endoff;
+	unsigned long initial_lcn;
 	unsigned long long ofs, end;
 
 	/* when trying to read beyond EOF, leave it unmapped */
@@ -359,10 +469,10 @@
 
 	lclusterbits = vi->z_logical_clusterbits;
 	ofs = map->m_la;
-	m.lcn = ofs >> lclusterbits;
+	initial_lcn = ofs >> lclusterbits;
 	endoff = ofs & ((1 << lclusterbits) - 1);
 
-	err = z_erofs_load_cluster_from_disk(&m, m.lcn);
+	err = z_erofs_load_cluster_from_disk(&m, initial_lcn);
 	if (err)
 		goto out;
 
@@ -401,8 +511,11 @@
 	}
 
 	map->m_llen = end - map->m_la;
-	map->m_plen = 1 << lclusterbits;
 	map->m_pa = blknr_to_addr(m.pblk);
+
+	err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
+	if (err)
+		goto out;
 	map->m_flags |= EROFS_MAP_MAPPED;
 
 out:
diff --git a/man/Makefile.am b/man/Makefile.am
index dcdbb35..ffcf6f8 100644
--- a/man/Makefile.am
+++ b/man/Makefile.am
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0+
 # Makefile.am
 
-dist_man_MANS = mkfs.erofs.1
+dist_man_MANS = mkfs.erofs.1 erofsfuse.1
 
diff --git a/man/erofsfuse.1 b/man/erofsfuse.1
new file mode 100644
index 0000000..6bd48b0
--- /dev/null
+++ b/man/erofsfuse.1
@@ -0,0 +1,44 @@
+.\" Copyright (c) 2021 Gao Xiang <xiang@kernel.org>
+.\"
+.TH EROFSFUSE 1
+.SH NAME
+erofsfuse \- FUSE file system client for erofs file system
+.SH SYNOPSIS
+\fBerofsfuse\fR [\fIOPTIONS\fR] \fIDEVICE\fR \fIMOUNTPOINT\fR
+.SH DESCRIPTION
+.B erofsfuse
+is a FUSE file system client that supports reading from devices or image files
+containing erofs file system.
+.SH OPTIONS
+.SS "general options:"
+.TP
+\fB\-o\fR opt,[opt...]
+mount options
+.TP
+\fB\-h\fR   \fB\-\-help\fR
+display help and exit
+.SS "erofsfuse options:"
+.TP
+.BI "\-\-dbglevel=" #
+Specify the level of debugging messages. The default is 2, which shows basic
+warning messages.
+.SS "FUSE options:"
+.TP
+\fB-d -o\fR debug
+enable debug output (implies -f)
+.TP
+\fB-f\fR
+foreground operation
+.TP
+\fB-s\fR
+disable multi-threaded operation
+.P
+For other FUSE options please see
+.BR mount.fuse (8)
+or see the output of
+.I erofsfuse \-\-help
+.SH AVAILABILITY
+\fBerofsfuse\fR is part of erofs-utils package and is available from
+git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git.
+.SH SEE ALSO
+.BR mount.fuse (8)
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index dcaf9d7..d164fa5 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -24,8 +24,13 @@
 Set an algorithm for file compression, which can be set with an optional
 compression level separated by a comma.
 .TP
+.BI "\-C " max-pcluster-size
+Specify the maximum size of compress physical cluster in bytes. It may enable
+big pcluster feature if needed (Linux v5.13+).
+.TP
 .BI "\-d " #
-Specify the level of debugging messages. The default is 0.
+Specify the level of debugging messages. The default is 2, which shows basic
+warning messages.
 .TP
 .BI "\-x " #
 Specify the upper limit of an xattr which is still inlined. The default is 2.
@@ -69,8 +74,20 @@
 .BI "\-\-file-contexts=" file
 Specify a \fIfile_contexts\fR file to setup / override selinux labels.
 .TP
+.BI "\-\-force-uid=" UID
+Set all file uids to \fIUID\fR.
+.TP
+.BI "\-\-force-gid=" GID
+Set all file gids to \fIGID\fR.
+.TP
+.B \-\-all-root
+Make all files owned by root.
+.TP
 .B \-\-help
 Display this help and exit.
+.TP
+.B \-\-max-extent-bytes #
+Specify maximum decompressed extent size # in bytes.
 .SH AUTHOR
 This version of \fBmkfs.erofs\fR is written by Li Guifu <blucerlee@gmail.com>,
 Miao Xie <miaoxie@huawei.com> and Gao Xiang <xiang@kernel.org> with
diff --git a/mkfs/main.c b/mkfs/main.c
index abd48be..e476189 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -36,6 +36,13 @@
 #ifdef HAVE_LIBSELINUX
 	{"file-contexts", required_argument, NULL, 4},
 #endif
+	{"force-uid", required_argument, NULL, 5},
+	{"force-gid", required_argument, NULL, 6},
+	{"all-root", no_argument, NULL, 7},
+#ifndef NDEBUG
+	{"random-pclusterblks", no_argument, NULL, 8},
+#endif
+	{"max-extent-bytes", required_argument, NULL, 9},
 #ifdef WITH_ANDROID
 	{"mount-point", required_argument, NULL, 10},
 	{"product-out", required_argument, NULL, 11},
@@ -61,25 +68,33 @@
 {
 	fputs("usage: [options] FILE DIRECTORY\n\n"
 	      "Generate erofs image from DIRECTORY to FILE, and [options] are:\n"
-	      " -zX[,Y]            X=compressor (Y=compression level, optional)\n"
-	      " -d#                set output message level to # (maximum 9)\n"
-	      " -x#                set xattr tolerance to # (< 0, disable xattrs; default 2)\n"
-	      " -EX[,...]          X=extended options\n"
-	      " -T#                set a fixed UNIX timestamp # to all files\n"
+	      " -zX[,Y]               X=compressor (Y=compression level, optional)\n"
+	      " -C#                   specify the size of compress physical cluster in bytes\n"
+	      " -d#                   set output message level to # (maximum 9)\n"
+	      " -x#                   set xattr tolerance to # (< 0, disable xattrs; default 2)\n"
+	      " -EX[,...]             X=extended options\n"
+	      " -T#                   set a fixed UNIX timestamp # to all files\n"
 #ifdef HAVE_LIBUUID
-	      " -UX                use a given filesystem UUID\n"
+	      " -UX                   use a given filesystem UUID\n"
 #endif
-	      " --exclude-path=X   avoid including file X (X = exact literal path)\n"
-	      " --exclude-regex=X  avoid including files that match X (X = regular expression)\n"
+	      " --exclude-path=X      avoid including file X (X = exact literal path)\n"
+	      " --exclude-regex=X     avoid including files that match X (X = regular expression)\n"
 #ifdef HAVE_LIBSELINUX
-	      " --file-contexts=X  specify a file contexts file to setup selinux labels\n"
+	      " --file-contexts=X     specify a file contexts file to setup selinux labels\n"
 #endif
-	      " --help             display this help and exit\n"
+	      " --force-uid=#         set all file uids to # (# = UID)\n"
+	      " --force-gid=#         set all file gids to # (# = GID)\n"
+	      " --all-root            make all files owned by root\n"
+	      " --help                display this help and exit\n"
+	      " --max-extent-bytes=#  set maximum decompressed extent size # in bytes\n"
+#ifndef NDEBUG
+	      " --random-pclusterblks randomize pclusterblks for big pcluster (debugging only)\n"
+#endif
 #ifdef WITH_ANDROID
 	      "\nwith following android-specific options:\n"
-	      " --mount-point=X    X=prefix of target fs path (default: /)\n"
-	      " --product-out=X    X=product_out directory\n"
-	      " --fs-config-file=X X=fs_config file\n"
+	      " --mount-point=X       X=prefix of target fs path (default: /)\n"
+	      " --product-out=X       X=product_out directory\n"
+	      " --fs-config-file=X    X=fs_config file\n"
 #endif
 	      "\nAvailable compressors are: ", stderr);
 	print_available_compressors(stderr, ", ");
@@ -152,7 +167,7 @@
 	char *endptr;
 	int opt, i;
 
-	while((opt = getopt_long(argc, argv, "d:x:z:E:T:U:",
+	while((opt = getopt_long(argc, argv, "d:x:z:E:T:U:C:",
 				 long_options, NULL)) != -1) {
 		switch (opt) {
 		case 'z':
@@ -233,6 +248,37 @@
 			if (opt && opt != -EBUSY)
 				return opt;
 			break;
+		case 5:
+			cfg.c_uid = strtoul(optarg, &endptr, 0);
+			if (cfg.c_uid == -1 || *endptr != '\0') {
+				erofs_err("invalid uid %s", optarg);
+				return -EINVAL;
+			}
+			break;
+		case 6:
+			cfg.c_gid = strtoul(optarg, &endptr, 0);
+			if (cfg.c_gid == -1 || *endptr != '\0') {
+				erofs_err("invalid gid %s", optarg);
+				return -EINVAL;
+			}
+			break;
+		case 7:
+			cfg.c_uid = cfg.c_gid = 0;
+			break;
+#ifndef NDEBUG
+		case 8:
+			cfg.c_random_pclusterblks = true;
+			break;
+#endif
+		case 9:
+			cfg.c_max_decompressed_extent_bytes =
+				strtoul(optarg, &endptr, 0);
+			if (*endptr != '\0') {
+				erofs_err("invalid maximum uncompressed extent size %s",
+					  optarg);
+				return -EINVAL;
+			}
+			break;
 #ifdef WITH_ANDROID
 		case 10:
 			cfg.mount_point = optarg;
@@ -248,6 +294,17 @@
 			cfg.fs_config_file = optarg;
 			break;
 #endif
+		case 'C':
+			i = strtoull(optarg, &endptr, 0);
+			if (*endptr != '\0' ||
+			    i < EROFS_BLKSIZ || i % EROFS_BLKSIZ) {
+				erofs_err("invalid physical clustersize %s",
+					  optarg);
+				return -EINVAL;
+			}
+			cfg.c_physical_clusterblks = i / EROFS_BLKSIZ;
+			break;
+
 		case 1:
 			usage();
 			exit(0);
@@ -304,11 +361,16 @@
 		round_up(EROFS_SUPER_END, EROFS_BLKSIZ);
 	char *buf;
 
-	*blocks         = erofs_mapbh(NULL, true);
+	*blocks         = erofs_mapbh(NULL);
 	sb.blocks       = cpu_to_le32(*blocks);
 	sb.root_nid     = cpu_to_le16(root_nid);
 	memcpy(sb.uuid, sbi.uuid, sizeof(sb.uuid));
 
+	if (erofs_sb_has_compr_cfgs())
+		sb.u1.available_compr_algs = sbi.available_compr_algs;
+	else
+		sb.u1.lz4_max_distance = cpu_to_le16(sbi.lz4_max_distance);
+
 	buf = calloc(sb_blksize, 1);
 	if (!buf) {
 		erofs_err("Failed to allocate memory for sb: %s",
@@ -483,7 +545,10 @@
 
 	erofs_show_config();
 	erofs_set_fs_root(cfg.c_src_path);
-
+#ifndef NDEBUG
+	if (cfg.c_random_pclusterblks)
+		srand(time(NULL));
+#endif
 	sb_bh = erofs_buffer_init();
 	if (IS_ERR(sb_bh)) {
 		err = PTR_ERR(sb_bh);
@@ -498,7 +563,7 @@
 		goto exit;
 	}
 
-	err = z_erofs_compress_init();
+	err = z_erofs_compress_init(sb_bh);
 	if (err) {
 		erofs_err("Failed to initialize compressor: %s",
 			  erofs_strerror(err));