Merge "Sync with upstream 41655e1c:" am: 78d19b2743 am: f0a696483b am: c34c4fe0ef

Change-Id: I66ae9464f1a59748677ed7dc53fbe98ee8b4a934
diff --git a/Android.bp b/Android.bp
index 67858c3..3d57578 100644
--- a/Android.bp
+++ b/Android.bp
@@ -21,5 +21,6 @@
         "-Wno-unused-const-variable",
         "-Wno-format",
         "-Wno-sign-compare",
+        "-include freebsd-compat.h",
     ],
 }
diff --git a/boot.c b/boot.c
index 33577ca..a3ba7ab 100644
--- a/boot.c
+++ b/boot.c
@@ -152,9 +152,6 @@
 		boot->NumSectors = boot->bpbHugeSectors;
 	}
 
-
-
-
 	if (boot->flags & FAT32) {
 		/* If the OEM Name field is EXFAT, it's not FAT32, so bail */
 		if (!memcmp(&block[3], "EXFAT   ", 8)) {
@@ -272,13 +269,30 @@
 	boot->NumClusters = (boot->NumSectors - boot->FirstCluster) / boot->bpbSecPerClust +
 	    CLUST_FIRST;
 
-	if (boot->flags&FAT32)
+	if (boot->flags & FAT32) {
+		if (boot->NumClusters > (CLUST_RSRVD & CLUST32_MASK)) {
+			pfatal("Filesystem too big (%u clusters) for FAT32 partition",
+			    boot->NumClusters);
+			return FSFATAL;
+		}
+		if (boot->NumClusters < (CLUST_RSRVD & CLUST16_MASK)) {
+			pfatal("Filesystem too small (%u clusters) for FAT32 partition",
+			    boot->NumClusters);
+			return FSFATAL;
+		}
 		boot->ClustMask = CLUST32_MASK;
-	else if (boot->NumClusters < (CLUST_RSRVD&CLUST12_MASK))
+
+		if (boot->bpbRootClust < CLUST_FIRST ||
+		    boot->bpbRootClust >= boot->NumClusters) {
+			pfatal("Root directory starts with cluster out of range(%u)",
+			       boot->bpbRootClust);
+			return FSFATAL;
+		}
+	} else if (boot->NumClusters < (CLUST_RSRVD&CLUST12_MASK)) {
 		boot->ClustMask = CLUST12_MASK;
-	else if (boot->NumClusters < (CLUST_RSRVD&CLUST16_MASK))
+	} else if (boot->NumClusters < (CLUST_RSRVD&CLUST16_MASK)) {
 		boot->ClustMask = CLUST16_MASK;
-	else {
+	} else {
 		pfatal("Filesystem too big (%u clusters) for non-FAT32 partition",
 		       boot->NumClusters);
 		return FSFATAL;
diff --git a/check.c b/check.c
index 2431bd3..2c38662 100644
--- a/check.c
+++ b/check.c
@@ -47,9 +47,8 @@
 {
 	int dosfs;
 	struct bootblock boot;
-	struct fatEntry *fat = NULL;
+	struct fat_descriptor *fat = NULL;
 	int finish_dosdirsection=0;
-	u_int i;
 	int mod = 0;
 	int ret = 8;
 
@@ -88,65 +87,39 @@
 	}
 
 	if (!preen)  {
-		if (boot.ValidFat < 0)
-			printf("** Phase 1 - Read and Compare FATs\n");
-		else
-			printf("** Phase 1 - Read FAT\n");
+		printf("** Phase 1 - Read FAT and checking connectivity\n");
 	}
 
-	mod |= readfat(dosfs, &boot, boot.ValidFat >= 0 ? boot.ValidFat : 0, &fat);
+	mod |= readfat(dosfs, &boot, &fat);
 	if (mod & FSFATAL) {
 		close(dosfs);
 		return 8;
 	}
 
-	if (boot.ValidFat < 0)
-		for (i = 1; i < boot.bpbFATs; i++) {
-			struct fatEntry *currentFat;
-
-			mod |= readfat(dosfs, &boot, i, &currentFat);
-
-			if (mod & FSFATAL)
-				goto out;
-
-			mod |= comparefat(&boot, fat, currentFat, i);
-			free(currentFat);
-			if (mod & FSFATAL)
-				goto out;
-		}
-
 	if (!preen)
-		printf("** Phase 2 - Check Cluster Chains\n");
+		printf("** Phase 2 - Checking Directories\n");
 
-	mod |= checkfat(&boot, fat);
-	if (mod & FSFATAL)
-		goto out;
-	/* delay writing FATs */
-
-	if (!preen)
-		printf("** Phase 3 - Checking Directories\n");
-
-	mod |= resetDosDirSection(&boot, fat);
+	mod |= resetDosDirSection(fat);
 	finish_dosdirsection = 1;
 	if (mod & FSFATAL)
 		goto out;
 	/* delay writing FATs */
 
-	mod |= handleDirTree(dosfs, &boot, fat);
+	mod |= handleDirTree(fat);
 	if (mod & FSFATAL)
 		goto out;
 
 	if (!preen)
-		printf("** Phase 4 - Checking for Lost Files\n");
+		printf("** Phase 3 - Checking for Lost Files\n");
 
-	mod |= checklost(dosfs, &boot, fat);
+	mod |= checklost(fat);
 	if (mod & FSFATAL)
 		goto out;
 
 	/* now write the FATs */
-	if (mod & (FSFATMOD|FSFIXFAT)) {
+	if (mod & FSFATMOD) {
 		if (ask(1, "Update FATs")) {
-			mod |= writefat(dosfs, &boot, fat, mod & FSFIXFAT);
+			mod |= writefat(fat);
 			if (mod & FSFATAL)
 				goto out;
 		} else
@@ -170,7 +143,7 @@
 
 			if (mod & FSDIRTY) {
 				pwarn("MARKING FILE SYSTEM CLEAN\n");
-				mod |= writefat(dosfs, &boot, fat, 1);
+				mod |= writefat(fat);
 			} else {
 				pwarn("\n***** FILE SYSTEM IS LEFT MARKED AS DIRTY *****\n");
 				mod |= FSERROR; /* file system not clean */
diff --git a/dir.c b/dir.c
index 7e459c7..6bdc3b4 100644
--- a/dir.c
+++ b/dir.c
@@ -1,6 +1,7 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
+ * Copyright (c) 2019 Google LLC
  * Copyright (C) 1995, 1996, 1997 Wolfgang Solfrank
  * Copyright (c) 1995 Martin Husemann
  * Some structure declaration borrowed from Paul Popelka
@@ -95,14 +96,11 @@
 static void freeDirTodo(struct dirTodoNode *);
 static char *fullpath(struct dosDirEntry *);
 static u_char calcShortSum(u_char *);
-static int delete(int, struct bootblock *, struct fatEntry *, cl_t, int,
-    cl_t, int, int);
-static int removede(int, struct bootblock *, struct fatEntry *, u_char *,
-    u_char *, cl_t, cl_t, cl_t, char *, int);
-static int checksize(struct bootblock *, struct fatEntry *, u_char *,
-    struct dosDirEntry *);
-static int readDosDirSection(int, struct bootblock *, struct fatEntry *,
-    struct dosDirEntry *);
+static int delete(struct fat_descriptor *, cl_t, int, cl_t, int, int);
+static int removede(struct fat_descriptor *, u_char *, u_char *,
+    cl_t, cl_t, cl_t, char *, int);
+static int checksize(struct fat_descriptor *, u_char *, struct dosDirEntry *);
+static int readDosDirSection(struct fat_descriptor *, struct dosDirEntry *);
 
 /*
  * Manage free dosDirEntry structures.
@@ -116,7 +114,7 @@
 
 	if (!(de = freede)) {
 		if (!(de = malloc(sizeof *de)))
-			return 0;
+			return (NULL);
 	} else
 		freede = de->next;
 	return de;
@@ -193,7 +191,7 @@
 /*
  * Calculate a checksum over an 8.3 alias name
  */
-static u_char
+static inline u_char
 calcShortSum(u_char *p)
 {
 	u_char sum = 0;
@@ -221,21 +219,24 @@
  * Init internal state for a new directory scan.
  */
 int
-resetDosDirSection(struct bootblock *boot, struct fatEntry *fat)
+resetDosDirSection(struct fat_descriptor *fat)
 {
-	int b1, b2;
+	int rootdir_size, cluster_size;
 	int ret = FSOK;
 	size_t len;
+	struct bootblock *boot;
 
-	b1 = boot->bpbRootDirEnts * 32;
-	b2 = boot->bpbSecPerClust * boot->bpbBytesPerSec;
+	boot = fat_get_boot(fat);
 
-	if ((buffer = malloc(len = MAX(b1, b2))) == NULL) {
+	rootdir_size = boot->bpbRootDirEnts * 32;
+	cluster_size = boot->bpbSecPerClust * boot->bpbBytesPerSec;
+
+	if ((buffer = malloc(len = MAX(rootdir_size, cluster_size))) == NULL) {
 		perr("No space for directory buffer (%zu)", len);
 		return FSFATAL;
 	}
 
-	if ((delbuf = malloc(len = b2)) == NULL) {
+	if ((delbuf = malloc(len = cluster_size)) == NULL) {
 		free(buffer);
 		perr("No space for directory delbuf (%zu)", len);
 		return FSFATAL;
@@ -250,18 +251,10 @@
 
 	memset(rootDir, 0, sizeof *rootDir);
 	if (boot->flags & FAT32) {
-		if (boot->bpbRootClust < CLUST_FIRST ||
-		    boot->bpbRootClust >= boot->NumClusters) {
-			pfatal("Root directory starts with cluster out of range(%u)",
-			       boot->bpbRootClust);
-			return FSFATAL;
-		}
-		if (fat[boot->bpbRootClust].head != boot->bpbRootClust) {
+		if (!fat_is_cl_head(fat, boot->bpbRootClust)) {
 			pfatal("Root directory doesn't start a cluster chain");
 			return FSFATAL;
 		}
-
-		fat[boot->bpbRootClust].flags |= FAT_USED;
 		rootDir->head = boot->bpbRootClust;
 	}
 
@@ -302,16 +295,21 @@
  * Delete directory entries between startcl, startoff and endcl, endoff.
  */
 static int
-delete(int f, struct bootblock *boot, struct fatEntry *fat, cl_t startcl,
+delete(struct fat_descriptor *fat, cl_t startcl,
     int startoff, cl_t endcl, int endoff, int notlast)
 {
 	u_char *s, *e;
 	off_t off;
-	int clsz = boot->bpbSecPerClust * boot->bpbBytesPerSec;
+	int clsz, fd;
+	struct bootblock *boot;
+
+	boot = fat_get_boot(fat);
+	fd = fat_get_fd(fat);
+	clsz = boot->bpbSecPerClust * boot->bpbBytesPerSec;
 
 	s = delbuf + startoff;
 	e = delbuf + clsz;
-	while (startcl >= CLUST_FIRST && startcl < boot->NumClusters) {
+	while (fat_is_valid_cl(fat, startcl)) {
 		if (startcl == endcl) {
 			if (notlast)
 				break;
@@ -320,11 +318,11 @@
 		off = (startcl - CLUST_FIRST) * boot->bpbSecPerClust + boot->FirstCluster;
 
 		off *= boot->bpbBytesPerSec;
-		if (lseek(f, off, SEEK_SET) != off) {
+		if (lseek(fd, off, SEEK_SET) != off) {
 			perr("Unable to lseek to %" PRId64, off);
 			return FSFATAL;
 		}
-		if (read(f, delbuf, clsz) != clsz) {
+		if (read(fd, delbuf, clsz) != clsz) {
 			perr("Unable to read directory");
 			return FSFATAL;
 		}
@@ -332,25 +330,26 @@
 			*s = SLOT_DELETED;
 			s += 32;
 		}
-		if (lseek(f, off, SEEK_SET) != off) {
+		if (lseek(fd, off, SEEK_SET) != off) {
 			perr("Unable to lseek to %" PRId64, off);
 			return FSFATAL;
 		}
-		if (write(f, delbuf, clsz) != clsz) {
+		if (write(fd, delbuf, clsz) != clsz) {
 			perr("Unable to write directory");
 			return FSFATAL;
 		}
 		if (startcl == endcl)
 			break;
-		startcl = fat[startcl].next;
+		startcl = fat_get_cl_next(fat, startcl);
 		s = delbuf;
 	}
 	return FSOK;
 }
 
 static int
-removede(int f, struct bootblock *boot, struct fatEntry *fat, u_char *start,
-    u_char *end, cl_t startcl, cl_t endcl, cl_t curcl, char *path, int type)
+removede(struct fat_descriptor *fat, u_char *start,
+    u_char *end, cl_t startcl, cl_t endcl, cl_t curcl,
+    char *path, int type)
 {
 	switch (type) {
 	case 0:
@@ -366,14 +365,14 @@
 	}
 	if (ask(0, "Remove")) {
 		if (startcl != curcl) {
-			if (delete(f, boot, fat,
+			if (delete(fat,
 				   startcl, start - buffer,
 				   endcl, end - buffer,
 				   endcl == curcl) == FSFATAL)
 				return FSFATAL;
 			start = buffer;
 		}
-		/* startcl is < CLUST_FIRST for !fat32 root */
+		/* startcl is < CLUST_FIRST for !FAT32 root */
 		if ((endcl == curcl) || (startcl < CLUST_FIRST))
 			for (; start < end; start += 32)
 				*start = SLOT_DELETED;
@@ -386,23 +385,37 @@
  * Check an in-memory file entry
  */
 static int
-checksize(struct bootblock *boot, struct fatEntry *fat, u_char *p,
-    struct dosDirEntry *dir)
+checksize(struct fat_descriptor *fat, u_char *p, struct dosDirEntry *dir)
 {
+	int ret = FSOK;
+	size_t physicalSize;
+	struct bootblock *boot;
+
+	boot = fat_get_boot(fat);
+
 	/*
 	 * Check size on ordinary files
 	 */
-	u_int32_t physicalSize;
-
-	if (dir->head == CLUST_FREE)
+	if (dir->head == CLUST_FREE) {
 		physicalSize = 0;
-	else {
-		if (dir->head < CLUST_FIRST || dir->head >= boot->NumClusters)
+	} else {
+		if (!fat_is_valid_cl(fat, dir->head))
 			return FSERROR;
-		physicalSize = fat[dir->head].length * boot->ClusterSize;
+		ret = checkchain(fat, dir->head, &physicalSize);
+		/*
+		 * Upon return, physicalSize would hold the chain length
+		 * that checkchain() was able to validate, but if the user
+		 * refused the proposed repair, it would be unsafe to
+		 * proceed with directory entry fix, so bail out in that
+		 * case.
+		 */
+		if (ret == FSERROR) {
+			return (FSERROR);
+		}
+		physicalSize *= boot->ClusterSize;
 	}
 	if (physicalSize < dir->size) {
-		pwarn("size of %s is %u, should at most be %u\n",
+		pwarn("size of %s is %u, should at most be %zu\n",
 		      fullpath(dir), dir->size, physicalSize);
 		if (ask(1, "Truncate")) {
 			dir->size = physicalSize;
@@ -422,11 +435,10 @@
 
 			for (cl = dir->head, len = sz = 0;
 			    (sz += boot->ClusterSize) < dir->size; len++)
-				cl = fat[cl].next;
-			clearchain(boot, fat, fat[cl].next);
-			fat[cl].next = CLUST_EOF;
-			fat[dir->head].length = len;
-			return FSFATMOD;
+				cl = fat_get_cl_next(fat, cl);
+			clearchain(fat, fat_get_cl_next(fat, cl));
+			ret = fat_set_cl_next(fat, cl, CLUST_EOF);
+			return (FSFATMOD | ret);
 		} else
 			return FSERROR;
 	}
@@ -442,15 +454,20 @@
  * when we traverse into it.
  */
 static int
-check_subdirectory(int f, struct bootblock *boot, struct dosDirEntry *dir)
+check_subdirectory(struct fat_descriptor *fat, struct dosDirEntry *dir)
 {
 	u_char *buf, *cp;
 	off_t off;
 	cl_t cl;
 	int retval = FSOK;
+	int fd;
+	struct bootblock *boot;
+
+	boot = fat_get_boot(fat);
+	fd = fat_get_fd(fat);
 
 	cl = dir->head;
-	if (dir->parent && (cl < CLUST_FIRST || cl >= boot->NumClusters)) {
+	if (dir->parent && !fat_is_valid_cl(fat, cl)) {
 		return FSERROR;
 	}
 
@@ -474,8 +491,8 @@
 	}
 
 	off *= boot->bpbBytesPerSec;
-	if (lseek(f, off, SEEK_SET) != off ||
-	    read(f, buf, boot->bpbBytesPerSec) != (ssize_t)boot->bpbBytesPerSec) {
+	if (lseek(fd, off, SEEK_SET) != off ||
+	    read(fd, buf, boot->bpbBytesPerSec) != (ssize_t)boot->bpbBytesPerSec) {
 		perr("Unable to read directory");
 		free(buf);
 		return FSFATAL;
@@ -509,22 +526,27 @@
  *   - push directories onto the todo-stack
  */
 static int
-readDosDirSection(int f, struct bootblock *boot, struct fatEntry *fat,
-    struct dosDirEntry *dir)
+readDosDirSection(struct fat_descriptor *fat, struct dosDirEntry *dir)
 {
+	struct bootblock *boot;
 	struct dosDirEntry dirent, *d;
 	u_char *p, *vallfn, *invlfn, *empty;
 	off_t off;
-	int i, j, k, last;
+	int fd, i, j, k, iosize, entries;
+	bool is_legacyroot;
 	cl_t cl, valcl = ~0, invcl = ~0, empcl = ~0;
 	char *t;
 	u_int lidx = 0;
 	int shortSum;
 	int mod = FSOK;
+	size_t dirclusters;
 #define	THISMOD	0x8000			/* Only used within this routine */
 
+	boot = fat_get_boot(fat);
+	fd = fat_get_fd(fat);
+
 	cl = dir->head;
-	if (dir->parent && (cl < CLUST_FIRST || cl >= boot->NumClusters)) {
+	if (dir->parent && (!fat_is_valid_cl(fat, cl))) {
 		/*
 		 * Already handled somewhere else.
 		 */
@@ -532,24 +554,50 @@
 	}
 	shortSum = -1;
 	vallfn = invlfn = empty = NULL;
+
+	/*
+	 * If we are checking the legacy root (for FAT12/FAT16),
+	 * we will operate on the whole directory; otherwise, we
+	 * will operate on one cluster at a time, and also take
+	 * this opportunity to examine the chain.
+	 *
+	 * Derive how many entries we are going to encounter from
+	 * the I/O size.
+	 */
+	is_legacyroot = (dir->parent == NULL && !(boot->flags & FAT32));
+	if (is_legacyroot) {
+		iosize = boot->bpbRootDirEnts * 32;
+		entries = boot->bpbRootDirEnts;
+	} else {
+		iosize = boot->bpbSecPerClust * boot->bpbBytesPerSec;
+		entries = iosize / 32;
+		mod |= checkchain(fat, dir->head, &dirclusters);
+	}
+
 	do {
-		if (!(boot->flags & FAT32) && !dir->parent) {
-			last = boot->bpbRootDirEnts * 32;
+		if (is_legacyroot) {
+			/*
+			 * Special case for FAT12/FAT16 root -- read
+			 * in the whole root directory.
+			 */
 			off = boot->bpbResSectors + boot->bpbFATs *
 			    boot->FATsecs;
 		} else {
-			last = boot->bpbSecPerClust * boot->bpbBytesPerSec;
+			/*
+			 * Otherwise, read in a cluster of the
+			 * directory.
+			 */
 			off = (cl - CLUST_FIRST) * boot->bpbSecPerClust + boot->FirstCluster;
 		}
 
 		off *= boot->bpbBytesPerSec;
-		if (lseek(f, off, SEEK_SET) != off
-		    || read(f, buffer, last) != last) {
+		if (lseek(fd, off, SEEK_SET) != off ||
+		    read(fd, buffer, iosize) != iosize) {
 			perr("Unable to read directory");
 			return FSFATAL;
 		}
-		last /= 32;
-		for (p = buffer, i = 0; i < last; i++, p += 32) {
+
+		for (p = buffer, i = 0; i < entries; i++, p += 32) {
 			if (dir->fsckflags & DIREMPWARN) {
 				*p = SLOT_EMPTY;
 				continue;
@@ -572,7 +620,7 @@
 						u_char *q;
 
 						dir->fsckflags &= ~DIREMPTY;
-						if (delete(f, boot, fat,
+						if (delete(fat,
 							   empcl, empty - buffer,
 							   cl, p - buffer, 1) == FSFATAL)
 							return FSFATAL;
@@ -701,7 +749,7 @@
 
 			if (dirent.flags & ATTR_VOLUME) {
 				if (vallfn || invlfn) {
-					mod |= removede(f, boot, fat,
+					mod |= removede(fat,
 							invlfn ? invlfn : vallfn, p,
 							invlfn ? invcl : valcl, -1, 0,
 							fullpath(dir), 2);
@@ -741,7 +789,7 @@
 			dirent.next = dir->child;
 
 			if (invlfn) {
-				mod |= k = removede(f, boot, fat,
+				mod |= k = removede(fat,
 						    invlfn, vallfn ? vallfn : p,
 						    invcl, vallfn ? valcl : cl, cl,
 						    fullpath(&dirent), 0);
@@ -757,74 +805,61 @@
 			vallfn = NULL; /* not used any longer */
 			invlfn = NULL;
 
-			if (dirent.size == 0 && !(dirent.flags & ATTR_DIRECTORY)) {
-				if (dirent.head != 0) {
-					pwarn("%s has clusters, but size 0\n",
-					      fullpath(&dirent));
-					if (ask(1, "Drop allocated clusters")) {
-						p[26] = p[27] = 0;
-						if (boot->ClustMask == CLUST32_MASK)
-							p[20] = p[21] = 0;
-						clearchain(boot, fat, dirent.head);
-						dirent.head = 0;
-						mod |= THISMOD|FSDIRMOD|FSFATMOD;
-					} else
-						mod |= FSERROR;
-				}
-			} else if (dirent.head == 0
-				   && !strcmp(dirent.name, "..")
-				   && dir->parent			/* XXX */
-				   && !dir->parent->parent) {
-				/*
-				 *  Do nothing, the parent is the root
-				 */
-			} else if (dirent.head < CLUST_FIRST
-				   || dirent.head >= boot->NumClusters
-				   || fat[dirent.head].next == CLUST_FREE
-				   || (fat[dirent.head].next >= CLUST_RSRVD
-				       && fat[dirent.head].next < CLUST_EOFS)
-				   || fat[dirent.head].head != dirent.head) {
-				if (dirent.head == 0)
-					pwarn("%s has no clusters\n",
-					      fullpath(&dirent));
-				else if (dirent.head < CLUST_FIRST
-					 || dirent.head >= boot->NumClusters)
-					pwarn("%s starts with cluster out of range(%u)\n",
-					      fullpath(&dirent),
-					      dirent.head);
-				else if (fat[dirent.head].next == CLUST_FREE)
-					pwarn("%s starts with free cluster\n",
-					      fullpath(&dirent));
-				else if (fat[dirent.head].next >= CLUST_RSRVD)
-					pwarn("%s starts with cluster marked %s\n",
-					      fullpath(&dirent),
-					      rsrvdcltype(fat[dirent.head].next));
-				else
-					pwarn("%s doesn't start a cluster chain\n",
-					      fullpath(&dirent));
-				if (dirent.flags & ATTR_DIRECTORY) {
-					if (ask(0, "Remove")) {
-						*p = SLOT_DELETED;
-						mod |= THISMOD|FSDIRMOD;
-					} else
-						mod |= FSERROR;
-					continue;
-				} else {
-					if (ask(1, "Truncate")) {
-						p[28] = p[29] = p[30] = p[31] = 0;
-						p[26] = p[27] = 0;
-						if (boot->ClustMask == CLUST32_MASK)
-							p[20] = p[21] = 0;
-						dirent.size = 0;
-						mod |= THISMOD|FSDIRMOD;
-					} else
-						mod |= FSERROR;
+			/*
+			 * Check if the directory entry is sane.
+			 *
+			 * '.' and '..' are skipped, their sanity is
+			 * checked somewhere else.
+			 *
+			 * For everything else, check if we have a new,
+			 * valid cluster chain (beginning of a file or
+			 * directory that was never previously claimed
+			 * by another file) when it's a non-empty file
+			 * or a directory. The sanity of the cluster
+			 * chain is checked at a later time when we
+			 * traverse into the directory, or examine the
+			 * file's directory entry.
+			 *
+			 * The only possible fix is to delete the entry
+			 * if it's a directory; for file, we have to
+			 * truncate the size to 0.
+			 */
+			if (!(dirent.flags & ATTR_DIRECTORY) ||
+			    (strcmp(dirent.name, ".") != 0 &&
+			    strcmp(dirent.name, "..") != 0)) {
+				if ((dirent.size != 0 || (dirent.flags & ATTR_DIRECTORY)) &&
+				    ((!fat_is_valid_cl(fat, dirent.head) ||
+				    !fat_is_cl_head(fat, dirent.head)))) {
+					if (!fat_is_valid_cl(fat, dirent.head)) {
+						pwarn("%s starts with cluster out of range(%u)\n",
+						    fullpath(&dirent),
+						    dirent.head);
+					} else {
+						pwarn("%s doesn't start a new cluster chain\n",
+						    fullpath(&dirent));
+					}
+
+					if (dirent.flags & ATTR_DIRECTORY) {
+						if (ask(0, "Remove")) {
+							*p = SLOT_DELETED;
+							mod |= THISMOD|FSDIRMOD;
+						} else
+							mod |= FSERROR;
+						continue;
+					} else {
+						if (ask(1, "Truncate")) {
+							p[28] = p[29] = p[30] = p[31] = 0;
+							p[26] = p[27] = 0;
+							if (boot->ClustMask == CLUST32_MASK)
+								p[20] = p[21] = 0;
+							dirent.size = 0;
+							dirent.head = 0;
+							mod |= THISMOD|FSDIRMOD;
+						} else
+							mod |= FSERROR;
+					}
 				}
 			}
-
-			if (dirent.head >= CLUST_FIRST && dirent.head < boot->NumClusters)
-				fat[dirent.head].flags |= FAT_USED;
-
 			if (dirent.flags & ATTR_DIRECTORY) {
 				/*
 				 * gather more info for directories
@@ -861,8 +896,7 @@
 							mod |= FSERROR;
 					}
 					continue;
-				}
-				if (strcmp(dirent.name, "..") == 0) {
+				} else if (strcmp(dirent.name, "..") == 0) {
 					if (dir->parent) {		/* XXX */
 						if (!dir->parent->parent) {
 							if (dirent.head) {
@@ -908,7 +942,7 @@
 						} else
 							mod |= FSERROR;
 						continue;
-					} else if ((check_subdirectory(f, boot,
+					} else if ((check_subdirectory(fat,
 					    &dirent) & FSERROR) == FSERROR) {
 						/*
 						 * A subdirectory should have
@@ -944,39 +978,43 @@
 				n->dir = d;
 				pendingDirectories = n;
 			} else {
-				mod |= k = checksize(boot, fat, p, &dirent);
+				mod |= k = checksize(fat, p, &dirent);
 				if (k & FSDIRMOD)
 					mod |= THISMOD;
 			}
 			boot->NumFiles++;
 		}
 
-		if (!(boot->flags & FAT32) && !dir->parent)
+		if (is_legacyroot) {
+			/*
+			 * Don't bother to write back right now because
+			 * we may continue to make modification to the
+			 * non-FAT32 root directory below.
+			 */
 			break;
-
-		if (mod & THISMOD) {
-			last *= 32;
-			if (lseek(f, off, SEEK_SET) != off
-			    || write(f, buffer, last) != last) {
+		} else if (mod & THISMOD) {
+			if (lseek(fd, off, SEEK_SET) != off
+			    || write(fd, buffer, iosize) != iosize) {
 				perr("Unable to write directory");
 				return FSFATAL;
 			}
 			mod &= ~THISMOD;
 		}
-	} while ((cl = fat[cl].next) >= CLUST_FIRST && cl < boot->NumClusters);
+	} while (fat_is_valid_cl(fat, (cl = fat_get_cl_next(fat, cl))));
 	if (invlfn || vallfn)
-		mod |= removede(f, boot, fat,
+		mod |= removede(fat,
 				invlfn ? invlfn : vallfn, p,
 				invlfn ? invcl : valcl, -1, 0,
 				fullpath(dir), 1);
 
-	/* The root directory of non fat32 filesystems is in a special
-	 * area and may have been modified above without being written out.
+	/*
+	 * The root directory of non-FAT32 filesystems is in a special
+	 * area and may have been modified above removede() without
+	 * being written out.
 	 */
-	if ((mod & FSDIRMOD) && !(boot->flags & FAT32) && !dir->parent) {
-		last *= 32;
-		if (lseek(f, off, SEEK_SET) != off
-		    || write(f, buffer, last) != last) {
+	if ((mod & FSDIRMOD) && is_legacyroot) {
+		if (lseek(fd, off, SEEK_SET) != off
+		    || write(fd, buffer, iosize) != iosize) {
 			perr("Unable to write directory");
 			return FSFATAL;
 		}
@@ -986,11 +1024,11 @@
 }
 
 int
-handleDirTree(int dosfs, struct bootblock *boot, struct fatEntry *fat)
+handleDirTree(struct fat_descriptor *fat)
 {
 	int mod;
 
-	mod = readDosDirSection(dosfs, boot, fat, rootDir);
+	mod = readDosDirSection(fat, rootDir);
 	if (mod & FSFATAL)
 		return FSFATAL;
 
@@ -1011,7 +1049,7 @@
 		/*
 		 * handle subdirectory
 		 */
-		mod |= readDosDirSection(dosfs, boot, fat, dir);
+		mod |= readDosDirSection(fat, dir);
 		if (mod & FSFATAL)
 			return FSFATAL;
 	}
@@ -1027,12 +1065,15 @@
 static off_t lfoff;
 
 int
-reconnect(int dosfs, struct bootblock *boot, struct fatEntry *fat, cl_t head)
+reconnect(struct fat_descriptor *fat, cl_t head, size_t length)
 {
+	struct bootblock *boot = fat_get_boot(fat);
 	struct dosDirEntry d;
-	int len;
+	int len, dosfs;
 	u_char *p;
 
+	dosfs = fat_get_fd(fat);
+
 	if (!ask(1, "Reconnect"))
 		return FSERROR;
 
@@ -1063,7 +1104,7 @@
 					break;
 		if (p && p < lfbuf + boot->ClusterSize)
 			break;
-		lfcl = p ? fat[lfcl].next : lostDir->head;
+		lfcl = p ? fat_get_cl_next(fat, lfcl) : lostDir->head;
 		if (lfcl < CLUST_FIRST || lfcl >= boot->NumClusters) {
 			/* Extend LOSTDIR?				XXX */
 			pwarn("No space in %s\n", LOSTDIR);
@@ -1088,7 +1129,7 @@
 	len = snprintf(d.name, sizeof(d.name), "%u", head);
 	d.flags = 0;
 	d.head = head;
-	d.size = fat[head].length * boot->ClusterSize;
+	d.size = length * boot->ClusterSize;
 
 	memcpy(p, d.name, len);
 	memset(p + len, ' ', 11 - len);
@@ -1103,7 +1144,6 @@
 	p[29] = (u_char)(d.size >> 8);
 	p[30] = (u_char)(d.size >> 16);
 	p[31] = (u_char)(d.size >> 24);
-	fat[head].flags |= FAT_USED;
 	if (lseek(dosfs, lfoff, SEEK_SET) != lfoff
 	    || (size_t)write(dosfs, lfbuf, boot->ClusterSize) != boot->ClusterSize) {
 		perr("could not write LOST.DIR");
diff --git a/dosfs.h b/dosfs.h
index 9f1480f..d89a086 100644
--- a/dosfs.h
+++ b/dosfs.h
@@ -83,19 +83,13 @@
 	u_int	NumBad;			/* # of bad clusters */
 };
 
-struct fatEntry {
-	cl_t	next;			/* pointer to next cluster */
-	cl_t	head;			/* pointer to start of chain */
-	u_int32_t length;		/* number of clusters on chain */
-	int	flags;			/* see below */
-};
-
 #define	CLUST_FREE	0		/* 0 means cluster is free */
 #define	CLUST_FIRST	2		/* 2 is the minimum valid cluster number */
 #define	CLUST_RSRVD	0xfffffff6	/* start of reserved clusters */
 #define	CLUST_BAD	0xfffffff7	/* a cluster with a defect */
 #define	CLUST_EOFS	0xfffffff8	/* start of EOF indicators */
 #define	CLUST_EOF	0xffffffff	/* standard value for last cluster */
+#define	CLUST_DEAD	0xfdeadc0d	/* error encountered */
 
 /*
  * Masks for cluster values
@@ -104,8 +98,6 @@
 #define	CLUST16_MASK	0xffff
 #define	CLUST32_MASK	0xfffffff
 
-#define	FAT_USED	1		/* This fat chain is used in a file */
-
 #define	DOSLONGNAMELEN	256		/* long name maximal length */
 #define LRFIRST		0x40		/* first long name record */
 #define	LRNOMASK	0x1f		/* mask to extract long record
diff --git a/ext.h b/ext.h
index ebc9467..6e9ba7f 100644
--- a/ext.h
+++ b/ext.h
@@ -32,6 +32,8 @@
 
 #include <sys/types.h>
 
+#include <stdbool.h>
+
 #include "dosfs.h"
 
 #define	LOSTDIR	"LOST.DIR"
@@ -44,6 +46,7 @@
 extern int preen;	/* we are preening */
 extern int rdonly;	/* device is opened read only (supersedes above) */
 extern int skipclean;	/* skip clean file systems if preening */
+extern int allow_mmap;  /* allow the use of mmap() */
 
 /*
  * function declarations
@@ -72,7 +75,6 @@
 #define	FSERROR		8		/* Some unrecovered error remains */
 #define	FSFATAL		16		/* Some unrecoverable error occurred */
 #define	FSDIRTY		32		/* File system is dirty */
-#define	FSFIXFAT	64		/* Fix file system FAT */
 
 /*
  * read a boot block in a machine independent fashion and translate
@@ -85,46 +87,53 @@
  */
 int writefsinfo(int, struct bootblock *);
 
-/*
- * Read one of the FAT copies and return a pointer to the new
- * allocated array holding our description of it.
- */
-int readfat(int, struct bootblock *, u_int, struct fatEntry **);
+/* Opaque type */
+struct fat_descriptor;
+
+void fat_clear_cl_head(struct fat_descriptor *, cl_t);
+bool fat_is_cl_head(struct fat_descriptor *, cl_t);
+
+cl_t fat_get_cl_next(struct fat_descriptor *, cl_t);
+
+int fat_set_cl_next(struct fat_descriptor *, cl_t, cl_t);
+
+cl_t fat_allocate_cluster(struct fat_descriptor *fat);
+
+struct bootblock* fat_get_boot(struct fat_descriptor *);
+int fat_get_fd(struct fat_descriptor *);
+bool fat_is_valid_cl(struct fat_descriptor *, cl_t);
 
 /*
- * Check two FAT copies for consistency and merge changes into the
- * first if necessary.
+ * Read the FAT 0 and return a pointer to the newly allocated
+ * descriptor of it.
  */
-int comparefat(struct bootblock *, struct fatEntry *, struct fatEntry *, u_int);
-
-/*
- * Check a FAT
- */
-int checkfat(struct bootblock *, struct fatEntry *);
+int readfat(int, struct bootblock *, struct fat_descriptor **);
 
 /*
  * Write back FAT entries
  */
-int writefat(int, struct bootblock *, struct fatEntry *, int);
+int writefat(struct fat_descriptor *);
 
 /*
  * Read a directory
  */
-int resetDosDirSection(struct bootblock *, struct fatEntry *);
+int resetDosDirSection(struct fat_descriptor *);
 void finishDosDirSection(void);
-int handleDirTree(int, struct bootblock *, struct fatEntry *);
+int handleDirTree(struct fat_descriptor *);
 
 /*
  * Cross-check routines run after everything is completely in memory
  */
+int checkchain(struct fat_descriptor *, cl_t, size_t *);
+
 /*
  * Check for lost cluster chains
  */
-int checklost(int, struct bootblock *, struct fatEntry *);
+int checklost(struct fat_descriptor *);
 /*
  * Try to reconnect a lost cluster chain
  */
-int reconnect(int, struct bootblock *, struct fatEntry *, cl_t);
+int reconnect(struct fat_descriptor *, cl_t, size_t);
 void finishlf(void);
 
 /*
@@ -138,6 +147,6 @@
 /*
  * Clear a cluster chain in a FAT
  */
-void clearchain(struct bootblock *, struct fatEntry *, cl_t);
+void clearchain(struct fat_descriptor *, cl_t);
 
 #endif
diff --git a/fat.c b/fat.c
index 7ca81ab..12050c2 100644
--- a/fat.c
+++ b/fat.c
@@ -1,6 +1,7 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
+ * Copyright (c) 2019 Google LLC
  * Copyright (C) 1995, 1996, 1997 Wolfgang Solfrank
  * Copyright (c) 1995 Martin Husemann
  *
@@ -33,6 +34,14 @@
   "$FreeBSD$";
 #endif /* not lint */
 
+#include <sys/endian.h>
+#include <sys/queue.h>
+#include <sys/limits.h>
+#include <sys/mman.h>
+#include <sys/param.h>
+
+#include <assert.h>
+#include <stdbool.h>
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
@@ -42,12 +51,517 @@
 #include "ext.h"
 #include "fsutil.h"
 
-static int checkclnum(struct bootblock *, u_int, cl_t, cl_t *);
-static int clustdiffer(cl_t, cl_t *, cl_t *, u_int);
-static int tryclear(struct bootblock *, struct fatEntry *, cl_t, cl_t *);
-static int _readfat(int, struct bootblock *, u_int, u_char **);
+static int _readfat(struct fat_descriptor *);
+static inline struct bootblock* boot_of_(struct fat_descriptor *);
+static inline int fd_of_(struct fat_descriptor *);
+static inline bool valid_cl(struct fat_descriptor *, cl_t);
 
-/*-
+
+/*
+ * Head bitmap for FAT scanning.
+ *
+ * FAT32 have up to 2^28 = 256M entries, and FAT16/12 have much less.
+ * For each cluster, we use 1 bit to represent if it's a head cluster
+ * (the first cluster of a cluster chain).
+ *
+ * Head bitmap
+ * ===========
+ * Initially, we set all bits to 1.  In readfat(), we traverse the
+ * whole FAT and mark each cluster identified as "next" cluster as
+ * 0.  After the scan, we have a bitmap with 1's to indicate the
+ * corresponding cluster was a "head" cluster.
+ *
+ * We use head bitmap to identify lost chains: a head cluster that was
+ * not being claimed by any file or directories is the head cluster of
+ * a lost chain.
+ *
+ * Handle of lost chains
+ * =====================
+ * At the end of scanning, we can easily find all lost chain's heads
+ * by finding out the 1's in the head bitmap.
+ */
+
+typedef struct long_bitmap {
+	unsigned long	*map;
+	size_t		 count;		/* Total set bits in the map */
+} long_bitmap_t;
+
+static inline void
+bitmap_clear(long_bitmap_t *lbp, cl_t cl)
+{
+	cl_t i = cl / LONG_BIT;
+	unsigned long clearmask = ~(1UL << (cl % LONG_BIT));
+
+	assert((lbp->map[i] & ~clearmask) != 0);
+	lbp->map[i] &= clearmask;
+	lbp->count--;
+}
+
+static inline bool
+bitmap_get(long_bitmap_t *lbp, cl_t cl)
+{
+	cl_t i = cl / LONG_BIT;
+	unsigned long usedbit = 1UL << (cl % LONG_BIT);
+
+	return ((lbp->map[i] & usedbit) == usedbit);
+}
+
+static inline bool
+bitmap_none_in_range(long_bitmap_t *lbp, cl_t cl)
+{
+	cl_t i = cl / LONG_BIT;
+
+	return (lbp->map[i] == 0);
+}
+
+static inline size_t
+bitmap_count(long_bitmap_t *lbp)
+{
+	return (lbp->count);
+}
+
+static int
+bitmap_ctor(long_bitmap_t *lbp, size_t bits, bool allone)
+{
+	size_t bitmap_size = roundup2(bits, LONG_BIT) / (LONG_BIT / 8);
+
+	free(lbp->map);
+	lbp->map = calloc(1, bitmap_size);
+	if (lbp->map == NULL)
+		return FSFATAL;
+
+	if (allone) {
+		memset(lbp->map, 0xff, bitmap_size);
+		lbp->count = bits;
+	} else {
+		lbp->count = 0;
+	}
+	return FSOK;
+}
+
+static void
+bitmap_dtor(long_bitmap_t *lbp)
+{
+	free(lbp->map);
+	lbp->map = NULL;
+}
+
+/*
+ * FAT32 can be as big as 256MiB (2^26 entries * 4 bytes), when we
+ * can not ask the kernel to manage the access, use a simple LRU
+ * cache with chunk size of 128 KiB to manage it.
+ */
+struct fat32_cache_entry {
+	TAILQ_ENTRY(fat32_cache_entry)	entries;
+	uint8_t			*chunk;		/* pointer to chunk */
+	off_t			 addr;		/* offset */
+	bool			 dirty;		/* dirty bit */
+};
+
+static const size_t	fat32_cache_chunk_size = 131072; /* MAXPHYS */
+static const size_t	fat32_cache_size = 4194304;
+static const size_t	fat32_cache_entries = 32; /* XXXgcc: cache_size / cache_chunk_size */
+
+/*
+ * FAT table descriptor, represents a FAT table that is already loaded
+ * into memory.
+ */
+struct fat_descriptor {
+	struct bootblock	*boot;
+	uint8_t			*fatbuf;
+	cl_t			(*get)(struct fat_descriptor *, cl_t);
+	int			(*set)(struct fat_descriptor *, cl_t, cl_t);
+	long_bitmap_t		 headbitmap;
+	int			 fd;
+	bool			 is_mmapped;
+	bool			 use_cache;
+	size_t		  	 fatsize;
+
+	size_t			 fat32_cached_chunks;
+	TAILQ_HEAD(cachehead, fat32_cache_entry)	fat32_cache_head;
+	struct fat32_cache_entry	*fat32_cache_allentries;
+	off_t			 fat32_offset;
+	off_t			 fat32_lastaddr;
+};
+
+void
+fat_clear_cl_head(struct fat_descriptor *fat, cl_t cl)
+{
+	bitmap_clear(&fat->headbitmap, cl);
+}
+
+bool
+fat_is_cl_head(struct fat_descriptor *fat, cl_t cl)
+{
+	return (bitmap_get(&fat->headbitmap, cl));
+}
+
+static inline bool
+fat_is_cl_head_in_range(struct fat_descriptor *fat, cl_t cl)
+{
+	return (!(bitmap_none_in_range(&fat->headbitmap, cl)));
+}
+
+static size_t
+fat_get_head_count(struct fat_descriptor *fat)
+{
+	return (bitmap_count(&fat->headbitmap));
+}
+
+/*
+ * FAT12 accessors.
+ *
+ * FAT12s are sufficiently small, expect it to always fit in the RAM.
+ */
+static inline uint8_t *
+fat_get_fat12_ptr(struct fat_descriptor *fat, cl_t cl)
+{
+	return (fat->fatbuf + ((cl + (cl >> 1))));
+}
+
+static cl_t
+fat_get_fat12_next(struct fat_descriptor *fat, cl_t cl)
+{
+	const uint8_t	*p;
+	cl_t	retval;
+
+	p = fat_get_fat12_ptr(fat, cl);
+	retval = le16dec(p);
+	/* Odd cluster: lower 4 bits belongs to the subsequent cluster */
+	if ((cl & 1) == 1)
+		retval >>= 4;
+	retval &= CLUST12_MASK;
+
+	if (retval >= (CLUST_BAD & CLUST12_MASK))
+		retval |= ~CLUST12_MASK;
+
+	return (retval);
+}
+
+static int
+fat_set_fat12_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
+{
+	uint8_t	*p;
+
+	/* Truncate 'nextcl' value, if needed */
+	nextcl &= CLUST12_MASK;
+
+	p = fat_get_fat12_ptr(fat, cl);
+
+	/*
+	 * Read in the 4 bits from the subsequent (for even clusters)
+	 * or the preceding (for odd clusters) cluster and combine
+	 * it to the nextcl value for encoding
+	 */
+	if ((cl & 1) == 0) {
+		nextcl |= ((p[1] & 0xf0) << 8);
+	} else {
+		nextcl <<= 4;
+		nextcl |= (p[0] & 0x0f);
+	}
+
+	le16enc(p, (uint16_t)nextcl);
+
+	return (0);
+}
+
+/*
+ * FAT16 accessors.
+ *
+ * FAT16s are sufficiently small, expect it to always fit in the RAM.
+ */
+static inline uint8_t *
+fat_get_fat16_ptr(struct fat_descriptor *fat, cl_t cl)
+{
+	return (fat->fatbuf + (cl << 1));
+}
+
+static cl_t
+fat_get_fat16_next(struct fat_descriptor *fat, cl_t cl)
+{
+	const uint8_t	*p;
+	cl_t	retval;
+
+	p = fat_get_fat16_ptr(fat, cl);
+	retval = le16dec(p) & CLUST16_MASK;
+
+	if (retval >= (CLUST_BAD & CLUST16_MASK))
+		retval |= ~CLUST16_MASK;
+
+	return (retval);
+}
+
+static int
+fat_set_fat16_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
+{
+	uint8_t	*p;
+
+	/* Truncate 'nextcl' value, if needed */
+	nextcl &= CLUST16_MASK;
+
+	p = fat_get_fat16_ptr(fat, cl);
+
+	le16enc(p, (uint16_t)nextcl);
+
+	return (0);
+}
+
+/*
+ * FAT32 accessors.
+ */
+static inline uint8_t *
+fat_get_fat32_ptr(struct fat_descriptor *fat, cl_t cl)
+{
+	return (fat->fatbuf + (cl << 2));
+}
+
+static cl_t
+fat_get_fat32_next(struct fat_descriptor *fat, cl_t cl)
+{
+	const uint8_t	*p;
+	cl_t	retval;
+
+	p = fat_get_fat32_ptr(fat, cl);
+	retval = le32dec(p) & CLUST32_MASK;
+
+	if (retval >= (CLUST_BAD & CLUST32_MASK))
+		retval |= ~CLUST32_MASK;
+
+	return (retval);
+}
+
+static int
+fat_set_fat32_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
+{
+	uint8_t	*p;
+
+	/* Truncate 'nextcl' value, if needed */
+	nextcl &= CLUST32_MASK;
+
+	p = fat_get_fat32_ptr(fat, cl);
+
+	le32enc(p, (uint32_t)nextcl);
+
+	return (0);
+}
+
+static inline size_t
+fat_get_iosize(struct fat_descriptor *fat, off_t address)
+{
+
+	if (address == fat->fat32_lastaddr) {
+		return (fat->fatsize & ((off_t)fat32_cache_chunk_size - 1));
+	} else {
+		return (fat32_cache_chunk_size);
+	}
+}
+
+static int
+fat_flush_fat32_cache_entry(struct fat_descriptor *fat,
+    struct fat32_cache_entry *entry)
+{
+	int fd;
+	off_t fat_addr;
+	size_t writesize;
+
+	fd = fd_of_(fat);
+
+	if (!entry->dirty)
+		return (FSOK);
+
+	writesize = fat_get_iosize(fat, entry->addr);
+
+	fat_addr = fat->fat32_offset + entry->addr;
+	if (lseek(fd, fat_addr, SEEK_SET) != fat_addr ||
+	    (size_t)write(fd, entry->chunk, writesize) != writesize) {
+			pfatal("Unable to write FAT");
+			return (FSFATAL);
+	}
+
+	entry->dirty = false;
+	return (FSOK);
+}
+
+static struct fat32_cache_entry *
+fat_get_fat32_cache_entry(struct fat_descriptor *fat, off_t addr,
+    bool writing)
+{
+	int fd;
+	struct fat32_cache_entry *entry, *first;
+	off_t	fat_addr;
+	size_t	rwsize;
+
+	addr &= ~(fat32_cache_chunk_size - 1);
+
+	first = TAILQ_FIRST(&fat->fat32_cache_head);
+
+	/*
+	 * Cache hit: if we already have the chunk, move it to list head
+	 */
+	TAILQ_FOREACH(entry, &fat->fat32_cache_head, entries) {
+		if (entry->addr == addr) {
+			if (writing) {
+				entry->dirty = true;
+			}
+			if (entry != first) {
+
+				TAILQ_REMOVE(&fat->fat32_cache_head, entry, entries);
+				TAILQ_INSERT_HEAD(&fat->fat32_cache_head, entry, entries);
+			}
+			return (entry);
+		}
+	}
+
+	/*
+	 * Cache miss: detach the chunk at tail of list, overwrite with
+	 * the located chunk, and populate with data from disk.
+	 */
+	entry = TAILQ_LAST(&fat->fat32_cache_head, cachehead);
+	TAILQ_REMOVE(&fat->fat32_cache_head, entry, entries);
+	if (fat_flush_fat32_cache_entry(fat, entry) != FSOK) {
+		return (NULL);
+	}
+
+	rwsize = fat_get_iosize(fat, addr);
+	fat_addr = fat->fat32_offset + addr;
+	entry->addr = addr;
+	fd = fd_of_(fat);
+	if (lseek(fd, fat_addr, SEEK_SET) != fat_addr ||
+		(size_t)read(fd, entry->chunk, rwsize) != rwsize) {
+		pfatal("Unable to read FAT");
+		return (NULL);
+	}
+	if (writing) {
+		entry->dirty = true;
+	}
+	TAILQ_INSERT_HEAD(&fat->fat32_cache_head, entry, entries);
+
+	return (entry);
+}
+
+static inline uint8_t *
+fat_get_fat32_cached_ptr(struct fat_descriptor *fat, cl_t cl, bool writing)
+{
+	off_t addr, off;
+	struct fat32_cache_entry *entry;
+
+	addr = cl << 2;
+	entry = fat_get_fat32_cache_entry(fat, addr, writing);
+
+	if (entry != NULL) {
+		off = addr & (fat32_cache_chunk_size - 1);
+		return (entry->chunk + off);
+	} else {
+		return (NULL);
+	}
+}
+
+
+static cl_t
+fat_get_fat32_cached_next(struct fat_descriptor *fat, cl_t cl)
+{
+	const uint8_t	*p;
+	cl_t	retval;
+
+	p = fat_get_fat32_cached_ptr(fat, cl, false);
+	if (p != NULL) {
+		retval = le32dec(p) & CLUST32_MASK;
+		if (retval >= (CLUST_BAD & CLUST32_MASK))
+			retval |= ~CLUST32_MASK;
+	} else {
+		retval = CLUST_DEAD;
+	}
+
+	return (retval);
+}
+
+static int
+fat_set_fat32_cached_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
+{
+	uint8_t	*p;
+
+	/* Truncate 'nextcl' value, if needed */
+	nextcl &= CLUST32_MASK;
+
+	p = fat_get_fat32_cached_ptr(fat, cl, true);
+	if (p != NULL) {
+		le32enc(p, (uint32_t)nextcl);
+		return FSOK;
+	} else {
+		return FSFATAL;
+	}
+}
+
+cl_t fat_get_cl_next(struct fat_descriptor *fat, cl_t cl)
+{
+
+	if (!valid_cl(fat, cl)) {
+		pfatal("Invalid cluster: %ud", cl);
+		return CLUST_DEAD;
+	}
+
+	return (fat->get(fat, cl));
+}
+
+int fat_set_cl_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
+{
+
+	if (rdonly) {
+		pwarn(" (NO WRITE)\n");
+		return FSFATAL;
+	}
+
+	if (!valid_cl(fat, cl)) {
+		pfatal("Invalid cluster: %ud", cl);
+		return FSFATAL;
+	}
+
+	return (fat->set(fat, cl, nextcl));
+}
+
+static inline struct bootblock*
+boot_of_(struct fat_descriptor *fat) {
+
+	return (fat->boot);
+}
+
+struct bootblock*
+fat_get_boot(struct fat_descriptor *fat) {
+
+	return (boot_of_(fat));
+}
+
+static inline int
+fd_of_(struct fat_descriptor *fat)
+{
+	return (fat->fd);
+}
+
+int
+fat_get_fd(struct fat_descriptor * fat)
+{
+	return (fd_of_(fat));
+}
+
+/*
+ * Whether a cl is in valid data range.
+ */
+bool
+fat_is_valid_cl(struct fat_descriptor *fat, cl_t cl)
+{
+
+	return (valid_cl(fat, cl));
+}
+
+static inline bool
+valid_cl(struct fat_descriptor *fat, cl_t cl)
+{
+	const struct bootblock *boot = boot_of_(fat);
+
+	return (cl >= CLUST_FIRST && cl < boot->NumClusters);
+}
+
+/*
  * The first 2 FAT entries contain pseudo-cluster numbers with the following
  * layout:
  *
@@ -129,93 +643,178 @@
 }
 
 /*
- * Check a cluster number for valid value
- */
-static int
-checkclnum(struct bootblock *boot, u_int fat, cl_t cl, cl_t *next)
-{
-	if (*next >= (CLUST_RSRVD&boot->ClustMask))
-		*next |= ~boot->ClustMask;
-	if (*next == CLUST_FREE) {
-		boot->NumFree++;
-		return FSOK;
-	}
-	if (*next == CLUST_BAD) {
-		boot->NumBad++;
-		return FSOK;
-	}
-	if (*next < CLUST_FIRST
-	    || (*next >= boot->NumClusters && *next < CLUST_EOFS)) {
-		pwarn("Cluster %u in FAT %d continues with %s cluster number %u\n",
-		      cl, fat,
-		      *next < CLUST_RSRVD ? "out of range" : "reserved",
-		      *next&boot->ClustMask);
-		if (ask(0, "Truncate")) {
-			*next = CLUST_EOF;
-			return FSFATMOD;
-		}
-		return FSERROR;
-	}
-	return FSOK;
-}
-
-/*
  * Read a FAT from disk. Returns 1 if successful, 0 otherwise.
  */
 static int
-_readfat(int fs, struct bootblock *boot, u_int no, u_char **buffer)
+_readfat(struct fat_descriptor *fat)
 {
+	int fd;
+	size_t i;
 	off_t off;
+	size_t readsize;
+	struct bootblock *boot;
+	struct fat32_cache_entry *entry;
 
-	*buffer = calloc(boot->FATsecs, boot->bpbBytesPerSec);
-	if (*buffer == NULL) {
-		perr("No space for FAT sectors (%zu)",
-		    (size_t)boot->FATsecs);
+	boot = boot_of_(fat);
+	fd = fd_of_(fat);
+	fat->fatsize = boot->FATsecs * boot->bpbBytesPerSec;
+
+	off = boot->bpbResSectors;
+	off *= boot->bpbBytesPerSec;
+
+	fat->is_mmapped = false;
+	fat->use_cache = false;
+
+	/* Attempt to mmap() first */
+	if (allow_mmap) {
+		fat->fatbuf = mmap(NULL, fat->fatsize,
+				PROT_READ | (rdonly ? 0 : PROT_WRITE),
+				MAP_SHARED, fd_of_(fat), off);
+		if (fat->fatbuf != MAP_FAILED) {
+			fat->is_mmapped = true;
+			return 1;
+		}
+	}
+
+	/*
+	 * Unfortunately, we were unable to mmap().
+	 *
+	 * Only use the cache manager when it's necessary, that is,
+	 * when the FAT is sufficiently large; in that case, only
+	 * read in the first 4 MiB of FAT into memory, and split the
+	 * buffer into chunks and insert to the LRU queue to populate
+	 * the cache with data.
+	 */
+	if (boot->ClustMask == CLUST32_MASK &&
+	    fat->fatsize >= fat32_cache_size) {
+		readsize = fat32_cache_size;
+		fat->use_cache = true;
+
+		fat->fat32_offset = boot->bpbResSectors * boot->bpbBytesPerSec;
+		fat->fat32_lastaddr = fat->fatsize & ~(fat32_cache_chunk_size);
+	} else {
+		readsize = fat->fatsize;
+	}
+	fat->fatbuf = malloc(readsize);
+	if (fat->fatbuf == NULL) {
+		perr("No space for FAT (%zu)", readsize);
 		return 0;
 	}
 
-	off = boot->bpbResSectors + no * boot->FATsecs;
-	off *= boot->bpbBytesPerSec;
-
-	if (lseek(fs, off, SEEK_SET) != off) {
+	if (lseek(fd, off, SEEK_SET) != off) {
+		perr("Unable to read FAT");
+		goto err;
+	}
+	if ((size_t)read(fd, fat->fatbuf, readsize) != readsize) {
 		perr("Unable to read FAT");
 		goto err;
 	}
 
-	if ((size_t)read(fs, *buffer, boot->FATsecs * boot->bpbBytesPerSec)
-	    != boot->FATsecs * boot->bpbBytesPerSec) {
-		perr("Unable to read FAT");
-		goto err;
+	/*
+	 * When cache is used, split the buffer into chunks, and
+	 * connect the buffer into the cache.
+	 */
+	if (fat->use_cache) {
+		TAILQ_INIT(&fat->fat32_cache_head);
+		entry = calloc(fat32_cache_entries, sizeof(*entry));
+		if (entry == NULL) {
+			perr("No space for FAT cache (%zu of %zu)",
+			    fat32_cache_entries, sizeof(entry));
+			goto err;
+		}
+		for (i = 0; i < fat32_cache_entries; i++) {
+			entry[i].addr = fat32_cache_chunk_size * i;
+			entry[i].chunk = &fat->fatbuf[entry[i].addr];
+			TAILQ_INSERT_TAIL(&fat->fat32_cache_head,
+			    &entry[i], entries);
+		}
+		fat->fat32_cache_allentries = entry;
 	}
 
 	return 1;
 
-    err:
-	free(*buffer);
+err:
+	free(fat->fatbuf);
+	fat->fatbuf = NULL;
 	return 0;
 }
 
+static void
+releasefat(struct fat_descriptor *fat)
+{
+	if (fat->is_mmapped) {
+		munmap(fat->fatbuf, fat->fatsize);
+	} else {
+		if (fat->use_cache) {
+			free(fat->fat32_cache_allentries);
+			fat->fat32_cache_allentries = NULL;
+		}
+		free(fat->fatbuf);
+	}
+	fat->fatbuf = NULL;
+	bitmap_dtor(&fat->headbitmap);
+}
+
 /*
- * Read a FAT and decode it into internal format
+ * Read or map a FAT and populate head bitmap
  */
 int
-readfat(int fs, struct bootblock *boot, u_int no, struct fatEntry **fp)
+readfat(int fs, struct bootblock *boot, struct fat_descriptor **fp)
 {
-	struct fatEntry *fat;
+	struct fat_descriptor *fat;
 	u_char *buffer, *p;
-	cl_t cl;
+	cl_t cl, nextcl;
 	int ret = FSOK;
 
 	boot->NumFree = boot->NumBad = 0;
 
-	if (!_readfat(fs, boot, no, &buffer))
-		return FSFATAL;
-
-	fat = calloc(boot->NumClusters, sizeof(struct fatEntry));
+	fat = calloc(1, sizeof(struct fat_descriptor));
 	if (fat == NULL) {
-		perr("No space for FAT clusters (%zu)",
+		perr("No space for FAT descriptor");
+		return FSFATAL;
+	}
+
+	fat->fd = fs;
+	fat->boot = boot;
+
+	if (!_readfat(fat)) {
+		free(fat);
+		return FSFATAL;
+	}
+	buffer = fat->fatbuf;
+
+	/* Populate accessors */
+	switch(boot->ClustMask) {
+	case CLUST12_MASK:
+		fat->get = fat_get_fat12_next;
+		fat->set = fat_set_fat12_next;
+		break;
+	case CLUST16_MASK:
+		fat->get = fat_get_fat16_next;
+		fat->set = fat_set_fat16_next;
+		break;
+	case CLUST32_MASK:
+		if (fat->is_mmapped || !fat->use_cache) {
+			fat->get = fat_get_fat32_next;
+			fat->set = fat_set_fat32_next;
+		} else {
+			fat->get = fat_get_fat32_cached_next;
+			fat->set = fat_set_fat32_cached_next;
+		}
+		break;
+	default:
+		pfatal("Invalid ClustMask: %d", boot->ClustMask);
+		releasefat(fat);
+		free(fat);
+		return FSFATAL;
+	}
+
+	if (bitmap_ctor(&fat->headbitmap, boot->NumClusters,
+	    true) != FSOK) {
+		perr("No space for head bitmap for FAT clusters (%zu)",
 		    (size_t)boot->NumClusters);
-		free(buffer);
+		releasefat(fat);
+		free(fat);
 		return FSFATAL;
 	}
 
@@ -263,54 +862,106 @@
 				break;
 			}
 
+			if (ask(1, "Correct")) {
+				ret |= FSFATMOD;
+				p = buffer;
 
-			if (ask(1, "Correct"))
-				ret |= FSFIXFAT;
-		}
-	}
-	switch (boot->ClustMask) {
-	case CLUST32_MASK:
-		p = buffer + 8;
-		break;
-	case CLUST16_MASK:
-		p = buffer + 4;
-		break;
-	default:
-		p = buffer + 3;
-		break;
-	}
-	for (cl = CLUST_FIRST; cl < boot->NumClusters;) {
-		switch (boot->ClustMask) {
-		case CLUST32_MASK:
-			fat[cl].next = p[0] + (p[1] << 8)
-				       + (p[2] << 16) + (p[3] << 24);
-			fat[cl].next &= boot->ClustMask;
-			ret |= checkclnum(boot, no, cl, &fat[cl].next);
-			cl++;
-			p += 4;
-			break;
-		case CLUST16_MASK:
-			fat[cl].next = p[0] + (p[1] << 8);
-			ret |= checkclnum(boot, no, cl, &fat[cl].next);
-			cl++;
-			p += 2;
-			break;
-		default:
-			fat[cl].next = (p[0] + (p[1] << 8)) & 0x0fff;
-			ret |= checkclnum(boot, no, cl, &fat[cl].next);
-			cl++;
-			if (cl >= boot->NumClusters)
-				break;
-			fat[cl].next = ((p[1] >> 4) + (p[2] << 4)) & 0x0fff;
-			ret |= checkclnum(boot, no, cl, &fat[cl].next);
-			cl++;
-			p += 3;
-			break;
+				*p++ = (u_char)boot->bpbMedia;
+				*p++ = 0xff;
+				*p++ = 0xff;
+				switch (boot->ClustMask) {
+				case CLUST16_MASK:
+					*p++ = 0xff;
+					break;
+				case CLUST32_MASK:
+					*p++ = 0x0f;
+					*p++ = 0xff;
+					*p++ = 0xff;
+					*p++ = 0xff;
+					*p++ = 0x0f;
+					break;
+				default:
+					break;
+				}
+			}
 		}
 	}
 
-	free(buffer);
+	/*
+	 * Traverse the FAT table and populate head map.  Initially, we
+	 * consider all clusters as possible head cluster (beginning of
+	 * a file or directory), and traverse the whole allocation table
+	 * by marking every non-head nodes as such (detailed below) and
+	 * fix obvious issues while we walk.
+	 *
+	 * For each "next" cluster, the possible values are:
+	 *
+	 * a) CLUST_FREE or CLUST_BAD.  The *current* cluster can't be a
+	 *    head node.
+	 * b) An out-of-range value. The only fix would be to truncate at
+	 *    the cluster.
+	 * c) A valid cluster.  It means that cluster (nextcl) is not a
+	 *    head cluster.  Note that during the scan, every cluster is
+	 *    expected to be seen for at most once, and when we saw them
+	 *    twice, it means a cross-linked chain which should be
+	 *    truncated at the current cluster.
+	 *
+	 * After scan, the remaining set bits indicates all possible
+	 * head nodes, because they were never claimed by any other
+	 * node as the next node, but we do not know if these chains
+	 * would end with a valid EOF marker.  We will check that in
+	 * checkchain() at a later time when checking directories,
+	 * where these head nodes would be marked as non-head.
+	 *
+	 * In the final pass, all head nodes should be cleared, and if
+	 * there is still head nodes, these would be leaders of lost
+	 * chain.
+	 */
+	for (cl = CLUST_FIRST; cl < boot->NumClusters; cl++) {
+		nextcl = fat_get_cl_next(fat, cl);
+
+		/* Check if the next cluster number is valid */
+		if (nextcl == CLUST_FREE) {
+			/* Save a hint for next free cluster */
+			if (boot->FSNext == 0) {
+				boot->FSNext = cl;
+			}
+			if (fat_is_cl_head(fat, cl)) {
+				fat_clear_cl_head(fat, cl);
+			}
+			boot->NumFree++;
+		} else if (nextcl == CLUST_BAD) {
+			if (fat_is_cl_head(fat, cl)) {
+				fat_clear_cl_head(fat, cl);
+			}
+			boot->NumBad++;
+		} else if (!valid_cl(fat, nextcl) && nextcl < CLUST_EOFS) {
+			pwarn("Cluster %u continues with %s "
+			    "cluster number %u\n",
+			    cl, (nextcl < CLUST_RSRVD) ?
+				"out of range" : "reserved",
+			    nextcl & boot->ClustMask);
+			if (ask(0, "Truncate")) {
+				ret |= fat_set_cl_next(fat, cl, CLUST_EOF);
+				ret |= FSFATMOD;
+			}
+		} else if (nextcl < boot->NumClusters) {
+			if (fat_is_cl_head(fat, nextcl)) {
+				fat_clear_cl_head(fat, nextcl);
+			} else {
+				pwarn("Cluster %u crossed another chain at %u\n",
+				    cl, nextcl);
+				if (ask(0, "Truncate")) {
+					ret |= fat_set_cl_next(fat, cl, CLUST_EOF);
+					ret |= FSFATMOD;
+				}
+			}
+		}
+
+	}
+
 	if (ret & FSFATAL) {
+		releasefat(fat);
 		free(fat);
 		*fp = NULL;
 	} else
@@ -333,332 +984,202 @@
 	return "bad";
 }
 
-static int
-clustdiffer(cl_t cl, cl_t *cp1, cl_t *cp2, u_int fatnum)
-{
-	if (*cp1 == CLUST_FREE || *cp1 >= CLUST_RSRVD) {
-		if (*cp2 == CLUST_FREE || *cp2 >= CLUST_RSRVD) {
-			if ((*cp1 != CLUST_FREE && *cp1 < CLUST_BAD
-			     && *cp2 != CLUST_FREE && *cp2 < CLUST_BAD)
-			    || (*cp1 > CLUST_BAD && *cp2 > CLUST_BAD)) {
-				pwarn("Cluster %u is marked %s with different indicators\n",
-				      cl, rsrvdcltype(*cp1));
-				if (ask(1, "Fix")) {
-					*cp2 = *cp1;
-					return FSFATMOD;
-				}
-				return FSFATAL;
-			}
-			pwarn("Cluster %u is marked %s in FAT 0, %s in FAT %u\n",
-			      cl, rsrvdcltype(*cp1), rsrvdcltype(*cp2), fatnum);
-			if (ask(0, "Use FAT 0's entry")) {
-				*cp2 = *cp1;
-				return FSFATMOD;
-			}
-			if (ask(0, "Use FAT %u's entry", fatnum)) {
-				*cp1 = *cp2;
-				return FSFATMOD;
-			}
-			return FSFATAL;
-		}
-		pwarn("Cluster %u is marked %s in FAT 0, but continues with cluster %u in FAT %d\n",
-		      cl, rsrvdcltype(*cp1), *cp2, fatnum);
-		if (ask(0, "Use continuation from FAT %u", fatnum)) {
-			*cp1 = *cp2;
-			return FSFATMOD;
-		}
-		if (ask(0, "Use mark from FAT 0")) {
-			*cp2 = *cp1;
-			return FSFATMOD;
-		}
-		return FSFATAL;
-	}
-	if (*cp2 == CLUST_FREE || *cp2 >= CLUST_RSRVD) {
-		pwarn("Cluster %u continues with cluster %u in FAT 0, but is marked %s in FAT %u\n",
-		      cl, *cp1, rsrvdcltype(*cp2), fatnum);
-		if (ask(0, "Use continuation from FAT 0")) {
-			*cp2 = *cp1;
-			return FSFATMOD;
-		}
-		if (ask(0, "Use mark from FAT %d", fatnum)) {
-			*cp1 = *cp2;
-			return FSFATMOD;
-		}
-		return FSERROR;
-	}
-	pwarn("Cluster %u continues with cluster %u in FAT 0, but with cluster %u in FAT %u\n",
-	      cl, *cp1, *cp2, fatnum);
-	if (ask(0, "Use continuation from FAT 0")) {
-		*cp2 = *cp1;
-		return FSFATMOD;
-	}
-	if (ask(0, "Use continuation from FAT %u", fatnum)) {
-		*cp1 = *cp2;
-		return FSFATMOD;
-	}
-	return FSERROR;
-}
-
 /*
- * Compare two FAT copies in memory. Resolve any conflicts and merge them
- * into the first one.
+ * Offer to truncate a chain at the specified CL, called by checkchain().
  */
-int
-comparefat(struct bootblock *boot, struct fatEntry *first,
-    struct fatEntry *second, u_int fatnum)
+static inline int
+truncate_at(struct fat_descriptor *fat, cl_t current_cl, size_t *chainsize)
 {
-	cl_t cl;
-	int ret = FSOK;
-
-	for (cl = CLUST_FIRST; cl < boot->NumClusters; cl++)
-		if (first[cl].next != second[cl].next)
-			ret |= clustdiffer(cl, &first[cl].next, &second[cl].next, fatnum);
-	return ret;
-}
-
-void
-clearchain(struct bootblock *boot, struct fatEntry *fat, cl_t head)
-{
-	cl_t p, q;
-
-	for (p = head; p >= CLUST_FIRST && p < boot->NumClusters; p = q) {
-		if (fat[p].head != head)
-			break;
-		q = fat[p].next;
-		fat[p].next = fat[p].head = CLUST_FREE;
-		fat[p].length = 0;
-	}
-}
-
-int
-tryclear(struct bootblock *boot, struct fatEntry *fat, cl_t head, cl_t *truncp)
-{
-	if (ask(0, "Clear chain starting at %u", head)) {
-		clearchain(boot, fat, head);
-		return FSFATMOD;
-	} else if (ask(0, "Truncate")) {
-		uint32_t len;
-		cl_t p;
-
-		for (p = head, len = 0;
-		    p >= CLUST_FIRST && p < boot->NumClusters;
-		    p = fat[p].next, len++)
-			continue;
-		*truncp = CLUST_EOF;
-		fat[head].length = len;
-		return FSFATMOD;
-	} else
-		return FSERROR;
-}
-
-/*
- * Check a complete FAT in-memory for crosslinks
- */
-int
-checkfat(struct bootblock *boot, struct fatEntry *fat)
-{
-	cl_t head, p, h, n;
-	u_int len;
 	int ret = 0;
-	int conf;
 
-	/*
-	 * pass 1: figure out the cluster chains.
-	 */
-	for (head = CLUST_FIRST; head < boot->NumClusters; head++) {
-		/* find next untravelled chain */
-		if (fat[head].head != 0		/* cluster already belongs to some chain */
-		    || fat[head].next == CLUST_FREE
-		    || fat[head].next == CLUST_BAD)
-			continue;		/* skip it. */
-
-		/* follow the chain and mark all clusters on the way */
-		for (len = 0, p = head;
-		     p >= CLUST_FIRST && p < boot->NumClusters &&
-		     fat[p].head != head;
-		     p = fat[p].next) {
-			fat[p].head = head;
-			len++;
-		}
-
-		/* the head record gets the length */
-		fat[head].length = fat[head].next == CLUST_FREE ? 0 : len;
+	if (ask(0, "Truncate")) {
+		ret = fat_set_cl_next(fat, current_cl, CLUST_EOF);
+		(*chainsize)++;
+		return (ret | FSFATMOD);
+	} else {
+		return FSERROR;
 	}
-
-	/*
-	 * pass 2: check for crosslinked chains (we couldn't do this in pass 1 because
-	 * we didn't know the real start of the chain then - would have treated partial
-	 * chains as interlinked with their main chain)
-	 */
-	for (head = CLUST_FIRST; head < boot->NumClusters; head++) {
-		/* find next untravelled chain */
-		if (fat[head].head != head)
-			continue;
-
-		/* follow the chain to its end (hopefully) */
-		for (len = fat[head].length, p = head;
-		     (n = fat[p].next) >= CLUST_FIRST && n < boot->NumClusters;
-		     p = n)
-			if (fat[n].head != head || len-- < 2)
-				break;
-		if (n >= CLUST_EOFS)
-			continue;
-
-		if (n == CLUST_FREE || n >= CLUST_RSRVD) {
-			pwarn("Cluster chain starting at %u ends with cluster marked %s\n",
-			      head, rsrvdcltype(n));
-clear:
-			ret |= tryclear(boot, fat, head, &fat[p].next);
-			continue;
-		}
-		if (n < CLUST_FIRST || n >= boot->NumClusters) {
-			pwarn("Cluster chain starting at %u ends with cluster out of range (%u)\n",
-			    head, n);
-			goto clear;
-		}
-		if (head == fat[n].head) {
-			pwarn("Cluster chain starting at %u loops at cluster %u\n",
-			    head, p);
-			goto clear;
-		}
-		pwarn("Cluster chains starting at %u and %u are linked at cluster %u\n",
-		      head, fat[n].head, n);
-		conf = tryclear(boot, fat, head, &fat[p].next);
-		if (ask(0, "Clear chain starting at %u", h = fat[n].head)) {
-			if (conf == FSERROR) {
-				/*
-				 * Transfer the common chain to the one not cleared above.
-				 */
-				for (p = n;
-				     p >= CLUST_FIRST && p < boot->NumClusters;
-				     p = fat[p].next) {
-					if (h != fat[p].head) {
-						/*
-						 * Have to reexamine this chain.
-						 */
-						head--;
-						break;
-					}
-					fat[p].head = head;
-				}
-			}
-			clearchain(boot, fat, h);
-			conf |= FSFATMOD;
-		}
-		ret |= conf;
-	}
-
-	return ret;
 }
 
 /*
- * Write out FATs encoding them from the internal format
+ * Examine a cluster chain for errors and count its size.
  */
 int
-writefat(int fs, struct bootblock *boot, struct fatEntry *fat, int correct_fat)
+checkchain(struct fat_descriptor *fat, cl_t head, size_t *chainsize)
 {
-	u_char *buffer, *p;
-	cl_t cl;
-	u_int i;
-	size_t fatsz;
-	off_t off;
-	int ret = FSOK;
+	cl_t current_cl, next_cl;
 
-	fatsz = boot->FATsecs * boot->bpbBytesPerSec;
-	buffer = calloc(boot->FATsecs, boot->bpbBytesPerSec);
-	if (buffer == NULL) {
-		perr("No space for FAT sectors (%zu)",
-		    (size_t)boot->FATsecs);
-		return FSFATAL;
+	/*
+	 * We expect that the caller to give us a real, unvisited 'head'
+	 * cluster, and it must be a valid cluster.  While scanning the
+	 * FAT table, we already excluded all clusters that was claimed
+	 * as a "next" cluster.  Assert all the three conditions.
+	 */
+	assert(valid_cl(fat, head));
+	assert(fat_is_cl_head(fat, head));
+
+	/*
+	 * Immediately mark the 'head' cluster that we are about to visit.
+	 */
+	fat_clear_cl_head(fat, head);
+
+	/*
+	 * The allocation of a non-zero sized file or directory is
+	 * represented as a singly linked list, and the tail node
+	 * would be the EOF marker (>=CLUST_EOFS).
+	 *
+	 * With a valid head node at hand, we expect all subsequent
+	 * cluster to be either a not yet seen and valid cluster (we
+	 * would continue counting), or the EOF marker (we conclude
+	 * the scan of this chain).
+	 *
+	 * For all other cases, the chain is invalid, and the only
+	 * viable fix would be to truncate at the current node (mark
+	 * it as EOF) when the next node violates that.
+	 */
+	*chainsize = 0;
+	current_cl = head;
+	for (next_cl = fat_get_cl_next(fat, current_cl);
+	    valid_cl(fat, next_cl);
+	    current_cl = next_cl, next_cl = fat_get_cl_next(fat, current_cl))
+		(*chainsize)++;
+
+	/* A natural end */
+	if (next_cl >= CLUST_EOFS) {
+		(*chainsize)++;
+		return FSOK;
 	}
-	boot->NumFree = 0;
-	p = buffer;
-	if (correct_fat) {
-		*p++ = (u_char)boot->bpbMedia;
-		*p++ = 0xff;
-		*p++ = 0xff;
-		switch (boot->ClustMask) {
-		case CLUST16_MASK:
-			*p++ = 0xff;
-			break;
-		case CLUST32_MASK:
-			*p++ = 0x0f;
-			*p++ = 0xff;
-			*p++ = 0xff;
-			*p++ = 0xff;
-			*p++ = 0x0f;
-			break;
+
+	/* The chain ended with an out-of-range cluster number. */
+	pwarn("Cluster %u continues with %s cluster number %u\n",
+	    current_cl,
+	    next_cl < CLUST_RSRVD ? "out of range" : "reserved",
+	    next_cl & boot_of_(fat)->ClustMask);
+	return (truncate_at(fat, current_cl, chainsize));
+}
+
+/*
+ * Clear cluster chain from head.
+ */
+void
+clearchain(struct fat_descriptor *fat, cl_t head)
+{
+	cl_t current_cl, next_cl;
+	struct bootblock *boot = boot_of_(fat);
+
+	current_cl = head;
+
+	while (valid_cl(fat, current_cl)) {
+		next_cl = fat_get_cl_next(fat, head);
+		(void)fat_set_cl_next(fat, current_cl, CLUST_FREE);
+		boot->NumFree++;
+		current_cl = next_cl;
+	}
+
+}
+
+/*
+ * Overwrite the n-th FAT with FAT0
+ */
+static int
+copyfat(struct fat_descriptor *fat, int n)
+{
+	size_t	rwsize, tailsize, blobs, i;
+	off_t	dst_off, src_off;
+	struct bootblock *boot;
+	int ret, fd;
+
+	ret = FSOK;
+	fd = fd_of_(fat);
+	boot = boot_of_(fat);
+
+	blobs = howmany(fat->fatsize, fat32_cache_size);
+	tailsize = fat->fatsize % fat32_cache_size;
+	if (tailsize == 0) {
+		tailsize = fat32_cache_size;
+	}
+	rwsize = fat32_cache_size;
+
+	src_off = fat->fat32_offset;
+	dst_off = boot->bpbResSectors + n * boot->FATsecs;
+	dst_off *= boot->bpbBytesPerSec;
+
+	for (i = 0; i < blobs;
+	    i++, src_off += fat32_cache_size, dst_off += fat32_cache_size) {
+		if (i == blobs - 1) {
+			rwsize = tailsize;
+		}
+		if ((lseek(fd, src_off, SEEK_SET) != src_off ||
+		    (size_t)read(fd, fat->fatbuf, rwsize) != rwsize) &&
+		    ret == FSOK) {
+			perr("Unable to read FAT0");
+			ret = FSFATAL;
+			continue;
+		}
+		if ((lseek(fd, dst_off, SEEK_SET) != dst_off ||
+			(size_t)write(fd, fat->fatbuf, rwsize) != rwsize) &&
+			ret == FSOK) {
+			perr("Unable to write FAT %d", n);
+			ret = FSERROR;
+		}
+	}
+	return (ret);
+}
+
+/*
+ * Write out FAT
+ */
+int
+writefat(struct fat_descriptor *fat)
+{
+	u_int i;
+	size_t writesz;
+	off_t dst_base;
+	int ret = FSOK, fd;
+	struct bootblock *boot;
+	struct fat32_cache_entry *entry;
+
+	boot = boot_of_(fat);
+	fd = fd_of_(fat);
+
+	if (fat->use_cache) {
+		/*
+		 * Attempt to flush all in-flight cache, and bail out
+		 * if we encountered an error (but only emit error
+		 * message once).  Stop proceeding with copyfat()
+		 * if any flush failed.
+		 */
+		TAILQ_FOREACH(entry, &fat->fat32_cache_head, entries) {
+			if (fat_flush_fat32_cache_entry(fat, entry) != FSOK) {
+				if (ret == FSOK) {
+					perr("Unable to write FAT");
+					ret = FSFATAL;
+				}
+			}
+		}
+		if (ret != FSOK)
+			return (ret);
+
+		/* Update backup copies of FAT, error is not fatal */
+		for (i = 1; i < boot->bpbFATs; i++) {
+			if (copyfat(fat, i) != FSOK)
+				ret = FSERROR;
 		}
 	} else {
-		/* use same FAT signature as the old FAT has */
-		int count;
-		u_char *old_fat;
+		writesz = fat->fatsize;
 
-		switch (boot->ClustMask) {
-		case CLUST32_MASK:
-			count = 8;
-			break;
-		case CLUST16_MASK:
-			count = 4;
-			break;
-		default:
-			count = 3;
-			break;
-		}
-
-		if (!_readfat(fs, boot, boot->ValidFat >= 0 ? boot->ValidFat :0,
-					 &old_fat)) {
-			free(buffer);
-			return FSFATAL;
-		}
-
-		memcpy(p, old_fat, count);
-		free(old_fat);
-		p += count;
-	}
-
-	for (cl = CLUST_FIRST; cl < boot->NumClusters; cl++) {
-		switch (boot->ClustMask) {
-		case CLUST32_MASK:
-			if (fat[cl].next == CLUST_FREE)
-				boot->NumFree++;
-			*p++ = (u_char)fat[cl].next;
-			*p++ = (u_char)(fat[cl].next >> 8);
-			*p++ = (u_char)(fat[cl].next >> 16);
-			*p &= 0xf0;
-			*p++ |= (fat[cl].next >> 24)&0x0f;
-			break;
-		case CLUST16_MASK:
-			if (fat[cl].next == CLUST_FREE)
-				boot->NumFree++;
-			*p++ = (u_char)fat[cl].next;
-			*p++ = (u_char)(fat[cl].next >> 8);
-			break;
-		default:
-			if (fat[cl].next == CLUST_FREE)
-				boot->NumFree++;
-			*p++ = (u_char)fat[cl].next;
-			*p = (u_char)((fat[cl].next >> 8) & 0xf);
-			cl++;
-			if (cl >= boot->NumClusters)
-				break;
-			if (fat[cl].next == CLUST_FREE)
-				boot->NumFree++;
-			*p++ |= (u_char)(fat[cl].next << 4);
-			*p++ = (u_char)(fat[cl].next >> 4);
-			break;
+		for (i = fat->is_mmapped ? 1 : 0; i < boot->bpbFATs; i++) {
+			dst_base = boot->bpbResSectors + i * boot->FATsecs;
+			dst_base *= boot->bpbBytesPerSec;
+			if ((lseek(fd, dst_base, SEEK_SET) != dst_base ||
+			    (size_t)write(fd, fat->fatbuf, writesz) != writesz) &&
+			    ret == FSOK) {
+				perr("Unable to write FAT %d", i);
+				ret = ((i == 0) ? FSFATAL : FSERROR);
+			}
 		}
 	}
-	for (i = 0; i < boot->bpbFATs; i++) {
-		off = boot->bpbResSectors + i * boot->FATsecs;
-		off *= boot->bpbBytesPerSec;
-		if (lseek(fs, off, SEEK_SET) != off
-		    || (size_t)write(fs, buffer, fatsz) != fatsz) {
-			perr("Unable to write FAT");
-			ret = FSFATAL; /* Return immediately?		XXX */
-		}
-	}
-	free(buffer);
+
 	return ret;
 }
 
@@ -666,31 +1187,55 @@
  * Check a complete in-memory FAT for lost cluster chains
  */
 int
-checklost(int dosfs, struct bootblock *boot, struct fatEntry *fat)
+checklost(struct fat_descriptor *fat)
 {
 	cl_t head;
 	int mod = FSOK;
-	int ret;
+	int dosfs, ret;
+	size_t chains, chainlength;
+	struct bootblock *boot;
 
-	for (head = CLUST_FIRST; head < boot->NumClusters; head++) {
-		/* find next untravelled chain */
-		if (fat[head].head != head
-		    || fat[head].next == CLUST_FREE
-		    || (fat[head].next >= CLUST_RSRVD
-			&& fat[head].next < CLUST_EOFS)
-		    || (fat[head].flags & FAT_USED))
+	dosfs = fd_of_(fat);
+	boot = boot_of_(fat);
+
+	/*
+	 * At this point, we have already traversed all directories.
+	 * All remaining chain heads in the bitmap are heads of lost
+	 * chains.
+	 */
+	chains = fat_get_head_count(fat);
+	for (head = CLUST_FIRST;
+	    chains > 0 && head < boot->NumClusters;
+	    ) {
+		/*
+		 * We expect the bitmap to be very sparse, so skip if
+		 * the range is full of 0's
+		 */
+		if (head % LONG_BIT == 0 &&
+		    !fat_is_cl_head_in_range(fat, head)) {
+			head += LONG_BIT;
 			continue;
-
-		pwarn("Lost cluster chain at cluster %u\n%d Cluster(s) lost\n",
-		      head, fat[head].length);
-		mod |= ret = reconnect(dosfs, boot, fat, head);
-		if (mod & FSFATAL)
-			break;
-		if (ret == FSERROR && ask(0, "Clear")) {
-			clearchain(boot, fat, head);
-			mod |= FSFATMOD;
 		}
+		if (fat_is_cl_head(fat, head)) {
+			ret = checkchain(fat, head, &chainlength);
+			if (ret != FSERROR) {
+				pwarn("Lost cluster chain at cluster %u\n"
+				    "%zd Cluster(s) lost\n",
+				    head, chainlength);
+				mod |= ret = reconnect(fat, head,
+				    chainlength);
+			}
+			if (mod & FSFATAL)
+				break;
+			if (ret == FSERROR && ask(0, "Clear")) {
+				clearchain(fat, head);
+				mod |= FSFATMOD;
+			}
+			chains--;
+		}
+		head++;
 	}
+
 	finishlf();
 
 	if (boot->bpbFSInfo) {
@@ -706,13 +1251,13 @@
 		}
 		if (boot->FSNext != 0xffffffffU &&
 		    (boot->FSNext >= boot->NumClusters ||
-		    (boot->NumFree && fat[boot->FSNext].next != CLUST_FREE))) {
+		    (boot->NumFree && fat_get_cl_next(fat, boot->FSNext) != CLUST_FREE))) {
 			pwarn("Next free cluster in FSInfo block (%u) %s\n",
 			      boot->FSNext,
 			      (boot->FSNext >= boot->NumClusters) ? "invalid" : "not free");
-			if (ask(1, "fix"))
+			if (ask(1, "Fix"))
 				for (head = CLUST_FIRST; head < boot->NumClusters; head++)
-					if (fat[head].next == CLUST_FREE) {
+					if (fat_get_cl_next(fat, head) == CLUST_FREE) {
 						boot->FSNext = head;
 						ret = 1;
 						break;
diff --git a/freebsd-compat.h b/freebsd-compat.h
new file mode 100644
index 0000000..ec6b218
--- /dev/null
+++ b/freebsd-compat.h
@@ -0,0 +1,109 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *      The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* From FreeBSD sys/sys/param.h */
+
+#ifndef roundup2
+#define roundup2(x, y)  (((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */
+#endif
+
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2002 Thomas Moestl <tmm@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+
+/* From FreeBSD sys/sys/endian.h */
+
+static __inline void
+le16enc(void *pp, uint16_t u)
+{
+	uint8_t *p = (uint8_t *)pp;
+
+	p[0] = u & 0xff;
+	p[1] = (u >> 8) & 0xff;
+}
+
+static __inline void
+le32enc(void *pp, uint32_t u)
+{
+	uint8_t *p = (uint8_t *)pp;
+
+	p[0] = u & 0xff;
+	p[1] = (u >> 8) & 0xff;
+	p[2] = (u >> 16) & 0xff;
+	p[3] = (u >> 24) & 0xff;
+}
+
+static __inline uint16_t
+le16dec(const void *pp)
+{
+	uint8_t const *p = (uint8_t const *)pp;
+
+	return ((p[1] << 8) | p[0]);
+}
+
+static __inline uint32_t
+le32dec(const void *pp)
+{
+	uint8_t const *p = (uint8_t const *)pp;
+
+	return (((unsigned)p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0]);
+}
diff --git a/main.c b/main.c
index 6802afc..de54cd1 100644
--- a/main.c
+++ b/main.c
@@ -48,6 +48,7 @@
 int preen;		/* set when preening */
 int rdonly;		/* device is opened read only (supersedes above) */
 int skipclean;		/* skip clean file systems if preening */
+int allow_mmap;		/* Allow the use of mmap(), if possible */
 
 static void usage(void) __dead2;
 
@@ -68,7 +69,8 @@
 	int ch;
 
 	skipclean = 1;
-	while ((ch = getopt(argc, argv, "CfFnpy")) != -1) {
+	allow_mmap = 1;
+	while ((ch = getopt(argc, argv, "CfFnpyM")) != -1) {
 		switch (ch) {
 		case 'C': /* for fsck_ffs compatibility */
 			break;
@@ -98,6 +100,10 @@
 			preen = 1;
 			break;
 
+		case 'M':
+			allow_mmap = 0;
+			break;
+
 		default:
 			usage();
 			break;