procrank: add proportional swap accounting

Shared pages are reported in each of the sharing process swapped pages.
Compute a proportional swap usage to get a meaningful value of what each
process has in swap. Report also process unique pages swapped out.
In case ZRAM is used compute the compression ration and report the
actual RAM use of the swapped pages.

Bug: 25392275
Change-Id: I3a28c7812a09a02e1a604593615f5c6ad0340f9f
diff --git a/libpagemap/include/pagemap/pagemap.h b/libpagemap/include/pagemap/pagemap.h
index 9063b1e..4de2b4b 100644
--- a/libpagemap/include/pagemap/pagemap.h
+++ b/libpagemap/include/pagemap/pagemap.h
@@ -21,9 +21,19 @@
 #include <stdio.h>
 #include <sys/cdefs.h>
 #include <sys/types.h>
+#include <sys/queue.h>
 
 __BEGIN_DECLS
 
+typedef struct pm_proportional_swap pm_proportional_swap_t;
+
+typedef struct pm_swap_offset pm_swap_offset_t;
+
+struct pm_swap_offset {
+    unsigned int offset;
+    SIMPLEQ_ENTRY(pm_swap_offset) simpleqe;
+};
+
 typedef struct pm_memusage pm_memusage_t;
 
 /* Holds the various metrics for memory usage of a process or a mapping. */
@@ -33,12 +43,32 @@
     size_t pss;
     size_t uss;
     size_t swap;
+    /* if non NULL then use swap_offset_list to compute proportional swap */
+    pm_proportional_swap_t *p_swap;
+    SIMPLEQ_HEAD(simpleqhead, pm_swap_offset) swap_offset_list;
+};
+
+typedef struct pm_swapusage pm_swapusage_t;
+struct pm_swapusage {
+    size_t proportional;
+    size_t unique;
 };
 
 /* Clears a memusage. */
 void pm_memusage_zero(pm_memusage_t *mu);
 /* Adds one memusage (a) to another (b). */
 void pm_memusage_add(pm_memusage_t *a, pm_memusage_t *b);
+/* Adds a swap offset */
+void pm_memusage_pswap_add_offset(pm_memusage_t *mu, unsigned int offset);
+/* Enable proportional swap computing. */
+void pm_memusage_pswap_init_handle(pm_memusage_t *mu, pm_proportional_swap_t *p_swap);
+/* Computes and return the proportional swap */
+void pm_memusage_pswap_get_usage(pm_memusage_t *mu, pm_swapusage_t *su);
+void pm_memusage_pswap_free(pm_memusage_t *mu);
+/* Initialize a proportional swap computing handle:
+   assumes only 1 swap device, total swap size of this device in bytes to be given as argument */
+pm_proportional_swap_t * pm_memusage_pswap_create(int swap_size);
+void pm_memusage_pswap_destroy(pm_proportional_swap_t *p_swap);
 
 typedef struct pm_kernel   pm_kernel_t;
 typedef struct pm_process  pm_process_t;
diff --git a/libpagemap/pm_map.c b/libpagemap/pm_map.c
index c6a1798..301a1cc 100644
--- a/libpagemap/pm_map.c
+++ b/libpagemap/pm_map.c
@@ -42,12 +42,13 @@
     if (error) return error;
 
     pm_memusage_zero(&usage);
+    pm_memusage_pswap_init_handle(&usage, usage_out->p_swap);
 
     for (i = 0; i < len; i++) {
         usage.vss += map->proc->ker->pagesize;
 
         if (!PM_PAGEMAP_PRESENT(pagemap[i]) &&
-	    !PM_PAGEMAP_SWAPPED(pagemap[i]))
+                !PM_PAGEMAP_SWAPPED(pagemap[i]))
             continue;
 
         if (!PM_PAGEMAP_SWAPPED(pagemap[i])) {
@@ -70,6 +71,7 @@
             usage.uss += (count == 1) ? (map->proc->ker->pagesize) : (0);
         } else {
             usage.swap += map->proc->ker->pagesize;
+            pm_memusage_pswap_add_offset(&usage, PM_PAGEMAP_SWAP_OFFSET(pagemap[i]));
         }
     }
 
@@ -77,7 +79,7 @@
 
     error = 0;
 
-out:    
+out:
     free(pagemap);
 
     return error;
@@ -101,13 +103,13 @@
     if (error) return error;
 
     pm_memusage_zero(&ws);
-    
+
     for (i = 0; i < len; i++) {
         error = pm_kernel_flags(map->proc->ker, PM_PAGEMAP_PFN(pagemap[i]),
                                 &flags);
         if (error) goto out;
 
-        if (!(flags & PM_PAGE_REFERENCED)) 
+        if (!(flags & PM_PAGE_REFERENCED))
             continue;
 
         error = pm_kernel_count(map->proc->ker, PM_PAGEMAP_PFN(pagemap[i]),
diff --git a/libpagemap/pm_memusage.c b/libpagemap/pm_memusage.c
index ea2a003..70cfede 100644
--- a/libpagemap/pm_memusage.c
+++ b/libpagemap/pm_memusage.c
@@ -14,10 +14,37 @@
  * limitations under the License.
  */
 
+#include <stdlib.h>
+#include <unistd.h>
+
 #include <pagemap/pagemap.h>
 
+#define SIMPLEQ_INSERT_SIMPLEQ_TAIL(head_a, head_b)             \
+    do {                                                        \
+        if (!SIMPLEQ_EMPTY(head_b)) {                           \
+            if ((head_a)->sqh_first == NULL)                    \
+                (head_a)->sqh_first = (head_b)->sqh_first;      \
+            *(head_a)->sqh_last = (head_b)->sqh_first;          \
+            (head_a)->sqh_last = (head_b)->sqh_last;            \
+        }                                                       \
+    } while (/*CONSTCOND*/0)
+
+/* We use an array of int to store the references to a given offset in the swap
+   1 GiB swap means 512KiB size array: offset are the index */
+typedef unsigned short pm_pswap_refcount_t;
+struct pm_proportional_swap {
+    unsigned int array_size;
+    pm_pswap_refcount_t *offset_array;
+};
+
 void pm_memusage_zero(pm_memusage_t *mu) {
     mu->vss = mu->rss = mu->pss = mu->uss = mu->swap = 0;
+    mu->p_swap = NULL;
+    SIMPLEQ_INIT(&mu->swap_offset_list);
+}
+
+void pm_memusage_pswap_init_handle(pm_memusage_t *mu, pm_proportional_swap_t *p_swap) {
+    mu->p_swap = p_swap;
 }
 
 void pm_memusage_add(pm_memusage_t *a, pm_memusage_t *b) {
@@ -26,4 +53,80 @@
     a->pss += b->pss;
     a->uss += b->uss;
     a->swap += b->swap;
+    SIMPLEQ_INSERT_SIMPLEQ_TAIL(&a->swap_offset_list, &b->swap_offset_list);
+}
+
+pm_proportional_swap_t * pm_memusage_pswap_create(int swap_size)
+{
+    pm_proportional_swap_t *p_swap = NULL;
+
+    p_swap = malloc(sizeof(pm_proportional_swap_t));
+    if (p_swap == NULL) {
+        fprintf(stderr, "Error allocating proportional swap.\n");
+    } else {
+        p_swap->array_size = swap_size / getpagesize();
+        p_swap->offset_array = calloc(p_swap->array_size, sizeof(pm_pswap_refcount_t));
+        if (p_swap->offset_array == NULL) {
+            fprintf(stderr, "Error allocating proportional swap offset array.\n");
+            free(p_swap);
+            p_swap = NULL;
+        }
+    }
+
+    return p_swap;
+}
+
+void pm_memusage_pswap_destroy(pm_proportional_swap_t *p_swap) {
+    if (p_swap) {
+        free(p_swap->offset_array);
+        free(p_swap);
+    }
+}
+
+void pm_memusage_pswap_add_offset(pm_memusage_t *mu, unsigned int offset) {
+    pm_swap_offset_t *soff;
+
+    if (mu->p_swap == NULL)
+        return;
+
+    if (offset > mu->p_swap->array_size) {
+        fprintf(stderr, "SWAP offset %d is out of swap bounds.\n", offset);
+        return;
+    } else {
+        if (mu->p_swap->offset_array[offset] == USHRT_MAX) {
+            fprintf(stderr, "SWAP offset %d ref. count if overflowing ushort type.\n", offset);
+        } else {
+            mu->p_swap->offset_array[offset]++;
+        }
+    }
+
+    soff = malloc(sizeof(pm_swap_offset_t));
+    if (soff) {
+        soff->offset = offset;
+        SIMPLEQ_INSERT_TAIL(&mu->swap_offset_list, soff, simpleqe);
+    }
+}
+
+void pm_memusage_pswap_get_usage(pm_memusage_t *mu, pm_swapusage_t *su) {
+
+    int pagesize = getpagesize();
+    pm_swap_offset_t *elem;
+
+    if (su == NULL)
+        return;
+
+    su->proportional = su->unique = 0;
+    SIMPLEQ_FOREACH(elem, &mu->swap_offset_list, simpleqe) {
+        su->proportional += pagesize / mu->p_swap->offset_array[elem->offset];
+        su->unique += mu->p_swap->offset_array[elem->offset] == 1 ? pagesize : 0;
+    }
+}
+
+void pm_memusage_pswap_free(pm_memusage_t *mu) {
+    pm_swap_offset_t *elem = SIMPLEQ_FIRST(&mu->swap_offset_list);
+    while (elem) {
+        SIMPLEQ_REMOVE_HEAD(&mu->swap_offset_list, simpleqe);
+        free(elem);
+        elem = SIMPLEQ_FIRST(&mu->swap_offset_list);
+    }
 }
diff --git a/libpagemap/pm_process.c b/libpagemap/pm_process.c
index 4d56428..eee3464 100644
--- a/libpagemap/pm_process.c
+++ b/libpagemap/pm_process.c
@@ -81,6 +81,10 @@
         return -1;
 
     pm_memusage_zero(&usage);
+    pm_memusage_pswap_init_handle(&usage, usage_out->p_swap);
+
+    pm_memusage_zero(&map_usage);
+    pm_memusage_pswap_init_handle(&map_usage, usage_out->p_swap);
 
     for (i = 0; i < proc->num_maps; i++) {
         error = pm_map_usage_flags(proc->maps[i], &map_usage, flags_mask,
@@ -185,6 +189,11 @@
 
     if (ws_out) {
         pm_memusage_zero(&ws);
+        pm_memusage_pswap_init_handle(&ws, ws_out->p_swap);
+
+        pm_memusage_zero(&map_ws);
+        pm_memusage_pswap_init_handle(&map_ws, ws_out->p_swap);
+
         for (i = 0; i < proc->num_maps; i++) {
             error = pm_map_workingset(proc->maps[i], &map_ws);
             if (error) return error;
diff --git a/procrank/procrank.c b/procrank/procrank.c
index ab50b64..881f110 100644
--- a/procrank/procrank.c
+++ b/procrank/procrank.c
@@ -48,9 +48,26 @@
 int (*compfn)(const void *a, const void *b);
 static int order;
 
-void print_mem_info() {
+enum {
+    MEMINFO_TOTAL,
+    MEMINFO_FREE,
+    MEMINFO_BUFFERS,
+    MEMINFO_CACHED,
+    MEMINFO_SHMEM,
+    MEMINFO_SLAB,
+    MEMINFO_SWAP_TOTAL,
+    MEMINFO_SWAP_FREE,
+    MEMINFO_ZRAM_TOTAL,
+    MEMINFO_MAPPED,
+    MEMINFO_VMALLOC_USED,
+    MEMINFO_PAGE_TABLES,
+    MEMINFO_KERNEL_STACK,
+    MEMINFO_COUNT
+};
+
+void get_mem_info(uint64_t mem[]) {
     char buffer[1024];
-    int numFound = 0;
+    unsigned int numFound = 0;
 
     int fd = open("/proc/meminfo", O_RDONLY);
 
@@ -75,6 +92,13 @@
             "Cached:",
             "Shmem:",
             "Slab:",
+            "SwapTotal:",
+            "SwapFree:",
+            "ZRam:",            /* not read from meminfo but from /sys/block/zram0 */
+            "Mapped:",
+            "VmallocUsed:",
+            "PageTables:",
+            "KernelStack:",
             NULL
     };
     static const int tagsLen[] = {
@@ -84,12 +108,18 @@
             7,
             6,
             5,
+            10,
+            9,
+            5,
+            7,
+            12,
+            11,
+            12,
             0
     };
-    uint64_t mem[] = { 0, 0, 0, 0, 0, 0 };
 
     char* p = buffer;
-    while (*p && numFound < 6) {
+    while (*p && (numFound < (sizeof(tagsLen) / sizeof(tagsLen[0])))) {
         int i = 0;
         while (tags[i]) {
             if (strncmp(p, tags[i], tagsLen[i]) == 0) {
@@ -112,10 +142,6 @@
         }
         if (*p) p++;
     }
-
-    printf("RAM: %" PRIu64 "K total, %" PRIu64 "K free, %" PRIu64 "K buffers, "
-            "%" PRIu64 "K cached, %" PRIu64 "K shmem, %" PRIu64 "K slab\n",
-            mem[0], mem[1], mem[2], mem[3], mem[4], mem[5]);
 }
 
 int main(int argc, char *argv[]) {
@@ -127,9 +153,12 @@
     uint64_t total_pss;
     uint64_t total_uss;
     uint64_t total_swap;
+    uint64_t total_pswap;
+    uint64_t total_uswap;
+    uint64_t total_zswap;
     char cmdline[256]; // this must be within the range of int
     int error;
-    bool has_swap = false;
+    bool has_swap = false, has_zram = false;
     uint64_t required_flags = 0;
     uint64_t flags_mask = 0;
 
@@ -141,6 +170,12 @@
     int arg;
     size_t i, j;
 
+    uint64_t mem[MEMINFO_COUNT] = { };
+    pm_proportional_swap_t *p_swap;
+    int fd, len;
+    char buffer[1024];
+    float zram_cr = 0.0;
+
     signal(SIGPIPE, SIG_IGN);
     compfn = &sort_by_pss;
     order = -1;
@@ -164,6 +199,9 @@
         exit(EXIT_FAILURE);
     }
 
+    get_mem_info(mem);
+    p_swap = pm_memusage_pswap_create(mem[MEMINFO_SWAP_TOTAL] * 1024);
+
     error = pm_kernel_create(&ker);
     if (error) {
         fprintf(stderr, "Error creating kernel interface -- "
@@ -191,6 +229,7 @@
         }
         procs[i]->pid = pids[i];
         pm_memusage_zero(&procs[i]->usage);
+        pm_memusage_pswap_init_handle(&procs[i]->usage, p_swap);
         error = pm_process_create(ker, pids[i], &proc);
         if (error) {
             fprintf(stderr, "warning: could not create process interface for %d\n", pids[i]);
@@ -237,16 +276,37 @@
 
     qsort(procs, num_procs, sizeof(procs[0]), compfn);
 
+    if (has_swap) {
+        fd = open("/sys/block/zram0/mem_used_total", O_RDONLY);
+        if (fd >= 0) {
+            len = read(fd, buffer, sizeof(buffer)-1);
+            close(fd);
+            if (len > 0) {
+                buffer[len] = 0;
+                mem[MEMINFO_ZRAM_TOTAL] = atoll(buffer)/1024;
+                zram_cr = (float) mem[MEMINFO_ZRAM_TOTAL] /
+                        (mem[MEMINFO_SWAP_TOTAL] - mem[MEMINFO_SWAP_FREE]);
+                has_zram = true;
+            }
+        }
+    }
+
     printf("%5s  ", "PID");
     if (ws) {
         printf("%7s  %7s  %7s  ", "WRss", "WPss", "WUss");
         if (has_swap) {
-            printf("%7s  ", "WSwap");
+            printf("%7s  %7s  %7s  ", "WSwap", "WPSwap", "WUSwap");
+            if (has_zram) {
+                printf("%7s  ", "WZSwap");
+            }
         }
     } else {
         printf("%8s  %7s  %7s  %7s  ", "Vss", "Rss", "Pss", "Uss");
         if (has_swap) {
-            printf("%7s  ", "Swap");
+            printf("%7s  %7s  %7s  ", "Swap", "PSwap", "USwap");
+            if (has_zram) {
+                printf("%7s  ", "ZSwap");
+            }
         }
     }
 
@@ -255,6 +315,9 @@
     total_pss = 0;
     total_uss = 0;
     total_swap = 0;
+    total_pswap = 0;
+    total_uswap = 0;
+    total_zswap = 0;
 
     for (i = 0; i < num_procs; i++) {
         if (getprocname(procs[i]->pid, cmdline, (int)sizeof(cmdline)) < 0) {
@@ -288,7 +351,20 @@
         }
 
         if (has_swap) {
+            pm_swapusage_t su;
+
+            pm_memusage_pswap_get_usage(&procs[i]->usage, &su);
             printf("%6zuK  ", procs[i]->usage.swap / 1024);
+            printf("%6zuK  ", su.proportional / 1024);
+            printf("%6zuK  ", su.unique / 1024);
+            total_pswap += su.proportional;
+            total_uswap += su.unique;
+            pm_memusage_pswap_free(&procs[i]->usage);
+            if (has_zram) {
+                size_t zpswap = su.proportional * zram_cr;
+                printf("%6zuK  ", zpswap / 1024);
+                total_zswap += zpswap;
+            }
         }
 
         printf("%s\n", cmdline);
@@ -297,6 +373,7 @@
     }
 
     free(procs);
+    pm_memusage_pswap_destroy(p_swap);
 
     /* Print the separator line */
     printf("%5s  ", "");
@@ -308,7 +385,10 @@
     }
 
     if (has_swap) {
-        printf("%7s  ", "------");
+        printf("%7s  %7s  %7s  ", "------", "------", "------");
+        if (has_zram) {
+            printf("%7s  ", "------");
+        }
     }
 
     printf("%s\n", "------");
@@ -325,12 +405,27 @@
 
     if (has_swap) {
         printf("%6" PRIu64 "K  ", total_swap / 1024);
+        printf("%6" PRIu64 "K  ", total_pswap / 1024);
+        printf("%6" PRIu64 "K  ", total_uswap / 1024);
+        if (has_zram) {
+            printf("%6" PRIu64 "K  ", total_zswap / 1024);
+        }
     }
 
     printf("TOTAL\n");
 
     printf("\n");
-    print_mem_info();
+
+    if (has_swap) {
+        printf("ZRAM: %" PRIu64 "K physical used for %" PRIu64 "K in swap "
+                "(%" PRIu64 "K total swap)\n",
+                mem[MEMINFO_ZRAM_TOTAL], (mem[MEMINFO_SWAP_TOTAL] - mem[MEMINFO_SWAP_FREE]),
+                mem[MEMINFO_SWAP_TOTAL]);
+    }
+    printf(" RAM: %" PRIu64 "K total, %" PRIu64 "K free, %" PRIu64 "K buffers, "
+            "%" PRIu64 "K cached, %" PRIu64 "K shmem, %" PRIu64 "K slab\n",
+            mem[MEMINFO_TOTAL], mem[MEMINFO_FREE], mem[MEMINFO_BUFFERS],
+            mem[MEMINFO_CACHED], mem[MEMINFO_SHMEM], mem[MEMINFO_SLAB]);
 
     return 0;
 }