blob: 6dc14a9c688966addc6ef949209fa70a70d00024 [file] [log] [blame]
/*
* Kernel iptables module to track stats for packets based on user tags.
*
* (C) 2011 Google, Inc
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifndef __XT_QTAGUID_INTERNAL_H__
#define __XT_QTAGUID_INTERNAL_H__
#include <linux/types.h>
#include <linux/rbtree.h>
#include <linux/spinlock_types.h>
#include <linux/workqueue.h>
/* Iface handling */
#define IDEBUG_MASK (1<<0)
/* Iptable Matching. Per packet. */
#define MDEBUG_MASK (1<<1)
/* Red-black tree handling. Per packet. */
#define RDEBUG_MASK (1<<2)
/* procfs ctrl/stats handling */
#define CDEBUG_MASK (1<<3)
/* dev and resource tracking */
#define DDEBUG_MASK (1<<4)
/* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */
#define DEFAULT_DEBUG_MASK 0
/*
* (Un)Define these *DEBUG to compile out/in the pr_debug calls.
* All undef: text size ~ 0x3030; all def: ~ 0x4404.
*/
#define IDEBUG
#define MDEBUG
#define RDEBUG
#define CDEBUG
#define DDEBUG
#define MSK_DEBUG(mask, ...) do { \
if (unlikely(qtaguid_debug_mask & (mask))) \
pr_debug(__VA_ARGS__); \
} while (0)
#ifdef IDEBUG
#define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__)
#else
#define IF_DEBUG(...) no_printk(__VA_ARGS__)
#endif
#ifdef MDEBUG
#define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__)
#else
#define MT_DEBUG(...) no_printk(__VA_ARGS__)
#endif
#ifdef RDEBUG
#define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__)
#else
#define RB_DEBUG(...) no_printk(__VA_ARGS__)
#endif
#ifdef CDEBUG
#define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__)
#else
#define CT_DEBUG(...) no_printk(__VA_ARGS__)
#endif
#ifdef DDEBUG
#define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__)
#else
#define DR_DEBUG(...) no_printk(__VA_ARGS__)
#endif
extern uint qtaguid_debug_mask;
/*---------------------------------------------------------------------------*/
/*
* Tags:
*
* They represent what the data usage counters will be tracked against.
* By default a tag is just based on the UID.
* The UID is used as the base for policing, and can not be ignored.
* So a tag will always at least represent a UID (uid_tag).
*
* A tag can be augmented with an "accounting tag" which is associated
* with a UID.
* User space can set the acct_tag portion of the tag which is then used
* with sockets: all data belonging to that socket will be counted against the
* tag. The policing is then based on the tag's uid_tag portion,
* and stats are collected for the acct_tag portion separately.
*
* There could be
* a: {acct_tag=1, uid_tag=10003}
* b: {acct_tag=2, uid_tag=10003}
* c: {acct_tag=3, uid_tag=10003}
* d: {acct_tag=0, uid_tag=10003}
* a, b, and c represent tags associated with specific sockets.
* d is for the totals for that uid, including all untagged traffic.
* Typically d is used with policing/quota rules.
*
* We want tag_t big enough to distinguish uid_t and acct_tag.
* It might become a struct if needed.
* Nothing should be using it as an int.
*/
typedef uint64_t tag_t; /* Only used via accessors */
#define TAG_UID_MASK 0xFFFFFFFFULL
#define TAG_ACCT_MASK (~0xFFFFFFFFULL)
static inline int tag_compare(tag_t t1, tag_t t2)
{
return t1 < t2 ? -1 : t1 == t2 ? 0 : 1;
}
static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid)
{
return acct_tag | uid;
}
static inline tag_t make_tag_from_uid(uid_t uid)
{
return uid;
}
static inline uid_t get_uid_from_tag(tag_t tag)
{
return tag & TAG_UID_MASK;
}
static inline tag_t get_utag_from_tag(tag_t tag)
{
return tag & TAG_UID_MASK;
}
static inline tag_t get_atag_from_tag(tag_t tag)
{
return tag & TAG_ACCT_MASK;
}
static inline bool valid_atag(tag_t tag)
{
return !(tag & TAG_UID_MASK);
}
static inline tag_t make_atag_from_value(uint32_t value)
{
return (uint64_t)value << 32;
}
/*---------------------------------------------------------------------------*/
/*
* Maximum number of socket tags that a UID is allowed to have active.
* Multiple processes belonging to the same UID contribute towards this limit.
* Special UIDs that can impersonate a UID also contribute (e.g. download
* manager, ...)
*/
#define DEFAULT_MAX_SOCK_TAGS 1024
/*
* For now we only track 2 sets of counters.
* The default set is 0.
* Userspace can activate another set for a given uid being tracked.
*/
#define IFS_MAX_COUNTER_SETS 2
enum ifs_tx_rx {
IFS_TX,
IFS_RX,
IFS_MAX_DIRECTIONS
};
/* For now, TCP, UDP, the rest */
enum ifs_proto {
IFS_TCP,
IFS_UDP,
IFS_PROTO_OTHER,
IFS_MAX_PROTOS
};
struct byte_packet_counters {
uint64_t bytes;
uint64_t packets;
};
struct data_counters {
struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS];
};
static inline uint64_t dc_sum_bytes(struct data_counters *counters,
int set,
enum ifs_tx_rx direction)
{
return counters->bpc[set][direction][IFS_TCP].bytes
+ counters->bpc[set][direction][IFS_UDP].bytes
+ counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
}
static inline uint64_t dc_sum_packets(struct data_counters *counters,
int set,
enum ifs_tx_rx direction)
{
return counters->bpc[set][direction][IFS_TCP].packets
+ counters->bpc[set][direction][IFS_UDP].packets
+ counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
}
/* Generic X based nodes used as a base for rb_tree ops */
struct tag_node {
struct rb_node node;
tag_t tag;
};
struct tag_stat {
struct tag_node tn;
struct data_counters counters;
/*
* If this tag is acct_tag based, we need to count against the
* matching parent uid_tag.
*/
struct data_counters *parent_counters;
};
struct iface_stat {
struct list_head list; /* in iface_stat_list */
char *ifname;
bool active;
/* net_dev is only valid for active iface_stat */
struct net_device *net_dev;
struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS];
struct data_counters totals_via_skb;
/*
* We keep the last_known, because some devices reset their counters
* just before NETDEV_UP, while some will reset just before
* NETDEV_REGISTER (which is more normal).
* So now, if the device didn't do a NETDEV_UNREGISTER and we see
* its current dev stats smaller that what was previously known, we
* assume an UNREGISTER and just use the last_known.
*/
struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS];
/* last_known is usable when last_known_valid is true */
bool last_known_valid;
struct proc_dir_entry *proc_ptr;
struct rb_root tag_stat_tree;
spinlock_t tag_stat_list_lock;
};
/* This is needed to create proc_dir_entries from atomic context. */
struct iface_stat_work {
struct work_struct iface_work;
struct iface_stat *iface_entry;
};
/*
* Track tag that this socket is transferring data for, and not necessarily
* the uid that owns the socket.
* This is the tag against which tag_stat.counters will be billed.
* These structs need to be looked up by sock and pid.
*/
struct sock_tag {
struct rb_node sock_node;
struct sock *sk; /* Only used as a number, never dereferenced */
/* The socket is needed for sockfd_put() */
struct socket *socket;
/* Used to associate with a given pid */
struct list_head list; /* in proc_qtu_data.sock_tag_list */
pid_t pid;
tag_t tag;
};
struct qtaguid_event_counts {
/* Various successful events */
atomic64_t sockets_tagged;
atomic64_t sockets_untagged;
atomic64_t counter_set_changes;
atomic64_t delete_cmds;
atomic64_t iface_events; /* Number of NETDEV_* events handled */
atomic64_t match_calls; /* Number of times iptables called mt */
/* Number of times iptables called mt from pre or post routing hooks */
atomic64_t match_calls_prepost;
/*
* match_found_sk_*: numbers related to the netfilter matching
* function finding a sock for the sk_buff.
* Total skbs processed is sum(match_found*).
*/
atomic64_t match_found_sk; /* An sk was already in the sk_buff. */
/* The connection tracker had or didn't have the sk. */
atomic64_t match_found_sk_in_ct;
atomic64_t match_found_no_sk_in_ct;
/*
* No sk could be found. No apparent owner. Could happen with
* unsolicited traffic.
*/
atomic64_t match_no_sk;
/*
* The file ptr in the sk_socket wasn't there.
* This might happen for traffic while the socket is being closed.
*/
atomic64_t match_no_sk_file;
};
/* Track the set active_set for the given tag. */
struct tag_counter_set {
struct tag_node tn;
int active_set;
};
/*----------------------------------------------*/
/*
* The qtu uid data is used to track resources that are created directly or
* indirectly by processes (uid tracked).
* It is shared by the processes with the same uid.
* Some of the resource will be counted to prevent further rogue allocations,
* some will need freeing once the owner process (uid) exits.
*/
struct uid_tag_data {
struct rb_node node;
uid_t uid;
/*
* For the uid, how many accounting tags have been set.
*/
int num_active_tags;
/* Track the number of proc_qtu_data that reference it */
int num_pqd;
struct rb_root tag_ref_tree;
/* No tag_node_tree_lock; use uid_tag_data_tree_lock */
};
struct tag_ref {
struct tag_node tn;
/*
* This tracks the number of active sockets that have a tag on them
* which matches this tag_ref.tn.tag.
* A tag ref can live on after the sockets are untagged.
* A tag ref can only be removed during a tag delete command.
*/
int num_sock_tags;
};
struct proc_qtu_data {
struct rb_node node;
pid_t pid;
struct uid_tag_data *parent_tag_data;
/* Tracks the sock_tags that need freeing upon this proc's death */
struct list_head sock_tag_list;
/* No spinlock_t sock_tag_list_lock; use the global one. */
};
/*----------------------------------------------*/
#endif /* ifndef __XT_QTAGUID_INTERNAL_H__ */