Refresh OProfile code running on the target side to 0.9.5 for ARMV7.
diff --git a/daemon/Android.mk b/daemon/Android.mk
index abee74c..322c76e 100644
--- a/daemon/Android.mk
+++ b/daemon/Android.mk
@@ -6,6 +6,9 @@
 	opd_anon.c \
 	opd_cookie.c \
 	opd_events.c \
+	opd_extended.c \
+	opd_ibs.c \
+	opd_ibs_trans.c \
 	opd_kernel.c \
 	opd_mangling.c \
 	opd_perfmon.c \
diff --git a/daemon/init.c b/daemon/init.c
index be0b9da..b4a63cc 100644
--- a/daemon/init.c
+++ b/daemon/init.c
@@ -174,14 +174,14 @@
 			sprintf(end_time_str, "%llu", end_time);
 			sprintf(opjitconv_path, "%s/%s", OP_BINDIR, "opjitconv");
 			arg_num = 0;
-			exec_args[arg_num++] = opjitconv_path;
+			exec_args[arg_num++] = "opjitconv";
 			if (vmisc)
 				exec_args[arg_num++] = "-d";
 			exec_args[arg_num++] = session_dir;
 			exec_args[arg_num++] = start_time_str;
 			exec_args[arg_num++] = end_time_str;
 			exec_args[arg_num] = (char *) NULL;
-			execvp("opjitconv", exec_args);
+			execvp(opjitconv_path, exec_args);
 			fprintf(stderr, "Failed to exec %s: %s\n",
 			        exec_args[0], strerror(errno));
 			/* We don't want any cleanup in the child */
diff --git a/daemon/opd_events.c b/daemon/opd_events.c
index 81a87d2..b544fb3 100644
--- a/daemon/opd_events.c
+++ b/daemon/opd_events.c
@@ -13,6 +13,7 @@
  
 #include "opd_events.h"
 #include "opd_printf.h"
+#include "opd_extended.h"
 #include "oprofiled.h"
 
 #include "op_string.h"
@@ -35,7 +36,7 @@
 static void malformed_events(void)
 {
 	fprintf(stderr, "oprofiled: malformed events passed "
-	        "on the command line\n");
+		"on the command line\n");
 	exit(EXIT_FAILURE);
 }
 
@@ -128,6 +129,12 @@
 struct opd_event * find_counter_event(unsigned long counter)
 {
 	size_t i;
+	struct opd_event * ret = NULL;
+
+	if (counter >= OP_MAX_COUNTERS) {
+		if((ret = opd_ext_find_counter_event(counter)) != NULL)
+			return ret;
+	}
 
 	for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) {
 		if (counter == opd_events[i].counter)
@@ -141,9 +148,9 @@
 
 
 void fill_header(struct opd_header * header, unsigned long counter,
-                 vma_t anon_start, vma_t cg_to_anon_start,
-                 int is_kernel, int cg_to_is_kernel,
-                 int spu_samples, uint64_t embed_offset, time_t mtime)
+		 vma_t anon_start, vma_t cg_to_anon_start,
+		 int is_kernel, int cg_to_is_kernel,
+		 int spu_samples, uint64_t embed_offset, time_t mtime)
 {
 	struct opd_event * event = find_counter_event(counter);
 
diff --git a/daemon/opd_events.h b/daemon/opd_events.h
index 3bd0106..1e8b801 100644
--- a/daemon/opd_events.h
+++ b/daemon/opd_events.h
@@ -40,8 +40,8 @@
 
 /** fill the sample file header with event info etc. */
 void fill_header(struct opd_header * header, unsigned long counter,
-                 vma_t anon_start, vma_t anon_end,
-                 int is_kernel, int cg_to_is_kernel,
+		 vma_t anon_start, vma_t anon_end,
+		 int is_kernel, int cg_to_is_kernel,
                  int spu_samples, uint64_t embed_offset, time_t mtime);
 
 #endif /* OPD_EVENTS_H */
diff --git a/daemon/opd_extended.c b/daemon/opd_extended.c
new file mode 100644
index 0000000..d88c285
--- /dev/null
+++ b/daemon/opd_extended.c
@@ -0,0 +1,181 @@
+/**
+ * @file opd_extended.c
+ * OProfile Extended Feature
+ *
+ * @remark Copyright 2007-2009 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+ * Copyright (c) 2009 Advanced Micro Devices, Inc.
+ */
+
+#include "opd_extended.h"
+#include "op_string.h"
+
+#include <string.h>
+#include <stdio.h>
+
+/* This global variable is >= 0
+ * if extended feature is enabled */
+static int opd_ext_feat_index;
+
+extern struct opd_ext_handlers ibs_handlers;
+
+/**
+ * OProfile Extended Feature Table
+ *
+ * This table contains a list of extended features.
+ */
+static struct opd_ext_feature ext_feature_table[] = {
+	{"ibs", &ibs_handlers },
+	{ NULL, NULL }
+};
+
+
+static int get_index_for_feature(char const * name)
+{
+	int ret = -1;
+	unsigned int i;
+
+	if(!name)
+		return ret;
+
+	for (i = 0 ; ext_feature_table[i].feature != NULL ; i++ ) {
+		if(!strncmp(name, ext_feature_table[i].feature,
+			strlen(ext_feature_table[i].feature))) {
+			ret = i;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+
+static inline int is_ext_enabled()
+{
+	if (opd_ext_feat_index >= 0
+	&& ext_feature_table[opd_ext_feat_index].handlers != NULL)
+		return 1;
+	else
+		return 0;
+}
+
+
+static inline int is_ext_sfile_enabled()
+{
+	if (opd_ext_feat_index >= 0
+	&& ext_feature_table[opd_ext_feat_index].handlers != NULL
+	&& ext_feature_table[opd_ext_feat_index].handlers->ext_sfile != NULL)
+		return 1;
+	else
+		return 0;
+}
+
+
+/**
+ * Param "value" is the input from CML option with the format:
+ *
+ * <feature name>:<param1>:<param2>:<param3>:.....
+ *
+ * where param1,2.3,..n are optional.
+ */
+int opd_ext_initialize(char const * value)
+{
+	int ret = EXIT_FAILURE;
+	char * tmp = NULL, * name = NULL, * args = NULL;
+
+	if(!value) {
+		opd_ext_feat_index = -1;
+		return 0;
+	}
+
+	tmp = op_xstrndup(value, strlen(value));
+
+	/* Parse feature name*/
+	if((name = strtok_r(tmp, ":", &args)) == NULL)
+		goto err_out;
+
+	if((opd_ext_feat_index = get_index_for_feature(name)) < 0)
+		goto err_out;
+
+	ret = ext_feature_table[opd_ext_feat_index].handlers->ext_init(args);
+
+	return ret;
+
+err_out:
+	fprintf(stderr,"opd_ext_initialize: Invalid extended feature option: %s\n", value);
+	return ret;
+}
+
+
+void opd_ext_print_stats()
+{
+	if (is_ext_enabled()
+	&& ext_feature_table[opd_ext_feat_index].handlers->ext_print_stats != NULL) {
+		printf("\n-- OProfile Extended-Feature Statistics --\n");
+		ext_feature_table[opd_ext_feat_index].handlers->ext_print_stats();
+	}
+}
+
+
+/**
+ * opd_sfile extended APIs
+ */
+void opd_ext_sfile_create(struct sfile * sf)
+{
+	/* Creating ext sfile only if extended feature is enable*/
+	if (is_ext_sfile_enabled()
+	&& ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->create != NULL)
+		ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->create(sf);
+}
+
+
+void opd_ext_sfile_dup (struct sfile * to, struct sfile * from)
+{
+	/* Duplicate ext sfile only if extended feature is enable*/
+	if (is_ext_sfile_enabled()
+	&& ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->dup != NULL)
+		ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->dup(to, from);
+}
+
+
+void opd_ext_sfile_close (struct sfile * sf)
+{
+	/* Close ext sfile only if extended feature is enable*/
+	if (is_ext_sfile_enabled()
+	&& ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->close != NULL)
+		ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->close(sf);
+}
+
+
+void opd_ext_sfile_sync(struct sfile * sf)
+{
+	/* Sync ext sfile only if extended feature is enable*/
+	if (is_ext_sfile_enabled()
+	&& ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->sync != NULL)
+		ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->sync(sf);
+}
+
+
+odb_t * opd_ext_sfile_get(struct transient const * trans, int is_cg)
+{
+	/* Get ext sfile only if extended feature is enable*/
+	if (is_ext_sfile_enabled()
+	&& ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->get != NULL)
+		return	ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->get(trans, is_cg);
+
+	return NULL;
+}
+
+
+struct opd_event * opd_ext_find_counter_event(unsigned long counter)
+{
+	/* Only if extended feature is enable*/
+	if (is_ext_sfile_enabled()
+	&& ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->find_counter_event != NULL)
+		return	ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->find_counter_event(counter);
+
+	return NULL;
+}
+
diff --git a/daemon/opd_extended.h b/daemon/opd_extended.h
new file mode 100644
index 0000000..715041d
--- /dev/null
+++ b/daemon/opd_extended.h
@@ -0,0 +1,85 @@
+/**
+ * @file opd_extended.h
+ * OProfile Extended Feature
+ *
+ * @remark Copyright 2007-2009 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+ * Copyright (c) 2009 Advanced Micro Devices, Inc.
+ */
+
+#ifndef OPD_EXTENDED_H
+#define OPD_EXTENDED_H
+
+#include "opd_trans.h"
+#include "odb.h"
+
+#include <stdlib.h>
+#include <stdint.h>
+
+
+/**
+ * OProfile Extended Feature Table Entry
+ */
+struct opd_ext_feature {
+	// Feature name
+	const char* feature;
+	// Feature handlers
+	struct opd_ext_handlers * handlers;
+};
+
+/**
+ * OProfile Extended handlers
+ */
+struct opd_ext_handlers {
+	// Extended init
+	int (*ext_init)(char const *);
+	// Extended statistics
+	int (*ext_print_stats)();
+	// Extended sfile handlers
+	struct opd_ext_sfile_handlers * ext_sfile;
+};
+
+/**
+ * OProfile Extended sub-handlers (sfile)
+ */
+struct opd_ext_sfile_handlers {
+	int (*create)(struct sfile *);
+	int (*dup)(struct sfile *, struct sfile *);
+	int (*close)(struct sfile *);
+	int (*sync)(struct sfile *);
+	odb_t * (*get)(struct transient const *, int);
+	struct opd_event * (*find_counter_event)(unsigned long);
+};
+
+/**
+ * @param value: commandline input option string
+ *
+ * Parse the specified extended feature
+ */
+extern int opd_ext_initialize(char const * value);
+
+/**
+ * Print out extended feature statistics in oprofiled.log file
+ */
+extern void opd_ext_print_stats();
+
+/**
+ * opd_sfile extended sfile handling functions
+ */
+extern void opd_ext_sfile_create(struct sfile * sf);
+extern void opd_ext_sfile_dup (struct sfile * to, struct sfile * from);
+extern void opd_ext_sfile_close(struct sfile * sf);
+extern void opd_ext_sfile_sync(struct sfile * sf);
+extern odb_t * opd_ext_sfile_get(struct transient const * trans, int is_cg);
+
+/**
+ * @param counter: counter index
+ *
+ * Get event struct opd_event from the counter index value.
+ */
+extern struct opd_event * opd_ext_find_counter_event(unsigned long counter);
+
+
+#endif
diff --git a/daemon/opd_ibs.c b/daemon/opd_ibs.c
new file mode 100644
index 0000000..c57554a
--- /dev/null
+++ b/daemon/opd_ibs.c
@@ -0,0 +1,692 @@
+/**
+ * @file daemon/opd_ibs.c
+ * AMD Family10h Instruction Based Sampling (IBS) handling.
+ *
+ * @remark Copyright 2007 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Jason Yeh <jason.yeh@amd.com>
+ * @author Paul Drongowski <paul.drongowski@amd.com>
+ * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+ * Copyright (c) 2008 Advanced Micro Devices, Inc.
+ */
+
+#include "op_hw_config.h"
+#include "op_events.h"
+#include "op_string.h"
+#include "op_libiberty.h"
+#include "opd_printf.h"
+#include "opd_trans.h"
+#include "opd_events.h"
+#include "opd_kernel.h"
+#include "opd_anon.h"
+#include "opd_sfile.h"
+#include "opd_interface.h"
+#include "opd_mangling.h"
+#include "opd_extended.h"
+#include "opd_ibs.h"
+#include "opd_ibs_trans.h"
+#include "opd_ibs_macro.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+extern op_cpu cpu_type;
+extern int no_event_ok;
+extern int sfile_equal(struct sfile const * sf, struct sfile const * sf2);
+extern void sfile_dup(struct sfile * to, struct sfile * from);
+
+/* IBS Select Arrays/Counters */
+static unsigned int ibs_selected_size;
+static unsigned int ibs_fetch_selected_flag;
+static unsigned int ibs_fetch_selected_size;
+static unsigned int ibs_op_selected_flag;
+static unsigned int ibs_op_selected_size;
+static unsigned int ibs_op_ls_selected_flag;
+static unsigned int ibs_op_ls_selected_size;
+static unsigned int ibs_op_nb_selected_flag;
+static unsigned int ibs_op_nb_selected_size;
+
+/* IBS Statistics */
+static unsigned long ibs_fetch_sample_stats;
+static unsigned long ibs_fetch_incomplete_stats;
+static unsigned long ibs_op_sample_stats;
+static unsigned long ibs_op_incomplete_stats;
+static unsigned long ibs_derived_event_stats;
+
+/*
+ * IBS Virtual Counter
+ */
+struct opd_event ibs_vc[OP_MAX_IBS_COUNTERS];
+
+/* IBS Virtual Counter Index(VCI) Map*/
+unsigned int ibs_vci_map[OP_MAX_IBS_COUNTERS];
+
+/**
+ * This function converts IBS fetch event flags and values into
+ * derived events. If the tagged (sampled) fetched caused a derived
+ * event, the derived event is tallied.
+ */
+static void opd_log_ibs_fetch(struct transient * trans)
+{
+	struct ibs_fetch_sample * trans_fetch = ((struct ibs_sample*)(trans->ext))->fetch;
+	if (!trans_fetch)
+		return;
+
+	trans_ibs_fetch(trans, ibs_fetch_selected_flag, ibs_fetch_selected_size);
+}
+
+
+/**
+ * This function translates the IBS op event flags and values into
+ * IBS op derived events. If an op derived event occured, it's tallied.
+ */
+static void opd_log_ibs_op(struct transient * trans)
+{
+	struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
+	if (!trans_op)
+		return;
+
+	trans_ibs_op(trans, ibs_op_selected_flag, ibs_op_selected_size);
+	trans_ibs_op_ls(trans, ibs_op_ls_selected_flag, ibs_op_ls_selected_size);
+	trans_ibs_op_nb(trans, ibs_op_nb_selected_flag, ibs_op_nb_selected_size);
+}
+
+
+static void opd_put_ibs_sample(struct transient * trans)
+{
+	unsigned long long event = 0;
+	struct kernel_image * k_image = NULL;
+	struct ibs_fetch_sample * trans_fetch = ((struct ibs_sample*)(trans->ext))->fetch;
+
+	if (!enough_remaining(trans, 1)) {
+		trans->remaining = 0;
+		return;
+	}
+
+	/* IBS can generate samples with invalid dcookie and
+	 * in kernel address range. Map such samples to vmlinux
+	 * only if the user either specifies a range, or vmlinux.
+	 */
+	if (trans->cookie == INVALID_COOKIE
+	    && (k_image = find_kernel_image(trans)) != NULL
+	    && (k_image->start != 0 && k_image->end != 0)
+	    && trans->in_kernel == 0)
+		trans->in_kernel = 1;
+
+	if (trans->tracing != TRACING_ON)
+		trans->event = event;
+
+	/* sfile can change at each sample for kernel */
+	if (trans->in_kernel != 0)
+		clear_trans_current(trans);
+
+	if (!trans->in_kernel && trans->cookie == NO_COOKIE)
+		trans->anon = find_anon_mapping(trans);
+
+	/* get the current sfile if needed */
+	if (!trans->current)
+		trans->current = sfile_find(trans);
+
+	/*
+	 * can happen if kernel sample falls through the cracks, or if
+	 * it's a sample from an anon region we couldn't find
+	 */
+	if (!trans->current)
+		goto out;
+
+	if (trans_fetch)
+		opd_log_ibs_fetch(trans);
+	else
+		opd_log_ibs_op(trans);
+out:
+	/* switch to trace mode */
+	if (trans->tracing == TRACING_START)
+		trans->tracing = TRACING_ON;
+
+	update_trans_last(trans);
+}
+
+
+void code_ibs_fetch_sample(struct transient * trans)
+{
+	struct ibs_fetch_sample * trans_fetch = NULL;
+
+	if (!enough_remaining(trans, 7)) {
+		verbprintf(vext, "not enough remaining\n");
+		trans->remaining = 0;
+		ibs_fetch_incomplete_stats++;
+		return;
+	}
+
+	ibs_fetch_sample_stats++;
+
+	trans->ext = xmalloc(sizeof(struct ibs_sample));
+	((struct ibs_sample*)(trans->ext))->fetch = xmalloc(sizeof(struct ibs_fetch_sample));
+	trans_fetch = ((struct ibs_sample*)(trans->ext))->fetch;
+
+	trans_fetch->rip = pop_buffer_value(trans);
+
+	trans_fetch->ibs_fetch_lin_addr_low = pop_buffer_value(trans);
+	trans_fetch->ibs_fetch_lin_addr_high = pop_buffer_value(trans);
+
+	trans_fetch->ibs_fetch_ctl_low = pop_buffer_value(trans);
+	trans_fetch->ibs_fetch_ctl_high = pop_buffer_value(trans);
+	trans_fetch->ibs_fetch_phys_addr_low = pop_buffer_value(trans);
+	trans_fetch->ibs_fetch_phys_addr_high = pop_buffer_value(trans);
+
+	verbprintf(vsamples,
+		"FETCH_X CPU:%ld PID:%ld RIP:%lx CTL_H:%x LAT:%d P_HI:%x P_LO:%x L_HI:%x L_LO:%x\n",
+		trans->cpu,
+		(long)trans->tgid,
+		trans_fetch->rip,
+		(trans_fetch->ibs_fetch_ctl_high >> 16) & 0x3ff,
+		(trans_fetch->ibs_fetch_ctl_high) & 0xffff,
+		trans_fetch->ibs_fetch_phys_addr_high,
+		trans_fetch->ibs_fetch_phys_addr_low,
+		trans_fetch->ibs_fetch_lin_addr_high,
+		trans_fetch->ibs_fetch_lin_addr_low) ;
+
+	/* Overwrite the trans->pc with the more accurate trans_fetch->rip */
+	trans->pc = trans_fetch->rip;
+
+	opd_put_ibs_sample(trans);
+
+	free(trans_fetch);
+	free(trans->ext);
+	trans->ext = NULL;
+}
+
+
+void code_ibs_op_sample(struct transient * trans)
+{
+	struct ibs_op_sample * trans_op= NULL;
+
+	if (!enough_remaining(trans, 13)) {
+		verbprintf(vext, "not enough remaining\n");
+		trans->remaining = 0;
+		ibs_op_incomplete_stats++;
+		return;
+	}
+
+	ibs_op_sample_stats++;
+
+	trans->ext = xmalloc(sizeof(struct ibs_sample));
+	((struct ibs_sample*)(trans->ext))->op = xmalloc(sizeof(struct ibs_op_sample));
+	trans_op = ((struct ibs_sample*)(trans->ext))->op;
+
+	trans_op->rip = pop_buffer_value(trans);
+
+	trans_op->ibs_op_lin_addr_low = pop_buffer_value(trans);
+	trans_op->ibs_op_lin_addr_high = pop_buffer_value(trans);
+
+	trans_op->ibs_op_data1_low         = pop_buffer_value(trans);
+	trans_op->ibs_op_data1_high        = pop_buffer_value(trans);
+	trans_op->ibs_op_data2_low         = pop_buffer_value(trans);
+	trans_op->ibs_op_data2_high        = pop_buffer_value(trans);
+	trans_op->ibs_op_data3_low         = pop_buffer_value(trans);
+	trans_op->ibs_op_data3_high        = pop_buffer_value(trans);
+	trans_op->ibs_op_ldst_linaddr_low  = pop_buffer_value(trans);
+	trans_op->ibs_op_ldst_linaddr_high = pop_buffer_value(trans);
+	trans_op->ibs_op_phys_addr_low     = pop_buffer_value(trans);
+	trans_op->ibs_op_phys_addr_high    = pop_buffer_value(trans);
+
+	verbprintf(vsamples,
+		   "IBS_OP_X CPU:%ld PID:%d RIP:%lx D1HI:%x D1LO:%x D2LO:%x D3HI:%x D3LO:%x L_LO:%x P_LO:%x\n",
+		   trans->cpu,
+		   trans->tgid,
+		   trans_op->rip,
+		   trans_op->ibs_op_data1_high,
+		   trans_op->ibs_op_data1_low,
+		   trans_op->ibs_op_data2_low,
+		   trans_op->ibs_op_data3_high,
+		   trans_op->ibs_op_data3_low,
+		   trans_op->ibs_op_ldst_linaddr_low,
+		   trans_op->ibs_op_phys_addr_low);
+
+	/* Overwrite the trans->pc with the more accurate trans_op->rip */
+	trans->pc = trans_op->rip;
+
+	opd_put_ibs_sample(trans);
+
+	free(trans_op);
+	free(trans->ext);
+	trans->ext = NULL;
+}
+
+
+/** Convert IBS event to value used for data structure indexing */
+static unsigned long ibs_event_to_counter(unsigned long x)
+{
+	unsigned long ret = ~0UL;
+
+	if (IS_IBS_FETCH(x))
+		ret = (x - IBS_FETCH_BASE);
+	else if (IS_IBS_OP(x))
+		ret = (x - IBS_OP_BASE + IBS_FETCH_MAX);
+	else if (IS_IBS_OP_LS(x))
+		ret = (x - IBS_OP_LS_BASE + IBS_OP_MAX + IBS_FETCH_MAX);
+	else if (IS_IBS_OP_NB(x))
+		ret = (x - IBS_OP_NB_BASE + IBS_OP_LS_MAX + IBS_OP_MAX + IBS_FETCH_MAX);
+
+	return (ret != ~0UL) ? ret + OP_MAX_COUNTERS : ret;
+}
+
+
+void opd_log_ibs_event(unsigned int event,
+	struct transient * trans)
+{
+	ibs_derived_event_stats++;
+	trans->event = event;
+	sfile_log_sample_count(trans, 1);
+}
+
+
+void opd_log_ibs_count(unsigned int event,
+			struct transient * trans,
+			unsigned int count)
+{
+	ibs_derived_event_stats++;
+	trans->event = event;
+	sfile_log_sample_count(trans, count);
+}
+
+
+static unsigned long get_ibs_vci_key(unsigned int event)
+{
+	unsigned long key = ibs_event_to_counter(event);
+	if (key == ~0UL || key < OP_MAX_COUNTERS)
+		return ~0UL;
+
+	key = key - OP_MAX_COUNTERS;
+
+	return key;
+}
+
+
+static int ibs_parse_and_set_events(char * str)
+{
+	char * tmp, * ptr, * tok1, * tok2 = NULL;
+	int is_done = 0;
+	struct op_event * event = NULL;
+	op_cpu cpu_type = CPU_NO_GOOD;
+	unsigned long key;
+
+	if (!str)
+		return -1;
+
+	cpu_type = op_get_cpu_type();
+	op_events(cpu_type);
+
+	tmp = op_xstrndup(str, strlen(str));
+	ptr = tmp;
+
+	while (is_done != 1
+		&& (tok1 = strtok_r(ptr, ",", &tok2)) != NULL) {
+
+		if ((ptr = strstr(tok1, ":")) != NULL) {
+			*ptr = '\0';
+			is_done = 1;
+		}
+
+		// Resove event number
+		event = find_event_by_name(tok1, 0, 0);
+		if (!event)
+			return -1;
+
+		// Grouping
+		if (IS_IBS_FETCH(event->val)) {
+			ibs_fetch_selected_flag |= 1 << IBS_FETCH_OFFSET(event->val);
+			ibs_fetch_selected_size++;
+		} else if (IS_IBS_OP(event->val)) {
+			ibs_op_selected_flag |= 1 << IBS_OP_OFFSET(event->val);
+			ibs_op_selected_size++;
+		} else if (IS_IBS_OP_LS(event->val)) {
+			ibs_op_ls_selected_flag |= 1 << IBS_OP_LS_OFFSET(event->val);
+			ibs_op_ls_selected_size++;
+		} else if (IS_IBS_OP_NB(event->val)) {
+			ibs_op_nb_selected_flag |= 1 << IBS_OP_NB_OFFSET(event->val);
+			ibs_op_nb_selected_size++;
+		} else {
+			return -1;
+		}
+
+		key = get_ibs_vci_key(event->val);
+		if (key == ~0UL)
+			return -1;
+
+		ibs_vci_map[key] = ibs_selected_size;
+
+		/* Initialize part of ibs_vc */
+		ibs_vc[ibs_selected_size].name    = tok1;
+		ibs_vc[ibs_selected_size].value   = event->val;
+		ibs_vc[ibs_selected_size].counter = ibs_selected_size + OP_MAX_COUNTERS;
+		ibs_vc[ibs_selected_size].kernel  = 1;
+		ibs_vc[ibs_selected_size].user    = 1;
+
+		ibs_selected_size++;
+
+		ptr = NULL;
+	}
+
+	return 0;
+}
+
+
+static int ibs_parse_counts(char * str, unsigned long int * count)
+{
+	char * tmp, * tok1, * tok2 = NULL, *end = NULL;
+	if (!str)
+		return -1;
+
+	tmp = op_xstrndup(str, strlen(str));
+	tok1 = strtok_r(tmp, ":", &tok2);
+	*count = strtoul(tok1, &end, 10);
+	if ((end && *end) || *count == 0
+	    || errno == EINVAL || errno == ERANGE) {
+		fprintf(stderr,"Invalid count (%s)\n", str);
+		return -1;
+	}
+
+	return 0;
+}
+
+
+static int ibs_parse_and_set_um_fetch(char const * str)
+{
+	if (!str)
+		return -1;
+	return 0;
+}
+
+
+
+static int ibs_parse_and_set_um_op(char const * str, unsigned long int * ibs_op_um)
+{
+	char * end = NULL;
+	if (!str)
+		return -1;
+
+	*ibs_op_um = strtoul(str, &end, 16);
+	if ((end && *end) || errno == EINVAL || errno == ERANGE) {
+		fprintf(stderr,"Invalid unitmaks (%s)\n", str);
+		return -1;
+	}
+	return 0;
+}
+
+
+static int ibs_init(char const * argv)
+{
+	char * tmp, * ptr, * tok1, * tok2 = NULL;
+	unsigned int i = 0;
+	unsigned long int ibs_fetch_count = 0;
+	unsigned long int ibs_op_count = 0;
+	unsigned long int ibs_op_um = 0;
+
+	if (!argv)
+		return -1;
+
+	if (empty_line(argv) != 0)
+		return -1;
+
+	tmp = op_xstrndup(argv, strlen(argv));
+	ptr = (char *) skip_ws(tmp);
+
+	// "fetch:event1,event2,....:count:um|op:event1,event2,.....:count:um"
+	tok1 = strtok_r(ptr, "|", &tok2);
+
+	while (tok1 != NULL) {
+
+		if (!strncmp("fetch:", tok1, strlen("fetch:"))) {
+			// Get to event section
+			tok1 = tok1 + strlen("fetch:");
+			if (ibs_parse_and_set_events(tok1) == -1)
+				return -1;
+
+			// Get to count section
+			while (tok1) {
+				if (*tok1 == '\0')
+					return -1;
+				if (*tok1 != ':') {
+					tok1++;
+				} else {
+					tok1++;
+					break;
+				}
+			}
+
+			if (ibs_parse_counts(tok1, &ibs_fetch_count) == -1)
+				return -1;
+
+			// Get to um section
+			while (tok1) {
+				if (*tok1 == '\0')
+					return -1;
+				if (*tok1 != ':') {
+					tok1++;
+				} else {
+					tok1++;
+					break;
+				}
+			}
+
+			if (ibs_parse_and_set_um_fetch(tok1) == -1)
+				return -1;
+
+		} else if (!strncmp("op:", tok1, strlen("op:"))) {
+			// Get to event section
+			tok1 = tok1 + strlen("op:");
+			if (ibs_parse_and_set_events(tok1) == -1)
+				return -1;
+
+			// Get to count section
+			while (tok1) {
+				if (*tok1 == '\0')
+					return -1;
+				if (*tok1 != ':') {
+					tok1++;
+				} else {
+					tok1++;
+					break;
+				}
+			}
+
+			if (ibs_parse_counts(tok1, &ibs_op_count) == -1)
+				return -1;
+
+			// Get to um section
+			while (tok1) {
+				if (*tok1 == '\0')
+					return -1;
+				if (*tok1 != ':') {
+					tok1++;
+				} else {
+					tok1++;
+					break;
+				}
+			}
+
+			if (ibs_parse_and_set_um_op(tok1, &ibs_op_um))
+				return -1;
+
+		} else
+			return -1;
+
+		tok1 = strtok_r(NULL, "|", &tok2);
+	}
+
+	/* Initialize ibs_vc */
+	for (i = 0 ; i < ibs_selected_size ; i++)
+	{
+		if (IS_IBS_FETCH(ibs_vc[i].value)) {
+			ibs_vc[i].count   = ibs_fetch_count;
+			ibs_vc[i].um      = 0;
+		} else {
+			ibs_vc[i].count   = ibs_op_count;
+			ibs_vc[i].um      = ibs_op_um;
+		}
+	}
+
+	// Allow no event
+	no_event_ok = 1;
+	return 0;
+}
+
+
+static int ibs_print_stats()
+{
+	printf("Nr. IBS Fetch samples     : %lu (%lu entries)\n", ibs_fetch_sample_stats, (ibs_fetch_sample_stats * 7));
+	printf("Nr. IBS Fetch incompletes : %lu\n", ibs_fetch_incomplete_stats);
+	printf("Nr. IBS Op samples        : %lu (%lu entries)\n", ibs_op_sample_stats, (ibs_op_sample_stats * 13));
+	printf("Nr. IBS Op incompletes    : %lu\n", ibs_op_incomplete_stats);
+	printf("Nr. IBS derived events    : %lu\n", ibs_derived_event_stats);
+	return 0;
+}
+
+
+static int ibs_sfile_create(struct sfile * sf)
+{
+	unsigned int i;
+	sf->ext_files = xmalloc(ibs_selected_size * sizeof(odb_t));
+	for (i = 0 ; i < ibs_selected_size ; ++i)
+		odb_init(&sf->ext_files[i]);
+
+	return 0;
+}
+
+
+static int ibs_sfile_dup (struct sfile * to, struct sfile * from)
+{
+	unsigned int i;
+	if (from->ext_files != NULL) {
+		to->ext_files = xmalloc(ibs_selected_size * sizeof(odb_t));
+		for (i = 0 ; i < ibs_selected_size ; ++i)
+			odb_init(&to->ext_files[i]);
+	} else {
+		to->ext_files = NULL;
+	}
+	return 0;
+}
+
+static int ibs_sfile_close(struct sfile * sf)
+{
+	unsigned int i;
+	if (sf->ext_files != NULL) {
+		for (i = 0; i < ibs_selected_size ; ++i)
+			odb_close(&sf->ext_files[i]);
+
+		free(sf->ext_files);
+		sf->ext_files= NULL;
+	}
+	return 0;
+}
+
+static int ibs_sfile_sync(struct sfile * sf)
+{
+	unsigned int i;
+	if (sf->ext_files != NULL) {
+		for (i = 0; i < ibs_selected_size ; ++i)
+			odb_sync(&sf->ext_files[i]);
+	}
+	return 0;
+}
+
+static odb_t * ibs_sfile_get(struct transient const * trans, int is_cg)
+{
+	struct sfile * sf = trans->current;
+	struct sfile * last = trans->last;
+	struct cg_entry * cg;
+	struct list_head * pos;
+	unsigned long hash;
+	odb_t * file;
+	unsigned long counter, ibs_vci, key;
+
+	/* Note: "trans->event" for IBS is not the same as traditional
+ 	 * events.  Here, it has the actual event (0xfxxx), while the
+ 	 * traditional event has the event index.
+ 	 */
+	key = get_ibs_vci_key(trans->event);
+	if (key == ~0UL) {
+		fprintf(stderr, "%s: Invalid IBS event %lu\n", __func__, trans->event);
+		abort();
+	}
+	ibs_vci = ibs_vci_map[key];
+	counter = ibs_vci + OP_MAX_COUNTERS;
+
+	/* Creating IBS sfile if it not already exists */
+	if (sf->ext_files == NULL)
+		ibs_sfile_create(sf);
+
+	file = &(sf->ext_files[ibs_vci]);
+	if (!is_cg)
+		goto open;
+
+	hash = last->hashval & (CG_HASH_SIZE - 1);
+
+	/* Need to look for the right 'to'. Since we're looking for
+	 * 'last', we use its hash.
+	 */
+	list_for_each(pos, &sf->cg_hash[hash]) {
+		cg = list_entry(pos, struct cg_entry, hash);
+		if (sfile_equal(last, &cg->to)) {
+			file = &(cg->to.ext_files[ibs_vci]);
+			goto open;
+		}
+	}
+
+	cg = xmalloc(sizeof(struct cg_entry));
+	sfile_dup(&cg->to, last);
+	list_add(&cg->hash, &sf->cg_hash[hash]);
+	file = &(cg->to.ext_files[ibs_vci]);
+
+open:
+	if (!odb_open_count(file))
+		opd_open_sample_file(file, last, sf, counter, is_cg);
+
+	/* Error is logged by opd_open_sample_file */
+	if (!odb_open_count(file))
+		return NULL;
+
+	return file;
+}
+
+
+/** Filled opd_event structure with IBS derived event information
+ *  from the given counter value.
+ */
+static struct opd_event * ibs_sfile_find_counter_event(unsigned long counter)
+{
+	unsigned long ibs_vci;
+
+	if (counter >= OP_MAX_COUNTERS + OP_MAX_IBS_COUNTERS
+	    || counter < OP_MAX_COUNTERS) {
+		fprintf(stderr,"Error: find_ibs_counter_event : "
+				"invalid counter value %lu.\n", counter);
+		abort();
+	}
+
+	ibs_vci = counter - OP_MAX_COUNTERS;
+	return &ibs_vc[ibs_vci];
+}
+
+
+struct opd_ext_sfile_handlers ibs_sfile_handlers =
+{
+	.create = &ibs_sfile_create,
+	.dup    = &ibs_sfile_dup,
+	.close  = &ibs_sfile_close,
+	.sync   = &ibs_sfile_sync,
+	.get    = &ibs_sfile_get,
+	.find_counter_event = &ibs_sfile_find_counter_event
+};
+
+
+struct opd_ext_handlers ibs_handlers =
+{
+	.ext_init  = &ibs_init,
+	.ext_print_stats = &ibs_print_stats,
+	.ext_sfile = &ibs_sfile_handlers
+};
diff --git a/daemon/opd_ibs.h b/daemon/opd_ibs.h
new file mode 100644
index 0000000..9ccc482
--- /dev/null
+++ b/daemon/opd_ibs.h
@@ -0,0 +1,137 @@
+/**
+ * @file daemon/opd_ibs.h
+ * AMD Family10h Instruction Based Sampling (IBS) handling.
+ *
+ * @remark Copyright 2008 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Jason Yeh <jason.yeh@amd.com>
+ * @author Paul Drongowski <paul.drongowski@amd.com>
+ * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+ * Copyright (c) 2008 Advanced Micro Devices, Inc.
+ */
+
+#ifndef OPD_IBS_H
+#define OPD_IBS_H
+
+#include <stdint.h>
+
+#include "opd_ibs_macro.h"
+
+struct transient;
+struct opd_event;
+
+/**
+ * IBS information is processed in two steps. The first step decodes
+ * hardware-level IBS information and saves it in decoded form. The
+ * second step translates the decoded IBS information into IBS derived
+ * events. IBS information is tallied and is reported as derived events.
+ */
+
+struct ibs_sample {
+	struct ibs_fetch_sample * fetch;
+	struct ibs_op_sample * op;
+};
+
+/**
+ * This struct represents the hardware-level IBS fetch information.
+ * Each field corresponds to a model-specific register (MSR.) See the
+ * BIOS and Kernel Developer's Guide for AMD Model Family 10h Processors
+ * for further details.
+ */
+struct ibs_fetch_sample {
+	unsigned long int rip;
+	/* MSRC001_1030 IBS Fetch Control Register */
+	unsigned int ibs_fetch_ctl_low;
+	unsigned int ibs_fetch_ctl_high;
+	/* MSRC001_1031 IBS Fetch Linear Address Register */
+	unsigned int ibs_fetch_lin_addr_low;
+	unsigned int ibs_fetch_lin_addr_high;
+	/* MSRC001_1032 IBS Fetch Physical Address Register */
+	unsigned int ibs_fetch_phys_addr_low;
+	unsigned int ibs_fetch_phys_addr_high;
+	unsigned int dummy_event;
+};
+
+
+
+/** This struct represents the hardware-level IBS op information. */
+struct ibs_op_sample {
+	unsigned long int rip;
+	/* MSRC001_1034 IBS Op Logical Address Register */
+	unsigned int ibs_op_lin_addr_low;
+	unsigned int ibs_op_lin_addr_high;
+	/* MSRC001_1035 IBS Op Data Register */
+	unsigned int ibs_op_data1_low;
+	unsigned int ibs_op_data1_high;
+	/* MSRC001_1036 IBS Op Data 2 Register */
+	unsigned int ibs_op_data2_low;
+	unsigned int ibs_op_data2_high;
+	/* MSRC001_1037 IBS Op Data 3 Register */
+	unsigned int ibs_op_data3_low;
+	unsigned int ibs_op_data3_high;
+	unsigned int ibs_op_ldst_linaddr_low;
+	unsigned int ibs_op_ldst_linaddr_high;
+	unsigned int ibs_op_phys_addr_low;
+	unsigned int ibs_op_phys_addr_high;
+};
+
+
+enum IBSL1PAGESIZE {
+	L1TLB4K = 0,
+	L1TLB2M,
+	L1TLB1G,
+	L1TLB_INVALID
+};
+
+
+/**
+ * Handle an IBS fetch sample escape code sequence. An IBS fetch sample
+ * is represented as an escape code sequence. (See the comment for the
+ * function code_ibs_op_sample() for the sequence of entries in the event
+ * buffer.) When this function is called, the ESCAPE_CODE and IBS_FETCH_CODE
+ * have already been removed from the event buffer. Thus, 7 more event buffer
+ * entries are needed in order to process a complete IBS fetch sample.
+ */
+extern void code_ibs_fetch_sample(struct transient * trans);
+
+/**
+ * Handle an IBS op sample escape code sequence. An IBS op sample
+ * is represented as an escape code sequence:
+ *
+ *    IBS fetch              IBS op
+ *    ---------------        ----------------
+ *    ESCAPE_CODE            ESCAPE_CODE
+ *    IBS_FETCH_CODE         IBS_OP_CODE
+ *    Offset                 Offset
+ *    IbsFetchLinAd low      IbsOpRip low        <-- Logical (virtual) RIP
+ *    IbsFetchLinAd high     IbsOpRip high       <-- Logical (virtual) RIP
+ *    IbsFetchCtl low        IbsOpData low
+ *    IbsFetchCtl high       IbsOpData high
+ *    IbsFetchPhysAd low     IbsOpData2 low
+ *    IbsFetchPhysAd high    IbsOpData2 high
+ *                           IbsOpData3 low
+ *                           IbsOpData3 high
+ *                           IbsDcLinAd low
+ *                           IbsDcLinAd high
+ *                           IbsDcPhysAd low
+ *                           IbsDcPhysAd high
+ *
+ * When this function is called, the ESCAPE_CODE and IBS_OP_CODE have
+ * already been removed from the event buffer. Thus, 13 more event buffer
+ * entries are needed to process a complete IBS op sample.
+ *
+ * The IbsFetchLinAd and IbsOpRip are the linear (virtual) addresses
+ * that were generated by the IBS hardware. These addresses are mapped
+ * into the offset.
+ */
+extern void code_ibs_op_sample(struct transient * trans);
+
+/** Log the specified IBS derived event. */
+extern void opd_log_ibs_event(unsigned int event, struct transient * trans);
+
+/** Log the specified IBS cycle count. */
+extern void opd_log_ibs_count(unsigned int event, struct transient * trans, unsigned int count);
+
+
+#endif /*OPD_IBS_H*/
diff --git a/daemon/opd_ibs_macro.h b/daemon/opd_ibs_macro.h
new file mode 100644
index 0000000..565d22f
--- /dev/null
+++ b/daemon/opd_ibs_macro.h
@@ -0,0 +1,366 @@
+/**
+ * @file daemon/opd_ibs_macro.h
+ * AMD Family10h Instruction Based Sampling (IBS) related macro.
+ *
+ * @remark Copyright 2008 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Jason Yeh <jason.yeh@amd.com>
+ * @author Paul Drongowski <paul.drongowski@amd.com>
+ * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+ * Copyright (c) 2008 Advanced Micro Devices, Inc.
+ */
+
+#ifndef OPD_IBS_MACRO_H
+#define OPD_IBS_MACRO_H
+
+/**
+ * The following defines are bit masks that are used to select
+ * IBS fetch event flags and values at the MSR level.
+ */
+#define FETCH_MASK_LATENCY  0x0000ffff
+#define FETCH_MASK_COMPLETE 0x00040000
+#define FETCH_MASK_IC_MISS  0x00080000
+#define FETCH_MASK_PHY_ADDR 0x00100000
+#define FETCH_MASK_PG_SIZE  0x00600000
+#define FETCH_MASK_L1_MISS  0x00800000
+#define FETCH_MASK_L2_MISS  0x01000000
+#define FETCH_MASK_KILLED   \
+		(FETCH_MASK_L1_MISS|FETCH_MASK_L2_MISS|FETCH_MASK_PHY_ADDR|\
+		FETCH_MASK_COMPLETE|FETCH_MASK_IC_MISS)
+
+
+/**
+ * The following defines are bit masks that are used to select
+ * IBS op event flags and values at the MSR level.
+ */
+#define BR_MASK_RETIRE           0x0000ffff
+#define BR_MASK_BRN_RET          0x00000020
+#define BR_MASK_BRN_MISP         0x00000010
+#define BR_MASK_BRN_TAKEN        0x00000008
+#define BR_MASK_RETURN           0x00000004
+#define BR_MASK_MISP_RETURN      0x00000002
+#define BR_MASK_BRN_RESYNC       0x00000001
+
+#define NB_MASK_L3_STATE         0x00000020
+#define NB_MASK_REQ_DST_PROC     0x00000010
+#define NB_MASK_REQ_DATA_SRC     0x00000007
+
+#define DC_MASK_L2_HIT_1G        0x00080000
+#define DC_MASK_PHY_ADDR_VALID   0x00040000
+#define DC_MASK_LIN_ADDR_VALID   0x00020000
+#define DC_MASK_MAB_HIT          0x00010000
+#define DC_MASK_LOCKED_OP        0x00008000
+#define DC_MASK_WC_MEM_ACCESS    0x00004000
+#define DC_MASK_UC_MEM_ACCESS    0x00002000
+#define DC_MASK_ST_TO_LD_CANCEL  0x00001000
+#define DC_MASK_ST_TO_LD_FOR     0x00000800
+#define DC_MASK_ST_BANK_CONFLICT 0x00000400
+#define DC_MASK_LD_BANK_CONFLICT 0x00000200
+#define DC_MASK_MISALIGN_ACCESS  0x00000100
+#define DC_MASK_DC_MISS          0x00000080
+#define DC_MASK_L2_HIT_2M        0x00000040
+#define DC_MASK_L1_HIT_1G        0x00000020
+#define DC_MASK_L1_HIT_2M        0x00000010
+#define DC_MASK_L2_TLB_MISS      0x00000008
+#define DC_MASK_L1_TLB_MISS      0x00000004
+#define DC_MASK_STORE_OP         0x00000002
+#define DC_MASK_LOAD_OP          0x00000001
+
+
+/**
+ * IBS derived events:
+ *
+ * IBS derived events are identified by event select values which are
+ * similar to the event select values that identify performance monitoring
+ * counter (PMC) events. Event select values for IBS derived events begin
+ * at 0xf000.
+ *
+ * The definitions in this file *must* match definitions
+ * of IBS derived events in gh-events.xml and in the
+ * oprofile AMD Family 10h events file. More information
+ * about IBS derived events is given in the Software Oprimization
+ * Guide for AMD Family 10h Processors.
+ */
+
+/**
+ * The following defines associate a 16-bit select value with an IBS
+ * derived fetch event.
+ */
+#define DE_IBS_FETCH_ALL         0xf000
+#define DE_IBS_FETCH_KILLED      0xf001
+#define DE_IBS_FETCH_ATTEMPTED   0xf002
+#define DE_IBS_FETCH_COMPLETED   0xf003
+#define DE_IBS_FETCH_ABORTED     0xf004
+#define DE_IBS_L1_ITLB_HIT       0xf005
+#define DE_IBS_ITLB_L1M_L2H      0xf006
+#define DE_IBS_ITLB_L1M_L2M      0xf007
+#define DE_IBS_IC_MISS           0xf008
+#define DE_IBS_IC_HIT            0xf009
+#define DE_IBS_FETCH_4K_PAGE     0xf00a
+#define DE_IBS_FETCH_2M_PAGE     0xf00b
+#define DE_IBS_FETCH_1G_PAGE     0xf00c
+#define DE_IBS_FETCH_XX_PAGE     0xf00d
+#define DE_IBS_FETCH_LATENCY     0xf00e
+
+#define IBS_FETCH_BASE           0xf000
+#define IBS_FETCH_END            0xf00e
+#define IBS_FETCH_MAX            (IBS_FETCH_END - IBS_FETCH_BASE + 1)
+#define IS_IBS_FETCH(x)          (IBS_FETCH_BASE <= x && x <= IBS_FETCH_END)
+#define IBS_FETCH_OFFSET(x)      (x - IBS_FETCH_BASE)
+
+/**
+ * The following defines associate a 16-bit select value with an IBS
+ * derived branch/return macro-op event.
+ */
+#define DE_IBS_OP_ALL             0xf100
+#define DE_IBS_OP_TAG_TO_RETIRE   0xf101
+#define DE_IBS_OP_COMP_TO_RETIRE  0xf102
+#define DE_IBS_BRANCH_RETIRED     0xf103
+#define DE_IBS_BRANCH_MISP        0xf104
+#define DE_IBS_BRANCH_TAKEN       0xf105
+#define DE_IBS_BRANCH_MISP_TAKEN  0xf106
+#define DE_IBS_RETURN             0xf107
+#define DE_IBS_RETURN_MISP        0xf108
+#define DE_IBS_RESYNC             0xf109
+
+#define IBS_OP_BASE               0xf100
+#define IBS_OP_END                0xf109
+#define IBS_OP_MAX                (IBS_OP_END - IBS_OP_BASE + 1)
+#define IS_IBS_OP(x)              (IBS_OP_BASE <= x && x <= IBS_OP_END)
+#define IBS_OP_OFFSET(x)          (x - IBS_OP_BASE)
+
+/**
+ * The following defines associate a 16-bit select value with an IBS
+ * derived load/store event.
+ */
+#define DE_IBS_LS_ALL_OP         0xf200
+#define DE_IBS_LS_LOAD_OP        0xf201
+#define DE_IBS_LS_STORE_OP       0xf202
+#define DE_IBS_LS_DTLB_L1H       0xf203
+#define DE_IBS_LS_DTLB_L1M_L2H   0xf204
+#define DE_IBS_LS_DTLB_L1M_L2M   0xf205
+#define DE_IBS_LS_DC_MISS        0xf206
+#define DE_IBS_LS_DC_HIT         0xf207
+#define DE_IBS_LS_MISALIGNED     0xf208
+#define DE_IBS_LS_BNK_CONF_LOAD  0xf209
+#define DE_IBS_LS_BNK_CONF_STORE 0xf20a
+#define DE_IBS_LS_STL_FORWARDED  0xf20b
+#define DE_IBS_LS_STL_CANCELLED  0xf20c
+#define DE_IBS_LS_UC_MEM_ACCESS  0xf20d
+#define DE_IBS_LS_WC_MEM_ACCESS  0xf20e
+#define DE_IBS_LS_LOCKED_OP      0xf20f
+#define DE_IBS_LS_MAB_HIT        0xf210
+#define DE_IBS_LS_L1_DTLB_4K     0xf211
+#define DE_IBS_LS_L1_DTLB_2M     0xf212
+#define DE_IBS_LS_L1_DTLB_1G     0xf213
+#define DE_IBS_LS_L1_DTLB_RES    0xf214
+#define DE_IBS_LS_L2_DTLB_4K     0xf215
+#define DE_IBS_LS_L2_DTLB_2M     0xf216
+#define DE_IBS_LS_L2_DTLB_1G     0xf217
+#define DE_IBS_LS_L2_DTLB_RES2   0xf218
+#define DE_IBS_LS_DC_LOAD_LAT    0xf219
+
+#define IBS_OP_LS_BASE           0xf200
+#define IBS_OP_LS_END            0xf219
+#define IBS_OP_LS_MAX            (IBS_OP_LS_END - IBS_OP_LS_BASE + 1)
+#define IS_IBS_OP_LS(x)          (IBS_OP_LS_BASE <= x && x <= IBS_OP_LS_END)
+#define IBS_OP_LS_OFFSET(x)      (x - IBS_OP_LS_BASE)
+
+
+/**
+ * The following defines associate a 16-bit select value with an IBS
+ * derived Northbridge (NB) event.
+ */
+#define DE_IBS_NB_LOCAL          0xf240
+#define DE_IBS_NB_REMOTE         0xf241
+#define DE_IBS_NB_LOCAL_L3       0xf242
+#define DE_IBS_NB_LOCAL_CACHE    0xf243
+#define DE_IBS_NB_REMOTE_CACHE   0xf244
+#define DE_IBS_NB_LOCAL_DRAM     0xf245
+#define DE_IBS_NB_REMOTE_DRAM    0xf246
+#define DE_IBS_NB_LOCAL_OTHER    0xf247
+#define DE_IBS_NB_REMOTE_OTHER   0xf248
+#define DE_IBS_NB_CACHE_STATE_M  0xf249
+#define DE_IBS_NB_CACHE_STATE_O  0xf24a
+#define DE_IBS_NB_LOCAL_LATENCY  0xf24b
+#define DE_IBS_NB_REMOTE_LATENCY 0xf24c
+
+#define IBS_OP_NB_BASE           0xf240
+#define IBS_OP_NB_END            0xf24c
+#define IBS_OP_NB_MAX            (IBS_OP_NB_END - IBS_OP_NB_BASE + 1)
+#define IS_IBS_OP_NB(x)          (IBS_OP_NB_BASE <= x && x <= IBS_OP_NB_END)
+#define IBS_OP_NB_OFFSET(x)      (x - IBS_OP_NB_BASE)
+
+
+#define OP_MAX_IBS_COUNTERS      (IBS_FETCH_MAX + IBS_OP_MAX + IBS_OP_LS_MAX + IBS_OP_NB_MAX)
+
+
+/**
+ * These macro decodes IBS hardware-level event flags and fields.
+ * Translation results are either zero (false) or non-zero (true), except
+ * the fetch latency, which is a 16-bit cycle count, and the fetch page size
+ * field, which is a 2-bit unsigned integer.
+ */
+
+/** Bits 47:32 IbsFetchLat: instruction fetch latency */
+#define IBS_FETCH_FETCH_LATENCY(x)              ((unsigned short)(x->ibs_fetch_ctl_high & FETCH_MASK_LATENCY))
+
+/** Bit 50 IbsFetchComp: instruction fetch complete. */
+#define IBS_FETCH_FETCH_COMPLETION(x)           ((x->ibs_fetch_ctl_high & FETCH_MASK_COMPLETE) != 0)
+
+/** Bit 51 IbsIcMiss: instruction cache miss. */
+#define IBS_FETCH_INST_CACHE_MISS(x)            ((x->ibs_fetch_ctl_high & FETCH_MASK_IC_MISS) != 0)
+
+/** Bit 52 IbsPhyAddrValid: instruction fetch physical address valid. */
+#define IBS_FETCH_PHYS_ADDR_VALID(x)            ((x->ibs_fetch_ctl_high & FETCH_MASK_PHY_ADDR) != 0)
+
+/** Bits 54:53 IbsL1TlbPgSz: instruction cache L1TLB page size. */
+#define IBS_FETCH_TLB_PAGE_SIZE(x)              ((unsigned short)((x->ibs_fetch_ctl_high >> 21) & 0x3))
+
+/** Bit 55 IbsL1TlbMiss: instruction cache L1TLB miss. */
+#define IBS_FETCH_M_L1_TLB_MISS(x)              ((x->ibs_fetch_ctl_high & FETCH_MASK_L1_MISS) != 0)
+
+/** Bit 56 IbsL2TlbMiss: instruction cache L2TLB miss. */
+#define IBS_FETCH_L2_TLB_MISS(x)                ((x->ibs_fetch_ctl_high & FETCH_MASK_L2_MISS) != 0)
+
+/** A fetch is a killed fetch if all the masked bits are clear */
+#define IBS_FETCH_KILLED(x)                     ((x->ibs_fetch_ctl_high & FETCH_MASK_KILLED) == 0)
+
+#define IBS_FETCH_INST_CACHE_HIT(x)             (IBS_FETCH_FETCH_COMPLETION(x) && !IBS_FETCH_INST_CACHE_MISS(x))
+
+#define IBS_FETCH_L1_TLB_HIT(x)                 (!IBS_FETCH_M_L1_TLB_MISS(x) && IBS_FETCH_PHYS_ADDR_VALID(x))
+
+#define IBS_FETCH_ITLB_L1M_L2H(x)               (IBS_FETCH_M_L1_TLB_MISS(x) && !IBS_FETCH_L2_TLB_MISS(x))
+
+#define IBS_FETCH_ITLB_L1M_L2M(x)               (IBS_FETCH_M_L1_TLB_MISS(x) && IBS_FETCH_L2_TLB_MISS(x))
+
+
+/**
+ * These macros translates IBS op event data from its hardware-level
+ * representation .It hides the MSR layout of IBS op data.
+ */
+
+/**
+ * MSRC001_1035 IBS OP Data Register (IbsOpData)
+ *
+ * 15:0 IbsCompToRetCtr: macro-op completion to retire count
+ */
+#define IBS_OP_COM_TO_RETIRE_CYCLES(x)          ((unsigned short)(x->ibs_op_data1_low & BR_MASK_RETIRE))
+
+/** 31:16 tag_to_retire_cycles : macro-op tag to retire count. */
+#define IBS_OP_TAG_TO_RETIRE_CYCLES(x)          ((unsigned short)((x->ibs_op_data1_low >> 16) & BR_MASK_RETIRE))
+
+/** 32 op_branch_resync : resync macro-op. */
+#define IBS_OP_OP_BRANCH_RESYNC(x)              ((x->ibs_op_data1_high & BR_MASK_BRN_RESYNC) != 0)
+
+/** 33 op_mispredict_return : mispredicted return macro-op. */
+#define IBS_OP_OP_MISPREDICT_RETURN(x)          ((x->ibs_op_data1_high & BR_MASK_MISP_RETURN) != 0)
+
+/** 34 IbsOpReturn: return macro-op. */
+#define IBS_OP_OP_RETURN(x)                     ((x->ibs_op_data1_high & BR_MASK_RETURN) != 0)
+
+/** 35 IbsOpBrnTaken: taken branch macro-op. */
+#define IBS_OP_OP_BRANCH_TAKEN(x)               ((x->ibs_op_data1_high & BR_MASK_BRN_TAKEN) != 0)
+
+/** 36 IbsOpBrnMisp: mispredicted branch macro-op.  */
+#define IBS_OP_OP_BRANCH_MISPREDICT(x)          ((x->ibs_op_data1_high & BR_MASK_BRN_MISP) != 0)
+
+/** 37 IbsOpBrnRet: branch macro-op retired. */
+#define IBS_OP_OP_BRANCH_RETIRED(x)             ((x->ibs_op_data1_high & BR_MASK_BRN_RET) != 0)
+
+/**
+ * MSRC001_1036 IBS Op Data 2 Register (IbsOpData2)
+ *
+ * 5 NbIbsReqCacheHitSt: IBS L3 cache state
+ */
+#define IBS_OP_NB_IBS_CACHE_HIT_ST(x)           ((x->ibs_op_data2_low & NB_MASK_L3_STATE) != 0)
+
+/** 4 NbIbsReqDstProc: IBS request destination processor */
+#define IBS_OP_NB_IBS_REQ_DST_PROC(x)           ((x->ibs_op_data2_low & NB_MASK_REQ_DST_PROC) != 0)
+
+/** 2:0 NbIbsReqSrc: Northbridge IBS request data source */
+#define IBS_OP_NB_IBS_REQ_SRC(x)                ((unsigned char)(x->ibs_op_data2_low & NB_MASK_REQ_DATA_SRC))
+
+/**
+ * MSRC001_1037 IBS Op Data3 Register
+ *
+ * Bits 48:32   IbsDcMissLat
+ */
+#define IBS_OP_DC_MISS_LATENCY(x)               ((unsigned short)(x->ibs_op_data3_high & 0xffff))
+
+/** 0 IbsLdOp: Load op */
+#define IBS_OP_IBS_LD_OP(x)                     ((x->ibs_op_data3_low & DC_MASK_LOAD_OP) != 0)
+
+/** 1 IbsStOp: Store op */
+#define IBS_OP_IBS_ST_OP(x)                     ((x->ibs_op_data3_low & DC_MASK_STORE_OP) != 0)
+
+/** 2 ibs_dc_l1_tlb_miss: Data cache L1TLB miss */
+#define IBS_OP_IBS_DC_L1_TLB_MISS(x)            ((x->ibs_op_data3_low & DC_MASK_L1_TLB_MISS) != 0)
+
+/** 3 ibs_dc_l2_tlb_miss: Data cache L2TLB miss */
+#define IBS_OP_IBS_DC_L2_TLB_MISS(x)            ((x->ibs_op_data3_low & DC_MASK_L2_TLB_MISS) != 0)
+
+/** 4 IbsDcL1tlbHit2M: Data cache L1TLB hit in 2M page */
+#define IBS_OP_IBS_DC_L1_TLB_HIT_2MB(x)         ((x->ibs_op_data3_low & DC_MASK_L1_HIT_2M) != 0)
+
+/** 5 ibs_dc_l1_tlb_hit_1gb: Data cache L1TLB hit in 1G page */
+#define IBS_OP_IBS_DC_L1_TLB_HIT_1GB(x)         ((x->ibs_op_data3_low & DC_MASK_L1_HIT_1G) != 0)
+
+/** 6 ibs_dc_l2_tlb_hit_2mb: Data cache L2TLB hit in 2M page */
+#define IBS_OP_IBS_DC_L2_TLB_HIT_2MB(x)         ((x->ibs_op_data3_low & DC_MASK_L2_HIT_2M) != 0)
+
+/** 7 ibs_dc_miss: Data cache miss */
+#define IBS_OP_IBS_DC_MISS(x)                   ((x->ibs_op_data3_low & DC_MASK_DC_MISS) != 0)
+
+/** 8 ibs_dc_miss_acc: Misaligned access */
+#define IBS_OP_IBS_DC_MISS_ACC(x)               ((x->ibs_op_data3_low & DC_MASK_MISALIGN_ACCESS) != 0)
+
+/** 9 ibs_dc_ld_bnk_con: Bank conflict on load operation */
+#define IBS_OP_IBS_DC_LD_BNK_CON(x)             ((x->ibs_op_data3_low & DC_MASK_LD_BANK_CONFLICT) != 0)
+
+/** 10 ibs_dc_st_bnk_con: Bank conflict on store operation */
+#define IBS_OP_IBS_DC_ST_BNK_CON(x)             ((x->ibs_op_data3_low & DC_MASK_ST_BANK_CONFLICT) != 0)
+
+/** 11 ibs_dc_st_to_ld_fwd : Data forwarded from store to load operation */
+#define IBS_OP_IBS_DC_ST_TO_LD_FWD(x)           ((x->ibs_op_data3_low & DC_MASK_ST_TO_LD_FOR) != 0)
+
+/** 12 ibs_dc_st_to_ld_can: Data forwarding from store to load operation cancelled */
+#define IBS_OP_IBS_DC_ST_TO_LD_CAN(x)           ((x->ibs_op_data3_low & DC_MASK_ST_TO_LD_CANCEL) != 0)
+
+/** 13 ibs_dc_uc_mem_acc: UC memory access */
+#define IBS_OP_IBS_DC_UC_MEM_ACC(x)             ((x->ibs_op_data3_low & DC_MASK_UC_MEM_ACCESS) != 0)
+
+/** 14 ibs_dc_wc_mem_acc : WC memory access */
+#define IBS_OP_IBS_DC_WC_MEM_ACC(x)             ((x->ibs_op_data3_low & DC_MASK_WC_MEM_ACCESS) != 0)
+
+/** 15 ibs_locked_op: Locked operation */
+#define IBS_OP_IBS_LOCKED_OP(x)                 ((x->ibs_op_data3_low & DC_MASK_LOCKED_OP) != 0)
+
+/** 16 ibs_dc_mab_hit : MAB hit */
+#define IBS_OP_IBS_DC_MAB_HIT(x)                ((x->ibs_op_data3_low & DC_MASK_MAB_HIT) != 0)
+
+/** 17 IbsDcLinAddrValid: Data cache linear address valid */
+#define IBS_OP_IBS_DC_LIN_ADDR_VALID(x)         ((x->ibs_op_data3_low & DC_MASK_LIN_ADDR_VALID) != 0)
+
+/** 18 ibs_dc_phy_addr_valid: Data cache physical address valid */
+#define IBS_OP_IBS_DC_PHY_ADDR_VALID(x)         ((x->ibs_op_data3_low & DC_MASK_PHY_ADDR_VALID) != 0)
+
+/** 19 ibs_dc_l2_tlb_hit_1gb: Data cache L2TLB hit in 1G page */
+#define IBS_OP_IBS_DC_L2_TLB_HIT_1GB(x)         ((x->ibs_op_data3_low & DC_MASK_L2_HIT_1G) != 0)
+
+
+/**
+ * Aggregate the IBS derived event. Increase the
+ * derived event count by one.
+ */
+#define AGG_IBS_EVENT(EV)               opd_log_ibs_event(EV, trans)
+
+/**
+ * Aggregate the IBS latency/cycle counts. Increase the
+ * derived event count by the specified count value.
+ */
+#define AGG_IBS_COUNT(EV, COUNT)        opd_log_ibs_count(EV, trans, COUNT)
+
+
+#endif /*OPD_IBS_MACRO_H*/
diff --git a/daemon/opd_ibs_trans.c b/daemon/opd_ibs_trans.c
new file mode 100644
index 0000000..3b2c2f8
--- /dev/null
+++ b/daemon/opd_ibs_trans.c
@@ -0,0 +1,554 @@
+/**
+ * @file daemon/opd_ibs_trans.c
+ * AMD Family10h Instruction Based Sampling (IBS) translation.
+ *
+ * @remark Copyright 2008 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Jason Yeh <jason.yeh@amd.com>
+ * @author Paul Drongowski <paul.drongowski@amd.com>
+ * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+ * Copyright (c) 2008 Advanced Micro Devices, Inc.
+ */
+
+#include "opd_ibs.h"
+#include "opd_ibs_macro.h"
+#include "opd_ibs_trans.h"
+#include "opd_trans.h"
+#include "opd_printf.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#define MAX_EVENTS_PER_GROUP	32
+
+/*
+ * --------------------- OP DERIVED FUNCTION
+ */
+void trans_ibs_fetch (struct transient * trans, unsigned int selected_flag, unsigned int size)
+{
+	struct ibs_fetch_sample * trans_fetch = ((struct ibs_sample*)(trans->ext))->fetch;
+	unsigned int i, j, mask = 1;
+
+	for (i = IBS_FETCH_BASE, j =0 ; i <= IBS_FETCH_END && j < size ; i++, mask = mask << 1) {
+
+		if ((selected_flag & mask) == 0)
+			continue;
+
+		j++;
+
+		switch (i) {
+
+		case DE_IBS_FETCH_ALL:
+			/* IBS all fetch samples (kills + attempts) */
+			AGG_IBS_EVENT(DE_IBS_FETCH_ALL);
+			break;
+
+		case DE_IBS_FETCH_KILLED:
+			/* IBS killed fetches ("case 0") -- All interesting event
+			 * flags are clear */
+			if (IBS_FETCH_KILLED(trans_fetch))
+				AGG_IBS_EVENT(DE_IBS_FETCH_KILLED);
+			break;
+
+		case DE_IBS_FETCH_ATTEMPTED:
+			/* Any non-killed fetch is an attempted fetch */
+			AGG_IBS_EVENT(DE_IBS_FETCH_ATTEMPTED);
+			break;
+
+		case DE_IBS_FETCH_COMPLETED:
+			if (IBS_FETCH_FETCH_COMPLETION(trans_fetch))
+				/* IBS Fetch Completed */
+				AGG_IBS_EVENT(DE_IBS_FETCH_COMPLETED);
+			break;
+
+		case DE_IBS_FETCH_ABORTED:
+			if (!IBS_FETCH_FETCH_COMPLETION(trans_fetch))
+				/* IBS Fetch Aborted */
+				AGG_IBS_EVENT(DE_IBS_FETCH_ABORTED);
+			break;
+
+		case DE_IBS_L1_ITLB_HIT:
+			/* IBS L1 ITLB hit */
+			if (IBS_FETCH_L1_TLB_HIT(trans_fetch))
+				AGG_IBS_EVENT(DE_IBS_L1_ITLB_HIT);
+			break;
+
+		case DE_IBS_ITLB_L1M_L2H:
+			/* IBS L1 ITLB miss and L2 ITLB hit */
+			if (IBS_FETCH_ITLB_L1M_L2H(trans_fetch))
+				AGG_IBS_EVENT(DE_IBS_ITLB_L1M_L2H);
+			break;
+
+		case DE_IBS_ITLB_L1M_L2M:
+			/* IBS L1 & L2 ITLB miss; complete ITLB miss */
+			if (IBS_FETCH_ITLB_L1M_L2M(trans_fetch))
+				AGG_IBS_EVENT(DE_IBS_ITLB_L1M_L2M);
+			break;
+
+		case DE_IBS_IC_MISS:
+			/* IBS instruction cache miss */
+			if (IBS_FETCH_INST_CACHE_MISS(trans_fetch))
+				AGG_IBS_EVENT(DE_IBS_IC_MISS);
+			break;
+
+		case DE_IBS_IC_HIT:
+			/* IBS instruction cache hit */
+			if (IBS_FETCH_INST_CACHE_HIT(trans_fetch))
+				AGG_IBS_EVENT(DE_IBS_IC_HIT);
+			break;
+
+		case DE_IBS_FETCH_4K_PAGE:
+			if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch)
+			    && IBS_FETCH_TLB_PAGE_SIZE(trans_fetch) ==  L1TLB4K)
+				AGG_IBS_EVENT(DE_IBS_FETCH_4K_PAGE);
+			break;
+
+		case DE_IBS_FETCH_2M_PAGE:
+			if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch)
+			    && IBS_FETCH_TLB_PAGE_SIZE(trans_fetch) ==  L1TLB2M)
+				AGG_IBS_EVENT(DE_IBS_FETCH_2M_PAGE);
+			break;
+
+		case DE_IBS_FETCH_1G_PAGE:
+			if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch)
+			    && IBS_FETCH_TLB_PAGE_SIZE(trans_fetch) ==  L1TLB1G)
+				AGG_IBS_EVENT(DE_IBS_FETCH_1G_PAGE);
+			break;
+
+		case DE_IBS_FETCH_XX_PAGE:
+			break;
+
+		case DE_IBS_FETCH_LATENCY:
+			if (IBS_FETCH_FETCH_LATENCY(trans_fetch))
+				AGG_IBS_COUNT(DE_IBS_FETCH_LATENCY,
+					      IBS_FETCH_FETCH_LATENCY(trans_fetch));
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+/*
+ * --------------------- OP DERIVED FUNCTION
+ */
+void trans_ibs_op (struct transient * trans, unsigned int selected_flag, unsigned int size)
+{
+	struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
+	unsigned int i, j, mask = 1;
+
+	for (i = IBS_OP_BASE, j =0 ; i <= IBS_OP_END && j < size ; i++, mask = mask << 1) {
+
+		if ((selected_flag & mask) == 0)
+			continue;
+
+		j++;
+
+		switch (i) {
+
+		case DE_IBS_OP_ALL:
+			/* All IBS op samples */
+			AGG_IBS_EVENT(DE_IBS_OP_ALL);
+			break;
+
+		case DE_IBS_OP_TAG_TO_RETIRE:
+			/* Tally retire cycle counts for all sampled macro-ops
+			 * IBS tag to retire cycles */
+			if (IBS_OP_TAG_TO_RETIRE_CYCLES(trans_op))
+				AGG_IBS_COUNT(DE_IBS_OP_TAG_TO_RETIRE,
+					IBS_OP_TAG_TO_RETIRE_CYCLES(trans_op));
+			break;
+
+		case DE_IBS_OP_COMP_TO_RETIRE:
+			/* IBS completion to retire cycles */
+			if (IBS_OP_COM_TO_RETIRE_CYCLES(trans_op))
+				AGG_IBS_COUNT(DE_IBS_OP_COMP_TO_RETIRE,
+					IBS_OP_COM_TO_RETIRE_CYCLES(trans_op));
+			break;
+
+		case DE_IBS_BRANCH_RETIRED:
+			if (IBS_OP_OP_BRANCH_RETIRED(trans_op))
+				/* IBS Branch retired op */
+				AGG_IBS_EVENT(DE_IBS_BRANCH_RETIRED) ;
+			break;
+
+		case DE_IBS_BRANCH_MISP:
+			if (IBS_OP_OP_BRANCH_RETIRED(trans_op)
+			    /* Test branch-specific event flags */
+			    /* IBS mispredicted Branch op */
+			    && IBS_OP_OP_BRANCH_MISPREDICT(trans_op))
+				AGG_IBS_EVENT(DE_IBS_BRANCH_MISP) ;
+			break;
+
+		case DE_IBS_BRANCH_TAKEN:
+			if (IBS_OP_OP_BRANCH_RETIRED(trans_op)
+			    /* IBS taken Branch op */
+			    && IBS_OP_OP_BRANCH_TAKEN(trans_op))
+				AGG_IBS_EVENT(DE_IBS_BRANCH_TAKEN);
+			break;
+
+		case DE_IBS_BRANCH_MISP_TAKEN:
+			if (IBS_OP_OP_BRANCH_RETIRED(trans_op)
+			    /* IBS mispredicted taken branch op */
+			    && IBS_OP_OP_BRANCH_TAKEN(trans_op)
+			    && IBS_OP_OP_BRANCH_MISPREDICT(trans_op))
+				AGG_IBS_EVENT(DE_IBS_BRANCH_MISP_TAKEN);
+			break;
+
+		case DE_IBS_RETURN:
+			if (IBS_OP_OP_BRANCH_RETIRED(trans_op)
+			    /* IBS return op */
+			    && IBS_OP_OP_RETURN(trans_op))
+				AGG_IBS_EVENT(DE_IBS_RETURN);
+			break;
+
+		case DE_IBS_RETURN_MISP:
+			if (IBS_OP_OP_BRANCH_RETIRED(trans_op)
+			    /* IBS mispredicted return op */
+			    && IBS_OP_OP_RETURN(trans_op)
+			    && IBS_OP_OP_BRANCH_MISPREDICT(trans_op))
+				AGG_IBS_EVENT(DE_IBS_RETURN_MISP);
+			break;
+
+		case DE_IBS_RESYNC:
+			/* Test for a resync macro-op */
+			if (IBS_OP_OP_BRANCH_RESYNC(trans_op))
+				AGG_IBS_EVENT(DE_IBS_RESYNC);
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+
+/*
+ * --------------------- OP LS DERIVED FUNCTION
+ */
+void trans_ibs_op_ls (struct transient * trans, unsigned int selected_flag, unsigned int size)
+{
+	struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
+	unsigned int i, j, mask = 1;
+
+	/* Preliminary check */
+	if (!IBS_OP_IBS_LD_OP(trans_op) && !IBS_OP_IBS_ST_OP(trans_op))
+		return;
+
+
+	for (i = IBS_OP_LS_BASE, j =0 ; i <= IBS_OP_LS_END && j < size ; i++, mask = mask << 1) {
+
+		if ((selected_flag & mask) == 0)
+			continue;
+
+		j++;
+
+		switch (i) {
+
+		case DE_IBS_LS_ALL_OP:
+			/* Count the number of LS op samples */
+			AGG_IBS_EVENT(DE_IBS_LS_ALL_OP) ;
+			break;
+
+		case DE_IBS_LS_LOAD_OP:
+			if (IBS_OP_IBS_LD_OP(trans_op))
+				/* TALLy an IBS load derived event */
+				AGG_IBS_EVENT(DE_IBS_LS_LOAD_OP) ;
+			break;
+
+		case DE_IBS_LS_STORE_OP:
+			if (IBS_OP_IBS_ST_OP(trans_op))
+				/* Count and handle store operations */
+				AGG_IBS_EVENT(DE_IBS_LS_STORE_OP);
+			break;
+
+		case DE_IBS_LS_DTLB_L1H:
+			if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+			    && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op))
+				/* L1 DTLB hit -- This is the most frequent case */
+				AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1H);
+			break;
+
+		case DE_IBS_LS_DTLB_L1M_L2H:
+			/* l2_translation_size = 1 */
+			if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+			    && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+			    && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op))
+				/* L1 DTLB miss, L2 DTLB hit */
+				AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1M_L2H);
+			break;
+
+		case DE_IBS_LS_DTLB_L1M_L2M:
+			if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+			    && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+			    && IBS_OP_IBS_DC_L2_TLB_MISS(trans_op))
+				/* L1 DTLB miss, L2 DTLB miss */
+				AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1M_L2M);
+			break;
+
+		case DE_IBS_LS_DC_MISS:
+			if (IBS_OP_IBS_DC_MISS(trans_op))
+				AGG_IBS_EVENT(DE_IBS_LS_DC_MISS);
+			break;
+
+		case DE_IBS_LS_DC_HIT:
+			if (!IBS_OP_IBS_DC_MISS(trans_op))
+				AGG_IBS_EVENT(DE_IBS_LS_DC_HIT);
+			break;
+
+		case DE_IBS_LS_MISALIGNED:
+			if (IBS_OP_IBS_DC_MISS_ACC(trans_op))
+				AGG_IBS_EVENT(DE_IBS_LS_MISALIGNED);
+			break;
+
+		case DE_IBS_LS_BNK_CONF_LOAD:
+			if (IBS_OP_IBS_DC_LD_BNK_CON(trans_op))
+				AGG_IBS_EVENT(DE_IBS_LS_BNK_CONF_LOAD);
+			break;
+
+		case DE_IBS_LS_BNK_CONF_STORE:
+			if (IBS_OP_IBS_DC_ST_BNK_CON(trans_op))
+				AGG_IBS_EVENT(DE_IBS_LS_BNK_CONF_STORE);
+			break;
+
+		case DE_IBS_LS_STL_FORWARDED:
+			if (IBS_OP_IBS_LD_OP(trans_op)
+			    /* Data forwarding info are valid only for load ops */
+			    && IBS_OP_IBS_DC_ST_TO_LD_FWD(trans_op))
+				AGG_IBS_EVENT(DE_IBS_LS_STL_FORWARDED) ;
+			break;
+
+		case DE_IBS_LS_STL_CANCELLED:
+			if (IBS_OP_IBS_LD_OP(trans_op))
+			if (IBS_OP_IBS_DC_ST_TO_LD_CAN(trans_op))
+				AGG_IBS_EVENT(DE_IBS_LS_STL_CANCELLED) ;
+			break;
+
+		case DE_IBS_LS_UC_MEM_ACCESS:
+			if (IBS_OP_IBS_DC_UC_MEM_ACC(trans_op))
+				AGG_IBS_EVENT(DE_IBS_LS_UC_MEM_ACCESS);
+			break;
+
+		case DE_IBS_LS_WC_MEM_ACCESS:
+			if (IBS_OP_IBS_DC_WC_MEM_ACC(trans_op))
+				AGG_IBS_EVENT(DE_IBS_LS_WC_MEM_ACCESS);
+			break;
+
+		case DE_IBS_LS_LOCKED_OP:
+			if (IBS_OP_IBS_LOCKED_OP(trans_op))
+				AGG_IBS_EVENT(DE_IBS_LS_LOCKED_OP);
+			break;
+
+		case DE_IBS_LS_MAB_HIT:
+			if (IBS_OP_IBS_DC_MAB_HIT(trans_op))
+				AGG_IBS_EVENT(DE_IBS_LS_MAB_HIT);
+			break;
+
+		case DE_IBS_LS_L1_DTLB_4K:
+			/* l1_translation */
+			if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+			    && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+
+			    && !IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op)
+			    && !IBS_OP_IBS_DC_L1_TLB_HIT_1GB(trans_op))
+				/* This is the most common case, unfortunately */
+				AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_4K) ;
+			break;
+
+		case DE_IBS_LS_L1_DTLB_2M:
+			/* l1_translation */
+			if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+			    && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+
+			    && IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op))
+				/* 2M L1 DTLB page translation */
+				AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_2M);
+			break;
+
+		case DE_IBS_LS_L1_DTLB_1G:
+			/* l1_translation */
+			if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+			    && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+
+			    && !IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op)
+			    && IBS_OP_IBS_DC_L1_TLB_HIT_1GB(trans_op))
+				/* 1G L1 DTLB page translation */
+				AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_1G);
+			break;
+
+		case DE_IBS_LS_L1_DTLB_RES:
+			break;
+
+		case DE_IBS_LS_L2_DTLB_4K:
+			/* l2_translation_size = 1 */
+			if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+			    && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+			    && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)
+
+			    /* L2 DTLB page translation */
+			    && !IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op)
+			    && !IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op))
+				/* 4K L2 DTLB page translation */
+				AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_4K);
+			break;
+
+		case DE_IBS_LS_L2_DTLB_2M:
+			/* l2_translation_size = 1 */
+			if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+			    && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+			    && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)
+
+			    /* L2 DTLB page translation */
+			    && IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op)
+			    && !IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op))
+				/* 2M L2 DTLB page translation */
+				AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_2M);
+			break;
+
+		case DE_IBS_LS_L2_DTLB_1G:
+			/* l2_translation_size = 1 */
+			if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+			    && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+			    && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)
+
+			    /* L2 DTLB page translation */
+			    && !IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op)
+			    && IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op))
+				/* 2M L2 DTLB page translation */
+				AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_1G);
+			break;
+
+		case DE_IBS_LS_L2_DTLB_RES2:
+			break;
+
+		case DE_IBS_LS_DC_LOAD_LAT:
+			if (IBS_OP_IBS_LD_OP(trans_op)
+			    /* If the load missed in DC, tally the DC load miss latency */
+			    && IBS_OP_IBS_DC_MISS(trans_op))
+				/* DC load miss latency is only reliable for load ops */
+				AGG_IBS_COUNT(DE_IBS_LS_DC_LOAD_LAT,
+					      IBS_OP_DC_MISS_LATENCY(trans_op)) ;
+			break;
+
+		default:
+			break;
+		}
+	}
+}
+
+/*
+ * --------------------- OP NB DERIVED FUNCTION
+ *
+ * NB data is only guaranteed reliable for load operations
+ * that miss in L1 and L2 cache. NB data arrives too late
+ * to be reliable for store operations
+ */
+void trans_ibs_op_nb (struct transient * trans, unsigned int selected_flag, unsigned int size)
+{
+	struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
+	unsigned int i, j, mask = 1;
+
+	/* Preliminary check */
+	if (!IBS_OP_IBS_LD_OP(trans_op))
+		return;
+
+	if (!IBS_OP_IBS_DC_MISS(trans_op))
+		return;
+
+	if (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0)
+		return;
+
+	for (i = IBS_OP_NB_BASE, j =0 ; i <= IBS_OP_NB_END && j < size ; i++, mask = mask << 1) {
+
+		if ((selected_flag & mask) == 0)
+			continue;
+
+		j++;
+
+		switch (i) {
+
+		case DE_IBS_NB_LOCAL:
+			if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
+				/* Request was serviced by local processor */
+				AGG_IBS_EVENT(DE_IBS_NB_LOCAL) ;
+			break;
+
+		case DE_IBS_NB_REMOTE:
+			if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
+				/* Request was serviced by remote processor */
+				AGG_IBS_EVENT(DE_IBS_NB_REMOTE) ;
+			break;
+
+		case DE_IBS_NB_LOCAL_L3:
+			if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
+			    && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x1))
+				AGG_IBS_EVENT(DE_IBS_NB_LOCAL_L3);
+			break;
+
+		case DE_IBS_NB_LOCAL_CACHE:
+			if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
+			    && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2))
+				AGG_IBS_EVENT(DE_IBS_NB_LOCAL_CACHE);
+			break;
+
+		case DE_IBS_NB_REMOTE_CACHE:
+			if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
+			    && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2))
+				AGG_IBS_EVENT(DE_IBS_NB_REMOTE_CACHE) ;
+			break;
+
+		case DE_IBS_NB_LOCAL_DRAM:
+			if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
+			    && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x3))
+				AGG_IBS_EVENT(DE_IBS_NB_LOCAL_DRAM);
+			break;
+
+		case DE_IBS_NB_REMOTE_DRAM:
+			if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
+			    && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x3))
+				AGG_IBS_EVENT(DE_IBS_NB_REMOTE_DRAM) ;
+			break;
+
+		case DE_IBS_NB_LOCAL_OTHER:
+			if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
+			    && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x7))
+				AGG_IBS_EVENT(DE_IBS_NB_LOCAL_OTHER);
+			break;
+
+		case DE_IBS_NB_REMOTE_OTHER:
+			if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
+			    && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x7))
+				AGG_IBS_EVENT(DE_IBS_NB_REMOTE_OTHER) ;
+			break;
+
+		case DE_IBS_NB_CACHE_STATE_M:
+			if ((IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2)
+			    && !IBS_OP_NB_IBS_CACHE_HIT_ST(trans_op))
+				AGG_IBS_EVENT(DE_IBS_NB_CACHE_STATE_M) ;
+			break;
+
+		case DE_IBS_NB_CACHE_STATE_O:
+			if ((IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2)
+			    && IBS_OP_NB_IBS_CACHE_HIT_ST(trans_op))
+				AGG_IBS_EVENT(DE_IBS_NB_CACHE_STATE_O) ;
+			break;
+
+		case DE_IBS_NB_LOCAL_LATENCY:
+			if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
+				/* Request was serviced by local processor */
+				AGG_IBS_COUNT(DE_IBS_NB_LOCAL_LATENCY,
+					      IBS_OP_DC_MISS_LATENCY(trans_op));
+			break;
+
+		case DE_IBS_NB_REMOTE_LATENCY:
+			if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
+				/* Request was serviced by remote processor */
+				AGG_IBS_COUNT(DE_IBS_NB_REMOTE_LATENCY,
+					      IBS_OP_DC_MISS_LATENCY(trans_op));
+			break;
+
+		default:
+			break;
+		}
+	}
+}
diff --git a/daemon/opd_ibs_trans.h b/daemon/opd_ibs_trans.h
new file mode 100644
index 0000000..d01e3d9
--- /dev/null
+++ b/daemon/opd_ibs_trans.h
@@ -0,0 +1,31 @@
+/**
+ * @file daemon/opd_ibs_trans.h
+ * AMD Family10h Instruction Based Sampling (IBS) translation.
+ *
+ * @remark Copyright 2008 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Jason Yeh <jason.yeh@amd.com>
+ * @author Paul Drongowski <paul.drongowski@amd.com>
+ * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+ * Copyright (c) 2008 Advanced Micro Devices, Inc.
+ */
+
+#ifndef OPD_IBS_TRANS_H
+#define OPD_IBS_TRANS_H
+
+struct ibs_fetch_sample;
+struct ibs_op_sample;
+struct transient;
+
+struct ibs_translation_table {
+	unsigned int event;
+	void (*translator)(struct transient *);
+};
+
+
+extern void trans_ibs_fetch (struct transient * trans, unsigned int selected_flag, unsigned int size);
+extern void trans_ibs_op (struct transient * trans, unsigned int selected_flag, unsigned int size);
+extern void trans_ibs_op_ls (struct transient * trans, unsigned int selected_flag, unsigned int size);
+extern void trans_ibs_op_nb (struct transient * trans, unsigned int selected_flag, unsigned int size);
+#endif // OPD_IBS_TRANS_H
diff --git a/daemon/opd_interface.h b/daemon/opd_interface.h
index c876830..ef3b02c 100644
--- a/daemon/opd_interface.h
+++ b/daemon/opd_interface.h
@@ -35,11 +35,14 @@
 #if defined(__powerpc__)
 #define SPU_PROFILING_CODE		11
 #define SPU_CTX_SWITCH_CODE		12
-#define DOMAIN_SWITCH_CODE		13
-#define LAST_CODE			14
 #else
 #define DOMAIN_SWITCH_CODE		11
-#define LAST_CODE			12
+/* Code 12 is now considered an unknown escape code */
 #endif
+
+/* AMD's Instruction-Based Sampling (IBS) escape code */
+#define IBS_FETCH_SAMPLE		13
+#define IBS_OP_SAMPLE			14
+#define LAST_CODE			15
  
 #endif /* OPD_INTERFACE_H */
diff --git a/daemon/opd_mangling.c b/daemon/opd_mangling.c
index 08a6079..b4768a6 100644
--- a/daemon/opd_mangling.c
+++ b/daemon/opd_mangling.c
@@ -138,7 +138,7 @@
 }
 
 
-int opd_open_sample_file(odb_t * file, struct sfile * last,
+int opd_open_sample_file(odb_t *file, struct sfile *last,
                          struct sfile * sf, int counter, int cg)
 {
 	char * mangled;
diff --git a/daemon/opd_mangling.h b/daemon/opd_mangling.h
index 0e46ec4..d1b2a78 100644
--- a/daemon/opd_mangling.h
+++ b/daemon/opd_mangling.h
@@ -27,7 +27,7 @@
  *
  * Returns 0 on success.
  */
-int opd_open_sample_file(odb_t * file, struct sfile * last,
+int opd_open_sample_file(odb_t *file, struct sfile *last,
                          struct sfile * sf, int counter, int cg);
 
 #endif /* OPD_MANGLING_H */
diff --git a/daemon/opd_printf.h b/daemon/opd_printf.h
index e1f8476..09df07f 100644
--- a/daemon/opd_printf.h
+++ b/daemon/opd_printf.h
@@ -22,6 +22,8 @@
 extern int varcs;
 /// kernel module handling
 extern int vmodule;
+/// extended feature
+extern int vext;
 /// all others not fitting in above category, not voluminous.
 extern int vmisc;
 
diff --git a/daemon/opd_sfile.c b/daemon/opd_sfile.c
index 03ebf55..c2dea20 100644
--- a/daemon/opd_sfile.c
+++ b/daemon/opd_sfile.c
@@ -17,6 +17,7 @@
 #include "opd_anon.h"
 #include "opd_printf.h"
 #include "opd_stats.h"
+#include "opd_extended.h"
 #include "oprofiled.h"
 
 #include "op_libiberty.h"
@@ -126,7 +127,7 @@
 }
 
 
-static int
+int
 sfile_equal(struct sfile const * sf, struct sfile const * sf2)
 {
 	return do_match(sf, sf2->cookie, sf2->app_cookie, sf2->kernel,
@@ -183,6 +184,11 @@
 	for (i = 0 ; i < op_nr_counters ; ++i)
 		odb_init(&sf->files[i]);
 
+	if (trans->ext)
+		opd_ext_sfile_create(sf);
+	else
+		sf->ext_files = NULL;
+
 	for (i = 0; i < CG_HASH_SIZE; ++i)
 		list_init(&sf->cg_hash[i]);
 
@@ -269,7 +275,7 @@
 }
 
 
-static void sfile_dup(struct sfile * to, struct sfile * from)
+void sfile_dup(struct sfile * to, struct sfile * from)
 {
 	size_t i;
 
@@ -278,6 +284,8 @@
 	for (i = 0 ; i < op_nr_counters ; ++i)
 		odb_init(&to->files[i]);
 
+	opd_ext_sfile_dup(to, from);
+
 	for (i = 0; i < CG_HASH_SIZE; ++i)
 		list_init(&to->cg_hash[i]);
 
@@ -295,6 +303,9 @@
 	unsigned long hash;
 	odb_t * file;
 
+	if ((trans->ext) != NULL)
+		return opd_ext_sfile_get(trans, is_cg);
+
 	if (trans->event >= op_nr_counters) {
 		fprintf(stderr, "%s: Invalid counter %lu\n", __FUNCTION__,
 			trans->event);
@@ -417,6 +428,13 @@
 
 void sfile_log_sample(struct transient const * trans)
 {
+	sfile_log_sample_count(trans, 1);
+}
+
+
+void sfile_log_sample_count(struct transient const * trans,
+                            unsigned long int count)
+{
 	int err;
 	vma_t pc = trans->pc;
 	odb_t * file;
@@ -437,7 +455,7 @@
 
 	if (trans->current->anon)
 		pc -= trans->current->anon->start;
- 
+
 	if (vsamples)
 		verbose_sample(trans, pc);
 
@@ -446,7 +464,9 @@
 		return;
 	}
 
-	err = odb_update_node(file, (uint64_t)pc);
+	err = odb_update_node_with_offset(file,
+					  (odb_key_t)pc,
+					  count);
 	if (err) {
 		fprintf(stderr, "%s: %s\n", __FUNCTION__, strerror(err));
 		abort();
@@ -462,6 +482,8 @@
 	for (i = 0; i < op_nr_counters; ++i)
 		odb_close(&sf->files[i]);
 
+	opd_ext_sfile_close(sf);
+
 	return 0;
 }
 
@@ -481,6 +503,8 @@
 	for (i = 0; i < op_nr_counters; ++i)
 		odb_sync(&sf->files[i]);
 
+	opd_ext_sfile_sync(sf);
+
 	return 0;
 }
 
diff --git a/daemon/opd_sfile.h b/daemon/opd_sfile.h
index 86d5025..76e5e63 100644
--- a/daemon/opd_sfile.h
+++ b/daemon/opd_sfile.h
@@ -62,6 +62,8 @@
 	int ignored;
 	/** opened sample files */
 	odb_t files[OP_MAX_COUNTERS];
+	/** extended sample files */
+	odb_t * ext_files;
 	/** hash table of opened cg sample files */
 	struct list_head cg_hash[CG_HASH_SIZE];
 };
@@ -107,6 +109,10 @@
 /** Log the sample in a previously located sfile. */
 void sfile_log_sample(struct transient const * trans);
 
+/** Log the event/cycle count in a previously located sfile */
+void sfile_log_sample_count(struct transient const * trans,
+                            unsigned long int count);
+
 /** initialise hashes */
 void sfile_init(void);
 
diff --git a/daemon/opd_stats.c b/daemon/opd_stats.c
index ddb1940..e7af72b 100644
--- a/daemon/opd_stats.c
+++ b/daemon/opd_stats.c
@@ -10,6 +10,7 @@
  */
 
 #include "opd_stats.h"
+#include "opd_extended.h"
 #include "oprofiled.h"
 
 #include "op_get_time.h"
@@ -40,6 +41,7 @@
 	struct dirent * dirent;
 
 	printf("\n%s\n", op_get_time());
+	printf("\n-- OProfile Statistics --\n");
 	printf("Nr. sample dumps: %lu\n", opd_stats[OPD_DUMP_COUNT]);
 	printf("Nr. non-backtrace samples: %lu\n", opd_stats[OPD_SAMPLES]);
 	printf("Nr. kernel samples: %lu\n", opd_stats[OPD_KERNEL]);
@@ -59,6 +61,8 @@
 	print_if("Nr. samples lost due to no mm: %u\n",
 	       "/dev/oprofile/stats", "sample_lost_no_mm", 1);
 
+	opd_ext_print_stats();
+
 	if (!(dir = opendir("/dev/oprofile/stats/")))
 		goto out;
 	while ((dirent = readdir(dir))) {
@@ -68,6 +72,7 @@
 			continue;
 		snprintf(path, 256, "/dev/oprofile/stats/%s", dirent->d_name);
 
+		printf("\n---- Statistics for cpu : %d\n", cpu_nr);
 		print_if("Nr. samples lost cpu buffer overflow: %u\n",
 		     path, "sample_lost_overflow", 1);
 		print_if("Nr. samples lost task exit: %u\n",
diff --git a/daemon/opd_trans.c b/daemon/opd_trans.c
index 871e6e6..76296a0 100644
--- a/daemon/opd_trans.c
+++ b/daemon/opd_trans.c
@@ -194,7 +194,7 @@
 	if (vmisc) {
 		char const * name = verbose_cookie(trans->cookie);
 		verbprintf(vmisc, "COOKIE_SWITCH to cookie %s(%llx)\n",
-		           name, trans->cookie);
+			   name, trans->cookie);
 	}
 }
 
@@ -246,11 +246,11 @@
 	verbprintf(vmisc, "XEN_ENTER_SWITCH to xen\n");
 	trans->in_kernel = 1;
 	trans->current = NULL;
-	/* subtlety: we must keep trans->cookie cached, even though it's 
-	 * meaningless for Xen - we won't necessarily get a cookie switch 
-	 * on Xen exit. See comments in opd_sfile.c. It seems that we can 
-	 * get away with in_kernel = 1 as long as we supply the correct 
-	 * Xen image, and its address range in startup find_kernel_image 
+	/* subtlety: we must keep trans->cookie cached, even though it's
+	 * meaningless for Xen - we won't necessarily get a cookie switch
+	 * on Xen exit. See comments in opd_sfile.c. It seems that we can
+	 * get away with in_kernel = 1 as long as we supply the correct
+	 * Xen image, and its address range in startup find_kernel_image
 	 * is modified to look in the Xen image also
 	 */
 }
@@ -258,24 +258,31 @@
 extern void code_spu_profiling(struct transient * trans);
 extern void code_spu_ctx_switch(struct transient * trans);
 
+extern void code_ibs_fetch_sample(struct transient * trans);
+extern void code_ibs_op_sample(struct transient * trans);
+
 handler_t handlers[LAST_CODE + 1] = {
 	&code_unknown,
 	&code_ctx_switch,
 	&code_cpu_switch,
 	&code_cookie_switch,
 	&code_kernel_enter,
- 	&code_user_enter,
+	&code_user_enter,
 	&code_module_loaded,
 	/* tgid handled differently */
 	&code_unknown,
 	&code_trace_begin,
 	&code_unknown,
- 	&code_xen_enter,
+	&code_xen_enter,
 #if defined(__powerpc__)
 	&code_spu_profiling,
 	&code_spu_ctx_switch,
-#endif
+#else
 	&code_unknown,
+	&code_unknown,
+#endif
+	&code_ibs_fetch_sample,
+	&code_ibs_op_sample,
 };
 
 extern void (*special_processor)(struct transient *);
@@ -299,7 +306,8 @@
 		.cpu = -1,
 		.tid = -1,
 		.embedded_offset = UNUSED_EMBEDDED_OFFSET,
-		.tgid = -1
+		.tgid = -1,
+		.ext = NULL
 	};
 
 	/* FIXME: was uint64_t but it can't compile on alpha where uint64_t
@@ -313,17 +321,9 @@
 		return;
 	}
 
-    int i;
-
-    for (i = 0; i < count && i < 200; i++) {
-        verbprintf(vmisc, "buffer[%d] is %x\n", i, buffer[i]);
-    }
-
 	while (trans.remaining) {
 		code = pop_buffer_value(&trans);
 
-        verbprintf(vmisc, "In opd_process_samples (code is %lld)\n", code);
-
 		if (!is_escape_code(code)) {
 			opd_put_sample(&trans, code);
 			continue;
@@ -338,7 +338,6 @@
 		// started with ESCAPE_CODE, next is type
 		code = pop_buffer_value(&trans);
 	
-        verbprintf(vmisc, "next code is %lld\n", code);
 		if (code >= LAST_CODE) {
 			fprintf(stderr, "Unknown code %llu\n", code);
 			abort();
diff --git a/daemon/opd_trans.h b/daemon/opd_trans.h
index ab4e816..c0a868b 100644
--- a/daemon/opd_trans.h
+++ b/daemon/opd_trans.h
@@ -54,6 +54,7 @@
 	pid_t tid;
 	pid_t tgid;
 	uint64_t embedded_offset;
+	void * ext;
 };
 
 typedef void (*handler_t)(struct transient *);
diff --git a/daemon/oprofiled.c b/daemon/oprofiled.c
index ec2ea1b..173d972 100644
--- a/daemon/oprofiled.c
+++ b/daemon/oprofiled.c
@@ -17,6 +17,7 @@
 #include "oprofiled.h"
 #include "opd_printf.h"
 #include "opd_events.h"
+#include "opd_extended.h"
 
 #include "op_config.h"
 #include "op_version.h"
@@ -52,11 +53,13 @@
 
 uint op_nr_counters;
 op_cpu cpu_type;
+int no_event_ok;
 int vsfile;
 int vsamples;
 int varcs;
 int vmodule;
 int vmisc;
+int vext;
 int separate_lib;
 int separate_kernel;
 int separate_thread;
@@ -71,6 +74,7 @@
 static char * verbose;
 static char * binary_name_filter;
 static char * events;
+static char * ext_feature;
 static int showvers;
 static struct oprofiled_ops * opd_ops;
 extern struct oprofiled_ops opd_24_ops;
@@ -94,6 +98,7 @@
 	{ "events", 'e', POPT_ARG_STRING, &events, 0, "events list", "[events]" },
 	{ "version", 'v', POPT_ARG_NONE, &showvers, 0, "show version", NULL, },
 	{ "verbose", 'V', POPT_ARG_STRING, &verbose, 0, "be verbose in log file", "all,sfile,arcs,samples,module,misc", },
+	{ "ext-feature", 'x', POPT_ARG_STRING, &ext_feature, 1, "enable extended feature", "<extended-feature-name>:[args]", },
 	POPT_AUTOHELP
 	{ NULL, 0, 0, NULL, 0, NULL, NULL, },
 };
@@ -353,6 +358,7 @@
 		varcs = 1;
 		vmodule = 1;
 		vmisc = 1;
+		vext= 1;
 	} else if (!strcmp(name, "sfile")) {
 		vsfile = 1;
 	} else if (!strcmp(name, "arcs")) {
@@ -363,6 +369,8 @@
 		vmodule = 1;
 	} else if (!strcmp(name, "misc")) {
 		vmisc = 1;
+	} else if (!strcmp(name, "ext")) {
+		vext= 1;
 	} else {
 		fprintf(stderr, "unknown verbose options\n");
 		exit(EXIT_FAILURE);
@@ -426,7 +434,10 @@
 		}
 	}
 
-	if (events == NULL) {
+	if(opd_ext_initialize(ext_feature) != EXIT_SUCCESS)
+		exit(EXIT_FAILURE);
+
+	if (events == NULL && no_event_ok == 0) {
 		fprintf(stderr, "oprofiled: no events specified.\n");
 		poptPrintHelp(optcon, stderr, 0);
 		exit(EXIT_FAILURE);
@@ -451,7 +462,8 @@
 		}
 	}
 
-	opd_parse_events(events);
+	if (events != NULL)
+		opd_parse_events(events);
 
 	opd_parse_image_filter();
 
diff --git a/libdb/db_insert.c b/libdb/db_insert.c
index 018c294..6bbd71f 100644
--- a/libdb/db_insert.c
+++ b/libdb/db_insert.c
@@ -51,6 +51,13 @@
 
 int odb_update_node(odb_t * odb, odb_key_t key)
 {
+	return odb_update_node_with_offset(odb, key, 1);
+}
+
+int odb_update_node_with_offset(odb_t * odb, 
+				odb_key_t key, 
+				unsigned long int offset)
+{
 	odb_index_t index;
 	odb_node_t * node;
 	odb_data_t * data;
@@ -60,8 +67,8 @@
 	while (index) {
 		node = &data->node_base[index];
 		if (node->key == key) {
-			if (node->value + 1 != 0) {
-				node->value += 1;
+			if (node->value + offset != 0) {
+				node->value += offset;
 			} else {
 				/* post profile tools must handle overflow */
 				/* FIXME: the tricky way will be just to add
@@ -92,7 +99,7 @@
 		index = node->next;
 	}
 
-	return add_node(data, key, 1);
+	return add_node(data, key, offset);
 }
 
 
diff --git a/libdb/db_manage.c b/libdb/db_manage.c
index d8a6fcb..17a0be5 100644
--- a/libdb/db_manage.c
+++ b/libdb/db_manage.c
@@ -11,10 +11,10 @@
 #define _GNU_SOURCE
 
 #include <stdlib.h>
-#ifndef ANDROID
-#include <sys/fcntl.h>
-#else
+#ifdef ANDROID
 #include <fcntl.h>
+#else
+#include <sys/fcntl.h>
 #endif
 #include <sys/mman.h>
 #include <sys/types.h>
diff --git a/libdb/odb.h b/libdb/odb.h
index c190b57..9ad1da2 100644
--- a/libdb/odb.h
+++ b/libdb/odb.h
@@ -180,6 +180,22 @@
  */
 int odb_update_node(odb_t * odb, odb_key_t key);
 
+/**
+ * odb_update_node_with_offset
+ * @param odb the data base object to setup
+ * @param key the hash key
+ * @param offset the offset to be added
+ *
+ * update info at key by adding the specified offset to its associated value,
+ * if the key does not exist a new node is created and the value associated
+ * is set to offset.
+ *
+ * returns EXIT_SUCCESS on success, EXIT_FAILURE on failure
+ */
+int odb_update_node_with_offset(odb_t * odb, 
+				odb_key_t key, 
+				unsigned long int offset);
+
 /** Add a new node w/o regarding if a node with the same key already exists
  *
  * returns EXIT_SUCCESS on success, EXIT_FAILURE on failure
diff --git a/libop/Android.mk b/libop/Android.mk
index 8fbd1e6..e935a45 100644
--- a/libop/Android.mk
+++ b/libop/Android.mk
@@ -8,7 +8,9 @@
 	op_events.c \
 	op_get_interface.c \
 	op_mangle.c \
-	op_parse_event.c
+	op_parse_event.c \
+	op_xml_events.c \
+	op_xml_out.c
 
 LOCAL_C_INCLUDES := \
 	$(LOCAL_PATH)/.. \
diff --git a/libop/op_alloc_counter.c b/libop/op_alloc_counter.c
index 353100a..bb2bd6e 100644
--- a/libop/op_alloc_counter.c
+++ b/libop/op_alloc_counter.c
@@ -113,6 +113,9 @@
  * a bitmask of already allocated counter. Walking through node is done in
  * preorder left to right.
  *
+ * In case of extended events (required no phisical counters), the associated
+ * counter_map entry will be -1.
+ *
  * Possible improvment if neccessary: partition counters in class of counter,
  * two counter belong to the same class if they allow exactly the same set of
  * event. Now using a variant of the backtrack algo can works on class of
@@ -128,18 +131,27 @@
 	if (depth == max_depth)
 		return 1;
 
-	list_for_each(pos, &ctr_arc[depth].next) {
-		counter_arc const * arc = list_entry(pos, counter_arc, next);
-
-		if (allocated_mask & (1 << arc->counter))
-			continue;
-
-		counter_map[depth] = arc->counter;
-
+	/* If ctr_arc is not available, counter_map is -1 */
+	if((&ctr_arc[depth].next)->next == &ctr_arc[depth].next) {
+		counter_map[depth] = -1;
 		if (allocate_counter(ctr_arc, max_depth, depth + 1,
-		                     allocated_mask | (1 << arc->counter),
+		                     allocated_mask,
 		                     counter_map))
 			return 1;
+	} else {
+		list_for_each(pos, &ctr_arc[depth].next) {
+			counter_arc const * arc = list_entry(pos, counter_arc, next);
+
+			if (allocated_mask & (1 << arc->counter))
+				continue;
+
+			counter_map[depth] = arc->counter;
+
+			if (allocate_counter(ctr_arc, max_depth, depth + 1,
+					     allocated_mask | (1 << arc->counter),
+					     counter_map))
+				return 1;
+		}
 	}
 
 	return 0;
@@ -167,7 +179,8 @@
 	/* assume nothing is available */
 	u32 available=0;
 
-	count = scandir("/dev/oprofile", &counterlist, perfcounterdir, alphasort);
+	count = scandir("/dev/oprofile", &counterlist, perfcounterdir,
+			alphasort);
 	if (count < 0)
 		/* unable to determine bit mask */
 		return -1;
@@ -186,21 +199,36 @@
 {
 	counter_arc_head * ctr_arc;
 	size_t * counter_map;
-	int nr_counters;
+	int i, nr_counters, nr_pmc_events;
+	op_cpu curr_cpu_type;
 	u32 unavailable_counters = 0;
 
-	nr_counters = op_get_counter_mask(&unavailable_counters);
+	/* Either ophelp or one of the libop tests may invoke this
+	 * function with a non-native cpu_type.  If so, we should not
+	 * call op_get_counter_mask because that will look for real counter
+	 * information in oprofilefs.
+	 */
+	curr_cpu_type = op_get_cpu_type();
+	if (cpu_type != curr_cpu_type)
+		nr_counters = op_get_nr_counters(cpu_type);
+	else
+		nr_counters = op_get_counter_mask(&unavailable_counters);
+
 	/* no counters then probably perfmon managing perfmon hw */
 	if (nr_counters <= 0) {
 		nr_counters = op_get_nr_counters(cpu_type);
 		unavailable_counters = (~0) << nr_counters;
 	}
-	if (nr_counters < nr_events)
-		return 0;
+
+	/* Check to see if we have enough physical counters to map events*/
+	for (i = 0, nr_pmc_events = 0; i < nr_events; i++)
+		if(pev[i]->ext == NULL)
+			if (++nr_pmc_events > nr_counters)
+				return 0;
 
 	ctr_arc = build_counter_arc(pev, nr_events);
 
-	counter_map = xmalloc(nr_counters * sizeof(size_t));
+	counter_map = xmalloc(nr_events * sizeof(size_t));
 
 	if (!allocate_counter(ctr_arc, nr_events, 0, unavailable_counters,
 			      counter_map)) {
diff --git a/libop/op_config.h b/libop/op_config.h
index b384497..12e4b96 100644
--- a/libop/op_config.h
+++ b/libop/op_config.h
@@ -25,6 +25,10 @@
  */
 void init_op_config_dirs(char const * session_dir);
 
+#ifndef ANDROID
+#define OP_SESSION_DIR_DEFAULT "/var/lib/oprofile/"
+#endif
+
 /* 
  * various paths, corresponding to opcontrol, that should be
  * initialized by init_op_config_dirs() above. 
@@ -37,8 +41,10 @@
 extern char op_pipe_file[];
 extern char op_dump_status[];
 
+#if ANDROID
 #define OP_DRIVER_BASE  "/dev/oprofile"
 #define OP_DATA_DIR     "/data/oprofile"
+#endif
 
 /* Global directory that stores debug files */
 #ifndef DEBUGDIR
diff --git a/libop/op_cpu_type.c b/libop/op_cpu_type.c
index b9d13de..e168b43 100644
--- a/libop/op_cpu_type.c
+++ b/libop/op_cpu_type.c
@@ -14,6 +14,7 @@
 #include <string.h>
 
 #include "op_cpu_type.h"
+#include "op_hw_specific.h"
 
 struct cpu_descr {
 	char const * pretty;
@@ -74,6 +75,13 @@
 	{ "ppc64 POWER5++", "ppc64/power5++", CPU_PPC64_POWER5pp, 6 },
 	{ "e300", "ppc/e300", CPU_PPC_E300, 4 },
 	{ "AVR32", "avr32", CPU_AVR32, 3 },
+	{ "ARM V7 PMNC", "arm/armv7", CPU_ARM_V7, 5 },
+ 	{ "Intel Architectural Perfmon", "i386/arch_perfmon", CPU_ARCH_PERFMON, 0},
+	{ "AMD64 family11h", "x86-64/family11h", CPU_FAMILY11H, 4 },
+	{ "ppc64 POWER7", "ppc64/power7", CPU_PPC64_POWER7, 6 },
+	{ "ppc64 compat version 1", "ppc64/ibm-compat-v1", CPU_PPC64_IBM_COMPAT_V1, 4 },
+   	{ "Intel Core/i7", "i386/core_i7", CPU_CORE_I7, 4 },
+   	{ "Intel Atom", "i386/atom", CPU_ATOM, 2 },
 };
  
 static size_t const nr_cpu_descrs = sizeof(cpu_descrs) / sizeof(struct cpu_descr);
@@ -151,8 +159,14 @@
 
 int op_get_nr_counters(op_cpu cpu_type)
 {
+	int cnt;
+
 	if (cpu_type <= CPU_NO_GOOD || cpu_type >= MAX_CPU_TYPE)
 		return 0;
 
+	cnt = arch_num_counters(cpu_type);
+	if (cnt >= 0)
+		return cnt;
+
 	return cpu_descrs[cpu_type].nr_counters;
 }
diff --git a/libop/op_cpu_type.h b/libop/op_cpu_type.h
index be95ae2..133a4f8 100644
--- a/libop/op_cpu_type.h
+++ b/libop/op_cpu_type.h
@@ -72,6 +72,13 @@
 	CPU_PPC64_POWER5pp,  /**< ppc64 Power5++ family */
 	CPU_PPC_E300, /**< e300 */
 	CPU_AVR32, /**< AVR32 */
+	CPU_ARM_V7, /**< ARM V7 */
+ 	CPU_ARCH_PERFMON, /**< Intel architectural perfmon */
+	CPU_FAMILY11H, /**< AMD family 11h */
+	CPU_PPC64_POWER7, /**< ppc64 POWER7 family */
+	CPU_PPC64_IBM_COMPAT_V1, /**< IBM PPC64 processor compat mode version 1 */
+   	CPU_CORE_I7, /* Intel Core i7, Nehalem */
+   	CPU_ATOM, /* First generation Intel Atom */
 	MAX_CPU_TYPE
 } op_cpu;
 
diff --git a/libop/op_events.c b/libop/op_events.c
index b4a10e7..ad95d86 100644
--- a/libop/op_events.c
+++ b/libop/op_events.c
@@ -16,6 +16,7 @@
 #include "op_fileio.h"
 #include "op_string.h"
 #include "op_cpufreq.h"
+#include "op_hw_specific.h"
 
 #include <string.h>
 #include <stdlib.h>
@@ -27,6 +28,24 @@
 static char const * filename;
 static unsigned int line_nr;
 
+static void delete_event(struct op_event * event);
+static void read_events(char const * file);
+static void read_unit_masks(char const * file);
+static void free_unit_mask(struct op_unit_mask * um);
+
+static char *build_fn(const char *cpu_name, const char *fn)
+{
+	char *s;
+	static const char *dir;
+	if (dir == NULL)
+		dir = getenv("OPROFILE_EVENTS_DIR");
+	if (dir == NULL)
+		dir = OP_DATADIR;
+	s = xmalloc(strlen(dir) + strlen(cpu_name) + strlen(fn) + 5);
+	sprintf(s, "%s/%s/%s", dir, cpu_name, fn);
+	return s;
+}
+
 static void parse_error(char const * context)
 {
 	fprintf(stderr, "oprofile: parse error in %s, line %u\n",
@@ -69,6 +88,23 @@
 	return value;
 }
 
+static void include_um(const char *start, const char *end)
+{
+	char *s;
+	char cpu[end - start + 1];
+	int old_line_nr;
+	const char *old_filename;
+
+	strncpy(cpu, start, end - start);
+	cpu[end - start] = 0;
+	s = build_fn(cpu, "unit_masks");
+	old_line_nr = line_nr;
+	old_filename = filename;
+	read_unit_masks(s);
+	line_nr = old_line_nr;
+	filename = old_filename;
+	free(s);
+}
 
 /* name:MESI type:bitmask default:0x0f */
 static void parse_um(struct op_unit_mask * um, char const * line)
@@ -94,6 +130,14 @@
 
 		++tagend;
 
+		if (strisprefix(start, "include")) {
+			if (seen_name + seen_type + seen_default > 0)
+				parse_error("include must be on its own");
+			free_unit_mask(um);
+			include_um(tagend, valueend);
+			return;
+		}
+
 		if (strisprefix(start, "name")) {
 			if (seen_name)
 				parse_error("duplicate name: tag");
@@ -125,6 +169,11 @@
 		tagend = valueend;
 		start = valueend;
 	}
+
+	if (!um->name)
+		parse_error("Missing name for unit mask");
+	if (!seen_type)
+		parse_error("Missing type for unit mask");
 }
 
 
@@ -158,6 +207,11 @@
 	return um;
 }
 
+static void free_unit_mask(struct op_unit_mask * um)
+{
+	list_del(&um->um_next);
+	free(um);
+}
 
 /*
  * name:zero type:mandatory default:0x0
@@ -227,21 +281,68 @@
 	return mask;
 }
 
-
-static struct op_unit_mask * find_um(char const * value)
+static struct op_unit_mask * try_find_um(char const * value)
 {
 	struct list_head * pos;
 
 	list_for_each(pos, &um_list) {
 		struct op_unit_mask * um = list_entry(pos, struct op_unit_mask, um_next);
-		if (strcmp(value, um->name) == 0)
+		if (strcmp(value, um->name) == 0) {
+			um->used = 1;
 			return um;
+		}
 	}
+	return NULL;
+}
 
+static struct op_unit_mask * find_um(char const * value)
+{
+	struct op_unit_mask * um = try_find_um(value);
+	if (um)
+		return um;
 	fprintf(stderr, "oprofile: could not find unit mask %s\n", value);
 	exit(EXIT_FAILURE);
 }
 
+/* um:a,b,c,d merge multiple unit masks */
+static struct op_unit_mask * merge_um(char * value)
+{
+	int num;
+	char *s;
+	struct op_unit_mask *new, *um;
+	enum unit_mask_type type = -1U;
+
+	um = try_find_um(value);
+	if (um)
+		return um;
+
+	new = new_unit_mask();
+	new->name = xstrdup(value);
+	new->used = 1;
+	num = 0;
+	while ((s = strsep(&value, ",")) != NULL) {
+		unsigned c;
+		um = find_um(s);
+		if (type == -1U)
+			type = um->unit_type_mask;
+		if (um->unit_type_mask != type)
+			parse_error("combined unit mask must be all the same types");
+		if (type != utm_bitmask && type != utm_exclusive)
+			parse_error("combined unit mask must be all bitmasks or exclusive");
+		new->default_mask |= um->default_mask;
+		new->num += um->num;
+		if (new->num > MAX_UNIT_MASK)
+			parse_error("too many members in combined unit mask");
+		for (c = 0; c < um->num; c++, num++) {
+			new->um[num] = um->um[c];
+			new->um[num].desc = xstrdup(new->um[num].desc);
+		}
+	}
+	if (type == -1U)
+		parse_error("Empty unit mask");
+	new->unit_type_mask = type;
+	return new;		
+}
 
 /* parse either a "tag:value" or a ": trailing description string" */
 static int next_token(char const ** cp, char ** name, char ** value)
@@ -287,6 +388,20 @@
 	return 1;
 }
 
+static void include_events (char *value)
+{
+	char * event_file;
+	const char *old_filename;
+	int old_line_nr;
+
+	event_file = build_fn(value, "events");
+	old_line_nr = line_nr;
+	old_filename = filename;
+	read_events(event_file);
+	line_nr = old_line_nr;
+	filename = old_filename;
+	free(event_file);
+}
 
 static struct op_event * new_event(void)
 {
@@ -297,8 +412,14 @@
 	return event;
 }
 
+static void free_event(struct op_event * event)
+{
+	list_del(&event->event_next);
+	free(event);
+}
 
 /* event:0x00 counters:0 um:zero minimum:4096 name:ISSUES : Total issues */
+/* event:0x00 ext:xxxxxx um:zero minimum:4096 name:ISSUES : Total issues */
 static void read_events(char const * file)
 {
 	struct op_event * event = NULL;
@@ -306,8 +427,9 @@
 	char * name;
 	char * value;
 	char const * c;
-	int seen_event, seen_counters, seen_um, seen_minimum, seen_name;
+	int seen_event, seen_counters, seen_um, seen_minimum, seen_name, seen_ext;
 	FILE * fp = fopen(file, "r");
+	int tags;
 
 	if (!fp) {
 		fprintf(stderr, "oprofile: could not open event description file %s\n", file);
@@ -323,13 +445,17 @@
 		if (empty_line(line) || comment_line(line))
 			goto next;
 
+		tags = 0;
 		seen_name = 0;
 		seen_event = 0;
 		seen_counters = 0;
+		seen_ext = 0;
 		seen_um = 0;
 		seen_minimum = 0;
 		event = new_event();
-
+		event->filter = -1;
+		event->ext = NULL;
+		
 		c = line;
 		while (next_token(&c, &name, &value)) {
 			if (strcmp(name, "name") == 0) {
@@ -351,14 +477,24 @@
 				if (seen_counters)
 					parse_error("duplicate counters: tag");
 				seen_counters = 1;
-				event->counter_mask = parse_counter_mask(value);
+				if (!strcmp(value, "cpuid"))
+					event->counter_mask = arch_get_counter_mask();
+				else
+					event->counter_mask = parse_counter_mask(value);
 				free(value);
+			} else if (strcmp(name, "ext") == 0) {
+				if (seen_ext)
+					parse_error("duplicate ext: tag");
+				seen_ext = 1;
+				event->ext = value;
 			} else if (strcmp(name, "um") == 0) {
 				if (seen_um)
 					parse_error("duplicate um: tag");
 				seen_um = 1;
-				event->unit = find_um(value);
-				event->unit->used = 1;
+				if (strchr(value, ','))
+					event->unit = merge_um(value);
+				else
+					event->unit = find_um(value);
 				free(value);
 			} else if (strcmp(name, "minimum") == 0) {
 				if (seen_minimum)
@@ -368,9 +504,22 @@
 				free(value);
 			} else if (strcmp(name, "desc") == 0) {
 				event->desc = value;
+			} else if (strcmp(name, "filter") == 0) {
+				event->filter = parse_int(value);
+				free(value);
+			} else if (strcmp(name, "include") == 0) {
+				if (tags > 0)
+					parse_error("tags before include:");
+				free_event(event);
+				include_events(value);
+				free(value);
+				c = skip_ws(c);
+				if (*c != '\0' && *c != '#')
+					parse_error("non whitespace after include:");
 			} else {
 				parse_error("unknown tag");
 			}
+			tags++;
 
 			free(name);
 		}
@@ -385,20 +534,21 @@
 
 
 /* usefull for make check */
-static void check_unit_mask(struct op_unit_mask const * um,
+static int check_unit_mask(struct op_unit_mask const * um,
 	char const * cpu_name)
 {
 	u32 i;
+	int err = 0;
 
 	if (!um->used) {
 		fprintf(stderr, "um %s is not used\n", um->name);
-		exit(EXIT_FAILURE);
+		err = EXIT_FAILURE;
 	}
 
 	if (um->unit_type_mask == utm_mandatory && um->num != 1) {
 		fprintf(stderr, "mandatory um %s doesn't contain exactly one "
 			"entry (%s)\n", um->name, cpu_name);
-		exit(EXIT_FAILURE);
+		err = EXIT_FAILURE;
 	} else if (um->unit_type_mask == utm_bitmask) {
 		u32 default_mask = um->default_mask;
 		for (i = 0; i < um->num; ++i)
@@ -407,7 +557,7 @@
 		if (default_mask) {
 			fprintf(stderr, "um %s default mask is not valid "
 				"(%s)\n", um->name, cpu_name);
-			exit(EXIT_FAILURE);
+			err = EXIT_FAILURE;
 		}
 	} else {
 		for (i = 0; i < um->num; ++i) {
@@ -418,63 +568,66 @@
 		if (i == um->num) {
 			fprintf(stderr, "exclusive um %s default value is not "
 				"valid (%s)\n", um->name, cpu_name);
-			exit(EXIT_FAILURE);
+			err = EXIT_FAILURE;
 		}
 	}
+	return err;
+}
+
+static void arch_filter_events(op_cpu cpu_type)
+{
+	struct list_head * pos, * pos2;
+	unsigned filter = arch_get_filter(cpu_type);
+	if (!filter)
+		return;
+	list_for_each_safe (pos, pos2, &events_list) {
+		struct op_event * event = list_entry(pos, struct op_event, event_next);
+		if (event->filter >= 0 && ((1U << event->filter) & filter))
+			delete_event(event);
+	}
 }
 
+static void load_events_name(const char *cpu_name)
+{
+	char * event_file;
+	char * um_file;
+
+	event_file = build_fn(cpu_name, "events");
+	um_file = build_fn(cpu_name, "unit_masks");
+
+	read_unit_masks(um_file);
+	read_events(event_file);
+	
+	free(um_file);
+	free(event_file);
+}
 
 static void load_events(op_cpu cpu_type)
 {
-	char const * cpu_name = op_get_cpu_name(cpu_type);
-	char * event_dir;
-	char * event_file;
-	char * um_file;
-	char * dir;
+	const char * cpu_name = op_get_cpu_name(cpu_type);
 	struct list_head * pos;
+	int err = 0;
 
 	if (!list_empty(&events_list))
 		return;
 
-	dir = getenv("OPROFILE_EVENTS_DIR");
-	if (dir == NULL)
-		dir = OP_DATADIR;
+	load_events_name(cpu_name);
 
-	event_dir = xmalloc(strlen(dir) + strlen("/") + strlen(cpu_name) +
-                            strlen("/") + 1);
-	strcpy(event_dir, dir);
-	strcat(event_dir, "/"); 
-
-	strcat(event_dir, cpu_name);
-	strcat(event_dir, "/");
-
-	event_file = xmalloc(strlen(event_dir) + strlen("events") + 1);
-	strcpy(event_file, event_dir);
-	strcat(event_file, "events");
-
-	um_file = xmalloc(strlen(event_dir) + strlen("unit_masks") + 1);
-	strcpy(um_file, event_dir);
-	strcat(um_file, "unit_masks");
-
-	read_unit_masks(um_file);
-	read_events(event_file);
+	arch_filter_events(cpu_type);
 
 	/* sanity check: all unit mask must be used */
 	list_for_each(pos, &um_list) {
 		struct op_unit_mask * um = list_entry(pos, struct op_unit_mask, um_next);
-
-		check_unit_mask(um, cpu_name);
+		err |= check_unit_mask(um, cpu_name);
 	}
-	
-	free(um_file);
-	free(event_file);
-	free(event_dir);
+	if (err)
+		exit(err);
 }
 
-
 struct list_head * op_events(op_cpu cpu_type)
 {
 	load_events(cpu_type);
+	arch_filter_events(cpu_type);
 	return &events_list;
 }
 
@@ -521,8 +674,8 @@
 	}
 }
 
-
-static struct op_event * find_event(u32 nr)
+/* There can be actually multiple events here, so this is not quite correct */
+static struct op_event * find_event_any(u32 nr)
 {
 	struct list_head * pos;
 
@@ -535,8 +688,25 @@
 	return NULL;
 }
 
+static struct op_event * find_event_um(u32 nr, u32 um)
+{
+	struct list_head * pos;
+	unsigned int i;
 
-static FILE * open_event_mapping_file(char const * cpu_name) 
+	list_for_each(pos, &events_list) {
+		struct op_event * event = list_entry(pos, struct op_event, event_next);
+		if (event->val == nr) {
+			for (i = 0; i < event->unit->num; i++) {
+				if (event->unit->um[i].value == um)
+					return event;
+			}
+		}
+	}
+
+	return NULL;
+}
+
+static FILE * open_event_mapping_file(char const * cpu_name)
 {
 	char * ev_map_file;
 	char * dir;
@@ -560,7 +730,7 @@
 /**
  *  This function is PPC64-specific.
  */
-static char const * get_mapping(u32 nr, FILE * fp) 
+static char const * get_mapping(u32 nr, FILE * fp)
 {
 	char * line;
 	char * name;
@@ -655,6 +825,8 @@
 		case CPU_PPC64_POWER5p:
 		case CPU_PPC64_POWER5pp:
 		case CPU_PPC64_POWER6:
+		case CPU_PPC64_POWER7:
+		case CPU_PPC64_IBM_COMPAT_V1:
 			if (!fp) {
 				fprintf(stderr, "oprofile: could not open event mapping file %s\n", filename);
 				exit(EXIT_FAILURE);
@@ -672,67 +844,102 @@
 	return map;
 }
 
+static int match_event(int i, struct op_event *event, unsigned um)
+{
+	unsigned v = event->unit->um[i].value;
 
-struct op_event * find_event_by_name(char const * name)
+	switch (event->unit->unit_type_mask) {
+	case utm_exclusive:
+	case utm_mandatory:
+		return v == um;
+
+	case utm_bitmask:
+		return (v & um) || (!v && v == 0);
+	}
+
+	abort();
+}
+
+struct op_event * find_event_by_name(char const * name, unsigned um, int um_valid)
 {
 	struct list_head * pos;
 
 	list_for_each(pos, &events_list) {
 		struct op_event * event = list_entry(pos, struct op_event, event_next);
-		if (strcmp(event->name, name) == 0)
+		if (strcmp(event->name, name) == 0) {
+			if (um_valid) {
+				unsigned i;
+
+				for (i = 0; i < event->unit->num; i++)
+					if (match_event(i, event, um))
+						return event;
+				continue;
+			}
 			return event;
+		}
 	}
 
 	return NULL;
 }
 
 
-struct op_event * op_find_event(op_cpu cpu_type, u32 nr)
+struct op_event * op_find_event(op_cpu cpu_type, u32 nr, u32 um)
 {
 	struct op_event * event;
 
 	load_events(cpu_type);
 
-	event = find_event(nr);
+	event = find_event_um(nr, um);
 
 	return event;
 }
 
+struct op_event * op_find_event_any(op_cpu cpu_type, u32 nr)
+{
+	load_events(cpu_type);
+
+	return find_event_any(nr);
+}
 
 int op_check_events(int ctr, u32 nr, u32 um, op_cpu cpu_type)
 {
-	int ret = OP_OK_EVENT;
-	struct op_event * event;
+	int ret = OP_INVALID_EVENT;
 	size_t i;
 	u32 ctr_mask = 1 << ctr;
+	struct list_head * pos;
 
 	load_events(cpu_type);
 
-	event = find_event(nr);
+	list_for_each(pos, &events_list) {
+		struct op_event * event = list_entry(pos, struct op_event, event_next);
+		if (event->val != nr)
+			continue;
 
-	if (!event) {
-		ret |= OP_INVALID_EVENT;
-		return ret;
-	}
+		ret = OP_OK_EVENT;
 
-	if ((event->counter_mask & ctr_mask) == 0)
-		ret |= OP_INVALID_COUNTER;
+		if ((event->counter_mask & ctr_mask) == 0)
+			ret |= OP_INVALID_COUNTER;
 
-	if (event->unit->unit_type_mask == utm_bitmask) {
-		for (i = 0; i < event->unit->num; ++i)
-			um &= ~(event->unit->um[i].value);			
-		
-		if (um)
-			ret |= OP_INVALID_UM;
+		if (event->unit->unit_type_mask == utm_bitmask) {
+			for (i = 0; i < event->unit->num; ++i)
+				um &= ~(event->unit->um[i].value);			
+			
+			if (um)
+				ret |= OP_INVALID_UM;
+			
+		} else {
+			for (i = 0; i < event->unit->num; ++i) {
+				if (event->unit->um[i].value == um)
+					break;
+			}
+			
+			if (i == event->unit->num)
+				ret |= OP_INVALID_UM;
 
-	} else {
-		for (i = 0; i < event->unit->num; ++i) {
-			if (event->unit->um[i].value == um)
-				break;
 		}
 
-		if (i == event->unit->num)
-			ret |= OP_INVALID_UM;
+		if (ret == OP_OK_EVENT)
+			return ret;
 	}
 
 	return ret;
@@ -759,6 +966,10 @@
 		case CPU_ATHLON:
 		case CPU_HAMMER:
 		case CPU_FAMILY10:
+		case CPU_ARCH_PERFMON:
+		case CPU_FAMILY11H:
+ 		case CPU_ATOM:
+ 		case CPU_CORE_I7:
 			descr->name = "CPU_CLK_UNHALTED";
 			break;
 
@@ -793,6 +1004,7 @@
 		case CPU_ARM_XSCALE2:
 		case CPU_ARM_MPCORE:
 		case CPU_ARM_V6:
+		case CPU_ARM_V7:
 		case CPU_AVR32:
 			descr->name = "CPU_CYCLES";
 			break;
@@ -807,6 +1019,8 @@
 		case CPU_PPC64_POWER5p:
 		case CPU_PPC64_POWER5pp:
 		case CPU_PPC64_CELL:
+		case CPU_PPC64_POWER7:
+		case CPU_PPC64_IBM_COMPAT_V1:
 			descr->name = "CYCLES";
 			break;
 
diff --git a/libop/op_events.h b/libop/op_events.h
index f6462fc..9ffdc49 100644
--- a/libop/op_events.h
+++ b/libop/op_events.h
@@ -56,6 +56,8 @@
 	char * name;		/**< the event name */
 	char * desc;      	/**< the event description */
 	int min_count;		/**< minimum counter value allowed */
+	int filter;		/**< architecture specific filter or -1 */
+	char * ext;		/**< extended events */
 	struct list_head event_next;   /**< next event in list */
 };
 
@@ -63,10 +65,12 @@
 struct list_head * op_events(op_cpu cpu_type);
 
 /** Find a given event, returns NULL on error */
-struct op_event * op_find_event(op_cpu cpu_type, u32 nr);
+struct op_event * op_find_event(op_cpu cpu_type, u32 nr, u32 um);
+struct op_event * op_find_event_any(op_cpu cpu_type, u32 nr);
 
 /** Find a given event by name */
-struct op_event * find_event_by_name(char const * name);
+struct op_event * find_event_by_name(char const * name, unsigned um,
+                                     int um_valid);
 
 /**
  * Find a mapping for a given event ID for architectures requiring additional information
diff --git a/libop/op_hw_specific.h b/libop/op_hw_specific.h
new file mode 100644
index 0000000..35080ad
--- /dev/null
+++ b/libop/op_hw_specific.h
@@ -0,0 +1,107 @@
+/* 
+ * @file architecture specific interfaces
+ * @remark Copyright 2008 Intel Corporation
+ * @remark Read the file COPYING
+ * @author Andi Kleen
+ */
+
+#if defined(__i386__) || defined(__x86_64__) 
+
+/* Assume we run on the same host as the profilee */
+
+#define num_to_mask(x) ((1U << (x)) - 1)
+
+static inline int cpuid_vendor(char *vnd)
+{
+	union {
+		struct {
+			unsigned b,d,c;
+		};
+		char v[12];
+	} v;
+	unsigned eax;
+	asm("cpuid" : "=a" (eax), "=b" (v.b), "=c" (v.c), "=d" (v.d) : "0" (0));
+	return !strncmp(v.v, vnd, 12);
+}
+
+/* Work around Nehalem spec update AAJ79: CPUID incorrectly indicates
+   unhalted reference cycle architectural event is supported. We assume
+   steppings after C0 report correct data in CPUID. */
+static inline void workaround_nehalem_aaj79(unsigned *ebx)
+{
+	union {
+		unsigned eax;
+		struct {
+			unsigned stepping : 4;
+			unsigned model : 4;
+			unsigned family : 4;
+			unsigned type : 2;
+			unsigned res : 2;
+			unsigned ext_model : 4;
+			unsigned ext_family : 8;
+			unsigned res2 : 4;
+		};
+	} v;
+	unsigned model;
+
+	if (!cpuid_vendor("GenuineIntel"))
+		return;
+	asm("cpuid" : "=a" (v.eax) : "0" (1) : "ecx","ebx","edx");
+	model = (v.ext_model << 4) + v.model;
+	if (v.family != 6 || model != 26 || v.stepping > 4)
+		return;
+	*ebx |= (1 << 2);	/* disable unsupported event */
+}
+
+static inline unsigned arch_get_filter(op_cpu cpu_type)
+{
+	if (cpu_type == CPU_ARCH_PERFMON) { 
+		unsigned ebx, eax;
+		asm("cpuid" : "=a" (eax), "=b" (ebx) : "0" (0xa) : "ecx","edx");
+		workaround_nehalem_aaj79(&ebx);
+		return ebx & num_to_mask(eax >> 24);
+	}
+	return -1U;
+}
+
+static inline int arch_num_counters(op_cpu cpu_type) 
+{
+	if (cpu_type == CPU_ARCH_PERFMON) {
+		unsigned v;
+		asm("cpuid" : "=a" (v) : "0" (0xa) : "ebx","ecx","edx");
+		return (v >> 8) & 0xff;
+	} 
+	return -1;
+}
+
+static inline unsigned arch_get_counter_mask(void)
+{
+	unsigned v;
+	asm("cpuid" : "=a" (v) : "0" (0xa) : "ebx","ecx","edx");
+	return num_to_mask((v >> 8) & 0xff);	
+}
+
+#else
+
+static inline unsigned arch_get_filter(op_cpu cpu_type)
+{
+	/* Do something with passed arg to shut up the compiler warning */
+	if (cpu_type != CPU_NO_GOOD)
+		return 0;
+	return 0;
+}
+
+static inline int arch_num_counters(op_cpu cpu_type) 
+{
+	/* Do something with passed arg to shut up the compiler warning */
+	if (cpu_type != CPU_NO_GOOD)
+		return -1;
+	return -1;
+}
+
+static inline unsigned arch_get_counter_mask(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/libop/op_parse_event.c b/libop/op_parse_event.c
index 920d617..eb99a20 100644
--- a/libop/op_parse_event.c
+++ b/libop/op_parse_event.c
@@ -93,6 +93,7 @@
 		part = next_part(&cp);
 
 		if (part) {
+			parsed_events[i].unit_mask_valid = 1;
 			parsed_events[i].unit_mask = parse_ulong(part);
 			free(part);
 		}
diff --git a/libop/op_parse_event.h b/libop/op_parse_event.h
index 247a355..c8d4144 100644
--- a/libop/op_parse_event.h
+++ b/libop/op_parse_event.h
@@ -22,6 +22,7 @@
 	int unit_mask;
 	int kernel;
 	int user;
+	int unit_mask_valid;
 };
 
 /**
diff --git a/libop/op_xml_events.c b/libop/op_xml_events.c
new file mode 100644
index 0000000..5b9ac7d
--- /dev/null
+++ b/libop/op_xml_events.c
@@ -0,0 +1,93 @@
+/**
+ * @file op_xml_events.c
+ * routines for generating event files in XML
+ *
+ * @remark Copyright 2008 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Dave Nomura
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include "op_events.h"
+#include "op_list.h"
+#include "op_cpu_type.h"
+#include "op_xml_out.h"
+
+static op_cpu cpu_type;
+#define MAX_BUFFER 4096
+void open_xml_events(char const * title, char const * doc, op_cpu the_cpu_type)
+{
+	char const * schema_version = "1.0";
+	char buffer[MAX_BUFFER];
+
+	buffer[0] = '\0';
+	cpu_type = the_cpu_type;
+	open_xml_element(HELP_EVENTS, 0, buffer);
+	open_xml_element(HELP_HEADER, 1, buffer);
+	init_xml_str_attr(HELP_TITLE, title, buffer);
+	init_xml_str_attr(SCHEMA_VERSION, schema_version, buffer);
+	init_xml_str_attr(HELP_DOC, doc, buffer);
+	close_xml_element(NONE, 0, buffer);
+	printf("%s", buffer);
+}
+
+void close_xml_events(void)
+{
+	char buffer[MAX_BUFFER];
+
+	buffer[0] = '\0';
+	close_xml_element(HELP_EVENTS, 0, buffer);
+	printf("%s", buffer);
+}
+
+static void xml_do_arch_specific_event_help(struct op_event const * event,
+					    char * buffer)
+{
+	switch (cpu_type) {
+	case CPU_PPC64_CELL:
+		init_xml_int_attr(HELP_EVENT_GROUP, event->val / 100, buffer);
+		break;
+	default:
+		break;
+	}
+}
+
+
+void xml_help_for_event(struct op_event const * event)
+{
+	uint i;
+	int nr_counters;
+	int has_nested = strcmp(event->unit->name, "zero");
+	char buffer[MAX_BUFFER];
+
+	buffer[0] = '\0';
+	open_xml_element(HELP_EVENT, 1, buffer);
+	init_xml_str_attr(HELP_EVENT_NAME, event->name, buffer);
+	xml_do_arch_specific_event_help(event, buffer);
+	init_xml_str_attr(HELP_EVENT_DESC, event->desc, buffer);
+
+	nr_counters = op_get_nr_counters(cpu_type);
+	init_xml_int_attr(HELP_COUNTER_MASK, event->counter_mask, buffer);
+	init_xml_int_attr(HELP_MIN_COUNT, event->min_count, buffer);
+
+	if (has_nested) {
+		close_xml_element(NONE, 1, buffer);
+		open_xml_element(HELP_UNIT_MASKS, 1, buffer);
+		init_xml_int_attr(HELP_DEFAULT_MASK, event->unit->default_mask, buffer);
+		close_xml_element(NONE, 1, buffer);
+		for (i = 0; i < event->unit->num; i++) {
+			open_xml_element(HELP_UNIT_MASK, 1, buffer);
+			init_xml_int_attr(HELP_UNIT_MASK_VALUE,
+					  event->unit->um[i].value, buffer);
+			init_xml_str_attr(HELP_UNIT_MASK_DESC,
+					  event->unit->um[i].desc, buffer);
+			close_xml_element(NONE, 0, buffer);
+		}
+		close_xml_element(HELP_UNIT_MASKS, 0, buffer);
+	}
+	close_xml_element(has_nested ? HELP_EVENT : NONE, has_nested, buffer);
+	printf("%s", buffer);
+}
+
diff --git a/libop/op_xml_events.h b/libop/op_xml_events.h
new file mode 100644
index 0000000..e1e092e
--- /dev/null
+++ b/libop/op_xml_events.h
@@ -0,0 +1,20 @@
+/**
+ * @file op_xml_events.h
+ * routines for generating event files in XML
+ *
+ * @remark Copyright 2008 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Dave Nomura
+ */
+
+#ifndef OP_XML_EVENTS_H
+#define OP_XML_EVENTS_H
+
+#include "op_events.h"
+
+void xml_help_for_event(struct op_event const * event);
+void open_xml_events(char const * title, char const * doc, op_cpu cpu_type);
+void close_xml_events(void);
+
+#endif /* OP_XML_EVENTS_H */
diff --git a/libop/op_xml_out.c b/libop/op_xml_out.c
new file mode 100644
index 0000000..d779c45
--- /dev/null
+++ b/libop/op_xml_out.c
@@ -0,0 +1,233 @@
+/**
+ * @file op_xml_out.c
+ * C utility routines for writing XML
+ *
+ * @remark Copyright 2008 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Dave Nomura
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "op_xml_out.h"
+
+char const * xml_tag_map[] = {
+	"NONE",
+	"id",
+	"profile",
+		"processor",
+		"cputype",
+		"title",
+		"schemaversion",
+		"mhz",
+	"setup",
+	"timersetup",
+		"rtcinterrupts",
+	"eventsetup",
+		"eventname",
+		"unitmask",
+		"setupcount",
+		"separatedcpus",
+	"options",
+		"session", "debuginfo", "details", "excludedependent",
+		"excludesymbols", "imagepath", "includesymbols", "merge",
+	"classes",
+	"class",
+		"cpu",
+		"event",
+		"mask",
+	"process",
+		"pid",
+	"thread",
+		"tid",
+	"binary",
+	"module",
+		"name",
+	"callers",
+	"callees",
+	"symbol",
+		"idref",
+		"self",
+		"detaillo",
+		"detailhi",
+	"symboltable",
+	"symboldata",
+		"startingaddr",
+		"file",
+		"line",
+		"codelength",
+	"summarydata",
+	"sampledata",
+	"count",
+	"detailtable",
+	"symboldetails",
+	"detaildata",
+		"vmaoffset",
+	"bytestable",
+	"bytes",
+	"help_events",
+	"header",
+		"title",
+		"doc",
+	"event",
+		"event_name",
+		"group",
+		"desc",
+		"counter_mask",
+		"min_count",
+	"unit_masks",
+		"default",
+	"unit_mask",
+		"mask",
+		"desc"
+};
+
+#define MAX_BUF_LEN 2048
+char const * xml_tag_name(tag_t tag)
+{
+	return xml_tag_map[tag];
+}
+
+
+void open_xml_element(tag_t tag, int with_attrs, char * buffer)
+{
+	char const * tag_name = xml_tag_name(tag);
+	unsigned int const max_len = strlen(tag_name) + 3;
+	char tmp_buf[MAX_BUF_LEN];
+
+	if (max_len >= sizeof(tmp_buf))
+		fprintf(stderr,"Warning: open_xml_element: buffer overflow %d\n", max_len);
+
+	if (snprintf(tmp_buf, sizeof(tmp_buf), "<%s%s", tag_name,
+		(with_attrs ? " " : ">\n")) < 0) {
+		fprintf(stderr,"open_xml_element: snprintf failed\n");
+		exit(EXIT_FAILURE);
+	}
+	strncat(buffer, tmp_buf, sizeof(tmp_buf));
+}
+
+
+void close_xml_element(tag_t tag, int has_nested, char * buffer)
+{
+	char const * tag_name = xml_tag_name(tag);
+	unsigned int const max_len = strlen(tag_name) + 3;
+	char tmp_buf[MAX_BUF_LEN];
+
+	if (max_len >= sizeof(tmp_buf))
+		fprintf(stderr,"Warning: close_xml_element: buffer overflow %d\n", max_len);
+
+	if (tag == NONE) {
+		if (snprintf(tmp_buf, sizeof(tmp_buf), "%s\n", (has_nested ? ">" : "/>")) < 0) {
+			fprintf(stderr, "close_xml_element: snprintf failed\n");
+			exit(EXIT_FAILURE);
+		}
+	} else {
+		if (snprintf(tmp_buf, sizeof(tmp_buf), "</%s>\n", tag_name) < 0) {
+			fprintf(stderr, "close_xml_element: snprintf failed\n");
+			exit(EXIT_FAILURE);
+		}
+	}
+	strncat(buffer, tmp_buf, sizeof(tmp_buf));
+}
+
+
+void init_xml_int_attr(tag_t attr, int value, char * buffer)
+{
+	char const * attr_name = xml_tag_name(attr);
+	char tmp_buf[MAX_BUF_LEN];
+	unsigned int const max_len = strlen(attr_name) + 50;
+
+	if (max_len >= sizeof(tmp_buf)) {
+		fprintf(stderr,
+			"Warning: init_xml_int_attr: buffer overflow %d\n", max_len);
+	}
+
+
+	if (snprintf(tmp_buf, sizeof(tmp_buf), " %s=\"%d\"", attr_name, value) < 0) {
+		fprintf(stderr,"init_xml_int_attr: snprintf failed\n");
+		exit(EXIT_FAILURE);
+	}
+	strncat(buffer, tmp_buf, sizeof(tmp_buf));
+}
+
+
+void init_xml_dbl_attr(tag_t attr, double value, char * buffer)
+{
+	char const * attr_name = xml_tag_name(attr);
+	unsigned int const max_len = strlen(attr_name) + 50;
+	char tmp_buf[MAX_BUF_LEN];
+
+	if (max_len >= sizeof(tmp_buf))
+		fprintf(stderr, "Warning: init_xml_dbl_attr: buffer overflow %d\n", max_len);
+
+	if (snprintf(tmp_buf, sizeof(tmp_buf), " %s=\"%.2f\"", attr_name, value) < 0) {
+		fprintf(stderr, "init_xml_dbl_attr: snprintf failed\n");
+		exit(EXIT_FAILURE);
+	}
+	strncat(buffer, tmp_buf, sizeof(tmp_buf));
+}
+
+
+static char * xml_quote(char const * str, char * quote_buf)
+{
+	int i;
+	int pos = 0;
+	int len = strlen(str);
+
+	
+	quote_buf[pos++] = '"';
+
+	for (i = 0; i < len; i++) {
+		if (pos >= MAX_BUF_LEN - 10) {
+			fprintf(stderr,"quote_str: buffer overflow %d\n", pos);
+			exit(EXIT_FAILURE);
+		}
+
+		switch(str[i]) {
+		case '&':
+			strncpy(quote_buf + pos, "&amp;", 5);
+			pos += 5;
+			break;
+		case '<':
+			strncpy(quote_buf + pos, "&lt;", 4);
+			pos += 4;
+			break;
+		case '>':
+			strncpy(quote_buf + pos, "&gt;", 4);
+			pos += 4;
+			break;
+		case '"':
+			strncpy(quote_buf + pos, "&quot;", 6);
+			pos += 6;
+			break;
+		default:
+			quote_buf[pos++] = str[i];
+			break;
+		}
+	}
+
+	quote_buf[pos++] = '"';
+	quote_buf[pos++] = '\0';
+	return quote_buf;
+}
+
+
+void init_xml_str_attr(tag_t attr, char const * str, char * buffer)
+{
+	char tmp_buf[MAX_BUF_LEN];
+	char quote_buf[MAX_BUF_LEN];
+	char const * attr_name = xml_tag_name(attr);
+	char const * quote_str = xml_quote(str, quote_buf);
+	const unsigned int max_len = strlen(attr_name) + strlen(quote_str) + 10;
+
+	if (max_len >= sizeof(tmp_buf))
+		fprintf(stderr, "Warning: init_xml_str_attr: buffer overflow %d\n", max_len);
+
+	if (snprintf(tmp_buf, sizeof(tmp_buf), " %s=""%s""", attr_name, quote_str) < 0) {
+		fprintf(stderr,"init_xml_str_attr: snprintf failed\n");
+		exit(EXIT_FAILURE);
+	}
+	strncat(buffer, tmp_buf, sizeof(tmp_buf));
+}
diff --git a/libop/op_xml_out.h b/libop/op_xml_out.h
new file mode 100644
index 0000000..52e8d8f
--- /dev/null
+++ b/libop/op_xml_out.h
@@ -0,0 +1,72 @@
+/**
+ * @file op_xml_out.h
+ * utility routines for writing XML
+ *
+ * @remark Copyright 2008 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Dave Nomura
+ */
+
+#ifndef OP_XML_OUT_H
+#define OP_XML_OUT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+	NONE=0, TABLE_ID, PROFILE,
+	PROCESSOR, CPU_NAME, TITLE, SCHEMA_VERSION, MHZ,
+	SETUP, 
+	TIMER_SETUP, RTC_INTERRUPTS,
+	EVENT_SETUP, EVENT_NAME, UNIT_MASK, SETUP_COUNT, SEPARATED_CPUS,
+	OPTIONS, SESSION, DEBUG_INFO, DETAILS, EXCLUDE_DEPENDENT, EXCLUDE_SYMBOLS,
+		IMAGE_PATH, INCLUDE_SYMBOLS, MERGE,
+	CLASSES,
+	CLASS,
+		CPU_NUM,
+		EVENT_NUM,
+		EVENT_MASK,
+	PROCESS, PROC_ID,
+	THREAD, THREAD_ID,
+	BINARY,
+	MODULE, NAME,
+	CALLERS, CALLEES,
+	SYMBOL, ID_REF, SELFREF, DETAIL_LO, DETAIL_HI,
+	SYMBOL_TABLE,
+	SYMBOL_DATA, STARTING_ADDR,
+		SOURCE_FILE, SOURCE_LINE, CODE_LENGTH,
+	SUMMARY, SAMPLE,
+	COUNT,
+	DETAIL_TABLE, SYMBOL_DETAILS, DETAIL_DATA, VMA,
+	BYTES_TABLE, BYTES,
+	HELP_EVENTS,
+	HELP_HEADER,
+	HELP_TITLE,
+	HELP_DOC,
+	HELP_EVENT,
+	HELP_EVENT_NAME,
+	HELP_EVENT_GROUP,
+	HELP_EVENT_DESC,
+	HELP_COUNTER_MASK,
+	HELP_MIN_COUNT,
+	HELP_UNIT_MASKS,
+	HELP_DEFAULT_MASK,
+	HELP_UNIT_MASK,
+	HELP_UNIT_MASK_VALUE,
+	HELP_UNIT_MASK_DESC
+	} tag_t;
+
+char const * xml_tag_name(tag_t tag);
+void open_xml_element(tag_t tag, int with_attrs, char * result);
+void close_xml_element(tag_t tag, int has_nested, char * result);
+void init_xml_int_attr(tag_t attr, int value, char * result);
+void init_xml_dbl_attr(tag_t attr, double value, char * result);
+void init_xml_str_attr(tag_t attr, char const * str, char * result);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OP_XML_OUT_H */
diff --git a/libutil/Android.mk b/libutil/Android.mk
index 29f3bfb..6a7cf5d 100644
--- a/libutil/Android.mk
+++ b/libutil/Android.mk
@@ -7,6 +7,7 @@
 	op_file.c \
 	op_fileio.c \
 	op_get_time.c \
+	op_growable_buffer.c \
 	op_libiberty.c \
 	op_lockfile.c \
 	op_popt.c \
diff --git a/libutil/op_file.c b/libutil/op_file.c
index e3e6cb6..fd5995a 100644
--- a/libutil/op_file.c
+++ b/libutil/op_file.c
@@ -94,7 +94,17 @@
 	name_len = strlen(basedir) + strlen("/") + strlen(ent->d_name) + 1;
 	name = xmalloc(name_len);
 	sprintf(name, "%s/%s", basedir,	ent->d_name);
-	if (stat(name, st_buf) != 0) {
+	if (stat(name, st_buf) != 0)
+	{
+		struct stat lstat_buf;
+		int err = errno;
+		if (lstat(name, &lstat_buf) == 0 &&
+			    S_ISLNK(lstat_buf.st_mode)) {
+			// dangling symlink -- silently ignore
+		} else {
+			fprintf(stderr, "stat failed for %s (%s)\n",
+			                name, strerror(err));
+		}
 		free(name);
 		name = NULL;
 	}
@@ -147,13 +157,14 @@
 		case MATCH_ANY_ENTRY_RECURSION + MATCH:
 			name = make_pathname_from_dirent(base_dir, ent,
 						       &stat_buffer);
-			if (name && S_ISDIR(stat_buffer.st_mode) &&
-			    !S_ISLNK(stat_buffer.st_mode)) {
-				get_matching_pathnames(
-					name_list, getpathname,
-					name, filter, recursion);
-			} else {
-				getpathname(name, name_list);
+			if (name) {
+				if (S_ISDIR(stat_buffer.st_mode)) {
+					get_matching_pathnames(
+						name_list, getpathname,
+						name, filter, recursion);
+				} else {
+					getpathname(name, name_list);
+				}
 			}
 			free(name);
 			break;
@@ -161,8 +172,7 @@
 		case MATCH_DIR_ONLY_RECURSION + MATCH:
 			name = make_pathname_from_dirent(base_dir, ent,
 						       &stat_buffer);
-			if (name && S_ISDIR(stat_buffer.st_mode) &&
-			    !S_ISLNK(stat_buffer.st_mode)) {
+			if (name && S_ISDIR(stat_buffer.st_mode)) {
 				/* Check if full directory name contains
 				 * match to the filter; if so, add it to
 				 * name_list and quit; else, recurse.
diff --git a/libutil/op_libiberty.h b/libutil/op_libiberty.h
index ea02a50..ef2f386 100644
--- a/libutil/op_libiberty.h
+++ b/libutil/op_libiberty.h
@@ -34,7 +34,6 @@
 /* some system have a libiberty.a but no libiberty.h so we must provide
  * ourself the missing proto */
 #ifndef HAVE_LIBIBERTY_H
-
 /* Set the program name used by xmalloc.  */
 void xmalloc_set_program_name(char const *);
 
@@ -71,7 +70,6 @@
 #define xmalloc_set_program_name(n)
 #endif
 
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/opcontrol/Android.mk b/opcontrol/Android.mk
index 56211ad..8f04f3a 100644
--- a/opcontrol/Android.mk
+++ b/opcontrol/Android.mk
@@ -1,6 +1,10 @@
 LOCAL_PATH:= $(call my-dir)
 include $(CLEAR_VARS)
 
+ifeq ($(TARGET_ARCH_VARIANT), armv7-a)
+    LOCAL_CFLAGS += -DWITH_ARM_V7_A
+endif
+
 LOCAL_SRC_FILES:= \
 	opcontrol.cpp
 
diff --git a/opcontrol/opcontrol.cpp b/opcontrol/opcontrol.cpp
index 2d9cb2f..8b79c7e 100644
--- a/opcontrol/opcontrol.cpp
+++ b/opcontrol/opcontrol.cpp
@@ -39,7 +39,15 @@
 /* Experiments found that using a small interval may hang the device, and the
  * more events tracked simultaneously, the longer the interval has to be.
  */
+
+#if !defined(WITH_ARM_V7_A)
+#define MAX_EVENTS 3
 int min_count[3] = {150000, 200000, 250000};
+#else
+#define MAX_EVENTS 4
+int min_count[4] = {150000, 200000, 250000, 300000};
+#endif
+
 int list_events; 
 int show_usage;
 int setup;
@@ -49,8 +57,8 @@
 int stop;
 int reset;
 
-int selected_events[3];
-int selected_counts[3];
+int selected_events[MAX_EVENTS];
+int selected_counts[MAX_EVENTS];
 
 char kernel_range[512];
 char vmlinux[512];
@@ -76,6 +84,8 @@
     const char *name;
     const char *explanation;
 } event_info[] = {
+#if !defined(WITH_ARM_V7_A)
+    /* ARM V6 events */
     {0x00, "IFU_IFETCH_MISS", 
      "number of instruction fetch misses"},
     {0x01, "CYCLES_IFU_MEM_STALL", 
@@ -112,6 +122,115 @@
      "Times write buffer was drained"},
     {0xff, "CPU_CYCLES", 
      "clock cycles counter"}, 
+#else
+    /* ARM V7 events */
+    {0x00, "PMNC_SW_INCR",
+     "Software increment of PMNC registers"},
+    {0x01, "IFETCH_MISS",
+     "Instruction fetch misses from cache or normal cacheable memory"},
+    {0x02, "ITLB_MISS",
+     "Instruction fetch misses from TLB"},
+    {0x03, "DCACHE_REFILL",
+     "Data R/W operation that causes a refill from cache or normal cacheable"
+     "memory"},
+    {0x04, "DCACHE_ACCESS",
+     "Data R/W from cache"},
+    {0x05, "DTLB_REFILL",
+     "Data R/W that causes a TLB refill"},
+    {0x06, "DREAD",
+     "Data read architecturally executed (note: architecturally executed = for"
+     "instructions that are unconditional or that pass the condition code)"},
+    {0x07, "DWRITE",
+     "Data write architecturally executed"},
+    {0x08, "INSTR_EXECUTED",
+     "All executed instructions"},
+    {0x09, "EXC_TAKEN",
+     "Exception taken"},
+    {0x0A, "EXC_EXECUTED",
+     "Exception return architecturally executed"},
+    {0x0B, "CID_WRITE",
+     "Instruction that writes to the Context ID Register architecturally"
+     "executed"},
+    {0x0C, "PC_WRITE",
+     "SW change of PC, architecturally executed (not by exceptions)"},
+    {0x0D, "PC_IMM_BRANCH",
+     "Immediate branch instruction executed (taken or not)"},
+    {0x0E, "PC_PROC_RETURN",
+     "Procedure return architecturally executed (not by exceptions)"},
+    {0x0F, "UNALIGNED_ACCESS",
+     "Unaligned access architecturally executed"},
+    {0x10, "PC_BRANCH_MIS_PRED",
+     "Branch mispredicted or not predicted. Counts pipeline flushes because of"
+     "misprediction"},
+    {0x12, "PC_BRANCH_MIS_USED",
+    "Branch or change in program flow that could have been predicted"},
+    {0x40, "WRITE_BUFFER_FULL",
+     "Any write buffer full cycle"},
+    {0x41, "L2_STORE_MERGED",
+     "Any store that is merged in L2 cache"},
+    {0x42, "L2_STORE_BUFF",
+     "Any bufferable store from load/store to L2 cache"},
+    {0x43, "L2_ACCESS",
+     "Any access to L2 cache"},
+    {0x44, "L2_CACH_MISS",
+     "Any cacheable miss in L2 cache"},
+    {0x45, "AXI_READ_CYCLES",
+     "Number of cycles for an active AXI read"},
+    {0x46, "AXI_WRITE_CYCLES",
+     "Number of cycles for an active AXI write"},
+    {0x47, "MEMORY_REPLAY",
+     "Any replay event in the memory subsystem"},
+    {0x48, "UNALIGNED_ACCESS_REPLAY",
+     "Unaligned access that causes a replay"},
+    {0x49, "L1_DATA_MISS",
+     "L1 data cache miss as a result of the hashing algorithm"},
+    {0x4A, "L1_INST_MISS",
+     "L1 instruction cache miss as a result of the hashing algorithm"},
+    {0x4B, "L1_DATA_COLORING",
+     "L1 data access in which a page coloring alias occurs"},
+    {0x4C, "L1_NEON_DATA",
+     "NEON data access that hits L1 cache"},
+    {0x4D, "L1_NEON_CACH_DATA",
+     "NEON cacheable data access that hits L1 cache"},
+    {0x4E, "L2_NEON",
+     "L2 access as a result of NEON memory access"},
+    {0x4F, "L2_NEON_HIT",
+     "Any NEON hit in L2 cache"},
+    {0x50, "L1_INST",
+     "Any L1 instruction cache access, excluding CP15 cache accesses"},
+    {0x51, "PC_RETURN_MIS_PRED",
+     "Return stack misprediction at return stack pop"
+     "(incorrect target address)"},
+    {0x52, "PC_BRANCH_FAILED",
+     "Branch prediction misprediction"},
+    {0x53, "PC_BRANCH_TAKEN",
+     "Any predicted branch that is taken"},
+    {0x54, "PC_BRANCH_EXECUTED",
+     "Any taken branch that is executed"},
+    {0x55, "OP_EXECUTED",
+     "Number of operations executed"
+     "(in instruction or mutli-cycle instruction)"},
+    {0x56, "CYCLES_INST_STALL",
+     "Cycles where no instruction available"},
+    {0x57, "CYCLES_INST",
+     "Number of instructions issued in a cycle"},
+    {0x58, "CYCLES_NEON_DATA_STALL",
+     "Number of cycles the processor waits on MRC data from NEON"},
+    {0x59, "CYCLES_NEON_INST_STALL",
+     "Number of cycles the processor waits on NEON instruction queue or"
+     "NEON load queue"},
+    {0x5A, "NEON_CYCLES",
+     "Number of cycles NEON and integer processors are not idle"},
+    {0x70, "PMU0_EVENTS",
+     "Number of events from external input source PMUEXTIN[0]"},
+    {0x71, "PMU1_EVENTS",
+     "Number of events from external input source PMUEXTIN[1]"},
+    {0x72, "PMU_EVENTS",
+     "Number of events from both external input sources PMUEXTIN[0]"
+     "and PMUEXTIN[1]"},
+    {0xFF, "CPU_CYCLES",
+     "Number of CPU cycles"},
+#endif
 };
 
 void usage() {
@@ -293,7 +412,7 @@
 
     printf("Driver directory: %s\n", OP_DRIVER_BASE);
     printf("Session directory: %s\n", OP_DATA_DIR);
-    for (i = 0; i < 3; i++) {
+    for (i = 0; i < MAX_EVENTS; i++) {
         sprintf(fullname, OP_DRIVER_BASE"/%d/enabled", i);
         num = read_num(fullname);
         if (num > 0) {
@@ -379,8 +498,9 @@
                 break;
             /* --event */
             case 'e':   
-                if (num_events == 3) {
-                    fprintf(stderr, "More than 3 events specified\n");
+                if (num_events == MAX_EVENTS) {
+                    fprintf(stderr, "More than %d events specified\n",
+                            MAX_EVENTS);
                     exit(1);
                 }
                 if (process_event(optarg)) {
@@ -445,6 +565,7 @@
 
         strcpy(command, "oprofiled --session-dir="OP_DATA_DIR);
 
+#if !defined(WITH_ARM_V7_A)
         /* Since counter #3 can only handle CPU_CYCLES, check and shuffle the 
          * order a bit so that the maximal number of events can be profiled
          * simultaneously
@@ -477,6 +598,7 @@
                 selected_counts[i] = temp;
             }
         }
+#endif
 
 
         /* Configure the counters and enable them */
@@ -518,7 +640,7 @@
         }
 
         /* Disable the unused counters */
-        for (i = num_events; i < 3; i++) {
+        for (i = num_events; i < MAX_EVENTS; i++) {
             echo_dev("0", 0, "enabled", i);
         }