zte's code,first commit

Change-Id: I9a04da59e459a9bc0d67f101f700d9d7dc8d681b
diff --git a/ap/os/linux/linux-3.4.x/tools/perf/bench/bench.h b/ap/os/linux/linux-3.4.x/tools/perf/bench/bench.h
new file mode 100644
index 0000000..a09bece
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/tools/perf/bench/bench.h
@@ -0,0 +1,18 @@
+#ifndef BENCH_H
+#define BENCH_H
+
+extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
+extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
+extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used);
+extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
+
+#define BENCH_FORMAT_DEFAULT_STR	"default"
+#define BENCH_FORMAT_DEFAULT		0
+#define BENCH_FORMAT_SIMPLE_STR		"simple"
+#define BENCH_FORMAT_SIMPLE		1
+
+#define BENCH_FORMAT_UNKNOWN		-1
+
+extern int bench_format;
+
+#endif
diff --git a/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memcpy-arch.h b/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memcpy-arch.h
new file mode 100644
index 0000000..a72e36c
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memcpy-arch.h
@@ -0,0 +1,12 @@
+
+#ifdef ARCH_X86_64
+
+#define MEMCPY_FN(fn, name, desc)		\
+	extern void *fn(void *, const void *, size_t);
+
+#include "mem-memcpy-x86-64-asm-def.h"
+
+#undef MEMCPY_FN
+
+#endif
+
diff --git a/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
new file mode 100644
index 0000000..d66ab79
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
@@ -0,0 +1,12 @@
+
+MEMCPY_FN(__memcpy,
+	"x86-64-unrolled",
+	"unrolled memcpy() in arch/x86/lib/memcpy_64.S")
+
+MEMCPY_FN(memcpy_c,
+	"x86-64-movsq",
+	"movsq-based memcpy() in arch/x86/lib/memcpy_64.S")
+
+MEMCPY_FN(memcpy_c_e,
+	"x86-64-movsb",
+	"movsb-based memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memcpy-x86-64-asm.S b/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memcpy-x86-64-asm.S
new file mode 100644
index 0000000..fcd9cf0
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -0,0 +1,12 @@
+#define memcpy MEMCPY /* don't hide glibc's memcpy() */
+#define altinstr_replacement text
+#define globl p2align 4; .globl
+#define Lmemcpy_c globl memcpy_c; memcpy_c
+#define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e
+#include "../../../arch/x86/lib/memcpy_64.S"
+/*
+ * We need to provide note.GNU-stack section, saying that we want
+ * NOT executable stack. Otherwise the final linking will assume that
+ * the ELF stack should not be restricted at all and set it RWX.
+ */
+.section .note.GNU-stack,"",@progbits
diff --git a/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memcpy.c b/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memcpy.c
new file mode 100644
index 0000000..7155722
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memcpy.c
@@ -0,0 +1,303 @@
+/*
+ * mem-memcpy.c
+ *
+ * memcpy: Simple memory copy in various ways
+ *
+ * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../util/header.h"
+#include "bench.h"
+#include "mem-memcpy-arch.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <errno.h>
+
+#define K 1024
+
+static const char	*length_str	= "1MB";
+static const char	*routine	= "default";
+static int		iterations	= 1;
+static bool		use_clock;
+static int		clock_fd;
+static bool		only_prefault;
+static bool		no_prefault;
+
+static const struct option options[] = {
+	OPT_STRING('l', "length", &length_str, "1MB",
+		    "Specify length of memory to copy. "
+		    "available unit: B, MB, GB (upper and lower)"),
+	OPT_STRING('r', "routine", &routine, "default",
+		    "Specify routine to copy"),
+	OPT_INTEGER('i', "iterations", &iterations,
+		    "repeat memcpy() invocation this number of times"),
+	OPT_BOOLEAN('c', "clock", &use_clock,
+		    "Use CPU clock for measuring"),
+	OPT_BOOLEAN('o', "only-prefault", &only_prefault,
+		    "Show only the result with page faults before memcpy()"),
+	OPT_BOOLEAN('n', "no-prefault", &no_prefault,
+		    "Show only the result without page faults before memcpy()"),
+	OPT_END()
+};
+
+typedef void *(*memcpy_t)(void *, const void *, size_t);
+
+struct routine {
+	const char *name;
+	const char *desc;
+	memcpy_t fn;
+};
+
+struct routine routines[] = {
+	{ "default",
+	  "Default memcpy() provided by glibc",
+	  memcpy },
+#ifdef ARCH_X86_64
+
+#define MEMCPY_FN(fn, name, desc) { name, desc, fn },
+#include "mem-memcpy-x86-64-asm-def.h"
+#undef MEMCPY_FN
+
+#endif
+
+	{ NULL,
+	  NULL,
+	  NULL   }
+};
+
+static const char * const bench_mem_memcpy_usage[] = {
+	"perf bench mem memcpy <options>",
+	NULL
+};
+
+static struct perf_event_attr clock_attr = {
+	.type		= PERF_TYPE_HARDWARE,
+	.config		= PERF_COUNT_HW_CPU_CYCLES
+};
+
+static void init_clock(void)
+{
+	clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
+
+	if (clock_fd < 0 && errno == ENOSYS)
+		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
+	else
+		BUG_ON(clock_fd < 0);
+}
+
+static u64 get_clock(void)
+{
+	int ret;
+	u64 clk;
+
+	ret = read(clock_fd, &clk, sizeof(u64));
+	BUG_ON(ret != sizeof(u64));
+
+	return clk;
+}
+
+static double timeval2double(struct timeval *ts)
+{
+	return (double)ts->tv_sec +
+		(double)ts->tv_usec / (double)1000000;
+}
+
+static void alloc_mem(void **dst, void **src, size_t length)
+{
+	*dst = zalloc(length);
+	if (!dst)
+		die("memory allocation failed - maybe length is too large?\n");
+
+	*src = zalloc(length);
+	if (!src)
+		die("memory allocation failed - maybe length is too large?\n");
+}
+
+static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
+{
+	u64 clock_start = 0ULL, clock_end = 0ULL;
+	void *src = NULL, *dst = NULL;
+	int i;
+
+	alloc_mem(&src, &dst, len);
+
+	if (prefault)
+		fn(dst, src, len);
+
+	clock_start = get_clock();
+	for (i = 0; i < iterations; ++i)
+		fn(dst, src, len);
+	clock_end = get_clock();
+
+	free(src);
+	free(dst);
+	return clock_end - clock_start;
+}
+
+static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
+{
+	struct timeval tv_start, tv_end, tv_diff;
+	void *src = NULL, *dst = NULL;
+	int i;
+
+	alloc_mem(&src, &dst, len);
+
+	if (prefault)
+		fn(dst, src, len);
+
+	BUG_ON(gettimeofday(&tv_start, NULL));
+	for (i = 0; i < iterations; ++i)
+		fn(dst, src, len);
+	BUG_ON(gettimeofday(&tv_end, NULL));
+
+	timersub(&tv_end, &tv_start, &tv_diff);
+
+	free(src);
+	free(dst);
+	return (double)((double)len / timeval2double(&tv_diff));
+}
+
+#define pf (no_prefault ? 0 : 1)
+
+#define print_bps(x) do {					\
+		if (x < K)					\
+			printf(" %14lf B/Sec", x);		\
+		else if (x < K * K)				\
+			printf(" %14lfd KB/Sec", x / K);	\
+		else if (x < K * K * K)				\
+			printf(" %14lf MB/Sec", x / K / K);	\
+		else						\
+			printf(" %14lf GB/Sec", x / K / K / K); \
+	} while (0)
+
+int bench_mem_memcpy(int argc, const char **argv,
+		     const char *prefix __used)
+{
+	int i;
+	size_t len;
+	double result_bps[2];
+	u64 result_clock[2];
+
+	argc = parse_options(argc, argv, options,
+			     bench_mem_memcpy_usage, 0);
+
+	if (use_clock)
+		init_clock();
+
+	len = (size_t)perf_atoll((char *)length_str);
+
+	result_clock[0] = result_clock[1] = 0ULL;
+	result_bps[0] = result_bps[1] = 0.0;
+
+	if ((s64)len <= 0) {
+		fprintf(stderr, "Invalid length:%s\n", length_str);
+		return 1;
+	}
+
+	/* same to without specifying either of prefault and no-prefault */
+	if (only_prefault && no_prefault)
+		only_prefault = no_prefault = false;
+
+	for (i = 0; routines[i].name; i++) {
+		if (!strcmp(routines[i].name, routine))
+			break;
+	}
+	if (!routines[i].name) {
+		printf("Unknown routine:%s\n", routine);
+		printf("Available routines...\n");
+		for (i = 0; routines[i].name; i++) {
+			printf("\t%s ... %s\n",
+			       routines[i].name, routines[i].desc);
+		}
+		return 1;
+	}
+
+	if (bench_format == BENCH_FORMAT_DEFAULT)
+		printf("# Copying %s Bytes ...\n\n", length_str);
+
+	if (!only_prefault && !no_prefault) {
+		/* show both of results */
+		if (use_clock) {
+			result_clock[0] =
+				do_memcpy_clock(routines[i].fn, len, false);
+			result_clock[1] =
+				do_memcpy_clock(routines[i].fn, len, true);
+		} else {
+			result_bps[0] =
+				do_memcpy_gettimeofday(routines[i].fn,
+						len, false);
+			result_bps[1] =
+				do_memcpy_gettimeofday(routines[i].fn,
+						len, true);
+		}
+	} else {
+		if (use_clock) {
+			result_clock[pf] =
+				do_memcpy_clock(routines[i].fn,
+						len, only_prefault);
+		} else {
+			result_bps[pf] =
+				do_memcpy_gettimeofday(routines[i].fn,
+						len, only_prefault);
+		}
+	}
+
+	switch (bench_format) {
+	case BENCH_FORMAT_DEFAULT:
+		if (!only_prefault && !no_prefault) {
+			if (use_clock) {
+				printf(" %14lf Clock/Byte\n",
+					(double)result_clock[0]
+					/ (double)len);
+				printf(" %14lf Clock/Byte (with prefault)\n",
+					(double)result_clock[1]
+					/ (double)len);
+			} else {
+				print_bps(result_bps[0]);
+				printf("\n");
+				print_bps(result_bps[1]);
+				printf(" (with prefault)\n");
+			}
+		} else {
+			if (use_clock) {
+				printf(" %14lf Clock/Byte",
+					(double)result_clock[pf]
+					/ (double)len);
+			} else
+				print_bps(result_bps[pf]);
+
+			printf("%s\n", only_prefault ? " (with prefault)" : "");
+		}
+		break;
+	case BENCH_FORMAT_SIMPLE:
+		if (!only_prefault && !no_prefault) {
+			if (use_clock) {
+				printf("%lf %lf\n",
+					(double)result_clock[0] / (double)len,
+					(double)result_clock[1] / (double)len);
+			} else {
+				printf("%lf %lf\n",
+					result_bps[0], result_bps[1]);
+			}
+		} else {
+			if (use_clock) {
+				printf("%lf\n", (double)result_clock[pf]
+					/ (double)len);
+			} else
+				printf("%lf\n", result_bps[pf]);
+		}
+		break;
+	default:
+		/* reaching this means there's some disaster: */
+		die("unknown format: %d\n", bench_format);
+		break;
+	}
+
+	return 0;
+}
diff --git a/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memset-arch.h b/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memset-arch.h
new file mode 100644
index 0000000..a040fa7
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memset-arch.h
@@ -0,0 +1,12 @@
+
+#ifdef ARCH_X86_64
+
+#define MEMSET_FN(fn, name, desc)		\
+	extern void *fn(void *, int, size_t);
+
+#include "mem-memset-x86-64-asm-def.h"
+
+#undef MEMSET_FN
+
+#endif
+
diff --git a/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memset-x86-64-asm-def.h b/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memset-x86-64-asm-def.h
new file mode 100644
index 0000000..a71dff9
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memset-x86-64-asm-def.h
@@ -0,0 +1,12 @@
+
+MEMSET_FN(__memset,
+	"x86-64-unrolled",
+	"unrolled memset() in arch/x86/lib/memset_64.S")
+
+MEMSET_FN(memset_c,
+	"x86-64-stosq",
+	"movsq-based memset() in arch/x86/lib/memset_64.S")
+
+MEMSET_FN(memset_c_e,
+	"x86-64-stosb",
+	"movsb-based memset() in arch/x86/lib/memset_64.S")
diff --git a/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memset-x86-64-asm.S b/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memset-x86-64-asm.S
new file mode 100644
index 0000000..9e5af89
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memset-x86-64-asm.S
@@ -0,0 +1,13 @@
+#define memset MEMSET /* don't hide glibc's memset() */
+#define altinstr_replacement text
+#define globl p2align 4; .globl
+#define Lmemset_c globl memset_c; memset_c
+#define Lmemset_c_e globl memset_c_e; memset_c_e
+#include "../../../arch/x86/lib/memset_64.S"
+
+/*
+ * We need to provide note.GNU-stack section, saying that we want
+ * NOT executable stack. Otherwise the final linking will assume that
+ * the ELF stack should not be restricted at all and set it RWX.
+ */
+.section .note.GNU-stack,"",@progbits
diff --git a/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memset.c b/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memset.c
new file mode 100644
index 0000000..e907918
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/tools/perf/bench/mem-memset.c
@@ -0,0 +1,297 @@
+/*
+ * mem-memset.c
+ *
+ * memset: Simple memory set in various ways
+ *
+ * Trivial clone of mem-memcpy.c.
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../util/header.h"
+#include "bench.h"
+#include "mem-memset-arch.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <errno.h>
+
+#define K 1024
+
+static const char	*length_str	= "1MB";
+static const char	*routine	= "default";
+static int		iterations	= 1;
+static bool		use_clock;
+static int		clock_fd;
+static bool		only_prefault;
+static bool		no_prefault;
+
+static const struct option options[] = {
+	OPT_STRING('l', "length", &length_str, "1MB",
+		    "Specify length of memory to copy. "
+		    "available unit: B, MB, GB (upper and lower)"),
+	OPT_STRING('r', "routine", &routine, "default",
+		    "Specify routine to copy"),
+	OPT_INTEGER('i', "iterations", &iterations,
+		    "repeat memset() invocation this number of times"),
+	OPT_BOOLEAN('c', "clock", &use_clock,
+		    "Use CPU clock for measuring"),
+	OPT_BOOLEAN('o', "only-prefault", &only_prefault,
+		    "Show only the result with page faults before memset()"),
+	OPT_BOOLEAN('n', "no-prefault", &no_prefault,
+		    "Show only the result without page faults before memset()"),
+	OPT_END()
+};
+
+typedef void *(*memset_t)(void *, int, size_t);
+
+struct routine {
+	const char *name;
+	const char *desc;
+	memset_t fn;
+};
+
+static const struct routine routines[] = {
+	{ "default",
+	  "Default memset() provided by glibc",
+	  memset },
+#ifdef ARCH_X86_64
+
+#define MEMSET_FN(fn, name, desc) { name, desc, fn },
+#include "mem-memset-x86-64-asm-def.h"
+#undef MEMSET_FN
+
+#endif
+
+	{ NULL,
+	  NULL,
+	  NULL   }
+};
+
+static const char * const bench_mem_memset_usage[] = {
+	"perf bench mem memset <options>",
+	NULL
+};
+
+static struct perf_event_attr clock_attr = {
+	.type		= PERF_TYPE_HARDWARE,
+	.config		= PERF_COUNT_HW_CPU_CYCLES
+};
+
+static void init_clock(void)
+{
+	clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
+
+	if (clock_fd < 0 && errno == ENOSYS)
+		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
+	else
+		BUG_ON(clock_fd < 0);
+}
+
+static u64 get_clock(void)
+{
+	int ret;
+	u64 clk;
+
+	ret = read(clock_fd, &clk, sizeof(u64));
+	BUG_ON(ret != sizeof(u64));
+
+	return clk;
+}
+
+static double timeval2double(struct timeval *ts)
+{
+	return (double)ts->tv_sec +
+		(double)ts->tv_usec / (double)1000000;
+}
+
+static void alloc_mem(void **dst, size_t length)
+{
+	*dst = zalloc(length);
+	if (!dst)
+		die("memory allocation failed - maybe length is too large?\n");
+}
+
+static u64 do_memset_clock(memset_t fn, size_t len, bool prefault)
+{
+	u64 clock_start = 0ULL, clock_end = 0ULL;
+	void *dst = NULL;
+	int i;
+
+	alloc_mem(&dst, len);
+
+	if (prefault)
+		fn(dst, -1, len);
+
+	clock_start = get_clock();
+	for (i = 0; i < iterations; ++i)
+		fn(dst, i, len);
+	clock_end = get_clock();
+
+	free(dst);
+	return clock_end - clock_start;
+}
+
+static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault)
+{
+	struct timeval tv_start, tv_end, tv_diff;
+	void *dst = NULL;
+	int i;
+
+	alloc_mem(&dst, len);
+
+	if (prefault)
+		fn(dst, -1, len);
+
+	BUG_ON(gettimeofday(&tv_start, NULL));
+	for (i = 0; i < iterations; ++i)
+		fn(dst, i, len);
+	BUG_ON(gettimeofday(&tv_end, NULL));
+
+	timersub(&tv_end, &tv_start, &tv_diff);
+
+	free(dst);
+	return (double)((double)len / timeval2double(&tv_diff));
+}
+
+#define pf (no_prefault ? 0 : 1)
+
+#define print_bps(x) do {					\
+		if (x < K)					\
+			printf(" %14lf B/Sec", x);		\
+		else if (x < K * K)				\
+			printf(" %14lfd KB/Sec", x / K);	\
+		else if (x < K * K * K)				\
+			printf(" %14lf MB/Sec", x / K / K);	\
+		else						\
+			printf(" %14lf GB/Sec", x / K / K / K); \
+	} while (0)
+
+int bench_mem_memset(int argc, const char **argv,
+		     const char *prefix __used)
+{
+	int i;
+	size_t len;
+	double result_bps[2];
+	u64 result_clock[2];
+
+	argc = parse_options(argc, argv, options,
+			     bench_mem_memset_usage, 0);
+
+	if (use_clock)
+		init_clock();
+
+	len = (size_t)perf_atoll((char *)length_str);
+
+	result_clock[0] = result_clock[1] = 0ULL;
+	result_bps[0] = result_bps[1] = 0.0;
+
+	if ((s64)len <= 0) {
+		fprintf(stderr, "Invalid length:%s\n", length_str);
+		return 1;
+	}
+
+	/* same to without specifying either of prefault and no-prefault */
+	if (only_prefault && no_prefault)
+		only_prefault = no_prefault = false;
+
+	for (i = 0; routines[i].name; i++) {
+		if (!strcmp(routines[i].name, routine))
+			break;
+	}
+	if (!routines[i].name) {
+		printf("Unknown routine:%s\n", routine);
+		printf("Available routines...\n");
+		for (i = 0; routines[i].name; i++) {
+			printf("\t%s ... %s\n",
+			       routines[i].name, routines[i].desc);
+		}
+		return 1;
+	}
+
+	if (bench_format == BENCH_FORMAT_DEFAULT)
+		printf("# Copying %s Bytes ...\n\n", length_str);
+
+	if (!only_prefault && !no_prefault) {
+		/* show both of results */
+		if (use_clock) {
+			result_clock[0] =
+				do_memset_clock(routines[i].fn, len, false);
+			result_clock[1] =
+				do_memset_clock(routines[i].fn, len, true);
+		} else {
+			result_bps[0] =
+				do_memset_gettimeofday(routines[i].fn,
+						len, false);
+			result_bps[1] =
+				do_memset_gettimeofday(routines[i].fn,
+						len, true);
+		}
+	} else {
+		if (use_clock) {
+			result_clock[pf] =
+				do_memset_clock(routines[i].fn,
+						len, only_prefault);
+		} else {
+			result_bps[pf] =
+				do_memset_gettimeofday(routines[i].fn,
+						len, only_prefault);
+		}
+	}
+
+	switch (bench_format) {
+	case BENCH_FORMAT_DEFAULT:
+		if (!only_prefault && !no_prefault) {
+			if (use_clock) {
+				printf(" %14lf Clock/Byte\n",
+					(double)result_clock[0]
+					/ (double)len);
+				printf(" %14lf Clock/Byte (with prefault)\n ",
+					(double)result_clock[1]
+					/ (double)len);
+			} else {
+				print_bps(result_bps[0]);
+				printf("\n");
+				print_bps(result_bps[1]);
+				printf(" (with prefault)\n");
+			}
+		} else {
+			if (use_clock) {
+				printf(" %14lf Clock/Byte",
+					(double)result_clock[pf]
+					/ (double)len);
+			} else
+				print_bps(result_bps[pf]);
+
+			printf("%s\n", only_prefault ? " (with prefault)" : "");
+		}
+		break;
+	case BENCH_FORMAT_SIMPLE:
+		if (!only_prefault && !no_prefault) {
+			if (use_clock) {
+				printf("%lf %lf\n",
+					(double)result_clock[0] / (double)len,
+					(double)result_clock[1] / (double)len);
+			} else {
+				printf("%lf %lf\n",
+					result_bps[0], result_bps[1]);
+			}
+		} else {
+			if (use_clock) {
+				printf("%lf\n", (double)result_clock[pf]
+					/ (double)len);
+			} else
+				printf("%lf\n", result_bps[pf]);
+		}
+		break;
+	default:
+		/* reaching this means there's some disaster: */
+		die("unknown format: %d\n", bench_format);
+		break;
+	}
+
+	return 0;
+}
diff --git a/ap/os/linux/linux-3.4.x/tools/perf/bench/sched-messaging.c b/ap/os/linux/linux-3.4.x/tools/perf/bench/sched-messaging.c
new file mode 100644
index 0000000..d1d1b30
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/tools/perf/bench/sched-messaging.c
@@ -0,0 +1,336 @@
+/*
+ *
+ * sched-messaging.c
+ *
+ * messaging: Benchmark for scheduler and IPC mechanisms
+ *
+ * Based on hackbench by Rusty Russell <rusty@rustcorp.com.au>
+ * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ *
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../builtin.h"
+#include "bench.h"
+
+/* Test groups of 20 processes spraying to 20 receivers */
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <sys/poll.h>
+#include <limits.h>
+
+#define DATASIZE 100
+
+static bool use_pipes = false;
+static unsigned int loops = 100;
+static bool thread_mode = false;
+static unsigned int num_groups = 10;
+
+struct sender_context {
+	unsigned int num_fds;
+	int ready_out;
+	int wakefd;
+	int out_fds[0];
+};
+
+struct receiver_context {
+	unsigned int num_packets;
+	int in_fds[2];
+	int ready_out;
+	int wakefd;
+};
+
+static void barf(const char *msg)
+{
+	fprintf(stderr, "%s (error: %s)\n", msg, strerror(errno));
+	exit(1);
+}
+
+static void fdpair(int fds[2])
+{
+	if (use_pipes) {
+		if (pipe(fds) == 0)
+			return;
+	} else {
+		if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == 0)
+			return;
+	}
+
+	barf(use_pipes ? "pipe()" : "socketpair()");
+}
+
+/* Block until we're ready to go */
+static void ready(int ready_out, int wakefd)
+{
+	char dummy;
+	struct pollfd pollfd = { .fd = wakefd, .events = POLLIN };
+
+	/* Tell them we're ready. */
+	if (write(ready_out, &dummy, 1) != 1)
+		barf("CLIENT: ready write");
+
+	/* Wait for "GO" signal */
+	if (poll(&pollfd, 1, -1) != 1)
+		barf("poll");
+}
+
+/* Sender sprays loops messages down each file descriptor */
+static void *sender(struct sender_context *ctx)
+{
+	char data[DATASIZE];
+	unsigned int i, j;
+
+	ready(ctx->ready_out, ctx->wakefd);
+
+	/* Now pump to every receiver. */
+	for (i = 0; i < loops; i++) {
+		for (j = 0; j < ctx->num_fds; j++) {
+			int ret, done = 0;
+
+again:
+			ret = write(ctx->out_fds[j], data + done,
+				    sizeof(data)-done);
+			if (ret < 0)
+				barf("SENDER: write");
+			done += ret;
+			if (done < DATASIZE)
+				goto again;
+		}
+	}
+
+	return NULL;
+}
+
+
+/* One receiver per fd */
+static void *receiver(struct receiver_context* ctx)
+{
+	unsigned int i;
+
+	if (!thread_mode)
+		close(ctx->in_fds[1]);
+
+	/* Wait for start... */
+	ready(ctx->ready_out, ctx->wakefd);
+
+	/* Receive them all */
+	for (i = 0; i < ctx->num_packets; i++) {
+		char data[DATASIZE];
+		int ret, done = 0;
+
+again:
+		ret = read(ctx->in_fds[0], data + done, DATASIZE - done);
+		if (ret < 0)
+			barf("SERVER: read");
+		done += ret;
+		if (done < DATASIZE)
+			goto again;
+	}
+
+	return NULL;
+}
+
+static pthread_t create_worker(void *ctx, void *(*func)(void *))
+{
+	pthread_attr_t attr;
+	pthread_t childid;
+	int err;
+
+	if (!thread_mode) {
+		/* process mode */
+		/* Fork the receiver. */
+		switch (fork()) {
+		case -1:
+			barf("fork()");
+			break;
+		case 0:
+			(*func) (ctx);
+			exit(0);
+			break;
+		default:
+			break;
+		}
+
+		return (pthread_t)0;
+	}
+
+	if (pthread_attr_init(&attr) != 0)
+		barf("pthread_attr_init:");
+
+#ifndef __ia64__
+	if (pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0)
+		barf("pthread_attr_setstacksize");
+#endif
+
+	err = pthread_create(&childid, &attr, func, ctx);
+	if (err != 0) {
+		fprintf(stderr, "pthread_create failed: %s (%d)\n",
+			strerror(err), err);
+		exit(-1);
+	}
+	return childid;
+}
+
+static void reap_worker(pthread_t id)
+{
+	int proc_status;
+	void *thread_status;
+
+	if (!thread_mode) {
+		/* process mode */
+		wait(&proc_status);
+		if (!WIFEXITED(proc_status))
+			exit(1);
+	} else {
+		pthread_join(id, &thread_status);
+	}
+}
+
+/* One group of senders and receivers */
+static unsigned int group(pthread_t *pth,
+		unsigned int num_fds,
+		int ready_out,
+		int wakefd)
+{
+	unsigned int i;
+	struct sender_context *snd_ctx = malloc(sizeof(struct sender_context)
+			+ num_fds * sizeof(int));
+
+	if (!snd_ctx)
+		barf("malloc()");
+
+	for (i = 0; i < num_fds; i++) {
+		int fds[2];
+		struct receiver_context *ctx = malloc(sizeof(*ctx));
+
+		if (!ctx)
+			barf("malloc()");
+
+
+		/* Create the pipe between client and server */
+		fdpair(fds);
+
+		ctx->num_packets = num_fds * loops;
+		ctx->in_fds[0] = fds[0];
+		ctx->in_fds[1] = fds[1];
+		ctx->ready_out = ready_out;
+		ctx->wakefd = wakefd;
+
+		pth[i] = create_worker(ctx, (void *)receiver);
+
+		snd_ctx->out_fds[i] = fds[1];
+		if (!thread_mode)
+			close(fds[0]);
+	}
+
+	/* Now we have all the fds, fork the senders */
+	for (i = 0; i < num_fds; i++) {
+		snd_ctx->ready_out = ready_out;
+		snd_ctx->wakefd = wakefd;
+		snd_ctx->num_fds = num_fds;
+
+		pth[num_fds+i] = create_worker(snd_ctx, (void *)sender);
+	}
+
+	/* Close the fds we have left */
+	if (!thread_mode)
+		for (i = 0; i < num_fds; i++)
+			close(snd_ctx->out_fds[i]);
+
+	/* Return number of children to reap */
+	return num_fds * 2;
+}
+
+static const struct option options[] = {
+	OPT_BOOLEAN('p', "pipe", &use_pipes,
+		    "Use pipe() instead of socketpair()"),
+	OPT_BOOLEAN('t', "thread", &thread_mode,
+		    "Be multi thread instead of multi process"),
+	OPT_UINTEGER('g', "group", &num_groups, "Specify number of groups"),
+	OPT_UINTEGER('l', "loop", &loops, "Specify number of loops"),
+	OPT_END()
+};
+
+static const char * const bench_sched_message_usage[] = {
+	"perf bench sched messaging <options>",
+	NULL
+};
+
+int bench_sched_messaging(int argc, const char **argv,
+		    const char *prefix __used)
+{
+	unsigned int i, total_children;
+	struct timeval start, stop, diff;
+	unsigned int num_fds = 20;
+	int readyfds[2], wakefds[2];
+	char dummy;
+	pthread_t *pth_tab;
+
+	argc = parse_options(argc, argv, options,
+			     bench_sched_message_usage, 0);
+
+	pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t));
+	if (!pth_tab)
+		barf("main:malloc()");
+
+	fdpair(readyfds);
+	fdpair(wakefds);
+
+	total_children = 0;
+	for (i = 0; i < num_groups; i++)
+		total_children += group(pth_tab+total_children, num_fds,
+					readyfds[1], wakefds[0]);
+
+	/* Wait for everyone to be ready */
+	for (i = 0; i < total_children; i++)
+		if (read(readyfds[0], &dummy, 1) != 1)
+			barf("Reading for readyfds");
+
+	gettimeofday(&start, NULL);
+
+	/* Kick them off */
+	if (write(wakefds[1], &dummy, 1) != 1)
+		barf("Writing to start them");
+
+	/* Reap them all */
+	for (i = 0; i < total_children; i++)
+		reap_worker(pth_tab[i]);
+
+	gettimeofday(&stop, NULL);
+
+	timersub(&stop, &start, &diff);
+
+	switch (bench_format) {
+	case BENCH_FORMAT_DEFAULT:
+		printf("# %d sender and receiver %s per group\n",
+		       num_fds, thread_mode ? "threads" : "processes");
+		printf("# %d groups == %d %s run\n\n",
+		       num_groups, num_groups * 2 * num_fds,
+		       thread_mode ? "threads" : "processes");
+		printf(" %14s: %lu.%03lu [sec]\n", "Total time",
+		       diff.tv_sec,
+		       (unsigned long) (diff.tv_usec/1000));
+		break;
+	case BENCH_FORMAT_SIMPLE:
+		printf("%lu.%03lu\n", diff.tv_sec,
+		       (unsigned long) (diff.tv_usec/1000));
+		break;
+	default:
+		/* reaching here is something disaster */
+		fprintf(stderr, "Unknown format:%d\n", bench_format);
+		exit(1);
+		break;
+	}
+
+	return 0;
+}
diff --git a/ap/os/linux/linux-3.4.x/tools/perf/bench/sched-pipe.c b/ap/os/linux/linux-3.4.x/tools/perf/bench/sched-pipe.c
new file mode 100644
index 0000000..0c7454f
--- /dev/null
+++ b/ap/os/linux/linux-3.4.x/tools/perf/bench/sched-pipe.c
@@ -0,0 +1,127 @@
+/*
+ *
+ * sched-pipe.c
+ *
+ * pipe: Benchmark for pipe()
+ *
+ * Based on pipe-test-1m.c by Ingo Molnar <mingo@redhat.com>
+ *  http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c
+ * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ *
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../builtin.h"
+#include "bench.h"
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <linux/unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#define LOOPS_DEFAULT 1000000
+static int loops = LOOPS_DEFAULT;
+
+static const struct option options[] = {
+	OPT_INTEGER('l', "loop", &loops,
+		    "Specify number of loops"),
+	OPT_END()
+};
+
+static const char * const bench_sched_pipe_usage[] = {
+	"perf bench sched pipe <options>",
+	NULL
+};
+
+int bench_sched_pipe(int argc, const char **argv,
+		     const char *prefix __used)
+{
+	int pipe_1[2], pipe_2[2];
+	int m = 0, i;
+	struct timeval start, stop, diff;
+	unsigned long long result_usec = 0;
+
+	/*
+	 * why does "ret" exist?
+	 * discarding returned value of read(), write()
+	 * causes error in building environment for perf
+	 */
+	int __used ret, wait_stat;
+	pid_t pid, retpid;
+
+	argc = parse_options(argc, argv, options,
+			     bench_sched_pipe_usage, 0);
+
+	assert(!pipe(pipe_1));
+	assert(!pipe(pipe_2));
+
+	pid = fork();
+	assert(pid >= 0);
+
+	gettimeofday(&start, NULL);
+
+	if (!pid) {
+		for (i = 0; i < loops; i++) {
+			ret = read(pipe_1[0], &m, sizeof(int));
+			ret = write(pipe_2[1], &m, sizeof(int));
+		}
+	} else {
+		for (i = 0; i < loops; i++) {
+			ret = write(pipe_1[1], &m, sizeof(int));
+			ret = read(pipe_2[0], &m, sizeof(int));
+		}
+	}
+
+	gettimeofday(&stop, NULL);
+	timersub(&stop, &start, &diff);
+
+	if (pid) {
+		retpid = waitpid(pid, &wait_stat, 0);
+		assert((retpid == pid) && WIFEXITED(wait_stat));
+	} else {
+		exit(0);
+	}
+
+	switch (bench_format) {
+	case BENCH_FORMAT_DEFAULT:
+		printf("# Executed %d pipe operations between two tasks\n\n",
+			loops);
+
+		result_usec = diff.tv_sec * 1000000;
+		result_usec += diff.tv_usec;
+
+		printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
+		       diff.tv_sec,
+		       (unsigned long) (diff.tv_usec/1000));
+
+		printf(" %14lf usecs/op\n",
+		       (double)result_usec / (double)loops);
+		printf(" %14d ops/sec\n",
+		       (int)((double)loops /
+			     ((double)result_usec / (double)1000000)));
+		break;
+
+	case BENCH_FORMAT_SIMPLE:
+		printf("%lu.%03lu\n",
+		       diff.tv_sec,
+		       (unsigned long) (diff.tv_usec / 1000));
+		break;
+
+	default:
+		/* reaching here is something disaster */
+		fprintf(stderr, "Unknown format:%d\n", bench_format);
+		exit(1);
+		break;
+	}
+
+	return 0;
+}