Commit 60136667 authored by Namhyung Kim's avatar Namhyung Kim Committed by Arnaldo Carvalho de Melo
Browse files

perf data: Allow to use stdio functions for pipe mode



When perf data is in a pipe, it reads each event separately using
read(2) syscall.  This is a huge performance bottleneck when
processing large data like in perf inject.  Also perf inject needs to
use write(2) syscall for the output.

So convert it to use buffer I/O functions in stdio library for pipe
data.  This makes inject-build-id bench time drops from 20ms to 8ms.

  $ perf bench internals inject-build-id
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 8.074 msec (+- 0.013 msec)
    Average time per event: 0.792 usec (+- 0.001 usec)
    Average memory usage: 8328 KB (+- 0 KB)
    Average build-id-all injection took: 5.490 msec (+- 0.008 msec)
    Average time per event: 0.538 usec (+- 0.001 usec)
    Average memory usage: 7563 KB (+- 0 KB)

This patch enables it just for perf inject when used with pipe (it's a
default behavior).  Maybe we could do it for perf record and/or report
later..

Committer testing:

Before:

  $ perf stat -r 5 perf bench internals inject-build-id
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 13.605 msec (+- 0.064 msec)
    Average time per event: 1.334 usec (+- 0.006 usec)
    Average memory usage: 12220 KB (+- 7 KB)
    Average build-id-all injection took: 11.458 msec (+- 0.058 msec)
    Average time per event: 1.123 usec (+- 0.006 usec)
    Average memory usage: 11546 KB (+- 8 KB)
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 13.673 msec (+- 0.057 msec)
    Average time per event: 1.341 usec (+- 0.006 usec)
    Average memory usage: 12508 KB (+- 8 KB)
    Average build-id-all injection took: 11.437 msec (+- 0.046 msec)
    Average time per event: 1.121 usec (+- 0.004 usec)
    Average memory usage: 11812 KB (+- 7 KB)
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 13.641 msec (+- 0.069 msec)
    Average time per event: 1.337 usec (+- 0.007 usec)
    Average memory usage: 12302 KB (+- 8 KB)
    Average build-id-all injection took: 10.820 msec (+- 0.106 msec)
    Average time per event: 1.061 usec (+- 0.010 usec)
    Average memory usage: 11616 KB (+- 7 KB)
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 13.379 msec (+- 0.074 msec)
    Average time per event: 1.312 usec (+- 0.007 usec)
    Average memory usage: 12334 KB (+- 8 KB)
    Average build-id-all injection took: 11.288 msec (+- 0.071 msec)
    Average time per event: 1.107 usec (+- 0.007 usec)
    Average memory usage: 11657 KB (+- 8 KB)
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 13.534 msec (+- 0.058 msec)
    Average time per event: 1.327 usec (+- 0.006 usec)
    Average memory usage: 12264 KB (+- 8 KB)
    Average build-id-all injection took: 11.557 msec (+- 0.076 msec)
    Average time per event: 1.133 usec (+- 0.007 usec)
    Average memory usage: 11593 KB (+- 8 KB)

   Performance counter stats for 'perf bench internals inject-build-id' (5 runs):

            4,060.05 msec task-clock:u              #    1.566 CPUs utilized            ( +-  0.65% )
                   0      context-switches:u        #    0.000 K/sec
                   0      cpu-migrations:u          #    0.000 K/sec
             101,888      page-faults:u             #    0.025 M/sec                    ( +-  0.12% )
       3,745,833,163      cycles:u                  #    0.923 GHz                      ( +-  0.10% )  (83.22%)
         194,346,613      stalled-cycles-frontend:u #    5.19% frontend cycles idle     ( +-  0.57% )  (83.30%)
         708,495,034      stalled-cycles-backend:u  #   18.91% backend cycles idle      ( +-  0.48% )  (83.48%)
       5,629,328,628      instructions:u            #    1.50  insn per cycle
                                                    #    0.13  stalled cycles per insn  ( +-  0.21% )  (83.57%)
       1,236,697,927      branches:u                #  304.602 M/sec                    ( +-  0.16% )  (83.44%)
          17,564,877      branch-misses:u           #    1.42% of all branches          ( +-  0.23% )  (82.99%)

              2.5934 +- 0.0128 seconds time elapsed  ( +-  0.49% )

  $

After:

  $ perf stat -r 5 perf bench internals inject-build-id
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 8.560 msec (+- 0.125 msec)
    Average time per event: 0.839 usec (+- 0.012 usec)
    Average memory usage: 12520 KB (+- 8 KB)
    Average build-id-all injection took: 5.789 msec (+- 0.054 msec)
    Average time per event: 0.568 usec (+- 0.005 usec)
    Average memory usage: 11919 KB (+- 9 KB)
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 8.639 msec (+- 0.111 msec)
    Average time per event: 0.847 usec (+- 0.011 usec)
    Average memory usage: 12732 KB (+- 8 KB)
    Average build-id-all injection took: 5.647 msec (+- 0.069 msec)
    Average time per event: 0.554 usec (+- 0.007 usec)
    Average memory usage: 12093 KB (+- 7 KB)
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 8.551 msec (+- 0.096 msec)
    Average time per event: 0.838 usec (+- 0.009 usec)
    Average memory usage: 12739 KB (+- 8 KB)
    Average build-id-all injection took: 5.617 msec (+- 0.061 msec)
    Average time per event: 0.551 usec (+- 0.006 usec)
    Average memory usage: 12105 KB (+- 7 KB)
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 8.403 msec (+- 0.097 msec)
    Average time per event: 0.824 usec (+- 0.010 usec)
    Average memory usage: 12770 KB (+- 8 KB)
    Average build-id-all injection took: 5.611 msec (+- 0.085 msec)
    Average time per event: 0.550 usec (+- 0.008 usec)
    Average memory usage: 12134 KB (+- 8 KB)
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 8.518 msec (+- 0.102 msec)
    Average time per event: 0.835 usec (+- 0.010 usec)
    Average memory usage: 12518 KB (+- 10 KB)
    Average build-id-all injection took: 5.503 msec (+- 0.073 msec)
    Average time per event: 0.540 usec (+- 0.007 usec)
    Average memory usage: 11882 KB (+- 8 KB)

   Performance counter stats for 'perf bench internals inject-build-id' (5 runs):

            2,394.88 msec task-clock:u              #    1.577 CPUs utilized            ( +-  0.83% )
                   0      context-switches:u        #    0.000 K/sec
                   0      cpu-migrations:u          #    0.000 K/sec
             103,181      page-faults:u             #    0.043 M/sec                    ( +-  0.11% )
       3,548,172,030      cycles:u                  #    1.482 GHz                      ( +-  0.30% )  (83.26%)
          81,537,700      stalled-cycles-frontend:u #    2.30% frontend cycles idle     ( +-  1.54% )  (83.24%)
         876,631,544      stalled-cycles-backend:u  #   24.71% backend cycles idle      ( +-  1.14% )  (83.45%)
       5,960,361,707      instructions:u            #    1.68  insn per cycle
                                                    #    0.15  stalled cycles per insn  ( +-  0.27% )  (83.26%)
       1,269,413,491      branches:u                #  530.054 M/sec                    ( +-  0.10% )  (83.48%)
          11,372,453      branch-misses:u           #    0.90% of all branches          ( +-  0.52% )  (83.31%)

             1.51874 +- 0.00642 seconds time elapsed  ( +-  0.42% )

  $

Signed-off-by: default avatarNamhyung Kim <namhyung@kernel.org>
Acked-by: default avatarJiri Olsa <jolsa@redhat.com>
Tested-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201030054742.87740-1-namhyung@kernel.org


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 0a04244c
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -853,10 +853,12 @@ int cmd_inject(int argc, const char **argv)
		.output = {
			.path = "-",
			.mode = PERF_DATA_MODE_WRITE,
			.use_stdio = true,
		},
	};
	struct perf_data data = {
		.mode = PERF_DATA_MODE_READ,
		.use_stdio = true,
	};
	int ret;

+38 −3
Original line number Diff line number Diff line
@@ -174,8 +174,21 @@ static bool check_pipe(struct perf_data *data)
			is_pipe = true;
	}

	if (is_pipe)
	if (is_pipe) {
		if (data->use_stdio) {
			const char *mode;

			mode = perf_data__is_read(data) ? "r" : "w";
			data->file.fptr = fdopen(fd, mode);

			if (data->file.fptr == NULL) {
				data->file.fd = fd;
				data->use_stdio = false;
			}
		} else {
			data->file.fd = fd;
		}
	}

	return data->is_pipe = is_pipe;
}
@@ -334,6 +347,9 @@ int perf_data__open(struct perf_data *data)
	if (check_pipe(data))
		return 0;

	/* currently it allows stdio for pipe only */
	data->use_stdio = false;

	if (!data->path)
		data->path = "perf.data";

@@ -353,9 +369,23 @@ void perf_data__close(struct perf_data *data)
		perf_data__close_dir(data);

	zfree(&data->file.path);

	if (data->use_stdio)
		fclose(data->file.fptr);
	else
		close(data->file.fd);
}

ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size)
{
	if (data->use_stdio) {
		if (fread(buf, size, 1, data->file.fptr) == 1)
			return size;
		return feof(data->file.fptr) ? 0 : -1;
	}
	return readn(data->file.fd, buf, size);
}

ssize_t perf_data_file__write(struct perf_data_file *file,
			      void *buf, size_t size)
{
@@ -365,6 +395,11 @@ ssize_t perf_data_file__write(struct perf_data_file *file,
ssize_t perf_data__write(struct perf_data *data,
			      void *buf, size_t size)
{
	if (data->use_stdio) {
		if (fwrite(buf, size, 1, data->file.fptr) == 1)
			return size;
		return -1;
	}
	return perf_data_file__write(&data->file, buf, size);
}

+10 −1
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@
#ifndef __PERF_DATA_H
#define __PERF_DATA_H

#include <stdio.h>
#include <stdbool.h>

enum perf_data_mode {
@@ -16,7 +17,10 @@ enum perf_dir_version {

struct perf_data_file {
	char		*path;
	union {
		int	 fd;
		FILE	*fptr;
	};
	unsigned long	 size;
};

@@ -26,6 +30,7 @@ struct perf_data {
	bool			 is_pipe;
	bool			 is_dir;
	bool			 force;
	bool			 use_stdio;
	enum perf_data_mode	 mode;

	struct {
@@ -62,11 +67,15 @@ static inline bool perf_data__is_single_file(struct perf_data *data)

static inline int perf_data__fd(struct perf_data *data)
{
	if (data->use_stdio)
		return fileno(data->file.fptr);

	return data->file.fd;
}

int perf_data__open(struct perf_data *data);
void perf_data__close(struct perf_data *data);
ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size);
ssize_t perf_data__write(struct perf_data *data,
			      void *buf, size_t size);
ssize_t perf_data_file__write(struct perf_data_file *file,
+4 −4
Original line number Diff line number Diff line
@@ -3647,7 +3647,8 @@ static int perf_file_section__process(struct perf_file_section *section,
}

static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
				       struct perf_header *ph, int fd,
				       struct perf_header *ph,
				       struct perf_data* data,
				       bool repipe)
{
	struct feat_fd ff = {
@@ -3656,7 +3657,7 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
	};
	ssize_t ret;

	ret = readn(fd, header, sizeof(*header));
	ret = perf_data__read(data, header, sizeof(*header));
	if (ret <= 0)
		return -1;

@@ -3679,8 +3680,7 @@ static int perf_header__read_pipe(struct perf_session *session)
	struct perf_header *header = &session->header;
	struct perf_pipe_file_header f_header;

	if (perf_file_header__read_pipe(&f_header, header,
					perf_data__fd(session->data),
	if (perf_file_header__read_pipe(&f_header, header, session->data,
					session->repipe) < 0) {
		pr_debug("incompatible file format\n");
		return -EINVAL;
+4 −3
Original line number Diff line number Diff line
@@ -1937,7 +1937,6 @@ static int __perf_session__process_pipe_events(struct perf_session *session)
{
	struct ordered_events *oe = &session->ordered_events;
	struct perf_tool *tool = session->tool;
	int fd = perf_data__fd(session->data);
	union perf_event *event;
	uint32_t size, cur_size = 0;
	void *buf = NULL;
@@ -1957,7 +1956,8 @@ static int __perf_session__process_pipe_events(struct perf_session *session)
	ordered_events__set_copy_on_queue(oe, true);
more:
	event = buf;
	err = readn(fd, event, sizeof(struct perf_event_header));
	err = perf_data__read(session->data, event,
			      sizeof(struct perf_event_header));
	if (err <= 0) {
		if (err == 0)
			goto done;
@@ -1989,7 +1989,8 @@ static int __perf_session__process_pipe_events(struct perf_session *session)
	p += sizeof(struct perf_event_header);

	if (size - sizeof(struct perf_event_header)) {
		err = readn(fd, p, size - sizeof(struct perf_event_header));
		err = perf_data__read(session->data, p,
				      size - sizeof(struct perf_event_header));
		if (err <= 0) {
			if (err == 0) {
				pr_err("unexpected end of event stream\n");