Commit 1ddf95f6 authored by Ian Rogers's avatar Ian Rogers Committed by Arnaldo Carvalho de Melo
Browse files

perf intel-tpebs: Don't close record on read



Factor sending record control fd code into its own function.

Rather than killing the record process send it a ping when reading.

Timeouts were witnessed if done too frequently, so only ping for the
first tpebs events.

Don't kill the record command send it a stop command.

As close isn't reliably called also close on evsel__exit.

Add extra checks on the record being terminated to avoid warnings.

Adjust the locking as needed and incorporate extra -Wthread-safety
checks.

Check to do six 500ms poll timeouts when sending commands, rather than
the larger 3000ms, to allow the record process terminating to be better
witnessed.

Reviewed-by: default avatarKan Liang <kan.liang@linux.intel.com>
Signed-off-by: default avatarIan Rogers <irogers@google.com>
Tested-by: default avatarWeilin Wang <weilin.wang@intel.com>
Acked-by: default avatarNamhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: Andreas Färber <afaerber@suse.de>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Falcon <thomas.falcon@intel.com>
Link: https://lore.kernel.org/r/20250414174134.3095492-13-irogers@google.com


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 81743920
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -1656,6 +1656,8 @@ void evsel__exit(struct evsel *evsel)
{
	assert(list_empty(&evsel->core.node));
	assert(evsel->evlist == NULL);
	if (evsel__is_retire_lat(evsel))
		evsel__tpebs_close(evsel);
	bpf_counter__destroy(evsel);
	perf_bpf_filter__destroy(evsel);
	evsel__free_counts(evsel);
+127 −73
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@ bool tpebs_recording;
static LIST_HEAD(tpebs_results);
static pthread_t tpebs_reader_thread;
static struct child_process tpebs_cmd;
static int control_fd[2], ack_fd[2];
static struct mutex tpebs_mtx;

struct tpebs_retire_lat {
@@ -51,8 +52,6 @@ struct tpebs_retire_lat {
	bool started;
};

static struct tpebs_retire_lat *tpebs_retire_lat__find(struct evsel *evsel);

static void tpebs_mtx_init(void)
{
	mutex_init(&tpebs_mtx);
@@ -66,7 +65,10 @@ static struct mutex *tpebs_mtx_get(void)
	return &tpebs_mtx;
}

static int evsel__tpebs_start_perf_record(struct evsel *evsel, int control_fd[], int ack_fd[])
static struct tpebs_retire_lat *tpebs_retire_lat__find(struct evsel *evsel)
	EXCLUSIVE_LOCKS_REQUIRED(tpebs_mtx_get());

static int evsel__tpebs_start_perf_record(struct evsel *evsel)
{
	const char **record_argv;
	int tpebs_event_size = 0, i = 0, ret;
@@ -74,15 +76,13 @@ static int evsel__tpebs_start_perf_record(struct evsel *evsel, int control_fd[],
	char cpumap_buf[50];
	struct tpebs_retire_lat *t;

	mutex_lock(tpebs_mtx_get());
	list_for_each_entry(t, &tpebs_results, nd)
		tpebs_event_size++;

	record_argv = malloc((10 + 2 * tpebs_event_size) * sizeof(*record_argv));
	if (!record_argv) {
		mutex_unlock(tpebs_mtx_get());
	if (!record_argv)
		return -ENOMEM;
	}

	record_argv[i++] = "perf";
	record_argv[i++] = "record";
	record_argv[i++] = "-W";
@@ -118,7 +118,6 @@ static int evsel__tpebs_start_perf_record(struct evsel *evsel, int control_fd[],
	list_for_each_entry(t, &tpebs_results, nd)
		t->started = true;

	mutex_unlock(tpebs_mtx_get());
	return ret;
}

@@ -131,6 +130,11 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused,
	struct tpebs_retire_lat *t;

	mutex_lock(tpebs_mtx_get());
	if (tpebs_cmd.pid == 0) {
		/* Record has terminated. */
		mutex_unlock(tpebs_mtx_get());
		return 0;
	}
	t = tpebs_retire_lat__find(evsel);
	if (!t) {
		mutex_unlock(tpebs_mtx_get());
@@ -180,17 +184,98 @@ static void *__sample_reader(void *arg __maybe_unused)
	return NULL;
}

static int tpebs_send_record_cmd(const char *msg) EXCLUSIVE_LOCKS_REQUIRED(tpebs_mtx_get())
{
	struct pollfd pollfd = { .events = POLLIN, };
	int ret, len, retries = 0;
	char ack_buf[8];

	/* Check if the command exited before the send, done with the lock held. */
	if (tpebs_cmd.pid == 0)
		return 0;

	/*
	 * Let go of the lock while sending/receiving as blocking can starve the
	 * sample reading thread.
	 */
	mutex_unlock(tpebs_mtx_get());

	/* Send perf record command.*/
	len = strlen(msg);
	ret = write(control_fd[1], msg, len);
	if (ret != len) {
		pr_err("perf record control write control message '%s' failed\n", msg);
		ret = -EPIPE;
		goto out;
	}

	if (!strcmp(msg, EVLIST_CTL_CMD_STOP_TAG)) {
		ret = 0;
		goto out;
	}

	/* Wait for an ack. */
	pollfd.fd = ack_fd[0];

	/*
	 * We need this poll to ensure the ack_fd PIPE will not hang
	 * when perf record failed for any reason. The timeout value
	 * 3000ms is an empirical selection.
	 */
again:
	if (!poll(&pollfd, 1, 500)) {
		if (check_if_command_finished(&tpebs_cmd)) {
			ret = 0;
			goto out;
		}

		if (retries++ < 6)
			goto again;
		pr_err("tpebs failed: perf record ack timeout for '%s'\n", msg);
		ret = -ETIMEDOUT;
		goto out;
	}

	if (!(pollfd.revents & POLLIN)) {
		if (check_if_command_finished(&tpebs_cmd)) {
			ret = 0;
			goto out;
		}

		pr_err("tpebs failed: did not received an ack for '%s'\n", msg);
		ret = -EPIPE;
		goto out;
	}

	ret = read(ack_fd[0], ack_buf, sizeof(ack_buf));
	if (ret > 0)
		ret = strcmp(ack_buf, EVLIST_CTL_CMD_ACK_TAG);
	else
		pr_err("tpebs: perf record control ack failed\n");
out:
	/* Re-take lock as expected by caller. */
	mutex_lock(tpebs_mtx_get());
	return ret;
}

/*
 * tpebs_stop - stop the sample data read thread and the perf record process.
 */
static int tpebs_stop(void)
static int tpebs_stop(void) EXCLUSIVE_LOCKS_REQUIRED(tpebs_mtx_get())
{
	int ret = 0;

	/* Like tpebs_start, we should only run tpebs_end once. */
	if (tpebs_cmd.pid != 0) {
		kill(tpebs_cmd.pid, SIGTERM);
		tpebs_send_record_cmd(EVLIST_CTL_CMD_STOP_TAG);
		tpebs_cmd.pid = 0;
		mutex_unlock(tpebs_mtx_get());
		pthread_join(tpebs_reader_thread, NULL);
		mutex_lock(tpebs_mtx_get());
		close(control_fd[0]);
		close(control_fd[1]);
		close(ack_fd[0]);
		close(ack_fd[1]);
		close(tpebs_cmd.out);
		ret = finish_command(&tpebs_cmd);
		tpebs_cmd.pid = 0;
@@ -352,13 +437,15 @@ int evsel__tpebs_open(struct evsel *evsel)
		return 0;
	/* Only start the events once. */
	if (tpebs_cmd.pid != 0) {
		struct tpebs_retire_lat *t = tpebs_retire_lat__find(evsel);
		struct tpebs_retire_lat *t;
		bool valid;

		if (!t || !t->started) {
			/* Fail, as the event wasn't started. */
			return -EBUSY;
		}
		return 0;
		mutex_lock(tpebs_mtx_get());
		t = tpebs_retire_lat__find(evsel);
		valid = t && t->started;
		mutex_unlock(tpebs_mtx_get());
		/* May fail as the event wasn't started. */
		return valid ? 0 : -EBUSY;
	}

	ret = evsel__tpebs_prepare(evsel);
@@ -367,12 +454,7 @@ int evsel__tpebs_open(struct evsel *evsel)

	mutex_lock(tpebs_mtx_get());
	tpebs_empty = list_empty(&tpebs_results);
	mutex_unlock(tpebs_mtx_get());
	if (!tpebs_empty) {
		struct pollfd pollfd = { .events = POLLIN, };
		int control_fd[2], ack_fd[2], len;
		char ack_buf[8];

		/*Create control and ack fd for --control*/
		if (pipe(control_fd) < 0) {
			pr_err("tpebs: Failed to create control fifo");
@@ -385,7 +467,7 @@ int evsel__tpebs_open(struct evsel *evsel)
			goto out;
		}

		ret = evsel__tpebs_start_perf_record(evsel, control_fd, ack_fd);
		ret = evsel__tpebs_start_perf_record(evsel);
		if (ret)
			goto out;

@@ -397,53 +479,16 @@ int evsel__tpebs_open(struct evsel *evsel)
			ret = -1;
			goto out;
		}
		/* Wait for perf record initialization.*/
		len = strlen(EVLIST_CTL_CMD_ENABLE_TAG);
		ret = write(control_fd[1], EVLIST_CTL_CMD_ENABLE_TAG, len);
		if (ret != len) {
			pr_err("perf record control write control message failed\n");
			goto out;
		}

		/* wait for an ack */
		pollfd.fd = ack_fd[0];

		/*
		 * We need this poll to ensure the ack_fd PIPE will not hang
		 * when perf record failed for any reason. The timeout value
		 * 3000ms is an empirical selection.
		 */
		if (!poll(&pollfd, 1, 3000)) {
			pr_err("tpebs failed: perf record ack timeout\n");
			ret = -1;
			goto out;
		}

		if (!(pollfd.revents & POLLIN)) {
			pr_err("tpebs failed: did not received an ack\n");
			ret = -1;
			goto out;
		}

		ret = read(ack_fd[0], ack_buf, sizeof(ack_buf));
		if (ret > 0)
			ret = strcmp(ack_buf, EVLIST_CTL_CMD_ACK_TAG);
		else {
			pr_err("tpebs: perf record control ack failed\n");
			goto out;
		ret = tpebs_send_record_cmd(EVLIST_CTL_CMD_ENABLE_TAG);
	}
out:
		close(control_fd[0]);
		close(control_fd[1]);
		close(ack_fd[0]);
		close(ack_fd[1]);
	}
	if (ret) {
		struct tpebs_retire_lat *t = tpebs_retire_lat__find(evsel);

		list_del_init(&t->nd);
		tpebs_retire_lat__delete(t);
	}
	mutex_unlock(tpebs_mtx_get());
	return ret;
}

@@ -452,6 +497,7 @@ int evsel__tpebs_read(struct evsel *evsel, int cpu_map_idx, int thread)
	struct perf_counts_values *count, *old_count = NULL;
	struct tpebs_retire_lat *t;
	uint64_t val;
	int ret;

	/* Only set retire_latency value to the first CPU and thread. */
	if (cpu_map_idx != 0 || thread != 0)
@@ -462,14 +508,20 @@ int evsel__tpebs_read(struct evsel *evsel, int cpu_map_idx, int thread)

	count = perf_counts(evsel->counts, cpu_map_idx, thread);

	mutex_lock(tpebs_mtx_get());
	t = tpebs_retire_lat__find(evsel);
	/*
	 * Need to stop the forked record to ensure get sampled data from the
	 * PIPE to process and get non-zero retire_lat value for hybrid.
	 * If reading the first tpebs result, send a ping to the record
	 * process. Allow the sample reader a chance to read by releasing and
	 * reacquiring the lock.
	 */
	tpebs_stop();

	if (&t->nd == tpebs_results.next) {
		ret = tpebs_send_record_cmd(EVLIST_CTL_CMD_PING_TAG);
		mutex_unlock(tpebs_mtx_get());
		if (ret)
			return ret;
		mutex_lock(tpebs_mtx_get());
	t = tpebs_retire_lat__find(evsel);
	}
	val = rint(t->val);
	mutex_unlock(tpebs_mtx_get());

@@ -498,10 +550,12 @@ void evsel__tpebs_close(struct evsel *evsel)

	mutex_lock(tpebs_mtx_get());
	t = tpebs_retire_lat__find(evsel);
	if (t) {
		list_del_init(&t->nd);
		tpebs_retire_lat__delete(t);
	mutex_unlock(tpebs_mtx_get());

		if (list_empty(&tpebs_results))
			tpebs_stop();
	}
	mutex_unlock(tpebs_mtx_get());
}