src/kernel/linux/v4.14/tools/virtio/ringtest/ring.c - T103 - Gitiles

 /*
  * Copyright (C) 2016 Red Hat, Inc.
  * Author: Michael S. Tsirkin <mst@redhat.com>
  * This work is licensed under the terms of the GNU GPL, version 2.
  *
  * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
  * signalling, unconditionally.
  */
 #define _GNU_SOURCE
 #include "main.h"
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>

 /* Next - Where next entry will be written.
  * Prev - "Next" value when event triggered previously.
  * Event - Peer requested event after writing this entry.
  */
 static inline bool need_event(unsigned short event,
 			      unsigned short next,
 			      unsigned short prev)
 {
 	return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
 }

 /* Design:
  * Guest adds descriptors with unique index values and DESC_HW in flags.
  * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
  * Flags are always set last.
  */
 #define DESC_HW 0x1

 struct desc {
 	unsigned short flags;
 	unsigned short index;
 	unsigned len;
 	unsigned long long addr;
 };

 /* how much padding is needed to avoid false cache sharing */
 #define HOST_GUEST_PADDING 0x80

 /* Mostly read */
 struct event {
 	unsigned short kick_index;
 	unsigned char reserved0[HOST_GUEST_PADDING - 2];
 	unsigned short call_index;
 	unsigned char reserved1[HOST_GUEST_PADDING - 2];
 };

 struct data {
 	void *buf; /* descriptor is writeable, we can't get buf from there */
 	void *data;
 } *data;

 struct desc *ring;
 struct event *event;

 struct guest {
 	unsigned avail_idx;
 	unsigned last_used_idx;
 	unsigned num_free;
 	unsigned kicked_avail_idx;
 	unsigned char reserved[HOST_GUEST_PADDING - 12];
 } guest;

 struct host {
 	/* we do not need to track last avail index
 	 * unless we have more than one in flight.
 	 */
 	unsigned used_idx;
 	unsigned called_used_idx;
 	unsigned char reserved[HOST_GUEST_PADDING - 4];
 } host;

 /* implemented by ring */
 void alloc_ring(void)
 {
 	int ret;
 	int i;

 	ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
 	if (ret) {
 		perror("Unable to allocate ring buffer.\n");
 		exit(3);
 	}
 	event = malloc(sizeof *event);
 	if (!event) {
 		perror("Unable to allocate event buffer.\n");
 		exit(3);
 	}
 	memset(event, 0, sizeof *event);
 	guest.avail_idx = 0;
 	guest.kicked_avail_idx = -1;
 	guest.last_used_idx = 0;
 	host.used_idx = 0;
 	host.called_used_idx = -1;
 	for (i = 0; i < ring_size; ++i) {
 		struct desc desc = {
 			.index = i,
 		};
 		ring[i] = desc;
 	}
 	guest.num_free = ring_size;
 	data = malloc(ring_size * sizeof *data);
 	if (!data) {
 		perror("Unable to allocate data buffer.\n");
 		exit(3);
 	}
 	memset(data, 0, ring_size * sizeof *data);
 }

 /* guest side */
 int add_inbuf(unsigned len, void *buf, void *datap)
 {
 	unsigned head, index;

 	if (!guest.num_free)
 		return -1;

 	guest.num_free--;
 	head = (ring_size - 1) & (guest.avail_idx++);

 	/* Start with a write. On MESI architectures this helps
 	 * avoid a shared state with consumer that is polling this descriptor.
 	 */
 	ring[head].addr = (unsigned long)(void*)buf;
 	ring[head].len = len;
 	/* read below might bypass write above. That is OK because it's just an
 	 * optimization. If this happens, we will get the cache line in a
 	 * shared state which is unfortunate, but probably not worth it to
 	 * add an explicit full barrier to avoid this.
 	 */
 	barrier();
 	index = ring[head].index;
 	data[index].buf = buf;
 	data[index].data = datap;
 	/* Barrier A (for pairing) */
 	smp_release();
 	ring[head].flags = DESC_HW;

 	return 0;
 }

 void *get_buf(unsigned *lenp, void **bufp)
 {
 	unsigned head = (ring_size - 1) & guest.last_used_idx;
 	unsigned index;
 	void *datap;

 	if (ring[head].flags & DESC_HW)
 		return NULL;
 	/* Barrier B (for pairing) */
 	smp_acquire();
 	*lenp = ring[head].len;
 	index = ring[head].index & (ring_size - 1);
 	datap = data[index].data;
 	*bufp = data[index].buf;
 	data[index].buf = NULL;
 	data[index].data = NULL;
 	guest.num_free++;
 	guest.last_used_idx++;
 	return datap;
 }

 bool used_empty()
 {
 	unsigned head = (ring_size - 1) & guest.last_used_idx;

 	return (ring[head].flags & DESC_HW);
 }

 void disable_call()
 {
 	/* Doing nothing to disable calls might cause
 	 * extra interrupts, but reduces the number of cache misses.
 	 */
 }

 bool enable_call()
 {
 	event->call_index = guest.last_used_idx;
 	/* Flush call index write */
 	/* Barrier D (for pairing) */
 	smp_mb();
 	return used_empty();
 }

 void kick_available(void)
 {
 	/* Flush in previous flags write */
 	/* Barrier C (for pairing) */
 	smp_mb();
 	if (!need_event(event->kick_index,
 			guest.avail_idx,
 			guest.kicked_avail_idx))
 		return;

 	guest.kicked_avail_idx = guest.avail_idx;
 	kick();
 }

 /* host side */
 void disable_kick()
 {
 	/* Doing nothing to disable kicks might cause
 	 * extra interrupts, but reduces the number of cache misses.
 	 */
 }

 bool enable_kick()
 {
 	event->kick_index = host.used_idx;
 	/* Barrier C (for pairing) */
 	smp_mb();
 	return avail_empty();
 }

 bool avail_empty()
 {
 	unsigned head = (ring_size - 1) & host.used_idx;

 	return !(ring[head].flags & DESC_HW);
 }

 bool use_buf(unsigned *lenp, void **bufp)
 {
 	unsigned head = (ring_size - 1) & host.used_idx;

 	if (!(ring[head].flags & DESC_HW))
 		return false;

 	/* make sure length read below is not speculated */
 	/* Barrier A (for pairing) */
 	smp_acquire();

 	/* simple in-order completion: we don't need
 	 * to touch index at all. This also means we
 	 * can just modify the descriptor in-place.
 	 */
 	ring[head].len--;
 	/* Make sure len is valid before flags.
 	 * Note: alternative is to write len and flags in one access -
 	 * possible on 64 bit architectures but wmb is free on Intel anyway
 	 * so I have no way to test whether it's a gain.
 	 */
 	/* Barrier B (for pairing) */
 	smp_release();
 	ring[head].flags = 0;
 	host.used_idx++;
 	return true;
 }

 void call_used(void)
 {
 	/* Flush in previous flags write */
 	/* Barrier D (for pairing) */
 	smp_mb();
 	if (!need_event(event->call_index,
 			host.used_idx,
 			host.called_used_idx))
 		return;

 	host.called_used_idx = host.used_idx;
 	call();
 }
	/*
	* Copyright (C) 2016 Red Hat, Inc.
	* Author: Michael S. Tsirkin <mst@redhat.com>
	* This work is licensed under the terms of the GNU GPL, version 2.
	*
	* Simple descriptor-based ring. virtio 0.9 compatible event index is used for
	* signalling, unconditionally.
	*/
	#define _GNU_SOURCE
	#include "main.h"
	#include <stdlib.h>
	#include <stdio.h>
	#include <string.h>

	/* Next - Where next entry will be written.
	* Prev - "Next" value when event triggered previously.
	* Event - Peer requested event after writing this entry.
	*/
	static inline bool need_event(unsigned short event,
	unsigned short next,
	unsigned short prev)
	{
	return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
	}

	/* Design:
	* Guest adds descriptors with unique index values and DESC_HW in flags.
	* Host overwrites used descriptors with correct len, index, and DESC_HW clear.
	* Flags are always set last.
	*/
	#define DESC_HW 0x1

	struct desc {
	unsigned short flags;
	unsigned short index;
	unsigned len;
	unsigned long long addr;
	};

	/* how much padding is needed to avoid false cache sharing */
	#define HOST_GUEST_PADDING 0x80

	/* Mostly read */
	struct event {
	unsigned short kick_index;
	unsigned char reserved0[HOST_GUEST_PADDING - 2];
	unsigned short call_index;
	unsigned char reserved1[HOST_GUEST_PADDING - 2];
	};

	struct data {
	void buf; / descriptor is writeable, we can't get buf from there */
	void *data;
	} *data;

	struct desc *ring;
	struct event *event;

	struct guest {
	unsigned avail_idx;
	unsigned last_used_idx;
	unsigned num_free;
	unsigned kicked_avail_idx;
	unsigned char reserved[HOST_GUEST_PADDING - 12];
	} guest;

	struct host {
	/* we do not need to track last avail index
	* unless we have more than one in flight.
	*/
	unsigned used_idx;
	unsigned called_used_idx;
	unsigned char reserved[HOST_GUEST_PADDING - 4];
	} host;

	/* implemented by ring */
	void alloc_ring(void)
	{
	int ret;
	int i;

	ret = posix_memalign((void *)&ring, 0x1000, ring_size sizeof *ring);
	if (ret) {
	perror("Unable to allocate ring buffer.\n");
	exit(3);
	}
	event = malloc(sizeof *event);
	if (!event) {
	perror("Unable to allocate event buffer.\n");
	exit(3);
	}
	memset(event, 0, sizeof *event);
	guest.avail_idx = 0;
	guest.kicked_avail_idx = -1;
	guest.last_used_idx = 0;
	host.used_idx = 0;
	host.called_used_idx = -1;
	for (i = 0; i < ring_size; ++i) {
	struct desc desc = {
	.index = i,
	};
	ring[i] = desc;
	}
	guest.num_free = ring_size;
	data = malloc(ring_size * sizeof *data);
	if (!data) {
	perror("Unable to allocate data buffer.\n");
	exit(3);
	}
	memset(data, 0, ring_size * sizeof *data);
	}

	/* guest side */
	int add_inbuf(unsigned len, void buf, void datap)
	{
	unsigned head, index;

	if (!guest.num_free)
	return -1;

	guest.num_free--;
	head = (ring_size - 1) & (guest.avail_idx++);

	/* Start with a write. On MESI architectures this helps
	* avoid a shared state with consumer that is polling this descriptor.
	*/
	ring[head].addr = (unsigned long)(void*)buf;
	ring[head].len = len;
	/* read below might bypass write above. That is OK because it's just an
	* optimization. If this happens, we will get the cache line in a
	* shared state which is unfortunate, but probably not worth it to
	* add an explicit full barrier to avoid this.
	*/
	barrier();
	index = ring[head].index;
	data[index].buf = buf;
	data[index].data = datap;
	/* Barrier A (for pairing) */
	smp_release();
	ring[head].flags = DESC_HW;

	return 0;
	}

	void get_buf(unsigned lenp, void **bufp)
	{
	unsigned head = (ring_size - 1) & guest.last_used_idx;
	unsigned index;
	void *datap;

	if (ring[head].flags & DESC_HW)
	return NULL;
	/* Barrier B (for pairing) */
	smp_acquire();
	*lenp = ring[head].len;
	index = ring[head].index & (ring_size - 1);
	datap = data[index].data;
	*bufp = data[index].buf;
	data[index].buf = NULL;
	data[index].data = NULL;
	guest.num_free++;
	guest.last_used_idx++;
	return datap;
	}

	bool used_empty()
	{
	unsigned head = (ring_size - 1) & guest.last_used_idx;

	return (ring[head].flags & DESC_HW);
	}

	void disable_call()
	{
	/* Doing nothing to disable calls might cause
	* extra interrupts, but reduces the number of cache misses.
	*/
	}

	bool enable_call()
	{
	event->call_index = guest.last_used_idx;
	/* Flush call index write */
	/* Barrier D (for pairing) */
	smp_mb();
	return used_empty();
	}

	void kick_available(void)
	{
	/* Flush in previous flags write */
	/* Barrier C (for pairing) */
	smp_mb();
	if (!need_event(event->kick_index,
	guest.avail_idx,
	guest.kicked_avail_idx))
	return;

	guest.kicked_avail_idx = guest.avail_idx;
	kick();
	}

	/* host side */
	void disable_kick()
	{
	/* Doing nothing to disable kicks might cause
	* extra interrupts, but reduces the number of cache misses.
	*/
	}

	bool enable_kick()
	{
	event->kick_index = host.used_idx;
	/* Barrier C (for pairing) */
	smp_mb();
	return avail_empty();
	}

	bool avail_empty()
	{
	unsigned head = (ring_size - 1) & host.used_idx;

	return !(ring[head].flags & DESC_HW);
	}

	bool use_buf(unsigned lenp, void *bufp)
	{
	unsigned head = (ring_size - 1) & host.used_idx;

	if (!(ring[head].flags & DESC_HW))
	return false;

	/* make sure length read below is not speculated */
	/* Barrier A (for pairing) */
	smp_acquire();

	/* simple in-order completion: we don't need
	* to touch index at all. This also means we
	* can just modify the descriptor in-place.
	*/
	ring[head].len--;
	/* Make sure len is valid before flags.
	* Note: alternative is to write len and flags in one access -
	* possible on 64 bit architectures but wmb is free on Intel anyway
	* so I have no way to test whether it's a gain.
	*/
	/* Barrier B (for pairing) */
	smp_release();
	ring[head].flags = 0;
	host.used_idx++;
	return true;
	}

	void call_used(void)
	{
	/* Flush in previous flags write */
	/* Barrier D (for pairing) */
	smp_mb();
	if (!need_event(event->call_index,
	host.used_idx,
	host.called_used_idx))
	return;

	host.called_used_idx = host.used_idx;
	call();
	}