diff --git a/src/include/86box/vid_voodoo_common.h b/src/include/86box/vid_voodoo_common.h index 2870f09ea..a6174e1cd 100644 --- a/src/include/86box/vid_voodoo_common.h +++ b/src/include/86box/vid_voodoo_common.h @@ -74,6 +74,13 @@ typedef union rgba_u { #define FIFO_FULL ((voodoo->fifo_write_idx - voodoo->fifo_read_idx) >= FIFO_SIZE - 4) #define FIFO_EMPTY (voodoo->fifo_read_idx == voodoo->fifo_write_idx) +#define VOODOO_BUF_FRONT 0 +#define VOODOO_BUF_BACK 1 +#define VOODOO_BUF_AUX 2 +#define VOODOO_BUF_UNKNOWN 3 +#define VOODOO_BUF_COUNT 4 +#define VOODOO_BUF_NONE 0xff + #define FIFO_TYPE 0xff000000 #define FIFO_ADDR 0x00ffffff @@ -98,6 +105,8 @@ typedef struct { uint32_t addr_type; uint32_t val; + uint8_t target_buf; + uint8_t pad[3]; } fifo_entry_t; typedef struct voodoo_params_t { @@ -356,6 +365,8 @@ typedef struct voodoo_t { event_t *wake_fifo_thread; event_t *wake_main_thread; event_t *fifo_not_full_event; + event_t *fifo_empty_event; + ATOMIC_INT fifo_empty_signaled; event_t *render_not_full_event[4]; event_t *wake_render_thread[4]; @@ -399,6 +410,8 @@ typedef struct voodoo_t { ATOMIC_INT cmd_written; ATOMIC_INT cmd_written_fifo; ATOMIC_INT cmd_written_fifo_2; + ATOMIC_INT pending_fb_writes_buf[VOODOO_BUF_COUNT]; + ATOMIC_INT pending_draw_cmds_buf[VOODOO_BUF_COUNT]; voodoo_params_t params_buffer[PARAM_SIZE]; ATOMIC_INT params_read_idx[4]; @@ -627,6 +640,12 @@ typedef struct voodoo_t { int fb_write_buffer; int fb_draw_buffer; int buffer_cutoff; + int queued_disp_buffer; + int queued_draw_buffer; + int queued_fb_write_buffer; + int queued_fb_draw_buffer; + uint32_t queued_lfbMode; + uint32_t queued_fbzMode; uint32_t tile_base; uint32_t tile_stride; @@ -657,6 +676,32 @@ typedef struct voodoo_t { uint64_t time; int render_time[4]; + uint64_t fifo_full_waits; + uint64_t fifo_full_wait_ticks; + uint64_t fifo_full_spin_checks; + uint64_t fifo_empty_waits; + uint64_t fifo_empty_wait_ticks; + uint64_t fifo_empty_spin_checks; + uint64_t render_waits; + uint64_t render_wait_ticks; + uint64_t render_wait_spin_checks; + uint64_t readl_fb_count; + uint64_t readl_fb_sync_count; + uint64_t readl_fb_nosync_count; + uint64_t readl_fb_relaxed_count; + uint64_t readl_fb_sync_buf[3]; + uint64_t readl_fb_nosync_buf[3]; + uint64_t readl_fb_relaxed_buf[3]; + uint64_t readl_reg_count; + uint64_t readl_tex_count; + int wait_stats_enabled; + int wait_stats_explicit; + int lfb_relax_enabled; + int lfb_relax_full; + int lfb_relax_ignore_cmdfifo; + int lfb_relax_ignore_draw; + int lfb_relax_ignore_fb_writes; + int lfb_relax_front_sync; int force_blit_count; int can_blit; diff --git a/src/video/vid_voodoo.c b/src/video/vid_voodoo.c index 745347cd4..72a57e66c 100644 --- a/src/video/vid_voodoo.c +++ b/src/video/vid_voodoo.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -73,6 +74,66 @@ voodoo_log(const char *fmt, ...) # define voodoo_log(fmt, ...) #endif +static int +voodoo_env_is_disabled(const char *value) +{ + /* Accept common "off" values for env overrides. */ + return !strcmp(value, "0") || !strcmp(value, "off") || !strcmp(value, "false") || !strcmp(value, "disabled"); +} + +static void +voodoo_init_relax_settings(voodoo_t *voodoo) +{ + const char *relax_env = getenv("VOODOO_LFB_RELAX"); + const char *wait_env = getenv("VOODOO_WAIT_STATS"); + int relax_enabled = 1; + + /* Default to front-sync relax mode; wait stats are opt-in. */ + if (!relax_env || !*relax_env) { + relax_env = "4"; + } else if (voodoo_env_is_disabled(relax_env)) { + relax_enabled = 0; + } + + voodoo->wait_stats_explicit = (wait_env && *wait_env); + voodoo->wait_stats_enabled = voodoo->wait_stats_explicit && !voodoo_env_is_disabled(wait_env); + + voodoo->lfb_relax_enabled = relax_enabled; + voodoo->lfb_relax_full = relax_enabled && (strcmp(relax_env, "full") == 0); + voodoo->lfb_relax_ignore_cmdfifo = relax_enabled && (!strcmp(relax_env, "nocmdfifo") || !strcmp(relax_env, "2") || !strcmp(relax_env, "3") || !strcmp(relax_env, "4") || !strcmp(relax_env, "frontsync")); + voodoo->lfb_relax_ignore_draw = relax_enabled && (!strcmp(relax_env, "nodraw") || !strcmp(relax_env, "2") || !strcmp(relax_env, "3") || !strcmp(relax_env, "4") || !strcmp(relax_env, "frontsync")); + voodoo->lfb_relax_ignore_fb_writes = relax_enabled && (!strcmp(relax_env, "nowrites") || !strcmp(relax_env, "3") || !strcmp(relax_env, "4") || !strcmp(relax_env, "frontsync")); + voodoo->lfb_relax_front_sync = relax_enabled && (!strcmp(relax_env, "4") || !strcmp(relax_env, "frontsync")); +} + +static void +voodoo_update_queued_buffers(voodoo_t *voodoo) +{ + switch (voodoo->queued_lfbMode & LFB_WRITE_MASK) { + case LFB_WRITE_FRONT: + voodoo->queued_fb_write_buffer = voodoo->queued_disp_buffer; + break; + case LFB_WRITE_BACK: + voodoo->queued_fb_write_buffer = voodoo->queued_draw_buffer; + break; + default: + voodoo->queued_fb_write_buffer = voodoo->queued_disp_buffer; + break; + } + + switch (voodoo->queued_fbzMode & FBZ_DRAW_MASK) { + case FBZ_DRAW_FRONT: + voodoo->queued_fb_draw_buffer = voodoo->queued_disp_buffer; + break; + case FBZ_DRAW_BACK: + voodoo->queued_fb_draw_buffer = voodoo->queued_draw_buffer; + break; + default: + voodoo->queued_fb_draw_buffer = voodoo->queued_draw_buffer; + break; + } +} + void voodoo_recalc(voodoo_t *voodoo) { @@ -167,11 +228,69 @@ voodoo_readw(uint32_t addr, void *priv) voodoo = set->voodoos[0]; } - voodoo->flush = 1; - while (!FIFO_EMPTY) - voodoo_wake_fifo_thread_now(voodoo); - voodoo_wait_for_render_thread_idle(voodoo); - voodoo->flush = 0; + /* Reads from aux/draw/write regions must see completed rendering. */ + int need_sync = (voodoo->fb_read_offset == voodoo->params.aux_offset) || + (voodoo->fb_read_offset == voodoo->params.draw_offset) || + (voodoo->fb_read_offset == voodoo->fb_write_offset); + int do_sync = 0; + int read_buf = -1; + + if (voodoo->fb_read_offset == voodoo->params.front_offset) + read_buf = VOODOO_BUF_FRONT; + else if (voodoo->fb_read_offset == voodoo->back_offset) + read_buf = VOODOO_BUF_BACK; + else if (voodoo->fb_read_offset == voodoo->params.aux_offset) + read_buf = VOODOO_BUF_AUX; + + if (!need_sync && voodoo->lfb_relax_front_sync && read_buf >= 0 && read_buf != VOODOO_BUF_BACK) + need_sync = 1; + + if (need_sync) { + if (!voodoo->lfb_relax_enabled) + do_sync = 1; + else if (voodoo->lfb_relax_full) + do_sync = 0; + else { + /* In relax mode, only back-buffer reads can skip the full FIFO flush. */ + int pending_buf = 0; + int pending_unknown = 0; + + if (read_buf >= 0 && read_buf < VOODOO_BUF_COUNT) { + if (!voodoo->lfb_relax_ignore_fb_writes) + pending_buf += voodoo->pending_fb_writes_buf[read_buf]; + if (!voodoo->lfb_relax_ignore_draw) + pending_buf += voodoo->pending_draw_cmds_buf[read_buf]; + } + + if (!voodoo->lfb_relax_ignore_fb_writes) + pending_unknown += voodoo->pending_fb_writes_buf[VOODOO_BUF_UNKNOWN]; + if (!voodoo->lfb_relax_ignore_draw) + pending_unknown += voodoo->pending_draw_cmds_buf[VOODOO_BUF_UNKNOWN]; + if (!voodoo->lfb_relax_ignore_cmdfifo) { + if ((voodoo->cmdfifo_depth_rd != voodoo->cmdfifo_depth_wr) || voodoo->cmdfifo_in_sub) + pending_unknown++; + if ((voodoo->cmdfifo_depth_rd_2 != voodoo->cmdfifo_depth_wr_2) || voodoo->cmdfifo_in_sub_2) + pending_unknown++; + } + + if (read_buf != VOODOO_BUF_BACK) + do_sync = 1; + else + do_sync = (pending_buf || pending_unknown); + } + + if (do_sync) { + voodoo->flush = 1; + while (!FIFO_EMPTY) { + voodoo_wake_fifo_thread_now(voodoo); + thread_wait_event(voodoo->fifo_empty_event, -1); + } + voodoo_wait_for_render_thread_idle(voodoo); + voodoo->flush = 0; + } else if (voodoo->lfb_relax_enabled && !voodoo->lfb_relax_full) { + voodoo_wait_for_render_thread_idle(voodoo); + } + } return voodoo_fb_readw(addr, voodoo); } @@ -191,8 +310,17 @@ voodoo_readl(uint32_t addr, void *priv) cycles -= voodoo->read_time; if (addr & 0x800000) { /*Texture*/ + if (voodoo->wait_stats_enabled) + voodoo->readl_tex_count++; } else if (addr & 0x400000) /*Framebuffer*/ { + uint64_t fifo_wait_start = 0; + uint64_t fifo_wait_spins = 0; + int fifo_wait_active = 0; + int need_sync = 0; + int do_sync = 0; + int read_buf = -1; + if (SLI_ENABLED) { const voodoo_set_t *set = voodoo->set; int y = (addr >> 11) & 0x3ff; @@ -203,23 +331,116 @@ voodoo_readl(uint32_t addr, void *priv) voodoo = set->voodoos[0]; } - voodoo->flush = 1; - while (!FIFO_EMPTY) { - voodoo_wake_fifo_thread_now(voodoo); - thread_wait_event(voodoo->fifo_not_full_event, 1); + if (voodoo->wait_stats_enabled) + voodoo->readl_fb_count++; + + if (voodoo->fb_read_offset == voodoo->params.front_offset) + read_buf = VOODOO_BUF_FRONT; + else if (voodoo->fb_read_offset == voodoo->back_offset) + read_buf = VOODOO_BUF_BACK; + else if (voodoo->fb_read_offset == voodoo->params.aux_offset) + read_buf = VOODOO_BUF_AUX; + + /* Reads from aux/draw/write regions must see completed rendering. */ + need_sync = (voodoo->fb_read_offset == voodoo->params.aux_offset) || + (voodoo->fb_read_offset == voodoo->params.draw_offset) || + (voodoo->fb_read_offset == voodoo->fb_write_offset); + if (!need_sync && voodoo->lfb_relax_front_sync && read_buf >= 0 && read_buf != VOODOO_BUF_BACK) + need_sync = 1; + if (need_sync) { + if (!voodoo->lfb_relax_enabled) + do_sync = 1; + else if (voodoo->lfb_relax_full) + do_sync = 0; + else { + /* In relax mode, only back-buffer reads can skip the full FIFO flush. */ + int pending_buf = 0; + int pending_unknown = 0; + + if (read_buf >= 0 && read_buf < VOODOO_BUF_COUNT) { + if (!voodoo->lfb_relax_ignore_fb_writes) + pending_buf += voodoo->pending_fb_writes_buf[read_buf]; + if (!voodoo->lfb_relax_ignore_draw) + pending_buf += voodoo->pending_draw_cmds_buf[read_buf]; + } + + if (!voodoo->lfb_relax_ignore_fb_writes) + pending_unknown += voodoo->pending_fb_writes_buf[VOODOO_BUF_UNKNOWN]; + if (!voodoo->lfb_relax_ignore_draw) + pending_unknown += voodoo->pending_draw_cmds_buf[VOODOO_BUF_UNKNOWN]; + if (!voodoo->lfb_relax_ignore_cmdfifo) { + if ((voodoo->cmdfifo_depth_rd != voodoo->cmdfifo_depth_wr) || voodoo->cmdfifo_in_sub) + pending_unknown++; + if ((voodoo->cmdfifo_depth_rd_2 != voodoo->cmdfifo_depth_wr_2) || voodoo->cmdfifo_in_sub_2) + pending_unknown++; + } + + if (read_buf != VOODOO_BUF_BACK) + do_sync = 1; + else + do_sync = (pending_buf || pending_unknown); + } + } + + if (voodoo->wait_stats_enabled) { + if (do_sync) + voodoo->readl_fb_sync_count++; + else + voodoo->readl_fb_nosync_count++; + if (read_buf >= 0) { + if (do_sync) + voodoo->readl_fb_sync_buf[read_buf]++; + else + voodoo->readl_fb_nosync_buf[read_buf]++; + } + if (need_sync && voodoo->lfb_relax_enabled && !do_sync) { + voodoo->readl_fb_relaxed_count++; + if (read_buf >= 0) + voodoo->readl_fb_relaxed_buf[read_buf]++; + } + } + + if (do_sync) { + voodoo->flush = 1; + while (!FIFO_EMPTY) { + if (voodoo->wait_stats_enabled) { + if (!fifo_wait_active) { + fifo_wait_active = 1; + fifo_wait_start = plat_timer_read(); + voodoo->fifo_empty_waits++; + } + fifo_wait_spins++; + } + voodoo_wake_fifo_thread_now(voodoo); + thread_wait_event(voodoo->fifo_empty_event, -1); + } + if (fifo_wait_active) { + voodoo->fifo_empty_wait_ticks += plat_timer_read() - fifo_wait_start; + voodoo->fifo_empty_spin_checks += fifo_wait_spins; + } + voodoo_wait_for_render_thread_idle(voodoo); + voodoo->flush = 0; + } else if (need_sync && voodoo->lfb_relax_enabled && !voodoo->lfb_relax_full) { + voodoo_wait_for_render_thread_idle(voodoo); } - voodoo_wait_for_render_thread_idle(voodoo); - voodoo->flush = 0; temp = voodoo_fb_readl(addr, voodoo); - } else + } else { + if (voodoo->wait_stats_enabled) + voodoo->readl_reg_count++; + switch (addr & 0x3fc) { case SST_status: { int fifo_entries = FIFO_ENTRIES; int swap_count = voodoo->swap_count; int written = voodoo->cmd_written + voodoo->cmd_written_fifo + voodoo->cmd_written_fifo_2; - int busy = (written - voodoo->cmd_read) || (voodoo->cmdfifo_depth_rd != voodoo->cmdfifo_depth_wr); + int busy = (written - voodoo->cmd_read) || + (voodoo->cmdfifo_depth_rd != voodoo->cmdfifo_depth_wr) || + voodoo->voodoo_busy || + voodoo->render_voodoo_busy[0] || + (voodoo->render_threads >= 2 && voodoo->render_voodoo_busy[1]) || + (voodoo->render_threads == 4 && (voodoo->render_voodoo_busy[2] || voodoo->render_voodoo_busy[3])); if (SLI_ENABLED && voodoo->type != VOODOO_2) { voodoo_t *voodoo_other = (voodoo == voodoo->set->voodoos[0]) ? voodoo->set->voodoos[1] : voodoo->set->voodoos[0]; @@ -229,7 +450,12 @@ voodoo_readl(uint32_t addr, void *priv) swap_count = voodoo_other->swap_count; if ((voodoo_other->fifo_write_idx - voodoo_other->fifo_read_idx) > fifo_entries) fifo_entries = voodoo_other->fifo_write_idx - voodoo_other->fifo_read_idx; - if ((other_written - voodoo_other->cmd_read) || (voodoo_other->cmdfifo_depth_rd != voodoo_other->cmdfifo_depth_wr)) + if ((other_written - voodoo_other->cmd_read) || + (voodoo_other->cmdfifo_depth_rd != voodoo_other->cmdfifo_depth_wr) || + voodoo_other->voodoo_busy || + voodoo_other->render_voodoo_busy[0] || + (voodoo_other->render_threads >= 2 && voodoo_other->render_voodoo_busy[1]) || + (voodoo_other->render_threads == 4 && (voodoo_other->render_voodoo_busy[2] || voodoo_other->render_voodoo_busy[3]))) busy = 1; if (!voodoo_other->voodoo_busy) voodoo_wake_fifo_thread(voodoo_other); @@ -384,6 +610,7 @@ voodoo_readl(uint32_t addr, void *priv) voodoo_log("voodoo_readl : bad addr %08X\n", addr); temp = 0xffffffff; } + } return temp; } @@ -537,6 +764,11 @@ voodoo_writel(uint32_t addr, uint32_t val, void *priv) happen here on a real Voodoo*/ voodoo->disp_buffer = 0; voodoo->draw_buffer = 1; + voodoo->queued_disp_buffer = voodoo->disp_buffer; + voodoo->queued_draw_buffer = voodoo->draw_buffer; + voodoo->queued_lfbMode = voodoo->lfbMode; + voodoo->queued_fbzMode = voodoo->params.fbzMode; + voodoo_update_queued_buffers(voodoo); voodoo_recalc(voodoo); voodoo->front_offset = voodoo->params.front_offset; } @@ -925,6 +1157,7 @@ voodoo_card_init(void) voodoo_t *voodoo = malloc(sizeof(voodoo_t)); memset(voodoo, 0, sizeof(voodoo_t)); + voodoo_init_relax_settings(voodoo); voodoo->bilinear_enabled = device_get_config_int("bilinear"); voodoo->dithersub_enabled = device_get_config_int("dithersub"); voodoo->scrfilter = device_get_config_int("dacfilter"); @@ -992,6 +1225,9 @@ voodoo_card_init(void) voodoo->wake_render_thread[3] = thread_create_event(); voodoo->wake_main_thread = thread_create_event(); voodoo->fifo_not_full_event = thread_create_event(); + voodoo->fifo_empty_event = thread_create_event(); + thread_set_event(voodoo->fifo_empty_event); + ATOMIC_STORE(voodoo->fifo_empty_signaled, 1); voodoo->render_not_full_event[0] = thread_create_event(); voodoo->render_not_full_event[1] = thread_create_event(); voodoo->render_not_full_event[2] = thread_create_event(); @@ -1065,6 +1301,11 @@ voodoo_card_init(void) voodoo->disp_buffer = 0; voodoo->draw_buffer = 1; + voodoo->queued_disp_buffer = voodoo->disp_buffer; + voodoo->queued_draw_buffer = voodoo->draw_buffer; + voodoo->queued_lfbMode = voodoo->lfbMode; + voodoo->queued_fbzMode = voodoo->params.fbzMode; + voodoo_update_queued_buffers(voodoo); voodoo->force_blit_count = 0; voodoo->can_blit = 0; @@ -1080,6 +1321,7 @@ voodoo_2d3d_card_init(int type) voodoo_t *voodoo = malloc(sizeof(voodoo_t)); memset(voodoo, 0, sizeof(voodoo_t)); + voodoo_init_relax_settings(voodoo); voodoo->bilinear_enabled = device_get_config_int("bilinear"); voodoo->dithersub_enabled = device_get_config_int("dithersub"); voodoo->scrfilter = device_get_config_int("dacfilter"); @@ -1116,6 +1358,9 @@ voodoo_2d3d_card_init(int type) voodoo->wake_render_thread[3] = thread_create_event(); voodoo->wake_main_thread = thread_create_event(); voodoo->fifo_not_full_event = thread_create_event(); + voodoo->fifo_empty_event = thread_create_event(); + thread_set_event(voodoo->fifo_empty_event); + ATOMIC_STORE(voodoo->fifo_empty_signaled, 1); voodoo->render_not_full_event[0] = thread_create_event(); voodoo->render_not_full_event[1] = thread_create_event(); voodoo->render_not_full_event[2] = thread_create_event(); @@ -1189,6 +1434,11 @@ voodoo_2d3d_card_init(int type) voodoo->disp_buffer = 0; voodoo->draw_buffer = 1; + voodoo->queued_disp_buffer = voodoo->disp_buffer; + voodoo->queued_draw_buffer = voodoo->draw_buffer; + voodoo->queued_lfbMode = voodoo->lfbMode; + voodoo->queued_fbzMode = voodoo->params.fbzMode; + voodoo_update_queued_buffers(voodoo); voodoo->force_blit_count = 0; voodoo->can_blit = 0; @@ -1277,6 +1527,7 @@ voodoo_card_close(voodoo_t *voodoo) thread_wait(voodoo->render_thread[3]); } thread_destroy_event(voodoo->fifo_not_full_event); + thread_destroy_event(voodoo->fifo_empty_event); thread_destroy_event(voodoo->wake_main_thread); thread_destroy_event(voodoo->wake_fifo_thread); thread_destroy_event(voodoo->wake_render_thread[0]); @@ -1284,6 +1535,48 @@ voodoo_card_close(voodoo_t *voodoo) thread_destroy_event(voodoo->render_not_full_event[0]); thread_destroy_event(voodoo->render_not_full_event[1]); + if (voodoo->wait_stats_enabled && voodoo->wait_stats_explicit) { + pclog("Voodoo wait stats (type=%d): fifo_full waits=%" PRIu64 " ticks=%" PRIu64 " spins=%" PRIu64 + ", fifo_empty waits=%" PRIu64 " ticks=%" PRIu64 " spins=%" PRIu64 + ", render_wait waits=%" PRIu64 " ticks=%" PRIu64 " spins=%" PRIu64 + ", readl fb=%" PRIu64 " sync=%" PRIu64 " nosync=%" PRIu64 " relaxed=%" PRIu64 " relax=%d full=%d nocmdfifo=%d nodraw=%d nowrites=%d frontsync=%d" + " sync_buf f=%" PRIu64 " b=%" PRIu64 " a=%" PRIu64 + " nosync_buf f=%" PRIu64 " b=%" PRIu64 " a=%" PRIu64 + " relaxed_buf f=%" PRIu64 " b=%" PRIu64 " a=%" PRIu64 + " reg=%" PRIu64 " tex=%" PRIu64 "\n", + voodoo->type, + voodoo->fifo_full_waits, + voodoo->fifo_full_wait_ticks, + voodoo->fifo_full_spin_checks, + voodoo->fifo_empty_waits, + voodoo->fifo_empty_wait_ticks, + voodoo->fifo_empty_spin_checks, + voodoo->render_waits, + voodoo->render_wait_ticks, + voodoo->render_wait_spin_checks, + voodoo->readl_fb_count, + voodoo->readl_fb_sync_count, + voodoo->readl_fb_nosync_count, + voodoo->readl_fb_relaxed_count, + voodoo->lfb_relax_enabled, + voodoo->lfb_relax_full, + voodoo->lfb_relax_ignore_cmdfifo, + voodoo->lfb_relax_ignore_draw, + voodoo->lfb_relax_ignore_fb_writes, + voodoo->lfb_relax_front_sync, + voodoo->readl_fb_sync_buf[0], + voodoo->readl_fb_sync_buf[1], + voodoo->readl_fb_sync_buf[2], + voodoo->readl_fb_nosync_buf[0], + voodoo->readl_fb_nosync_buf[1], + voodoo->readl_fb_nosync_buf[2], + voodoo->readl_fb_relaxed_buf[0], + voodoo->readl_fb_relaxed_buf[1], + voodoo->readl_fb_relaxed_buf[2], + voodoo->readl_reg_count, + voodoo->readl_tex_count); + } + for (uint8_t c = 0; c < TEX_CACHE_MAX; c++) { if (voodoo->dual_tmus) free(voodoo->texture_cache[1][c].data); diff --git a/src/video/vid_voodoo_fifo.c b/src/video/vid_voodoo_fifo.c index a7ce0850e..c0fcfe349 100644 --- a/src/video/vid_voodoo_fifo.c +++ b/src/video/vid_voodoo_fifo.c @@ -62,8 +62,97 @@ voodoo_fifo_log(const char *fmt, ...) #define WAKE_DELAY_DEFAULT (TIMER_USEC * 100) -/* Per-card wake delay: Voodoo1 uses a larger delay to reduce FIFO wake frequency */ -#define WAKE_DELAY_OF(v) ((v)->type == VOODOO_1 ? (TIMER_USEC * 500) : WAKE_DELAY_DEFAULT) +/* Per-card wake delay: keep all Voodoo cards at the default */ +#define WAKE_DELAY_OF(v) (WAKE_DELAY_DEFAULT) + +static __inline uint8_t +voodoo_queue_color_buf_tag(const voodoo_t *voodoo, int buf) +{ + if (buf == voodoo->queued_disp_buffer) + return VOODOO_BUF_FRONT; + if (buf == voodoo->queued_draw_buffer) + return VOODOO_BUF_BACK; + return VOODOO_BUF_UNKNOWN; +} + +static __inline void +voodoo_queue_recalc_buffers(voodoo_t *voodoo) +{ + switch (voodoo->queued_lfbMode & LFB_WRITE_MASK) { + case LFB_WRITE_FRONT: + voodoo->queued_fb_write_buffer = voodoo->queued_disp_buffer; + break; + case LFB_WRITE_BACK: + voodoo->queued_fb_write_buffer = voodoo->queued_draw_buffer; + break; + default: + voodoo->queued_fb_write_buffer = voodoo->queued_disp_buffer; + break; + } + + switch (voodoo->queued_fbzMode & FBZ_DRAW_MASK) { + case FBZ_DRAW_FRONT: + voodoo->queued_fb_draw_buffer = voodoo->queued_disp_buffer; + break; + case FBZ_DRAW_BACK: + voodoo->queued_fb_draw_buffer = voodoo->queued_draw_buffer; + break; + default: + voodoo->queued_fb_draw_buffer = voodoo->queued_draw_buffer; + break; + } +} + +static __inline void +voodoo_queue_apply_reg(voodoo_t *voodoo, uint32_t addr, uint32_t val) +{ + switch (addr & 0x3fc) { + case SST_lfbMode: + voodoo->queued_lfbMode = val; + voodoo_queue_recalc_buffers(voodoo); + break; + case SST_fbzMode: + voodoo->queued_fbzMode = val; + voodoo_queue_recalc_buffers(voodoo); + break; + case SST_swapbufferCMD: + if (TRIPLE_BUFFER) { + voodoo->queued_disp_buffer = (voodoo->queued_disp_buffer + 1) % 3; + voodoo->queued_draw_buffer = (voodoo->queued_draw_buffer + 1) % 3; + } else { + voodoo->queued_disp_buffer = !voodoo->queued_disp_buffer; + voodoo->queued_draw_buffer = !voodoo->queued_draw_buffer; + } + voodoo_queue_recalc_buffers(voodoo); + break; + default: + break; + } +} + +static __inline uint8_t +voodoo_queue_reg_target_buf(voodoo_t *voodoo, uint32_t addr) +{ + switch (addr & 0x3fc) { + case SST_triangleCMD: + case SST_ftriangleCMD: + case SST_fastfillCMD: + return voodoo_queue_color_buf_tag(voodoo, voodoo->queued_fb_draw_buffer); + case SST_lfbMode: + case SST_fbzMode: + case SST_swapbufferCMD: + return VOODOO_BUF_UNKNOWN; + default: + return VOODOO_BUF_NONE; + } +} + +static __inline void +voodoo_cmdfifo_reg_writel(voodoo_t *voodoo, uint32_t addr, uint32_t val) +{ + voodoo_reg_writel(addr, val, voodoo); + voodoo_queue_apply_reg(voodoo, addr, val); +} void voodoo_wake_fifo_thread(voodoo_t *voodoo) { @@ -94,8 +183,19 @@ void voodoo_queue_command(voodoo_t *voodoo, uint32_t addr_type, uint32_t val) { fifo_entry_t *fifo = &voodoo->fifo[voodoo->fifo_write_idx & FIFO_MASK]; + uint64_t fifo_wait_start = 0; + uint64_t fifo_wait_spins = 0; + int fifo_wait_active = 0; while (FIFO_FULL) { + if (voodoo->wait_stats_enabled) { + if (!fifo_wait_active) { + fifo_wait_active = 1; + fifo_wait_start = plat_timer_read(); + voodoo->fifo_full_waits++; + } + fifo_wait_spins++; + } thread_reset_event(voodoo->fifo_not_full_event); if (FIFO_FULL) { thread_wait_event(voodoo->fifo_not_full_event, 1); /*Wait for room in ringbuffer*/ @@ -104,8 +204,38 @@ voodoo_queue_command(voodoo_t *voodoo, uint32_t addr_type, uint32_t val) } } - fifo->val = val; - fifo->addr_type = addr_type; + if (fifo_wait_active) { + voodoo->fifo_full_wait_ticks += plat_timer_read() - fifo_wait_start; + voodoo->fifo_full_spin_checks += fifo_wait_spins; + } + +#ifdef _WIN32 + /* Reset only after an empty signal to avoid heavy ResetEvent churn on Windows. */ + if (ATOMIC_LOAD(voodoo->fifo_empty_signaled)) { + ATOMIC_STORE(voodoo->fifo_empty_signaled, 0); + thread_reset_event(voodoo->fifo_empty_event); + } +#else + thread_reset_event(voodoo->fifo_empty_event); +#endif + + fifo->val = val; + fifo->addr_type = addr_type; + fifo->target_buf = VOODOO_BUF_NONE; + + if (((addr_type & FIFO_TYPE) == FIFO_WRITEW_FB) || + ((addr_type & FIFO_TYPE) == FIFO_WRITEL_FB)) { + fifo->target_buf = voodoo_queue_color_buf_tag(voodoo, voodoo->queued_fb_write_buffer); + ATOMIC_INC(voodoo->pending_fb_writes_buf[fifo->target_buf]); + } else if ((addr_type & FIFO_TYPE) == FIFO_WRITEL_REG) { + uint8_t reg_buf = voodoo_queue_reg_target_buf(voodoo, addr_type & FIFO_ADDR); + + if (reg_buf != VOODOO_BUF_NONE) { + fifo->target_buf = reg_buf; + ATOMIC_INC(voodoo->pending_draw_cmds_buf[fifo->target_buf]); + } + voodoo_queue_apply_reg(voodoo, addr_type & FIFO_ADDR, val); + } voodoo->fifo_write_idx++; voodoo->cmd_status &= ~(1 << 24); @@ -120,7 +250,7 @@ voodoo_flush(voodoo_t *voodoo) voodoo->flush = 1; while (!FIFO_EMPTY) { voodoo_wake_fifo_thread_now(voodoo); - thread_wait_event(voodoo->fifo_not_full_event, 1); + thread_wait_event(voodoo->fifo_empty_event, -1); } voodoo_wait_for_render_thread_idle(voodoo); voodoo->flush = 0; @@ -138,9 +268,7 @@ void voodoo_wait_for_swap_complete(voodoo_t *voodoo) { while (voodoo->swap_pending) { - thread_wait_event(voodoo->wake_fifo_thread, -1); - thread_reset_event(voodoo->wake_fifo_thread); - + /* Avoid waiting on wake_fifo_thread here; main thread may be draining the FIFO. */ thread_wait_mutex(voodoo->swap_mutex); if ((voodoo->swap_pending && voodoo->flush) || FIFO_FULL) { /*Main thread is waiting for FIFO to empty, so skip vsync wait and just swap*/ @@ -153,6 +281,13 @@ voodoo_wait_for_swap_complete(voodoo_t *voodoo) break; } else thread_release_mutex(voodoo->swap_mutex); + /* Yield briefly while waiting for the swap to complete. */ +#ifdef _WIN32 + /* Sleep(1) can add measurable stalls on Windows. */ + plat_delay_ms(0); +#else + plat_delay_ms(1); +#endif } } @@ -261,9 +396,15 @@ voodoo_fifo_thread(void *param) switch (fifo->addr_type & FIFO_TYPE) { case FIFO_WRITEL_REG: while ((fifo->addr_type & FIFO_TYPE) == FIFO_WRITEL_REG) { - voodoo_reg_writel(fifo->addr_type & FIFO_ADDR, fifo->val, voodoo); + uint32_t reg_addr = fifo->addr_type & FIFO_ADDR; + uint8_t target_buf = fifo->target_buf; + + voodoo_reg_writel(reg_addr, fifo->val, voodoo); fifo->addr_type = FIFO_INVALID; voodoo->fifo_read_idx++; + if (target_buf < VOODOO_BUF_COUNT) { + ATOMIC_DEC(voodoo->pending_draw_cmds_buf[target_buf]); + } if (FIFO_EMPTY) break; fifo = &voodoo->fifo[voodoo->fifo_read_idx & FIFO_MASK]; @@ -272,9 +413,14 @@ voodoo_fifo_thread(void *param) case FIFO_WRITEW_FB: voodoo_wait_for_render_thread_idle(voodoo); while ((fifo->addr_type & FIFO_TYPE) == FIFO_WRITEW_FB) { + uint8_t target_buf = fifo->target_buf; + voodoo_fb_writew(fifo->addr_type & FIFO_ADDR, fifo->val, voodoo); fifo->addr_type = FIFO_INVALID; voodoo->fifo_read_idx++; + if (target_buf < VOODOO_BUF_COUNT) { + ATOMIC_DEC(voodoo->pending_fb_writes_buf[target_buf]); + } if (FIFO_EMPTY) break; fifo = &voodoo->fifo[voodoo->fifo_read_idx & FIFO_MASK]; @@ -283,9 +429,14 @@ voodoo_fifo_thread(void *param) case FIFO_WRITEL_FB: voodoo_wait_for_render_thread_idle(voodoo); while ((fifo->addr_type & FIFO_TYPE) == FIFO_WRITEL_FB) { + uint8_t target_buf = fifo->target_buf; + voodoo_fb_writel(fifo->addr_type & FIFO_ADDR, fifo->val, voodoo); fifo->addr_type = FIFO_INVALID; voodoo->fifo_read_idx++; + if (target_buf < VOODOO_BUF_COUNT) { + ATOMIC_DEC(voodoo->pending_fb_writes_buf[target_buf]); + } if (FIFO_EMPTY) break; fifo = &voodoo->fifo[voodoo->fifo_read_idx & FIFO_MASK]; @@ -326,6 +477,8 @@ voodoo_fifo_thread(void *param) voodoo->cmd_status |= (1 << 24); voodoo->cmd_status_2 |= (1 << 24); + thread_set_event(voodoo->fifo_empty_event); + ATOMIC_STORE(voodoo->fifo_empty_signaled, 1); while (voodoo->cmdfifo_enabled && (voodoo->cmdfifo_depth_rd != voodoo->cmdfifo_depth_wr || voodoo->cmdfifo_in_sub)) { uint64_t start_time = plat_timer_read(); @@ -418,7 +571,7 @@ voodoo_fifo_thread(void *param) if (voodoo->type >= VOODOO_BANSHEE && (addr & 0x3ff) == SST_swapbufferCMD) voodoo->cmd_written_fifo++; - voodoo_reg_writel(addr, val, voodoo); + voodoo_cmdfifo_reg_writel(voodoo, addr, val); } if (header & (1 << 15)) @@ -447,7 +600,7 @@ voodoo_fifo_thread(void *param) num = (header >> 29) & 7; mask = header; //(header >> 10) & 0xff; smode = (header >> 22) & 0xf; - voodoo_reg_writel(SST_sSetupMode, ((header >> 10) & 0xff) | (smode << 16), voodoo); + voodoo_cmdfifo_reg_writel(voodoo, SST_sSetupMode, ((header >> 10) & 0xff) | (smode << 16)); num_verticies = (header >> 6) & 0xf; v_num = 0; if (((header >> 3) & 7) == 2) @@ -492,9 +645,9 @@ voodoo_fifo_thread(void *param) voodoo->verts[3].sT1 = cmdfifo_get_f(voodoo); } if (v_num) - voodoo_reg_writel(SST_sDrawTriCMD, 0, voodoo); + voodoo_cmdfifo_reg_writel(voodoo, SST_sDrawTriCMD, 0); else - voodoo_reg_writel(SST_sBeginTriCMD, 0, voodoo); + voodoo_cmdfifo_reg_writel(voodoo, SST_sBeginTriCMD, 0); v_num++; if (v_num == 3 && ((header >> 3) & 7) == 0) v_num = 0; @@ -528,7 +681,7 @@ voodoo_fifo_thread(void *param) if (voodoo->type >= VOODOO_BANSHEE && (addr & 0x3ff) == SST_swapbufferCMD) voodoo->cmd_written_fifo++; - voodoo_reg_writel(addr, val, voodoo); + voodoo_cmdfifo_reg_writel(voodoo, addr, val); } } @@ -709,7 +862,7 @@ voodoo_fifo_thread(void *param) if (voodoo->type >= VOODOO_BANSHEE && (addr & 0x3ff) == SST_swapbufferCMD) voodoo->cmd_written_fifo_2++; - voodoo_reg_writel(addr, val, voodoo); + voodoo_cmdfifo_reg_writel(voodoo, addr, val); } if (header & (1 << 15)) @@ -738,7 +891,7 @@ voodoo_fifo_thread(void *param) num = (header >> 29) & 7; mask = header; //(header >> 10) & 0xff; smode = (header >> 22) & 0xf; - voodoo_reg_writel(SST_sSetupMode, ((header >> 10) & 0xff) | (smode << 16), voodoo); + voodoo_cmdfifo_reg_writel(voodoo, SST_sSetupMode, ((header >> 10) & 0xff) | (smode << 16)); num_verticies = (header >> 6) & 0xf; v_num = 0; if (((header >> 3) & 7) == 2) @@ -783,9 +936,9 @@ voodoo_fifo_thread(void *param) voodoo->verts[3].sT1 = cmdfifo_get_f_2(voodoo); } if (v_num) - voodoo_reg_writel(SST_sDrawTriCMD, 0, voodoo); + voodoo_cmdfifo_reg_writel(voodoo, SST_sDrawTriCMD, 0); else - voodoo_reg_writel(SST_sBeginTriCMD, 0, voodoo); + voodoo_cmdfifo_reg_writel(voodoo, SST_sBeginTriCMD, 0); v_num++; if (v_num == 3 && ((header >> 3) & 7) == 0) v_num = 0; @@ -819,7 +972,7 @@ voodoo_fifo_thread(void *param) if (voodoo->type >= VOODOO_BANSHEE && (addr & 0x3ff) == SST_swapbufferCMD) voodoo->cmd_written_fifo_2++; - voodoo_reg_writel(addr, val, voodoo); + voodoo_cmdfifo_reg_writel(voodoo, addr, val); } }