Big improvement to S2SB, but it doesn't fully work yet. Sometimes it copies in the other direction, what

?
This commit is contained in:
starfrost013
2025-04-12 03:02:55 +01:00
parent b606129b78
commit 15f49638e0
7 changed files with 151 additions and 55 deletions

View File

@@ -77,6 +77,10 @@ typedef enum nv3_pgraph_class_e
#define NV3_SET_NOTIFY_CONTEXT_FOR_DMA 0x0100 // Set object ctx for dma...see nv3_dma_context_t structure
#define NV3_SET_NOTIFY 0x0104
// Crap e.g. "OS name", that sometimes gets submitted, for some reason. So we just suppress the warning messages for them
#define NV3_NVCLASS_CRAP_START 0x0310
#define NV3_NVCLASS_CRAP_END 0x0324
// Render OPeration
#define NV3_ROP_SET_ROP 0x0300 // Set GDI standard rop

View File

@@ -18,7 +18,7 @@
#pragma once
/* Core */
void nv3_render_current_bpp(svga_t *svga, nv3_position_16_t position, nv3_size_16_t size, nv3_grobj_t grobj);
void nv3_render_current_bpp(svga_t *svga, nv3_position_16_t position, nv3_size_16_t size, nv3_grobj_t grobj, bool run_render_check);
void nv3_render_current_bpp_dfb_8(uint32_t address);
void nv3_render_current_bpp_dfb_16(uint32_t address);
void nv3_render_current_bpp_dfb_32(uint32_t address);
@@ -30,6 +30,7 @@ uint32_t nv3_render_read_pixel_32(nv3_position_16_t position, nv3_grobj_t grobj)
uint32_t nv3_render_get_vram_address(nv3_position_16_t position, nv3_grobj_t grobj);
uint32_t nv3_render_get_vram_address_for_buffer(nv3_position_16_t position, nv3_grobj_t grobj, uint32_t buffer);
uint32_t nv3_render_to_chroma(nv3_color_expanded_t expanded);
nv3_color_expanded_t nv3_render_expand_color(uint32_t color, nv3_grobj_t grobj); // Convert a colour to full RGB10 format from the current working format.

View File

@@ -1473,8 +1473,8 @@ void nv3_dfb_write8(uint32_t addr, uint8_t val, void* priv);
void nv3_dfb_write16(uint32_t addr, uint16_t val, void* priv); // Write 16-bit DFB
void nv3_dfb_write32(uint32_t addr, uint32_t val, void* priv); // Write 32-bit DFB
uint8_t nv3_svga_in(uint16_t addr, void* priv); // Read SVGA compatibility registers
void nv3_svga_out(uint16_t addr, uint8_t val, void* priv); // Write SVGA registers
uint8_t nv3_svga_read(uint16_t addr, void* priv); // Read SVGA compatibility registers
void nv3_svga_write(uint16_t addr, uint8_t val, void* priv); // Write SVGA registers
uint8_t nv3_pci_read(int32_t func, int32_t addr, void* priv); // Read PCI configuration registers
void nv3_pci_write(int32_t func, int32_t addr, uint8_t val, void* priv); // Write PCI configuration registers

View File

@@ -86,6 +86,10 @@ void nv3_class_01c_method(uint32_t param, uint32_t method_id, nv3_ramin_context_
nv_log("Method Execution: Image in Memory BUF%d TOP_LEFT_OFFSET=0x%08x\n", src_buffer_id, nv3->pgraph.boffset[src_buffer_id]);
break;
case NV3_NVCLASS_CRAP_START ... NV3_NVCLASS_CRAP_END:
/* Suppress but don't do anything */
nv3_pgraph_interrupt_invalid(NV3_PGRAPH_INTR_1_SOFTWARE_METHOD_PENDING);
break;
default:
warning("%s: Invalid or unimplemented method 0x%04x\n", nv3_class_names[context.class_id & 0x1F], method_id);
nv3_pgraph_interrupt_invalid(NV3_PGRAPH_INTR_1_SOFTWARE_METHOD_PENDING);

View File

@@ -42,8 +42,8 @@ void nv3_init_mappings_mmio(void);
void nv3_init_mappings_svga(void);
bool nv3_is_svga_redirect_address(uint32_t addr);
uint8_t nv3_svga_in(uint16_t addr, void* priv);
void nv3_svga_out(uint16_t addr, uint8_t val, void* priv);
uint8_t nv3_svga_read(uint16_t addr, void* priv);
void nv3_svga_write(uint16_t addr, uint8_t val, void* priv);
// Determine if this address needs to be redirected to the SVGA subsystem.
@@ -70,7 +70,7 @@ uint8_t nv3_mmio_read8(uint32_t addr, void* priv)
// svga writes are not logged anyway rn
uint32_t real_address = addr & 0x3FF;
ret = nv3_svga_in(real_address, nv3);
ret = nv3_svga_read(real_address, nv3);
nv_log_verbose_only("Redirected MMIO read8 to SVGA: addr=0x%04x returned 0x%04x\n", addr, ret);
@@ -95,8 +95,8 @@ uint16_t nv3_mmio_read16(uint32_t addr, void* priv)
// svga writes are not logged anyway rn
uint32_t real_address = addr & 0x3FF;
ret = nv3_svga_in(real_address, nv3)
| (nv3_svga_in(real_address + 1, nv3) << 8);
ret = nv3_svga_read(real_address, nv3)
| (nv3_svga_read(real_address + 1, nv3) << 8);
nv_log_verbose_only("Redirected MMIO read16 to SVGA: addr=0x%04x returned 0x%04x\n", addr, ret);
@@ -120,10 +120,10 @@ uint32_t nv3_mmio_read32(uint32_t addr, void* priv)
// svga writes are not logged anyway rn
uint32_t real_address = addr & 0x3FF;
ret = nv3_svga_in(real_address, nv3)
| (nv3_svga_in(real_address + 1, nv3) << 8)
| (nv3_svga_in(real_address + 2, nv3) << 16)
| (nv3_svga_in(real_address + 3, nv3) << 24);
ret = nv3_svga_read(real_address, nv3)
| (nv3_svga_read(real_address + 1, nv3) << 8)
| (nv3_svga_read(real_address + 2, nv3) << 16)
| (nv3_svga_read(real_address + 3, nv3) << 24);
nv_log_verbose_only("Redirected MMIO read32 to SVGA: addr=0x%04x returned 0x%04x\n", addr, ret);
@@ -152,7 +152,7 @@ void nv3_mmio_write8(uint32_t addr, uint8_t val, void* priv)
nv_log_verbose_only("Redirected MMIO write8 to SVGA: addr=0x%04x val=0x%02x\n", addr, val);
nv3_svga_out(real_address, val & 0xFF, nv3);
nv3_svga_write(real_address, val & 0xFF, nv3);
return;
}
@@ -180,8 +180,8 @@ void nv3_mmio_write16(uint32_t addr, uint16_t val, void* priv)
nv_log_verbose_only("Redirected MMIO write16 to SVGA: addr=0x%04x val=0x%02x\n", addr, val);
nv3_svga_out(real_address, val & 0xFF, nv3);
nv3_svga_out(real_address + 1, (val >> 8) & 0xFF, nv3);
nv3_svga_write(real_address, val & 0xFF, nv3);
nv3_svga_write(real_address + 1, (val >> 8) & 0xFF, nv3);
return;
}
@@ -208,10 +208,10 @@ void nv3_mmio_write32(uint32_t addr, uint32_t val, void* priv)
nv_log_verbose_only("Redirected MMIO write32 to SVGA: addr=0x%04x val=0x%02x\n", addr, val);
nv3_svga_out(real_address, val & 0xFF, nv3);
nv3_svga_out(real_address + 1, (val >> 8) & 0xFF, nv3);
nv3_svga_out(real_address + 2, (val >> 16) & 0xFF, nv3);
nv3_svga_out(real_address + 3, (val >> 24) & 0xFF, nv3);
nv3_svga_write(real_address, val & 0xFF, nv3);
nv3_svga_write(real_address + 1, (val >> 8) & 0xFF, nv3);
nv3_svga_write(real_address + 2, (val >> 16) & 0xFF, nv3);
nv3_svga_write(real_address + 3, (val >> 24) & 0xFF, nv3);
return;
}
@@ -597,7 +597,7 @@ void nv3_force_redraw(void* priv)
}
// Read from SVGA core memory
uint8_t nv3_svga_in(uint16_t addr, void* priv)
uint8_t nv3_svga_read(uint16_t addr, void* priv)
{
nv3_t* nv3 = (nv3_t*)priv;
@@ -662,7 +662,7 @@ uint8_t nv3_svga_in(uint16_t addr, void* priv)
}
// Write to SVGA core memory
void nv3_svga_out(uint16_t addr, uint8_t val, void* priv)
void nv3_svga_write(uint16_t addr, uint8_t val, void* priv)
{
// sanity check
if (!nv3)
@@ -939,8 +939,8 @@ void nv3_init_mappings_svga(void)
nv3->nvbase.svga.vram, 0, &nv3->nvbase.svga);
io_sethandler(0x03c0, 0x0020,
nv3_svga_in, NULL, NULL,
nv3_svga_out, NULL, NULL,
nv3_svga_read, NULL, NULL,
nv3_svga_write, NULL, NULL,
nv3);
}
@@ -964,14 +964,14 @@ void nv3_update_mappings(void)
(nv3->pci_config.pci_regs[PCI_REG_COMMAND] & PCI_COMMAND_IO) ? nv_log("Enable I/O\n") : nv_log("Disable I/O\n");
io_removehandler(0x03c0, 0x0020,
nv3_svga_in, NULL, NULL,
nv3_svga_out, NULL, NULL,
nv3_svga_read, NULL, NULL,
nv3_svga_write, NULL, NULL,
nv3);
if (nv3->pci_config.pci_regs[PCI_REG_COMMAND] & PCI_COMMAND_IO)
io_sethandler(0x03c0, 0x0020,
nv3_svga_in, NULL, NULL,
nv3_svga_out, NULL, NULL,
nv3_svga_read, NULL, NULL,
nv3_svga_write, NULL, NULL,
nv3);
if (!(nv3->pci_config.pci_regs[PCI_REG_COMMAND] & PCI_COMMAND_MEM))
@@ -1117,7 +1117,7 @@ void* nv3_init(const device_t *info)
pci_add_card(PCI_ADD_NORMAL, nv3_pci_read, nv3_pci_write, NULL, &nv3->nvbase.pci_slot);
svga_init(&nv3_device_pci, &nv3->nvbase.svga, nv3, nv3->nvbase.vram_amount,
nv3_recalc_timings, nv3_svga_in, nv3_svga_out, nv3_draw_cursor, NULL);
nv3_recalc_timings, nv3_svga_read, nv3_svga_write, nv3_draw_cursor, NULL);
if (nv3->nvbase.gpu_revision == NV3_PCI_CFG_REVISION_C00)
video_inform(VIDEO_FLAG_TYPE_SPECIAL, &timing_nv3t_pci);
@@ -1132,7 +1132,7 @@ void* nv3_init(const device_t *info)
pci_add_card(PCI_ADD_AGP, nv3_pci_read, nv3_pci_write, NULL, &nv3->nvbase.pci_slot);
svga_init(&nv3_device_agp, &nv3->nvbase.svga, nv3, nv3->nvbase.vram_amount,
nv3_recalc_timings, nv3_svga_in, nv3_svga_out, nv3_draw_cursor, NULL);
nv3_recalc_timings, nv3_svga_read, nv3_svga_write, nv3_draw_cursor, NULL);
if (nv3->nvbase.gpu_revision == NV3_PCI_CFG_REVISION_C00)
video_inform(VIDEO_FLAG_TYPE_SPECIAL, &timing_nv3t_agp);

View File

@@ -130,14 +130,10 @@ void nv3_render_blit_screen2screen(nv3_grobj_t grobj)
&& nv3->pgraph.blit.size.h < NV3_MAX_VERTICAL_SIZE)
memset(&nv3_s2sb_line_buffer, 0x00, (sizeof(uint32_t) * nv3->pgraph.blit.size.h) * (sizeof(uint32_t) * nv3->pgraph.blit.size.w));
nv3_position_16_t old_position = nv3->pgraph.blit.point_in;
nv3_position_16_t new_position = nv3->pgraph.blit.point_out;
/* First calculate our source and destination buffer */
uint32_t src_buffer = (grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_SRC_BUFFER) & 0x03;
/* test DST_BUFFER code
I assume for 2d at least only one is allowed at a time
*/
bool wtf_nvidia = false;
uint32_t dst_buffer = 0; // 5 = just use the source buffer
@@ -145,13 +141,23 @@ void nv3_render_blit_screen2screen(nv3_grobj_t grobj)
if ((grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_DST_BUFFER1_ENABLED) & 0x01) dst_buffer = 1;
if ((grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_DST_BUFFER2_ENABLED) & 0x01) dst_buffer = 2;
if ((grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_DST_BUFFER3_ENABLED) & 0x01) dst_buffer = 3;
nv3_position_16_t old_position;
nv3_position_16_t new_position;
uint16_t end_x_in = (nv3->pgraph.blit.point_in.x + nv3->pgraph.blit.size.w); /* needed for bounds checking */
uint16_t end_x_out = (nv3->pgraph.blit.point_out.x + nv3->pgraph.blit.size.w);
uint16_t end_y = (nv3->pgraph.blit.point_out.y + nv3->pgraph.blit.size.h);
uint32_t pixel_to_copy = 0x00;
/* If src_buffer != dst_buffer, the positions and src/dst buffer seem to be swapped.
Some kind of hardware errata (?), otherwise, I have no explanation for this behaviour. */
if (nv3->pgraph.boffset[src_buffer] == nv3->pgraph.boffset[dst_buffer])
{
old_position = nv3->pgraph.blit.point_in;
new_position = nv3->pgraph.blit.point_out;
}
else
{
old_position = nv3->pgraph.blit.point_out;
new_position = nv3->pgraph.blit.point_in;
wtf_nvidia = true;
}
/* Coordinates for copying an entire line at a time */
uint32_t buf_position = 0, vram_position = 0, size_x = nv3->pgraph.blit.size.w;
@@ -170,23 +176,35 @@ void nv3_render_blit_screen2screen(nv3_grobj_t grobj)
else if (nv3->nvbase.svga.bpp == 32)
size_x <<= 2;
for (int32_t y = 0; y < nv3->pgraph.blit.size.h; y++)
{
old_position.y = nv3->pgraph.blit.point_in.y + y;
/* 32bit buffer */
buf_position = (nv3->pgraph.blit.size.w * y);
vram_position = nv3_render_get_vram_address(old_position, grobj);
/* shouldn't matter in non-wtf mode */
vram_position = nv3_render_get_vram_address_for_buffer(old_position, grobj, dst_buffer);
memcpy(&nv3_s2sb_line_buffer[buf_position], &nv3->nvbase.svga.vram[vram_position], size_x);
old_position.y++;
/* 32bit buffer */
}
/* simply write it all back to vram */
for (int32_t y = 0; y < nv3->pgraph.blit.size.h; y++)
{
buf_position = (nv3->pgraph.blit.size.w * y);
new_position.y = nv3->pgraph.blit.point_out.y + y;
vram_position = nv3_render_get_vram_address(new_position, grobj);
/* Trying to avoid making the above function more complex. It seems, src is used most of th etime...But this is bad... */
if (wtf_nvidia)
{
/* Use the parameters of our dst buffer with the position of our source buffer, seriously, who was thinking of this */
vram_position = nv3_render_get_vram_address_for_buffer(new_position, grobj, src_buffer);
//vram_position = vram_position - nv3->pgraph.boffset[dst_buffer] + nv3->pgraph.boffset[src_buffer];
}
else
vram_position = nv3_render_get_vram_address(new_position, grobj);
memcpy(&nv3->nvbase.svga.vram[vram_position], &nv3_s2sb_line_buffer[buf_position], size_x);
new_position.y++;
}
/*
@@ -222,8 +240,26 @@ void nv3_render_blit_screen2screen(nv3_grobj_t grobj)
else if (nv3->pgraph.blit.point_out.y <= nv3->pgraph.blit.point_in.y) // equals case, just use out
blit_position.y = nv3->pgraph.blit.point_out.y;
uint32_t buf_end = ((nv3->nvbase.svga.bpp + 1) >> 3) * xsize * ysize;
//if (nv3->pgraph.boffset[dst_buffer] <= buf_end)
//nv3_render_current_bpp(&nv3->nvbase.svga, blit_position, blit_size, grobj);
/* Figure out the Display Buffer Address from the CRTCs */
uint32_t dba = ((nv3->nvbase.svga.crtc[NV3_CRTC_REGISTER_RPC0] & 0x1F) << 16)
+ (nv3->nvbase.svga.crtc[NV3_CRTC_REGISTER_STARTADDR_HIGH] << 8)
+ nv3->nvbase.svga.crtc[NV3_CRTC_REGISTER_STARTADDR_LOW];
/* If the BUFFER_ADDRESS of the last buffer is not the DBA, we don't *actually* want to draw this, so let's not
Apply stupid hack */
if (wtf_nvidia)
{
if (nv3->pgraph.boffset[src_buffer] != dba)
return;
}
else
{
if (nv3->pgraph.boffset[dst_buffer] != dba)
return;
}
nv3_render_current_bpp(&nv3->nvbase.svga, blit_position, blit_size, grobj, false);
}

View File

@@ -225,6 +225,7 @@ uint32_t nv3_render_get_vram_address(nv3_position_16_t position, nv3_grobj_t gro
uint32_t vram_y = position.y;
uint32_t current_buffer = (grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_SRC_BUFFER) & 0x03;
/*
uint32_t destination_buffer = 5; // 5 = just use the source buffer
// src is hardcoded to 1, dst to 0. Hmm...
@@ -236,7 +237,7 @@ uint32_t nv3_render_get_vram_address(nv3_position_16_t position, nv3_grobj_t gro
if (destination_buffer != current_buffer
&& destination_buffer != 5)
current_buffer = destination_buffer;
*/
uint32_t framebuffer_bpp = nv3->nvbase.svga.bpp;
// we have to multiply the x position by the number of bytes per pixel
@@ -260,6 +261,36 @@ uint32_t nv3_render_get_vram_address(nv3_position_16_t position, nv3_grobj_t gro
return pixel_addr_vram;
}
/* Combine the current buffer with the pitch to get the address in the video ram for a specific position relative to a specific framebuffer */
uint32_t nv3_render_get_vram_address_for_buffer(nv3_position_16_t position, nv3_grobj_t grobj, uint32_t buffer)
{
uint32_t vram_x = position.x;
uint32_t vram_y = position.y;
uint32_t framebuffer_bpp = nv3->nvbase.svga.bpp;
// we have to multiply the x position by the number of bytes per pixel
switch (framebuffer_bpp)
{
case 8:
break;
case 15:
case 16:
vram_x = position.x << 1;
break;
case 32:
vram_x = position.x << 2;
break;
}
uint32_t pixel_addr_vram = vram_x + (nv3->pgraph.bpitch[buffer] * vram_y) + nv3->pgraph.boffset[buffer];
pixel_addr_vram &= nv3->nvbase.svga.vram_mask;
return pixel_addr_vram;
}
/* Convert a dumb framebuffer address to a position. No buffer setup or anything, but just start at 0,0 for address 0. */
nv3_position_16_t nv3_render_get_dfb_position(uint32_t vram_address)
{
@@ -470,7 +501,7 @@ void nv3_render_write_pixel(nv3_position_16_t position, uint32_t color, nv3_grob
/* Go write the pixel */
nv3_size_16_t size = {0};
size.w = size.h = 1;
nv3_render_current_bpp(&nv3->nvbase.svga, position, size, grobj);
nv3_render_current_bpp(&nv3->nvbase.svga, position, size, grobj, true);
}
/* Ensure the correct monitor size */
@@ -560,14 +591,34 @@ void nv3_render_current_bpp_dfb_32(uint32_t address)
/* Blit to the monitor from GPU, current bpp */
void nv3_render_current_bpp(svga_t *svga, nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grobj)
void nv3_render_current_bpp(svga_t *svga, nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grobj, bool run_render_check)
{
/* Ensure that we are in the correct mode. Modified SVGA core code */
nv3_render_ensure_screen_size();
/* Don't try and draw stuff that is past the buffer, but, leave it in Video RAM */
//if (nv3->nvbase.last_buffer_address > (((nv3->nvbase.svga.bpp + 1) >> 3) * xsize * ysize))
//return;
/* Don't try and draw stuff that is past the buffer, but, leave it in Video RAM, so it can be used for s2sb's etc */
/* Not needed for s2sb*/
if (run_render_check)
{
/* Figure out the Display Buffer Address from the CRTCs */
uint32_t dba = ((nv3->nvbase.svga.crtc[NV3_CRTC_REGISTER_RPC0] & 0x1F) << 16)
+ (nv3->nvbase.svga.crtc[NV3_CRTC_REGISTER_STARTADDR_HIGH] << 8)
+ nv3->nvbase.svga.crtc[NV3_CRTC_REGISTER_STARTADDR_LOW];
/* Check our destination(?) buffer */
uint32_t dst_buffer = 0; // 5 = just use the source buffer
if ((grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_DST_BUFFER0_ENABLED) & 0x01) dst_buffer = 0;
if ((grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_DST_BUFFER1_ENABLED) & 0x01) dst_buffer = 1;
if ((grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_DST_BUFFER2_ENABLED) & 0x01) dst_buffer = 2;
if ((grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_DST_BUFFER3_ENABLED) & 0x01) dst_buffer = 3;
/* If the BUFFER_ADDRESS of the last buffer is not the DBA, we don't *actually* want to draw this, so let's not */
if (nv3->pgraph.boffset[dst_buffer] != dba)
return;
}
switch (nv3->nvbase.svga.bpp)
{