diff --git a/src/include/86box/nv/render/vid_nv3_render.h b/src/include/86box/nv/render/vid_nv3_render.h index 369230347..4b4f966ea 100644 --- a/src/include/86box/nv/render/vid_nv3_render.h +++ b/src/include/86box/nv/render/vid_nv3_render.h @@ -18,7 +18,7 @@ #pragma once /* Core */ -void nv3_render_current_bpp(svga_t *svga, nv3_position_16_t position, nv3_size_16_t size, nv3_grobj_t grobj, bool run_render_check); +void nv3_render_current_bpp(svga_t *svga, nv3_position_16_t position, nv3_size_16_t size, nv3_grobj_t grobj, bool run_render_check, bool use_destination_buffer); void nv3_render_current_bpp_dfb_8(uint32_t address); void nv3_render_current_bpp_dfb_16(uint32_t address); void nv3_render_current_bpp_dfb_32(uint32_t address); diff --git a/src/include/86box/nv/vid_nv3.h b/src/include/86box/nv/vid_nv3.h index e7bab44ac..fcfd2de72 100644 --- a/src/include/86box/nv/vid_nv3.h +++ b/src/include/86box/nv/vid_nv3.h @@ -79,9 +79,9 @@ extern const device_config_t nv3t_config[]; // Confi #define NV3_VBIOS_DEFAULT NV3_VBIOS_ERAZOR_V15403 // Temporary, will be loaded from settings -#define NV3_VRAM_SIZE_2MB 0x200000 // 2MB -#define NV3_VRAM_SIZE_4MB 0x400000 // 4MB -#define NV3_VRAM_SIZE_8MB 0x800000 // NV3T only +#define NV3_VRAM_SIZE_2MB 0x200000 // 2MB +#define NV3_VRAM_SIZE_4MB 0x400000 // 4MB +#define NV3_VRAM_SIZE_8MB 0x800000 // NV3T only // There is also 1mb supported by the card but it was never used // PCI config @@ -676,10 +676,7 @@ extern const device_config_t nv3t_config[]; // Confi #define NV3_PRMCIO_START 0x601000 -#define NV3_PRMCIO_CRTC_REGISTER_CUR_INDEX_MONO 0x6013B4 // Current CRTC Register Index - Monochrome -#define NV3_PRMCIO_CRTC_REGISTER_CUR_MONO 0x6013B5 // Currently Selected CRTC Register - Monochrome -#define NV3_PRMCIO_CRTC_REGISTER_CUR_INDEX_COLOR 0x6013D4 // Current CRTC Register Index - Colour -#define NV3_PRMCIO_CRTC_REGISTER_CUR_COLOR 0x6013D5 + #define NV3_PRMCIO_END 0x601FFF #define NV3_PDAC_START 0x680000 // OPTIONAL external DAC @@ -787,6 +784,9 @@ extern const device_config_t nv3t_config[]; // Confi // CRTC/CIO (0x3b0-0x3df) +#define NV3_CRTC_REGISTER_INDEX_MONO 0x3B4 +#define NV3_CRTC_REGISTER_MONO 0x3B5 // Currently Selected CRTC Register - Monochrome + #define NV3_CRTC_DATA_OUT 0x3C0 #define NV3_CRTC_MISCOUT 0x3C2 @@ -796,6 +796,8 @@ extern const device_config_t nv3t_config[]; // Confi #define NV3_CRTC_REGISTER_INDEX 0x3D4 #define NV3_CRTC_REGISTER_CURRENT 0x3D5 +#define NV3_CRTC_REGISTER_WTF 0x3D8 + // These are standard (0-18h) #define NV3_CRTC_REGISTER_HTOTAL 0x00 #define NV3_CRTC_REGISTER_HDISPEND 0x01 @@ -1076,8 +1078,8 @@ typedef struct nv3_pramdac_s uint32_t hserr_width; // horizontal sync error width uint8_t user_pixel_mask; // pixel mask for DAC lookup - uint32_t user_read_mode_address; // user read mode address - uint32_t user_write_mode_address; // user write mode address + uint32_t user_read_mode_address; // user read mode address + uint32_t user_write_mode_address; // user write mode address uint8_t palette[NV3_USER_DAC_PALETTE_SIZE]; // Palette Info/CLUT - 256 entriesxr,g,b = 768 bytes } nv3_pramdac_t; diff --git a/src/video/nv/nv3/nv3_core.c b/src/video/nv/nv3/nv3_core.c index deef4f093..2875f3cbc 100644 --- a/src/video/nv/nv3/nv3_core.c +++ b/src/video/nv/nv3/nv3_core.c @@ -196,7 +196,6 @@ void nv3_mmio_write16(uint32_t addr, uint16_t val, void* priv) nv_log_verbose_only("Redirected MMIO write16 to SVGA: addr=0x%04x val=0x%02x\n", addr, val); - nv3_svga_write(real_address, val & 0xFF, nv3); nv3_svga_write(real_address + 1, (val >> 8) & 0xFF, nv3); @@ -632,6 +631,9 @@ uint8_t nv3_svga_read(uint16_t addr, void* priv) case NV3_CRTC_REGISTER_INDEX: ret = nv3->nvbase.svga.crtcreg; break; + case NV3_CRTC_REGISTER_WTF: + ret = 0x08; // Required to not freeze in certain situations on v3.xx drivers + break; case NV3_CRTC_REGISTER_CURRENT: // Support the extended NVIDIA CRTC register range switch (nv3->nvbase.svga.crtcreg) diff --git a/src/video/nv/nv3/render/nv3_render_blit.c b/src/video/nv/nv3/render/nv3_render_blit.c index ed8858ee3..8b7d9cbf8 100644 --- a/src/video/nv/nv3/render/nv3_render_blit.c +++ b/src/video/nv/nv3/render/nv3_render_blit.c @@ -131,10 +131,7 @@ void nv3_render_blit_screen2screen(nv3_grobj_t grobj) memset(&nv3_s2sb_line_buffer, 0x00, (sizeof(uint32_t) * nv3->pgraph.blit.size.h) * (sizeof(uint32_t) * nv3->pgraph.blit.size.w)); /* First calculate our source and destination buffer */ - uint32_t src_buffer = (grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_SRC_BUFFER) & 0x03; - - bool wtf_nvidia = false; - + uint32_t src_buffer = (grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_SRC_BUFFER) & 0x03; uint32_t dst_buffer = 0; // 5 = just use the source buffer if ((grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_DST_BUFFER0_ENABLED) & 0x01) dst_buffer = 0; @@ -142,22 +139,10 @@ void nv3_render_blit_screen2screen(nv3_grobj_t grobj) if ((grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_DST_BUFFER2_ENABLED) & 0x01) dst_buffer = 2; if ((grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_DST_BUFFER3_ENABLED) & 0x01) dst_buffer = 3; - nv3_position_16_t old_position = {0}; - nv3_position_16_t new_position = {0}; + bool cross_buffer_blit = (nv3->pgraph.boffset[src_buffer] != nv3->pgraph.boffset[dst_buffer]); - /* If src_buffer != dst_buffer, the positions and src/dst buffer seem to be swapped. - Some kind of hardware errata (?), otherwise, I have no explanation for this behaviour. */ - if (nv3->pgraph.boffset[src_buffer] == nv3->pgraph.boffset[dst_buffer]) - { - old_position = nv3->pgraph.blit.point_in; - new_position = nv3->pgraph.blit.point_out; - } - else - { - old_position = nv3->pgraph.blit.point_out; - new_position = nv3->pgraph.blit.point_in; - wtf_nvidia = true; - } + nv3_position_16_t old_position = nv3->pgraph.blit.point_in; + nv3_position_16_t new_position = nv3->pgraph.blit.point_out; /* Coordinates for copying an entire line at a time */ uint32_t buf_position = 0, vram_position = 0, size_x = nv3->pgraph.blit.size.w; @@ -175,13 +160,12 @@ void nv3_render_blit_screen2screen(nv3_grobj_t grobj) size_x <<= 1; else if (nv3->nvbase.svga.bpp == 32) size_x <<= 2; - for (int32_t y = 0; y < nv3->pgraph.blit.size.h; y++) { buf_position = (nv3->pgraph.blit.size.w * y); /* shouldn't matter in non-wtf mode */ - vram_position = nv3_render_get_vram_address_for_buffer(old_position, grobj, dst_buffer); + vram_position = nv3_render_get_vram_address_for_buffer(old_position, grobj, src_buffer); memcpy(&nv3_s2sb_line_buffer[buf_position], &nv3->nvbase.svga.vram[vram_position], size_x); old_position.y++; @@ -192,16 +176,7 @@ void nv3_render_blit_screen2screen(nv3_grobj_t grobj) for (int32_t y = 0; y < nv3->pgraph.blit.size.h; y++) { buf_position = (nv3->pgraph.blit.size.w * y); - - /* Trying to avoid making the above function more complex. It seems, src is used most of th etime...But this is bad... */ - if (wtf_nvidia) - { - /* Use the parameters of our dst buffer with the position of our source buffer, seriously, who was thinking of this */ - vram_position = nv3_render_get_vram_address_for_buffer(new_position, grobj, src_buffer); - //vram_position = vram_position - nv3->pgraph.boffset[dst_buffer] + nv3->pgraph.boffset[src_buffer]; - } - else - vram_position = nv3_render_get_vram_address(new_position, grobj); + vram_position = nv3_render_get_vram_address_for_buffer(new_position, grobj, dst_buffer); memcpy(&nv3->nvbase.svga.vram[vram_position], &nv3_s2sb_line_buffer[buf_position], size_x); new_position.y++; @@ -216,50 +191,43 @@ void nv3_render_blit_screen2screen(nv3_grobj_t grobj) nv3_size_16_t blit_size = {0}; /* Change the smallest area of the screen that moved */ - if (nv3->pgraph.blit.point_out.x > nv3->pgraph.blit.point_in.x) - blit_size.w = (nv3->pgraph.blit.point_out.x - nv3->pgraph.blit.point_in.x) + nv3->pgraph.blit.size.w; - else if (nv3->pgraph.blit.point_out.x < nv3->pgraph.blit.point_in.x) - blit_size.w = (nv3->pgraph.blit.point_in.x - nv3->pgraph.blit.point_out.x) + nv3->pgraph.blit.size.w; + + if (cross_buffer_blit) + { + blit_position = nv3->pgraph.blit.point_out; + blit_size = nv3->pgraph.blit.size; + } else - blit_size.w = nv3->pgraph.blit.size.w; + { + if (nv3->pgraph.blit.point_out.x > nv3->pgraph.blit.point_in.x) + blit_size.w = (nv3->pgraph.blit.point_out.x - nv3->pgraph.blit.point_in.x) + nv3->pgraph.blit.size.w; + else if (nv3->pgraph.blit.point_out.x < nv3->pgraph.blit.point_in.x) + blit_size.w = (nv3->pgraph.blit.point_in.x - nv3->pgraph.blit.point_out.x) + nv3->pgraph.blit.size.w; + else + blit_size.w = nv3->pgraph.blit.size.w; - if (nv3->pgraph.blit.point_out.y > nv3->pgraph.blit.point_in.y) - blit_size.h = (nv3->pgraph.blit.point_out.y - nv3->pgraph.blit.point_in.y) + nv3->pgraph.blit.size.h; - else if (nv3->pgraph.blit.point_out.y < nv3->pgraph.blit.point_in.y) - blit_size.h = (nv3->pgraph.blit.point_in.y - nv3->pgraph.blit.point_out.y) + nv3->pgraph.blit.size.h; - else - blit_size.h = nv3->pgraph.blit.size.h; + if (nv3->pgraph.blit.point_out.y > nv3->pgraph.blit.point_in.y) + blit_size.h = (nv3->pgraph.blit.point_out.y - nv3->pgraph.blit.point_in.y) + nv3->pgraph.blit.size.h; + else if (nv3->pgraph.blit.point_out.y < nv3->pgraph.blit.point_in.y) + blit_size.h = (nv3->pgraph.blit.point_in.y - nv3->pgraph.blit.point_out.y) + nv3->pgraph.blit.size.h; + else + blit_size.h = nv3->pgraph.blit.size.h; - if (nv3->pgraph.blit.point_out.x > nv3->pgraph.blit.point_in.x) - blit_position.x = nv3->pgraph.blit.point_in.x; - else if (nv3->pgraph.blit.point_out.x <= nv3->pgraph.blit.point_in.x) // equals case, just use out - blit_position.x = nv3->pgraph.blit.point_out.x; + if (nv3->pgraph.blit.point_out.x > nv3->pgraph.blit.point_in.x) + blit_position.x = nv3->pgraph.blit.point_in.x; + else if (nv3->pgraph.blit.point_out.x <= nv3->pgraph.blit.point_in.x) // equals case, just use out + blit_position.x = nv3->pgraph.blit.point_out.x; - if (nv3->pgraph.blit.point_out.y > nv3->pgraph.blit.point_in.y) - blit_position.y = nv3->pgraph.blit.point_in.y; - else if (nv3->pgraph.blit.point_out.y <= nv3->pgraph.blit.point_in.y) // equals case, just use out - blit_position.y = nv3->pgraph.blit.point_out.y; + if (nv3->pgraph.blit.point_out.y > nv3->pgraph.blit.point_in.y) + blit_position.y = nv3->pgraph.blit.point_in.y; + else if (nv3->pgraph.blit.point_out.y <= nv3->pgraph.blit.point_in.y) // equals case, just use out + blit_position.y = nv3->pgraph.blit.point_out.y; - - /* Figure out the Display Buffer Address from the CRTCs */ - uint32_t dba = ((nv3->nvbase.svga.crtc[NV3_CRTC_REGISTER_RPC0] & 0x1F) << 16) - + (nv3->nvbase.svga.crtc[NV3_CRTC_REGISTER_STARTADDR_HIGH] << 8) - + nv3->nvbase.svga.crtc[NV3_CRTC_REGISTER_STARTADDR_LOW]; + } /* If the BUFFER_ADDRESS of the last buffer is not the DBA, we don't *actually* want to draw this, so let's not Apply stupid hack */ + - if (wtf_nvidia) - { - if (nv3->pgraph.boffset[src_buffer] != dba) - return; - } - else - { - if (nv3->pgraph.boffset[dst_buffer] != dba) - return; - } - - - nv3_render_current_bpp(&nv3->nvbase.svga, blit_position, blit_size, grobj, false); + nv3_render_current_bpp(&nv3->nvbase.svga, blit_position, blit_size, grobj, false, true); } \ No newline at end of file diff --git a/src/video/nv/nv3/render/nv3_render_core.c b/src/video/nv/nv3/render/nv3_render_core.c index 6eb1f50c1..916717984 100644 --- a/src/video/nv/nv3/render/nv3_render_core.c +++ b/src/video/nv/nv3/render/nv3_render_core.c @@ -31,10 +31,10 @@ #include <86box/utils/video_stdlib.h> /* Functions only used in this translation unit */ -void nv3_render_8bpp(nv3_position_16_t position, nv3_size_16_t size, nv3_grobj_t grobj); -void nv3_render_15bpp(nv3_position_16_t position, nv3_size_16_t size, nv3_grobj_t grobj); -void nv3_render_16bpp(nv3_position_16_t position, nv3_size_16_t size, nv3_grobj_t grobj); -void nv3_render_32bpp(nv3_position_16_t position, nv3_size_16_t size, nv3_grobj_t grobj); +void nv3_render_8bpp(nv3_position_16_t position, nv3_size_16_t size, nv3_grobj_t grobj, bool use_destination_buffer); +void nv3_render_15bpp(nv3_position_16_t position, nv3_size_16_t size, nv3_grobj_t grobj, bool use_destination_buffer); +void nv3_render_16bpp(nv3_position_16_t position, nv3_size_16_t size, nv3_grobj_t grobj, bool use_destination_buffer); +void nv3_render_32bpp(nv3_position_16_t position, nv3_size_16_t size, nv3_grobj_t grobj, bool use_destination_buffer); /* Expand a colour. NOTE: THE GPU INTERNALLY OPERATES ON RGB10!!!!!!!!!!! @@ -235,19 +235,6 @@ uint32_t nv3_render_get_vram_address(nv3_position_16_t position, nv3_grobj_t gro uint32_t vram_y = position.y; uint32_t current_buffer = (grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_SRC_BUFFER) & 0x03; - /* - uint32_t destination_buffer = 5; // 5 = just use the source buffer - - // src is hardcoded to 1, dst to 0. Hmm... - if ((grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_DST_BUFFER0_ENABLED) & 0x01) destination_buffer = 0; - if ((grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_DST_BUFFER1_ENABLED) & 0x01) destination_buffer = 1; - if ((grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_DST_BUFFER2_ENABLED) & 0x01) destination_buffer = 2; - if ((grobj.grobj_0 >> NV3_PGRAPH_CONTEXT_SWITCH_DST_BUFFER3_ENABLED) & 0x01) destination_buffer = 3; - - if (destination_buffer != current_buffer - && destination_buffer != 5) - current_buffer = destination_buffer; -*/ uint32_t framebuffer_bpp = nv3->nvbase.svga.bpp; // we have to multiply the x position by the number of bytes per pixel @@ -504,7 +491,7 @@ void nv3_render_write_pixel(nv3_position_16_t position, uint32_t color, nv3_grob /* Go write the pixel */ nv3_size_16_t size = {0}; size.w = size.h = 1; - nv3_render_current_bpp(&nv3->nvbase.svga, position, size, grobj, true); + nv3_render_current_bpp(&nv3->nvbase.svga, position, size, grobj, true, false); } /* Ensure the correct monitor size */ @@ -605,7 +592,7 @@ void nv3_render_current_bpp_dfb_32(uint32_t address) /* Blit to the monitor from GPU, current bpp */ -void nv3_render_current_bpp(svga_t *svga, nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grobj, bool run_render_check) +void nv3_render_current_bpp(svga_t *svga, nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grobj, bool run_render_check, bool use_destination_buffer) { /* Ensure that we are in the correct mode. Modified SVGA core code */ nv3_render_ensure_screen_size(); @@ -641,16 +628,16 @@ void nv3_render_current_bpp(svga_t *svga, nv3_position_16_t pos, nv3_size_16_t s fatal("NV3 - 4bpp not implemented (not even sure if it's SVGA only)"); break; case 8: - nv3_render_8bpp(pos, size, grobj); + nv3_render_8bpp(pos, size, grobj, use_destination_buffer); break; case 15: - nv3_render_15bpp(pos, size, grobj); + nv3_render_15bpp(pos, size, grobj, use_destination_buffer); break; case 16: - nv3_render_16bpp(pos, size, grobj); + nv3_render_16bpp(pos, size, grobj, use_destination_buffer); break; case 32: - nv3_render_32bpp(pos, size, grobj); + nv3_render_32bpp(pos, size, grobj, use_destination_buffer); break; } @@ -660,7 +647,7 @@ void nv3_render_current_bpp(svga_t *svga, nv3_position_16_t pos, nv3_size_16_t s Blit a certain region from the (destination buffer base + (position in vram)) to the 86Box monitor, indexed 8 bits per pixel format */ -void nv3_render_8bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grobj) +void nv3_render_8bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grobj, bool use_destination_buffer) { if (!nv3) return; @@ -675,7 +662,10 @@ void nv3_render_8bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grob for (uint32_t y = 0; y < size.h; y++) { /* re-set the vram address because we are basically "jumping" halfway across a line here */ - vram_base = nv3_render_get_vram_address(pos, grobj) & nv3->nvbase.svga.vram_display_mask; + if (use_destination_buffer) + vram_base = nv3_render_get_vram_address_for_buffer(pos, grobj, 0); // hardcode to zero for now + else + vram_base = nv3_render_get_vram_address(pos, grobj) & nv3->nvbase.svga.vram_display_mask; for (uint32_t x = 0; x < size.w; x++) { @@ -698,7 +688,7 @@ void nv3_render_8bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grob Blit a certain region from the (destination buffer base + (position in vram)) to the 86Box monitor, 15 bits per pixel format */ -void nv3_render_15bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grobj) +void nv3_render_15bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grobj, bool use_destination_buffer) { if (!nv3) return; @@ -713,7 +703,10 @@ void nv3_render_15bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t gro for (uint32_t y = 0; y < size.h; y++) { /* re-set the vram address because we are basically "jumping" halfway across a line here */ - vram_base = nv3_render_get_vram_address(pos, grobj) & nv3->nvbase.svga.vram_display_mask; + if (use_destination_buffer) + vram_base = nv3_render_get_vram_address_for_buffer(pos, grobj, 0); // hardcode to zero for now + else + vram_base = nv3_render_get_vram_address(pos, grobj) & nv3->nvbase.svga.vram_display_mask; for (uint32_t x = 0; x < size.w; x++) { @@ -736,7 +729,7 @@ void nv3_render_15bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t gro Blit a certain region from the (destination buffer base + (position in vram)) to the 86Box monitor, 16 bits per pixel format */ -void nv3_render_16bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grobj) +void nv3_render_16bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grobj, bool use_destination_buffer) { if (!nv3) return; @@ -750,8 +743,11 @@ void nv3_render_16bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t gro for (uint32_t y = 0; y < size.h; y++) { - /* re-get the vram address because we are basically "jumping" halfway across a line here */ - vram_base = nv3_render_get_vram_address(pos, grobj) & nv3->nvbase.svga.vram_display_mask; + /* re-set the vram address because we are basically "jumping" halfway across a line here */ + if (use_destination_buffer) + vram_base = nv3_render_get_vram_address_for_buffer(pos, grobj, 0); // hardcode to zero for now + else + vram_base = nv3_render_get_vram_address(pos, grobj) & nv3->nvbase.svga.vram_display_mask; for (uint32_t x = 0; x < size.w; x++) { @@ -774,7 +770,7 @@ void nv3_render_16bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t gro Blit a certain region from the (destination buffer base + (position in vram)) to the 86Box monitor, 32 bits per pixel format */ -void nv3_render_32bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grobj) +void nv3_render_32bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t grobj, bool use_destination_buffer) { if (!nv3) return; @@ -788,9 +784,12 @@ void nv3_render_32bpp(nv3_position_16_t pos, nv3_size_16_t size, nv3_grobj_t gro for (uint32_t y = 0; y < size.h; y++) { - /* re-get the vram address because we are basically "jumping" halfway across a line here */ - vram_base = nv3_render_get_vram_address(pos, grobj) & nv3->nvbase.svga.vram_display_mask; - + /* re-set the vram address because we are basically "jumping" halfway across a line here */ + if (use_destination_buffer) + vram_base = nv3_render_get_vram_address_for_buffer(pos, grobj, 0); // hardcode to zero for now + else + vram_base = nv3_render_get_vram_address(pos, grobj) & nv3->nvbase.svga.vram_display_mask; + for (uint32_t x = 0; x < size.w; x++) { p = &nv3->nvbase.svga.monitor->target_buffer->line[pos.y][pos.x];