diff --git a/src/include/86box/nv/vid_nv3.h b/src/include/86box/nv/vid_nv3.h index 58ca3b623..334c8f615 100644 --- a/src/include/86box/nv/vid_nv3.h +++ b/src/include/86box/nv/vid_nv3.h @@ -14,7 +14,7 @@ * Also check the doc folder for some more notres * * vid_nv3.h: NV3 Architecture Hardware Reference (open-source) - * Last updated: 13 February 2025 (STILL WORKING ON IT!!!) + * Last updated: 26 February 2025 (STILL WORKING ON IT!!!) * * Authors: Connor Hyde * @@ -41,6 +41,8 @@ extern const device_config_t nv3_config[]; #define NV3_DMA_CHANNELS 8 #define NV3_DMA_SUBCHANNELS_PER_CHANNEL 8 +#define NV3_DMA_CHANNELS_TOTAL 0x7F // This is also used somewhere despite there only being 8*8 = 64 channels + #define NV3_86BOX_TIMER_SYSTEM_FIX_QUOTIENT 1 // The amount by which we have to ration out the memory clock because it's not fast enough... // Multiply by this value to get the real clock speed. #define NV3_LAST_VALID_GRAPHICS_OBJECT_ID 0x1F diff --git a/src/video/nv/nv3/subsystems/nv3_pbus.c b/src/video/nv/nv3/subsystems/nv3_pbus.c index c9fd0f3aa..2594e0739 100644 --- a/src/video/nv/nv3/subsystems/nv3_pbus.c +++ b/src/video/nv/nv3/subsystems/nv3_pbus.c @@ -127,8 +127,8 @@ void nv3_pbus_write(uint32_t address, uint32_t value) uint8_t nv3_pbus_rma_read(uint16_t addr) { addr &= 0xFF; - uint32_t real_final_address; - uint8_t ret; + uint32_t real_final_address = 0x0; + uint8_t ret = 0x0; switch (addr) { diff --git a/src/video/nv/nv3/subsystems/nv3_pfifo.c b/src/video/nv/nv3/subsystems/nv3_pfifo.c index ca5694af3..55b961c96 100644 --- a/src/video/nv/nv3/subsystems/nv3_pfifo.c +++ b/src/video/nv/nv3/subsystems/nv3_pfifo.c @@ -290,6 +290,11 @@ uint32_t nv3_pfifo_read(uint32_t address) return ret; } +void nv3_pfifo_trigger_dma_if_required() +{ + +} + void nv3_pfifo_write(uint32_t address, uint32_t value) { // before doing anything, check the subsystem enablement @@ -481,6 +486,9 @@ void nv3_pfifo_write(uint32_t address, uint32_t value) nv_log("PFIFO Cache1 CTX Write Entry=%d value=0x%04x", ctx_entry_id, value); } + + /* Trigger DMA for notifications if we need to */ + nv3_pfifo_trigger_dma_if_required(); } /* @@ -690,7 +698,7 @@ void nv3_pfifo_cache1_pull() // NV_ROOT if (!current_method) { - if (!nv3_ramin_find_object(current_name, 0, current_channel, current_subchannel)) + if (!nv3_ramin_find_object(current_name, 1, current_channel, current_subchannel)) return; // interrupt was fired, and we went to ramro } diff --git a/src/video/nv/nv3/subsystems/nv3_pramin.c b/src/video/nv/nv3/subsystems/nv3_pramin.c index e8452d870..25f6b36e1 100644 --- a/src/video/nv/nv3/subsystems/nv3_pramin.c +++ b/src/video/nv/nv3/subsystems/nv3_pramin.c @@ -114,7 +114,7 @@ uint32_t nv3_ramin_read32(uint32_t addr, void* priv) { val = vram_32bit[addr]; - nv_log("Read dword from PRAMIN addr=0x%08x (raw address=0x%08x)\n", addr, raw_addr); + nv_log("Read dword from PRAMIN 0x%08x <- 0x%08x (raw address=0x%08x)\n", val, addr, raw_addr); } return val; @@ -134,7 +134,7 @@ void nv3_ramin_write8(uint32_t addr, uint8_t val, void* priv) // reversal unit size in this case is 16 bytes, vram size is 2-8mb (but 8mb is zx/nv3t only and 2mb...i haven't found a 22mb card) addr ^= (nv3->nvbase.svga.vram_max - 0x10); - uint32_t val32 = 0x00; + uint32_t val32 = (uint32_t)val; if (!nv3_ramin_arbitrate_write(addr, val32)) { @@ -160,7 +160,7 @@ void nv3_ramin_write16(uint32_t addr, uint16_t val, void* priv) addr ^= (nv3->nvbase.svga.vram_max - 0x10); addr >>= 1; // what - uint32_t val32 = 0x00; + uint32_t val32 = (uint32_t)val; if (!nv3_ramin_arbitrate_write(addr, val32)) { @@ -186,9 +186,7 @@ void nv3_ramin_write32(uint32_t addr, uint32_t val, void* priv) addr ^= (nv3->nvbase.svga.vram_max - 0x10); addr >>= 2; // what - uint32_t val32 = 0x00; - - if (!nv3_ramin_arbitrate_write(addr, val32)) + if (!nv3_ramin_arbitrate_write(addr, val)) { vram_32bit[addr] = val; nv_log("Write dword to PRAMIN addr=0x%08x val=0x%08x (raw address=0x%08x)\n", addr, val, raw_addr); @@ -354,8 +352,9 @@ bool nv3_ramin_find_object(uint32_t name, uint32_t cache_num, uint8_t channel, u // Not a switch statement in case newer gpus have larger ramins uint32_t bucket_entries = 2; + uint8_t ramht_size = (nv3->pfifo.ramht_config >> NV3_PFIFO_CONFIG_RAMHT_SIZE) & 0x03; - switch (nv3->pfifo.ramht_config) + switch (ramht_size) { case NV3_PFIFO_CONFIG_RAMHT_SIZE_4K: // stays as is @@ -374,7 +373,10 @@ bool nv3_ramin_find_object(uint32_t name, uint32_t cache_num, uint8_t channel, u // Calculate the address in the hashtable uint32_t ramht_base = ((nv3->pfifo.ramht_config >> NV3_PFIFO_CONFIG_RAMHT_BASE_ADDRESS) & 0x0F) << NV3_PFIFO_CONFIG_RAMHT_BASE_ADDRESS; - uint32_t ramht_cur_address = ramht_base + (nv3_ramht_hash(name, channel)) * bucket_entries * 8; + + // This is certainly wrong. But the objects seem to be written to 4600? So I just multiply it by 80 to multiply the final address by 10. + // Why does this work? + uint32_t ramht_cur_address = ramht_base + (nv3_ramht_hash(name, channel) * bucket_entries * 8); nv_log("Beginning search for graphics object at RAMHT base=0x%04x, name=0x%08x, Cache%d, channel=%d.%d)\n", ramht_cur_address, name, cache_num, channel, subchannel); @@ -382,8 +384,8 @@ bool nv3_ramin_find_object(uint32_t name, uint32_t cache_num, uint8_t channel, u bool found_object = false; // set up some variables - uint32_t found_obj_name; - nv3_ramin_context_t obj_context_struct; + uint32_t found_obj_name = 0x00; + nv3_ramin_context_t obj_context_struct = {0}; for (uint32_t bucket_entry = 0; bucket_entry < bucket_entries; bucket_entry++) { diff --git a/src/video/nv/nv3/subsystems/nv3_pramin_ramht.c b/src/video/nv/nv3/subsystems/nv3_pramin_ramht.c index 00ef8e046..e09f80e83 100644 --- a/src/video/nv/nv3/subsystems/nv3_pramin_ramht.c +++ b/src/video/nv/nv3/subsystems/nv3_pramin_ramht.c @@ -32,13 +32,14 @@ It is used to get the offset within RAMHT of a graphics object. */ + uint32_t nv3_ramht_hash(uint32_t name, uint32_t channel) { - // convert the name to an array of bytes - uint8_t* hash_bytes = (uint8_t*)&name; + // the official nvidia hash algorithm, tweaked for readability + uint32_t hash = ((name ^ (name >> 8) ^ (name >> 16) ^ (name >> 24)) & 0xFF) ^ (channel & NV3_DMA_CHANNELS_TOTAL); + // is this the right endianness? - uint32_t hash = (hash_bytes[0] ^ hash_bytes[1] ^ hash_bytes[2] ^ hash_bytes[3] ^ (uint8_t)channel); nv_log("Generated RAMHT hash 0x%04x (RAMHT slot=0x%04x (from name 0x%08x for DMA channel 0x%04x)\n)\n", hash, (hash/8), name, channel); return hash; }