even more fixes to hash lookup

This commit is contained in:
starfrost013
2025-02-26 23:55:49 +00:00
parent 8cf57fdc14
commit 71bdf8d081
5 changed files with 30 additions and 17 deletions

View File

@@ -14,7 +14,7 @@
* Also check the doc folder for some more notres
*
* vid_nv3.h: NV3 Architecture Hardware Reference (open-source)
* Last updated: 13 February 2025 (STILL WORKING ON IT!!!)
* Last updated: 26 February 2025 (STILL WORKING ON IT!!!)
*
* Authors: Connor Hyde <mario64crashed@gmail.com>
*
@@ -41,6 +41,8 @@ extern const device_config_t nv3_config[];
#define NV3_DMA_CHANNELS 8
#define NV3_DMA_SUBCHANNELS_PER_CHANNEL 8
#define NV3_DMA_CHANNELS_TOTAL 0x7F // This is also used somewhere despite there only being 8*8 = 64 channels
#define NV3_86BOX_TIMER_SYSTEM_FIX_QUOTIENT 1 // The amount by which we have to ration out the memory clock because it's not fast enough...
// Multiply by this value to get the real clock speed.
#define NV3_LAST_VALID_GRAPHICS_OBJECT_ID 0x1F

View File

@@ -127,8 +127,8 @@ void nv3_pbus_write(uint32_t address, uint32_t value)
uint8_t nv3_pbus_rma_read(uint16_t addr)
{
addr &= 0xFF;
uint32_t real_final_address;
uint8_t ret;
uint32_t real_final_address = 0x0;
uint8_t ret = 0x0;
switch (addr)
{

View File

@@ -290,6 +290,11 @@ uint32_t nv3_pfifo_read(uint32_t address)
return ret;
}
void nv3_pfifo_trigger_dma_if_required()
{
}
void nv3_pfifo_write(uint32_t address, uint32_t value)
{
// before doing anything, check the subsystem enablement
@@ -481,6 +486,9 @@ void nv3_pfifo_write(uint32_t address, uint32_t value)
nv_log("PFIFO Cache1 CTX Write Entry=%d value=0x%04x", ctx_entry_id, value);
}
/* Trigger DMA for notifications if we need to */
nv3_pfifo_trigger_dma_if_required();
}
/*
@@ -690,7 +698,7 @@ void nv3_pfifo_cache1_pull()
// NV_ROOT
if (!current_method)
{
if (!nv3_ramin_find_object(current_name, 0, current_channel, current_subchannel))
if (!nv3_ramin_find_object(current_name, 1, current_channel, current_subchannel))
return; // interrupt was fired, and we went to ramro
}

View File

@@ -114,7 +114,7 @@ uint32_t nv3_ramin_read32(uint32_t addr, void* priv)
{
val = vram_32bit[addr];
nv_log("Read dword from PRAMIN addr=0x%08x (raw address=0x%08x)\n", addr, raw_addr);
nv_log("Read dword from PRAMIN 0x%08x <- 0x%08x (raw address=0x%08x)\n", val, addr, raw_addr);
}
return val;
@@ -134,7 +134,7 @@ void nv3_ramin_write8(uint32_t addr, uint8_t val, void* priv)
// reversal unit size in this case is 16 bytes, vram size is 2-8mb (but 8mb is zx/nv3t only and 2mb...i haven't found a 22mb card)
addr ^= (nv3->nvbase.svga.vram_max - 0x10);
uint32_t val32 = 0x00;
uint32_t val32 = (uint32_t)val;
if (!nv3_ramin_arbitrate_write(addr, val32))
{
@@ -160,7 +160,7 @@ void nv3_ramin_write16(uint32_t addr, uint16_t val, void* priv)
addr ^= (nv3->nvbase.svga.vram_max - 0x10);
addr >>= 1; // what
uint32_t val32 = 0x00;
uint32_t val32 = (uint32_t)val;
if (!nv3_ramin_arbitrate_write(addr, val32))
{
@@ -186,9 +186,7 @@ void nv3_ramin_write32(uint32_t addr, uint32_t val, void* priv)
addr ^= (nv3->nvbase.svga.vram_max - 0x10);
addr >>= 2; // what
uint32_t val32 = 0x00;
if (!nv3_ramin_arbitrate_write(addr, val32))
if (!nv3_ramin_arbitrate_write(addr, val))
{
vram_32bit[addr] = val;
nv_log("Write dword to PRAMIN addr=0x%08x val=0x%08x (raw address=0x%08x)\n", addr, val, raw_addr);
@@ -354,8 +352,9 @@ bool nv3_ramin_find_object(uint32_t name, uint32_t cache_num, uint8_t channel, u
// Not a switch statement in case newer gpus have larger ramins
uint32_t bucket_entries = 2;
uint8_t ramht_size = (nv3->pfifo.ramht_config >> NV3_PFIFO_CONFIG_RAMHT_SIZE) & 0x03;
switch (nv3->pfifo.ramht_config)
switch (ramht_size)
{
case NV3_PFIFO_CONFIG_RAMHT_SIZE_4K:
// stays as is
@@ -374,7 +373,10 @@ bool nv3_ramin_find_object(uint32_t name, uint32_t cache_num, uint8_t channel, u
// Calculate the address in the hashtable
uint32_t ramht_base = ((nv3->pfifo.ramht_config >> NV3_PFIFO_CONFIG_RAMHT_BASE_ADDRESS) & 0x0F) << NV3_PFIFO_CONFIG_RAMHT_BASE_ADDRESS;
uint32_t ramht_cur_address = ramht_base + (nv3_ramht_hash(name, channel)) * bucket_entries * 8;
// This is certainly wrong. But the objects seem to be written to 4600? So I just multiply it by 80 to multiply the final address by 10.
// Why does this work?
uint32_t ramht_cur_address = ramht_base + (nv3_ramht_hash(name, channel) * bucket_entries * 8);
nv_log("Beginning search for graphics object at RAMHT base=0x%04x, name=0x%08x, Cache%d, channel=%d.%d)\n",
ramht_cur_address, name, cache_num, channel, subchannel);
@@ -382,8 +384,8 @@ bool nv3_ramin_find_object(uint32_t name, uint32_t cache_num, uint8_t channel, u
bool found_object = false;
// set up some variables
uint32_t found_obj_name;
nv3_ramin_context_t obj_context_struct;
uint32_t found_obj_name = 0x00;
nv3_ramin_context_t obj_context_struct = {0};
for (uint32_t bucket_entry = 0; bucket_entry < bucket_entries; bucket_entry++)
{

View File

@@ -32,13 +32,14 @@
It is used to get the offset within RAMHT of a graphics object.
*/
uint32_t nv3_ramht_hash(uint32_t name, uint32_t channel)
{
// convert the name to an array of bytes
uint8_t* hash_bytes = (uint8_t*)&name;
// the official nvidia hash algorithm, tweaked for readability
uint32_t hash = ((name ^ (name >> 8) ^ (name >> 16) ^ (name >> 24)) & 0xFF) ^ (channel & NV3_DMA_CHANNELS_TOTAL);
// is this the right endianness?
uint32_t hash = (hash_bytes[0] ^ hash_bytes[1] ^ hash_bytes[2] ^ hash_bytes[3] ^ (uint8_t)channel);
nv_log("Generated RAMHT hash 0x%04x (RAMHT slot=0x%04x (from name 0x%08x for DMA channel 0x%04x)\n)\n", hash, (hash/8), name, channel);
return hash;
}