diff --git a/src/acpi.c b/src/acpi.c index 26855a743..d8555cb95 100644 --- a/src/acpi.c +++ b/src/acpi.c @@ -32,6 +32,7 @@ #include <86box/keyboard.h> #include <86box/nvr.h> #include <86box/pit.h> +#include <86box/apm.h> #include <86box/acpi.h> @@ -77,8 +78,28 @@ acpi_update_irq(void *priv) } +static void +acpi_raise_smi(void *priv) +{ + acpi_t *dev = (acpi_t *) priv; + + if (dev->vendor == VEN_VIA) { + if ((dev->regs.glbctl & 0x01) && (!dev->regs.smi_lock || !dev->regs.smi_active)) { + smi_line = 1; + dev->regs.smi_active = 1; + } + } else { + if (dev->regs.glbctl & 0x01) { + smi_line = 1; + /* Clear bit 16 of GLBCTL. */ + dev->regs.glbctl &= ~0x00010000; + } + } +} + + static uint32_t -acpi_reg_read_common(int size, uint16_t addr, void *p) +acpi_reg_read_intel(int size, uint16_t addr, void *p) { acpi_t *dev = (acpi_t *) p; uint32_t ret = 0x00000000; @@ -117,16 +138,6 @@ acpi_reg_read_common(int size, uint16_t addr, void *p) /* PCNTRL - Processor Control Register (IO) */ ret = (dev->regs.pcntrl >> shift32) & 0xff; break; - case 0x14: - /* PLVL2 - Processor Level 2 Register (IO) */ - if (size == 1) - ret = dev->regs.plvl2; - break; - case 0x15: - /* PLVL3 - Processor Level 3 Register (IO) */ - if (size == 1) - ret = dev->regs.plvl3; - break; case 0x18: case 0x19: /* GLBSTS - Global Status Register (IO) */ ret = (dev->regs.glbsts >> shift16) & 0xff; @@ -173,8 +184,118 @@ acpi_reg_read_common(int size, uint16_t addr, void *p) } +static uint32_t +acpi_reg_read_via(int size, uint16_t addr, void *p) +{ + acpi_t *dev = (acpi_t *) p; + uint32_t ret = 0x00000000; + int shift16, shift32; + + addr &= 0xff; + shift16 = (addr & 1) << 3; + shift32 = (addr & 3) << 3; + + switch (addr) { + case 0x00: case 0x01: + /* PMSTS - Power Management Status Register (IO) */ + ret = (dev->regs.pmsts >> shift16) & 0xff; + break; + case 0x02: case 0x03: + /* PMEN - Power Management Resume Enable Register (IO) */ + ret = (dev->regs.pmen >> shift16) & 0xff; + break; + case 0x04: case 0x05: + /* PMCNTRL - Power Management Control Register (IO) */ + ret = (dev->regs.pmcntrl >> shift16) & 0xff; + break; + case 0x08: case 0x09: case 0x0a: case 0x0b: + /* PMTMR - Power Management Timer Register (IO) */ + ret = (dev->regs.timer_val >> shift32) & 0xff; + break; + case 0x10: case 0x11: case 0x12: case 0x13: + /* PCNTRL - Processor Control Register (IO) */ + ret = (dev->regs.pcntrl >> shift32) & 0xff; + break; + case 0x20: case 0x21: + /* GPSTS - General Purpose Status Register (IO) */ + ret = (dev->regs.gpsts >> shift16) & 0xff; + break; + case 0x22: case 0x23: + /* General Purpose SCI Enable */ + ret = (dev->regs.gpscien >> shift16) & 0xff; + break; + case 0x24: case 0x25: + /* General Purpose SMI Enable */ + ret = (dev->regs.gpsmien >> shift16) & 0xff; + break; + case 0x26: case 0x27: + /* Power Supply Control */ + ret = (dev->regs.pscntrl >> shift16) & 0xff; + break; + case 0x28: case 0x29: + /* GLBSTS - Global Status Register (IO) */ + ret = (dev->regs.glbsts >> shift16) & 0xff; + break; + case 0x2a: case 0x2b: + /* GLBEN - Global Enable Register (IO) */ + ret = (dev->regs.glben >> shift16) & 0xff; + break; + case 0x2c: case 0x2d: + /* GLBCTL - Global Control Register (IO) */ + ret = (dev->regs.glbctl >> shift16) & 0xff; + ret &= ~0x0110; + ret |= (dev->regs.smi_lock ? 0x10 : 0x00); + ret |= (dev->regs.smi_active ? 0x01 : 0x00); + break; + case 0x2f: + /* SMI Command */ + if (size == 1) + ret = dev->regs.smicmd & 0xff; + break; + case 0x30: case 0x31: case 0x32: case 0x33: + /* Primary Activity Detect Status */ + ret = (dev->regs.padsts >> shift32) & 0xff; + break; + case 0x34: case 0x35: case 0x36: case 0x37: + /* Primary Activity Detect Enable */ + ret = (dev->regs.paden >> shift32) & 0xff; + break; + case 0x38: case 0x39: case 0x3a: case 0x3b: + /* GP Timer Reload Enable */ + ret = (dev->regs.gptren >> shift32) & 0xff; + break; + case 0x40: + /* GPIO Direction Control */ + if (size == 1) + ret = dev->regs.gpio_dir & 0xff; + break; + case 0x42: + /* GPIO port Output Value */ + if (size == 1) + ret = dev->regs.gpio_val & 0xff; + break; + case 0x44: + /* GPIO port Output Value */ + if (size == 1) + ret = dev->regs.extsmi_val & 0xff; + break; + case 0x46: case 0x47: + /* GPO Port Output Value */ + ret = (dev->regs.gpo_val >> shift16) & 0xff; + break; + case 0x48: case 0x49: + /* GPO Port Input Value */ + ret = (dev->regs.gpi_val >> shift16) & 0xff; + break; + } + + acpi_log("(%i) ACPI Read (%i) %02X: %02X\n", in_smm, size, addr, ret); + return ret; +} + + static void -acpi_reg_write_common(int size, uint16_t addr, uint8_t val, void *p) +acpi_reg_write_intel(int size, uint16_t addr, uint8_t val, void *p) { acpi_t *dev = (acpi_t *) p; int shift16, shift32; @@ -199,6 +320,12 @@ acpi_reg_write_common(int size, uint16_t addr, uint8_t val, void *p) case 0x04: case 0x05: /* PMCNTRL - Power Management Control Register (IO) */ dev->regs.pmcntrl = ((dev->regs.pmcntrl & ~(0xff << shift16)) | (val << shift16)) & 0x3c07; + /* Setting GBL_RLS also sets BIOS_STS and generates SMI. */ + if ((addr == 0x04) && (dev->regs.pmcntrl & 0x0004)) { + dev->regs.glbsts |= 0x01; + if (dev->regs.glben & 0x02) + acpi_raise_smi(dev); + } if (dev->regs.pmcntrl & 0x2000) { sus_typ = (dev->regs.pmcntrl >> 10) & 7; switch (sus_typ) { @@ -240,16 +367,6 @@ acpi_reg_write_common(int size, uint16_t addr, uint8_t val, void *p) /* PCNTRL - Processor Control Register (IO) */ dev->regs.pcntrl = ((dev->regs.pcntrl & ~(0xff << shift32)) | (val << shift32)) & 0x00023e1e; break; - case 0x14: - /* PLVL2 - Processor Level 2 Register (IO) */ - if (size == 1) - dev->regs.plvl2 = val; - break; - case 0x15: - /* PLVL3 - Processor Level 3 Register (IO) */ - if (size == 1) - dev->regs.plvl3 = val; - break; case 0x18: case 0x19: /* GLBSTS - Global Status Register (IO) */ dev->regs.glbsts &= ~((val << shift16) & 0x0df7); @@ -264,8 +381,13 @@ acpi_reg_write_common(int size, uint16_t addr, uint8_t val, void *p) break; case 0x28: case 0x29: case 0x2a: case 0x2b: /* GLBCTL - Global Control Register (IO) */ - // dev->regs.glbctl = ((dev->regs.glbctl & ~(0xff << shift32)) | (val << shift32)) & 0x0701ff07; - dev->regs.glbctl = ((dev->regs.glbctl & ~(0xff << shift32)) | (val << shift32)) & 0x0700ff07; + dev->regs.glbctl = ((dev->regs.glbctl & ~(0xff << shift32)) | (val << shift32)) & 0x0701ff07; + /* Setting BIOS_RLS also sets GBL_STS and generates SMI. */ + if (dev->regs.glbctl & 0x00000002) { + dev->regs.pmsts |= 0x20; + if (dev->regs.pmen & 0x20) + acpi_update_irq(dev); + } break; case 0x2c: case 0x2d: case 0x2e: case 0x2f: /* DEVCTL - Device Control Register (IO) */ @@ -280,6 +402,180 @@ acpi_reg_write_common(int size, uint16_t addr, uint8_t val, void *p) } +static void +acpi_reg_write_via(int size, uint16_t addr, uint8_t val, void *p) +{ + acpi_t *dev = (acpi_t *) p; + int shift16, shift32; + int sus_typ; + + addr &= 0xff; + acpi_log("(%i) ACPI Write (%i) %02X: %02X\n", in_smm, size, addr, val); + shift16 = (addr & 1) << 3; + shift32 = (addr & 3) << 3; + + switch (addr) { + case 0x00: case 0x01: + /* PMSTS - Power Management Status Register (IO) */ + dev->regs.pmsts &= ~((val << shift16) & 0x8d31); + acpi_update_irq(dev); + if ((addr == 0x00) && !(dev->regs.pmsts & 0x20)) + dev->regs.glbctl &= ~0x0002; + break; + case 0x02: case 0x03: + /* PMEN - Power Management Resume Enable Register (IO) */ + dev->regs.pmen = ((dev->regs.pmen & ~(0xff << shift16)) | (val << shift16)) & 0x0521; + acpi_update_irq(dev); + break; + case 0x04: case 0x05: + /* PMCNTRL - Power Management Control Register (IO) */ + dev->regs.pmcntrl = ((dev->regs.pmcntrl & ~(0xff << shift16)) | (val << shift16)) & 0x3c07; + /* Setting GBL_RLS also sets BIOS_STS and generates SMI. */ + if ((addr == 0x04) && (dev->regs.pmcntrl & 0x0004)) { + dev->regs.glbsts |= 0x20; + if (dev->regs.glben & 0x20) + acpi_raise_smi(dev); + } + if (dev->regs.pmcntrl & 0x2000) { + sus_typ = (dev->regs.pmcntrl >> 10) & 7; + switch (sus_typ) { + case 0: + /* Soft power off. */ + exit(-1); + break; + case 1: + /* Suspend to RAM. */ + nvr_reg_write(0x000f, 0xff, dev->nvr); + + /* Do a hard reset. */ + device_reset_all_pci(); + + cpu_alt_reset = 0; + + pci_reset(); + keyboard_at_reset(); + + mem_a20_alt = 0; + mem_a20_recalc(); + + flushmmucache(); + + resetx86(); + break; + } + } + break; + case 0x10: case 0x11: case 0x12: case 0x13: + /* PCNTRL - Processor Control Register (IO) */ + dev->regs.pcntrl = ((dev->regs.pcntrl & ~(0xff << shift32)) | (val << shift32)) & 0x0000001e; + break; + case 0x20: case 0x21: + /* GPSTS - General Purpose Status Register (IO) */ + dev->regs.gpsts &= ~((val << shift16) & 0x03ff); + break; + case 0x22: case 0x23: + /* General Purpose SCI Enable */ + dev->regs.gpscien = ((dev->regs.gpscien & ~(0xff << shift16)) | (val << shift16)) & 0x03ff; + break; + case 0x24: case 0x25: + /* General Purpose SMI Enable */ + dev->regs.gpsmien = ((dev->regs.gpsmien & ~(0xff << shift16)) | (val << shift16)) & 0x03ff; + break; + case 0x26: case 0x27: + /* Power Supply Control */ + dev->regs.pscntrl = ((dev->regs.pscntrl & ~(0xff << shift16)) | (val << shift16)) & 0x0701; + break; + case 0x28: case 0x29: + /* GLBSTS - Global Status Register (IO) */ + dev->regs.glbsts &= ~((val << shift16) & 0x007f); + break; + case 0x2a: case 0x2b: + /* GLBEN - Global Enable Register (IO) */ + dev->regs.glben = ((dev->regs.glben & ~(0xff << shift16)) | (val << shift16)) & 0x007f; + break; + case 0x2c: + /* GLBCTL - Global Control Register (IO) */ + dev->regs.glbctl = (dev->regs.glbctl & ~0xff) | (val & 0xff); + dev->regs.smi_lock = !!(dev->regs.glbctl & 0x0010); + /* Setting BIOS_RLS also sets GBL_STS and generates SMI. */ + if (dev->regs.glbctl & 0x0002) { + dev->regs.pmsts |= 0x20; + if (dev->regs.pmen & 0x20) + acpi_update_irq(dev); + } + break; + case 0x2d: + /* GLBCTL - Global Control Register (IO) */ + dev->regs.glbctl &= ~((val << 8) & 0x0100); + if (val & 0x01) + dev->regs.smi_active = 0; + break; + case 0x2f: + /* SMI Command */ + if (size == 1) { + dev->regs.smicmd = val & 0xff; + dev->regs.glbsts |= 0x40; + if (dev->regs.glben & 0x40) + acpi_raise_smi(dev); + } + break; + case 0x30: case 0x31: case 0x32: case 0x33: + /* Primary Activity Detect Status */ + dev->regs.padsts &= ~((val << shift32) & 0x000000fd); + break; + case 0x34: case 0x35: case 0x36: case 0x37: + /* Primary Activity Detect Enable */ + dev->regs.paden = ((dev->regs.paden & ~(0xff << shift32)) | (val << shift32)) & 0x000000fd; + break; + case 0x38: case 0x39: case 0x3a: case 0x3b: + /* GP Timer Reload Enable */ + dev->regs.gptren = ((dev->regs.gptren & ~(0xff << shift32)) | (val << shift32)) & 0x000000d9; + break; + case 0x40: + /* GPIO Direction Control */ + if (size == 1) + dev->regs.gpio_dir = val & 0xff; + break; + case 0x42: + /* GPIO port Output Value */ + if (size == 1) + dev->regs.gpio_val = val & 0xff; + break; + case 0x46: case 0x47: + /* GPO Port Output Value */ + dev->regs.gpo_val = ((dev->regs.gpo_val & ~(0xff << shift16)) | (val << shift16)) & 0xffff; + break; + } +} + + +static uint32_t +acpi_reg_read_common(int size, uint16_t addr, void *p) +{ + acpi_t *dev = (acpi_t *) p; + uint8_t ret = 0xff; + + if (dev->vendor == VEN_VIA) + ret = acpi_reg_read_via(size, addr, p); + else + ret = acpi_reg_read_intel(size, addr, p); + + return ret; +} + + +static void +acpi_reg_write_common(int size, uint16_t addr, uint8_t val, void *p) +{ + acpi_t *dev = (acpi_t *) p; + + if (dev->vendor == VEN_VIA) + acpi_reg_write_via(size, addr, val, p); + else + acpi_reg_write_intel(size, addr, val, p); +} + + static uint32_t acpi_reg_readl(uint16_t addr, void *p) { @@ -440,6 +736,46 @@ acpi_set_nvr(acpi_t *dev, nvr_t *nvr) } +static void +acpi_apm_out(uint16_t port, uint8_t val, void *p) +{ + acpi_t *dev = (acpi_t *) p; + + acpi_log("[%04X:%08X] APM write: %04X = %02X (BX = %04X, CX = %04X)\n", CS, cpu_state.pc, port, val, BX, CX); + + port &= 0x0001; + + if (port == 0x0000) { + dev->apm->cmd = val; + if (dev->apm->do_smi) { + if (dev->vendor == VEN_INTEL) + dev->regs.glbsts |= 0x20; + acpi_raise_smi(dev); + } + } else + dev->apm->stat = val; +} + + +static uint8_t +acpi_apm_in(uint16_t port, void *p) +{ + acpi_t *dev = (acpi_t *) p; + uint8_t ret = 0xff; + + port &= 0x0001; + + if (port == 0x0000) + ret = dev->apm->cmd; + else + ret = dev->apm->stat; + + acpi_log("[%04X:%08X] APM read: %04X = %02X\n", CS, cpu_state.pc, port, ret); + + return ret; +} + + static void acpi_reset(void *priv) { @@ -482,6 +818,13 @@ acpi_init(const device_t *info) if (dev == NULL) return(NULL); memset(dev, 0x00, sizeof(acpi_t)); + dev->vendor = info->local; + + if (dev->vendor == VEN_INTEL) { + dev->apm = device_add(&apm_pci_acpi_device); + io_sethandler(0x00b2, 0x0002, acpi_apm_in, NULL, NULL, acpi_apm_out, NULL, NULL, dev); + } + timer_add(&dev->timer, acpi_timer_count, dev, 0); timer_set_delay_u64(&dev->timer, ACPICONST); @@ -489,11 +832,26 @@ acpi_init(const device_t *info) } -const device_t acpi_device = +const device_t acpi_intel_device = { - "ACPI", + "ACPI v1.0", DEVICE_PCI, - 0, + VEN_INTEL, + acpi_init, + acpi_close, + acpi_reset, + NULL, + acpi_speed_changed, + NULL, + NULL +}; + + +const device_t acpi_via_device = +{ + "ACPI v1.2", + DEVICE_PCI, + VEN_VIA, acpi_init, acpi_close, acpi_reset, diff --git a/src/apm.c b/src/apm.c index b197c0964..38d223e88 100644 --- a/src/apm.c +++ b/src/apm.c @@ -116,7 +116,8 @@ static void apm_t *dev = (apm_t *) malloc(sizeof(apm_t)); memset(dev, 0, sizeof(apm_t)); - io_sethandler(0x00b2, 0x0002, apm_in, NULL, NULL, apm_out, NULL, NULL, dev); + if (info->local == 0) + io_sethandler(0x00b2, 0x0002, apm_in, NULL, NULL, apm_out, NULL, NULL, dev); return dev; } @@ -150,3 +151,18 @@ const device_t apm_pci_device = NULL, NULL }; + + +const device_t apm_pci_acpi_device = +{ + "Advanced Power Management (PCI)", + DEVICE_PCI, + 1, + apm_init, + apm_close, + apm_reset, + NULL, + NULL, + NULL, + NULL +}; diff --git a/src/config.c b/src/config.c index 0cc5d35a1..0d3b13a29 100644 --- a/src/config.c +++ b/src/config.c @@ -957,7 +957,12 @@ load_hard_disks(void) wcsncpy(hdd[c].fn, &wp[wcslen(usr_path)], sizeof_w(hdd[c].fn)); } else #endif - wcsncpy(hdd[c].fn, wp, sizeof_w(hdd[c].fn)); + if (plat_path_abs(wp)) { + wcsncpy(hdd[c].fn, wp, sizeof_w(hdd[c].fn)); + } else { + wcsncpy(hdd[c].fn, usr_path, sizeof_w(hdd[c].fn)); + wcsncat(hdd[c].fn, wp, sizeof_w(hdd[c].fn)-wcslen(usr_path)); + } /* If disk is empty or invalid, mark it for deletion. */ if (! hdd_is_valid(c)) { @@ -1832,7 +1837,10 @@ save_hard_disks(void) sprintf(temp, "hdd_%02i_fn", c+1); if (hdd_is_valid(c) && (wcslen(hdd[c].fn) != 0)) - config_set_wstring(cat, temp, hdd[c].fn); + if (!wcsnicmp(hdd[c].fn, usr_path, wcslen(usr_path))) + config_set_wstring(cat, temp, &hdd[c].fn[wcslen(usr_path)]); + else + config_set_wstring(cat, temp, hdd[c].fn); else config_delete_var(cat, temp); } diff --git a/src/cpu/codegen_timing_p6.c b/src/cpu/codegen_timing_p6.c index 0cddde494..ec5c54640 100644 --- a/src/cpu/codegen_timing_p6.c +++ b/src/cpu/codegen_timing_p6.c @@ -1,4 +1,5 @@ -/*Hacky P6 timings based on K6 timings*/ +/*Basic P6 timing model by plant/nerd73. Based on the K6 timing model*/ +/*Some cycle timings come from https://www.agner.org/optimize/instruction_tables.pdf*/ #include #include #include @@ -21,24 +22,26 @@ typedef enum uop_type_t UOP_ALU = 0, /*Executes in Integer X or Y units*/ UOP_ALUX, /*Executes in Integer X unit*/ UOP_LOAD, /*Executes in Load unit*/ - UOP_STORE, /*Executes in Store unit*/ + UOP_STORED, /*Executes in Data Store unit*/ + UOP_STOREA, /*Executes in Address Store unit*/ UOP_FLOAD, /*Executes in Load unit*/ - UOP_FSTORE, /*Executes in Store unit*/ + UOP_FSTORED, /*Executes in Data Store unit*/ + UOP_FSTOREA, /*Executes in Address Store unit*/ UOP_MLOAD, /*Executes in Load unit*/ - UOP_MSTORE, /*Executes in Store unit*/ + UOP_MSTORED, /*Executes in Data Store unit*/ + UOP_MSTOREA, /*Executes in Address Store unit*/ UOP_FLOAT, /*Executes in Floating Point unit*/ - UOP_MEU, /*Executes in Multimedia unit*/ - UOP_MEU_SHIFT, /*Executes in Multimedia unit or ALU X/Y. Uses MMX shifter*/ - UOP_MEU_MUL, /*Executes in Multimedia unit or ALU X/Y. Uses MMX multiplier*/ + UOP_MMX, /*Executes in Integer X or Y units as MMX*/ + UOP_MMX_SHIFT, /*Executes in Integer Y unit. Uses MMX shifter*/ + UOP_MMX_MUL, /*Executes in Integer X unit. Uses MMX multiplier*/ UOP_BRANCH, /*Executes in Branch unit*/ UOP_LIMM /*Does not require an execution unit*/ } uop_type_t; typedef enum decode_type_t { - DECODE_SHORT, - DECODE_LONG, - DECODE_VECTOR + DECODE_SIMPLE, + DECODE_COMPLEX, } decode_type_t; #define MAX_UOPS 10 @@ -46,7 +49,6 @@ typedef enum decode_type_t typedef struct risc86_uop_t { uop_type_t type; - double throughput; double latency; } risc86_uop_t; @@ -60,804 +62,885 @@ typedef struct risc86_instruction_t static const risc86_instruction_t alu_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t alux_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1} }; static const risc86_instruction_t load_alu_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t load_alux_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1} }; static const risc86_instruction_t alu_store_op = { - .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} + }; static const risc86_instruction_t alux_store_op = { - .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} }; static const risc86_instruction_t branch_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_BRANCH, .latency = 1} }; static const risc86_instruction_t limm_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LIMM, .throughput = 1, .latency = 1} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_LIMM, .latency = 1} }; static const risc86_instruction_t load_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1} }; static const risc86_instruction_t store_op = { - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1} + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = {.type = UOP_STOREA, .latency = 1} }; static const risc86_instruction_t bswap_op = { - .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, }; static const risc86_instruction_t leave_op = { .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t lods_op = { .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t loop_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_BRANCH, .latency = 1} }; static const risc86_instruction_t mov_reg_seg_op = { .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, }; static const risc86_instruction_t movs_op = { .nr_uops = 4, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t pop_reg_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t pop_mem_op = { - .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t push_imm_op = { - .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2}, + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = {.type = UOP_STOREA, .latency = 1}, }; static const risc86_instruction_t push_mem_op = { - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1} }; static const risc86_instruction_t push_seg_op = { - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t stos_op = { - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t test_reg_op = { .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t test_reg_b_op = { .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1} }; static const risc86_instruction_t test_mem_imm_op = { .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t test_mem_imm_b_op = { .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1} }; static const risc86_instruction_t xchg_op = { .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t mmx_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX, .latency = 1.5} }; static const risc86_instruction_t mmx_mul_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU_MUL, .throughput = 1.5, .latency = 3} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX_MUL, .latency = 1.5} }; static const risc86_instruction_t mmx_shift_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU_SHIFT, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX_SHIFT, .latency = 1.5} }; static const risc86_instruction_t load_mmx_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU, .throughput = 1.5, .latency = 3} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_MMX, .latency = 1.5} }; static const risc86_instruction_t load_mmx_mul_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU_MUL, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3}, + .uop[1] = {.type = UOP_MMX_MUL, .latency = 1.5} }; static const risc86_instruction_t load_mmx_shift_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU_SHIFT, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3}, + .uop[1] = {.type = UOP_MMX_SHIFT, .latency = 1.5} }; static const risc86_instruction_t mload_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MLOAD, .throughput = 1, .latency = 2} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_MLOAD, .latency = 3}, }; static const risc86_instruction_t mstore_op = { - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MSTORE, .throughput = 1, .latency = 1} + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_MSTORED, .latency = 1}, + .uop[1] = {.type = UOP_MSTOREA, .latency = 1} }; static const risc86_instruction_t pmul_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX_MUL, .latency = 1.5} }; static const risc86_instruction_t pmul_mem_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3}, + .uop[1] = {.type = UOP_MMX_MUL, .latency = 1.5} }; - static const risc86_instruction_t float_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 1} +}; +static const risc86_instruction_t fadd_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 3} +}; +static const risc86_instruction_t fmul_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALU, .latency = 5} +}; +static const risc86_instruction_t float2_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 1} +}; +static const risc86_instruction_t fchs_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 2}, + .uop[1] = {.type = UOP_FLOAT, .latency = 2}, + .uop[2] = {.type = UOP_FLOAT, .latency = 2} }; static const risc86_instruction_t load_float_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_FLOAT, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 1} +}; +static const risc86_instruction_t load_fadd_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 3} +}; +static const risc86_instruction_t load_fmul_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 5} }; static const risc86_instruction_t fstore_op = { - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FSTORED, .latency = 1}, + .uop[1] = {.type = UOP_FSTOREA, .latency = 1}, +}; +static const risc86_instruction_t load_fiadd_op = +{ + .nr_uops = 7, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 1}, + .uop[2] = {.type = UOP_FLOAT, .latency = 1}, + .uop[3] = {.type = UOP_FLOAT, .latency = 1}, + .uop[4] = {.type = UOP_FLOAT, .latency = 1}, + .uop[5] = {.type = UOP_FLOAT, .latency = 1}, + .uop[6] = {.type = UOP_FLOAT, .latency = 1} }; - static const risc86_instruction_t fdiv_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 38, .latency = 38} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 32} }; static const risc86_instruction_t fdiv_mem_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_FLOAT, .throughput = 38, .latency = 38} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 38} }; static const risc86_instruction_t fsin_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 62, .latency = 62} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 60} }; static const risc86_instruction_t fsqrt_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 49, .latency = 49} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 69} }; -static const risc86_instruction_t vector_fldcw_op = +static const risc86_instruction_t complex_fldcw_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 8, .latency = 8} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 10} }; -static const risc86_instruction_t vector_float_op = +static const risc86_instruction_t complex_float_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 1} }; -static const risc86_instruction_t vector_float_l_op = +static const risc86_instruction_t complex_float_l_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 50, .latency = 50} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 50} }; -static const risc86_instruction_t vector_flde_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} -}; -static const risc86_instruction_t vector_fste_op = +static const risc86_instruction_t complex_flde_op = { .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2}, - .uop[1] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAD, .latency = 1}, + .uop[2] = {.type = UOP_FLOAT, .latency = 2} +}; +static const risc86_instruction_t complex_fste_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 2}, + .uop[1] = {.type = UOP_FSTORED, .latency = 1}, + .uop[2] = {.type = UOP_FSTOREA, .latency = 1} }; -static const risc86_instruction_t vector_alu1_op = +static const risc86_instruction_t complex_alu1_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1} }; -static const risc86_instruction_t vector_alu2_op = +static const risc86_instruction_t complex_alu2_op = { .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} }; -static const risc86_instruction_t vector_alu3_op = +static const risc86_instruction_t complex_alu3_op = { .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} }; -static const risc86_instruction_t vector_alu6_op = +static const risc86_instruction_t complex_alu6_op = { .nr_uops = 6, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1}, + .uop[4] = {.type = UOP_ALU, .latency = 1}, + .uop[5] = {.type = UOP_ALU, .latency = 1} }; -static const risc86_instruction_t vector_alux1_op = +static const risc86_instruction_t complex_alux1_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1} }; -static const risc86_instruction_t vector_alux3_op = +static const risc86_instruction_t complex_alux3_op = { .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .latency = 1} }; -static const risc86_instruction_t vector_alux6_op = +static const risc86_instruction_t complex_alux6_op = { - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .nr_uops = 6, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .latency = 1}, + .uop[3] = {.type = UOP_ALUX, .latency = 1}, + .uop[4] = {.type = UOP_ALUX, .latency = 1}, + .uop[5] = {.type = UOP_ALUX, .latency = 1} }; -static const risc86_instruction_t vector_alu_store_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_alux_store_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_arpl_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[1] = {.type = UOP_ALU, .throughput = 3, .latency = 3} -}; -static const risc86_instruction_t vector_bound_op = +static const risc86_instruction_t complex_alu_store_op = { .nr_uops = 4, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1}, }; -static const risc86_instruction_t vector_bsx_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 10, .latency = 10} -}; -static const risc86_instruction_t vector_call_far_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_cli_sti_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 7, .latency = 7} -}; -static const risc86_instruction_t vector_cmps_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_cmpsb_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_cmpxchg_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, -}; -static const risc86_instruction_t vector_cmpxchg_b_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, -}; -static const risc86_instruction_t vector_cpuid_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 22, .latency = 22} -}; -static const risc86_instruction_t vector_div16_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 10, .latency = 10} -}; -static const risc86_instruction_t vector_div16_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 10, .latency = 10} -}; -static const risc86_instruction_t vector_div32_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 18, .latency = 18} -}; -static const risc86_instruction_t vector_div32_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 18, .latency = 18} -}; -static const risc86_instruction_t vector_emms_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 25, .latency = 25} -}; -static const risc86_instruction_t vector_enter_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 10, .latency = 10} -}; -static const risc86_instruction_t vector_femms_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 6, .latency = 6} -}; -static const risc86_instruction_t vector_in_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11} -}; -static const risc86_instruction_t vector_ins_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_int_op = -{ - .nr_uops = 5, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 20, .latency = 20}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_iret_op = -{ - .nr_uops = 5, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[3] = {.type = UOP_ALU, .throughput = 20, .latency = 20}, - .uop[4] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_invd_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1000, .latency = 1000} -}; -static const risc86_instruction_t vector_jmp_far_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_load_alu_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_load_alux_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_loop_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_lss_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_ALU, .throughput = 3, .latency = 3} -}; -static const risc86_instruction_t vector_mov_mem_seg_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_mov_seg_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 3, .latency = 3} -}; -static const risc86_instruction_t vector_mov_seg_reg_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3} -}; -static const risc86_instruction_t vector_mul_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_mul_mem_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_mul64_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_mul64_mem_op = +static const risc86_instruction_t complex_alux_store_op = { .nr_uops = 4, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} }; -static const risc86_instruction_t vector_out_op = +static const risc86_instruction_t complex_arpl_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3}, + .uop[1] = {.type = UOP_ALU, .latency = 3} +}; +static const risc86_instruction_t complex_bound_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_LOAD, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_bsx_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 10, .latency = 10} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 10} }; -static const risc86_instruction_t vector_outs_op = +static const risc86_instruction_t complex_call_far_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_cli_sti_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 7} +}; +static const risc86_instruction_t complex_cmps_op = { .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 10, .latency = 10}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} }; -static const risc86_instruction_t vector_pusha_op = +static const risc86_instruction_t complex_cmpsb_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_cmpxchg_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} +}; +static const risc86_instruction_t complex_cmpxchg_b_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} +}; +static const risc86_instruction_t complex_cpuid_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 23} +}; +static const risc86_instruction_t complex_div16_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 10} +}; +static const risc86_instruction_t complex_div16_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 10} +}; +static const risc86_instruction_t complex_div32_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 18} +}; +static const risc86_instruction_t complex_div32_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 18} +}; +static const risc86_instruction_t complex_emms_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 25} +}; +static const risc86_instruction_t complex_enter_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = {.type = UOP_STOREA, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 10} +}; +static const risc86_instruction_t complex_femms_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 6} +}; +static const risc86_instruction_t complex_in_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 10} +}; +static const risc86_instruction_t complex_ins_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 10}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_int_op = { .nr_uops = 8, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[6] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[7] = {.type = UOP_STORE, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 20}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_STORED, .latency = 1}, + .uop[4] = {.type = UOP_STOREA, .latency = 1}, + .uop[5] = {.type = UOP_STORED, .latency = 1}, + .uop[6] = {.type = UOP_STOREA, .latency = 1}, + .uop[7] = {.type = UOP_BRANCH, .latency = 1} }; -static const risc86_instruction_t vector_popa_op = +static const risc86_instruction_t complex_iret_op = +{ + .nr_uops = 5, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3}, + .uop[1] = {.type = UOP_LOAD, .latency = 3}, + .uop[2] = {.type = UOP_LOAD, .latency = 3}, + .uop[3] = {.type = UOP_ALU, .latency = 20}, + .uop[4] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_invd_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 500} +}; +static const risc86_instruction_t complex_jmp_far_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3}, + .uop[1] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_load_alu_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_load_alux_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_loop_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_lss_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_LOAD, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 3} +}; +static const risc86_instruction_t complex_mov_mem_seg_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, +}; +static const risc86_instruction_t complex_mov_seg_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 3} +}; +static const risc86_instruction_t complex_mov_seg_reg_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3} +}; +static const risc86_instruction_t complex_mul_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_mul_mem_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_mul64_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_mul64_mem_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .latency = 1}, + .uop[3] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_out_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 10}, + .uop[1] = {.type = UOP_STOREA, .latency = 10}, +}; +static const risc86_instruction_t complex_outs_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 10}, + .uop[1] = {.type = UOP_STOREA, .latency = 10}, + .uop[2] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_pusha_op = { .nr_uops = 8, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[6] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[7] = {.type = UOP_LOAD, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 2}, + .uop[1] = {.type = UOP_STOREA, .latency = 2}, + .uop[2] = {.type = UOP_STORED, .latency = 2}, + .uop[3] = {.type = UOP_STOREA, .latency = 2}, + .uop[4] = {.type = UOP_STORED, .latency = 2}, + .uop[5] = {.type = UOP_STOREA, .latency = 2}, + .uop[6] = {.type = UOP_STORED, .latency = 2}, + .uop[7] = {.type = UOP_STOREA, .latency = 2} }; -static const risc86_instruction_t vector_popf_op = +static const risc86_instruction_t complex_popa_op = +{ + .nr_uops = 8, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_LOAD, .latency = 1}, + .uop[2] = {.type = UOP_LOAD, .latency = 1}, + .uop[3] = {.type = UOP_LOAD, .latency = 1}, + .uop[4] = {.type = UOP_LOAD, .latency = 1}, + .uop[5] = {.type = UOP_LOAD, .latency = 1}, + .uop[6] = {.type = UOP_LOAD, .latency = 1}, + .uop[7] = {.type = UOP_LOAD, .latency = 1} +}; +static const risc86_instruction_t complex_popf_op = { .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 17, .latency = 17} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3}, + .uop[1] = {.type = UOP_ALUX, .latency = 17} }; -static const risc86_instruction_t vector_push_mem_op = +static const risc86_instruction_t complex_push_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = {.type = UOP_STOREA, .latency = 1} +}; +static const risc86_instruction_t complex_pushf_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1} +}; +static const risc86_instruction_t complex_ret_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_retf_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 3}, + .uop[2] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_scas_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_scasb_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_setcc_mem_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_FSTORED, .latency = 1}, + .uop[3] = {.type = UOP_FSTOREA, .latency = 1} +}; +static const risc86_instruction_t complex_setcc_reg_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_test_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_test_mem_b_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_xchg_mem_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_xlat_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_LOAD, .latency = 1} +}; +static const risc86_instruction_t complex_wbinvd_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_pushf_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_ret_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_retf_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[2] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_scas_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_scasb_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_setcc_mem_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_setcc_reg_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_test_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_test_mem_b_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_xchg_mem_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_xlat_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2} -}; -static const risc86_instruction_t vector_wbinvd_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 10000, .latency = 10000} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 10000} }; + #define INVALID NULL static const risc86_instruction_t *opcode_timings[256] = @@ -865,38 +948,38 @@ static const risc86_instruction_t *opcode_timings[256] = /* ADD ADD ADD ADD*/ /*00*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, /* ADD ADD PUSH ES POP ES*/ - &alux_op, &alu_op, &push_seg_op, &vector_mov_seg_mem_op, + &alux_op, &alu_op, &push_seg_op, &complex_mov_seg_mem_op, /* OR OR OR OR*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, /* OR OR PUSH CS */ &alux_op, &alu_op, &push_seg_op, INVALID, /* ADC ADC ADC ADC*/ -/*10*/ &vector_alux_store_op, &vector_alu_store_op, &vector_load_alux_op, &vector_load_alu_op, +/*10*/ &complex_alux_store_op,&complex_alu_store_op, &complex_load_alux_op,&complex_load_alu_op, /* ADC ADC PUSH SS POP SS*/ - &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, + &complex_alux1_op, &complex_alu1_op, &push_seg_op, &complex_mov_seg_mem_op, /* SBB SBB SBB SBB*/ -/*10*/ &vector_alux_store_op, &vector_alu_store_op, &vector_load_alux_op, &vector_load_alu_op, +/*10*/ &complex_alux_store_op,&complex_alu_store_op, &complex_load_alux_op,&complex_load_alu_op, /* SBB SBB PUSH DS POP DS*/ - &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, + &complex_alux1_op, &complex_alu1_op, &push_seg_op, &complex_mov_seg_mem_op, /* AND AND AND AND*/ /*20*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, /* AND AND DAA*/ - &alux_op, &alu_op, INVALID, &vector_alux1_op, + &alux_op, &alu_op, INVALID, &complex_alux1_op, /* SUB SUB SUB SUB*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, /* SUB SUB DAS*/ - &alux_op, &alu_op, INVALID, &vector_alux1_op, + &alux_op, &alu_op, INVALID, &complex_alux1_op, /* XOR XOR XOR XOR*/ /*30*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, /* XOR XOR AAA*/ - &alux_op, &alu_op, INVALID, &vector_alux6_op, + &alux_op, &alu_op, INVALID, &complex_alux6_op, /* CMP CMP CMP CMP*/ &load_alux_op, &load_alu_op, &load_alux_op, &load_alu_op, /* CMP CMP AAS*/ - &alux_op, &alu_op, INVALID, &vector_alux6_op, + &alux_op, &alu_op, INVALID, &complex_alux6_op, /* INC EAX INC ECX INC EDX INC EBX*/ /*40*/ &alu_op, &alu_op, &alu_op, &alu_op, @@ -917,12 +1000,12 @@ static const risc86_instruction_t *opcode_timings[256] = &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, /* PUSHA POPA BOUND ARPL*/ -/*60*/ &vector_pusha_op, &vector_popa_op, &vector_bound_op, &vector_arpl_op, +/*60*/ &complex_pusha_op, &complex_popa_op, &complex_bound_op, &complex_arpl_op, INVALID, INVALID, INVALID, INVALID, /* PUSH imm IMUL PUSH imm IMUL*/ - &push_imm_op, &vector_mul_op, &push_imm_op, &vector_mul_op, + &push_imm_op, &complex_mul_op, &push_imm_op, &complex_mul_op, /* INSB INSW OUTSB OUTSW*/ - &vector_ins_op, &vector_ins_op, &vector_outs_op, &vector_outs_op, + &complex_ins_op, &complex_ins_op, &complex_outs_op, &complex_outs_op, /* Jxx*/ /*70*/ &branch_op, &branch_op, &branch_op, &branch_op, @@ -932,29 +1015,29 @@ static const risc86_instruction_t *opcode_timings[256] = /*80*/ INVALID, INVALID, INVALID, INVALID, /* TEST TEST XCHG XCHG*/ - &vector_test_mem_b_op, &vector_test_mem_op, &vector_xchg_mem_op, &vector_xchg_mem_op, + &complex_test_mem_b_op, &complex_test_mem_op, &complex_xchg_mem_op, &complex_xchg_mem_op, /* MOV MOV MOV MOV*/ &store_op, &store_op, &load_op, &load_op, /* MOV from seg LEA MOV to seg POP*/ - &vector_mov_mem_seg_op, &store_op, &vector_mov_seg_mem_op, &pop_mem_op, + &complex_mov_mem_seg_op, &store_op, &complex_mov_seg_mem_op, &pop_mem_op, /* NOP XCHG XCHG XCHG*/ /*90*/ &limm_op, &xchg_op, &xchg_op, &xchg_op, /* XCHG XCHG XCHG XCHG*/ &xchg_op, &xchg_op, &xchg_op, &xchg_op, /* CBW CWD CALL far WAIT*/ - &vector_alu1_op, &vector_alu1_op, &vector_call_far_op, &limm_op, + &complex_alu1_op, &complex_alu1_op, &complex_call_far_op, &limm_op, /* PUSHF POPF SAHF LAHF*/ - &vector_pushf_op, &vector_popf_op, &vector_alux1_op, &vector_alux1_op, + &complex_pushf_op, &complex_popf_op, &complex_alux1_op, &complex_alux1_op, /* MOV MOV MOV MOV*/ /*a0*/ &load_op, &load_op, &store_op, &store_op, /* MOVSB MOVSW CMPSB CMPSW*/ - &movs_op, &movs_op, &vector_cmpsb_op, &vector_cmps_op, + &movs_op, &movs_op, &complex_cmpsb_op, &complex_cmps_op, /* TEST TEST STOSB STOSW*/ &test_reg_b_op, &test_reg_op, &stos_op, &stos_op, /* LODSB LODSW SCASB SCASW*/ - &lods_op, &lods_op, &vector_scasb_op, &vector_scas_op, + &lods_op, &lods_op, &complex_scasb_op, &complex_scas_op, /* MOV*/ /*b0*/ &limm_op, &limm_op, &limm_op, &limm_op, @@ -963,37 +1046,37 @@ static const risc86_instruction_t *opcode_timings[256] = &limm_op, &limm_op, &limm_op, &limm_op, /* RET imm RET*/ -/*c0*/ INVALID, INVALID, &vector_ret_op, &vector_ret_op, +/*c0*/ INVALID, INVALID, &complex_ret_op, &complex_ret_op, /* LES LDS MOV MOV*/ - &vector_lss_op, &vector_lss_op, &store_op, &store_op, + &complex_lss_op, &complex_lss_op, &store_op, &store_op, /* ENTER LEAVE RETF RETF*/ - &vector_enter_op, &leave_op, &vector_retf_op, &vector_retf_op, + &complex_enter_op, &leave_op, &complex_retf_op, &complex_retf_op, /* INT3 INT INTO IRET*/ - &vector_int_op, &vector_int_op, &vector_int_op, &vector_iret_op, + &complex_int_op, &complex_int_op, &complex_int_op, &complex_iret_op, /*d0*/ INVALID, INVALID, INVALID, INVALID, /* AAM AAD SETALC XLAT*/ - &vector_alux6_op, &vector_alux3_op, &vector_alux1_op, &vector_xlat_op, + &complex_alux6_op, &complex_alux3_op, &complex_alux1_op, &complex_xlat_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /* LOOPNE LOOPE LOOP JCXZ*/ -/*e0*/ &vector_loop_op, &vector_loop_op, &loop_op, &vector_loop_op, +/*e0*/ &complex_loop_op, &complex_loop_op, &loop_op, &complex_loop_op, /* IN AL IN AX OUT_AL OUT_AX*/ - &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, + &complex_in_op, &complex_in_op, &complex_out_op, &complex_out_op, /* CALL JMP JMP JMP*/ - &store_op, &branch_op, &vector_jmp_far_op, &branch_op, + &store_op, &branch_op, &complex_jmp_far_op, &branch_op, /* IN AL IN AX OUT_AL OUT_AX*/ - &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, + &complex_in_op, &complex_in_op, &complex_out_op, &complex_out_op, /* REPNE REPE*/ /*f0*/ INVALID, INVALID, INVALID, INVALID, /* HLT CMC*/ - &vector_alux1_op, &vector_alu2_op, INVALID, INVALID, + &complex_alux1_op, &complex_alu2_op, INVALID, INVALID, /* CLC STC CLI STI*/ - &vector_alu1_op, &vector_alu1_op, &vector_cli_sti_op, &vector_cli_sti_op, + &complex_alu1_op, &complex_alu1_op, &complex_cli_sti_op, &complex_cli_sti_op, /* CLD STD INCDEC*/ - &vector_alu1_op, &vector_alu1_op, &alux_store_op, INVALID + &complex_alu1_op, &complex_alu1_op, &alux_store_op, INVALID }; static const risc86_instruction_t *opcode_timings_mod3[256] = @@ -1001,38 +1084,38 @@ static const risc86_instruction_t *opcode_timings_mod3[256] = /* ADD ADD ADD ADD*/ /*00*/ &alux_op, &alu_op, &alux_op, &alu_op, /* ADD ADD PUSH ES POP ES*/ - &alux_op, &alu_op, &push_seg_op, &vector_mov_seg_mem_op, + &alux_op, &alu_op, &push_seg_op, &complex_mov_seg_mem_op, /* OR OR OR OR*/ &alux_op, &alu_op, &alux_op, &alu_op, /* OR OR PUSH CS */ &alux_op, &alu_op, &push_seg_op, INVALID, /* ADC ADC ADC ADC*/ -/*10*/ &vector_alux1_op, &vector_alu1_op, &vector_alux1_op, &vector_alu1_op, +/*10*/ &complex_alux1_op, &complex_alu1_op, &complex_alux1_op, &complex_alu1_op, /* ADC ADC PUSH SS POP SS*/ - &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, + &complex_alux1_op, &complex_alu1_op, &push_seg_op, &complex_mov_seg_mem_op, /* SBB SBB SBB SBB*/ - &vector_alux1_op, &vector_alu1_op, &vector_alux1_op, &vector_alu1_op, + &complex_alux1_op, &complex_alu1_op, &complex_alux1_op, &complex_alu1_op, /* SBB SBB PUSH DS POP DS*/ - &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, + &complex_alux1_op, &complex_alu1_op, &push_seg_op, &complex_mov_seg_mem_op, /* AND AND AND AND*/ /*20*/ &alux_op, &alu_op, &alux_op, &alu_op, /* AND AND DAA*/ - &alux_op, &alu_op, INVALID, &vector_alux1_op, + &alux_op, &alu_op, INVALID, &complex_alux1_op, /* SUB SUB SUB SUB*/ &alux_op, &alu_op, &alux_op, &alu_op, /* SUB SUB DAS*/ - &alux_op, &alu_op, INVALID, &vector_alux1_op, + &alux_op, &alu_op, INVALID, &complex_alux1_op, /* XOR XOR XOR XOR*/ /*30*/ &alux_op, &alu_op, &alux_op, &alu_op, /* XOR XOR AAA*/ - &alux_op, &alu_op, INVALID, &vector_alux6_op, + &alux_op, &alu_op, INVALID, &complex_alux6_op, /* CMP CMP CMP CMP*/ &alux_op, &alu_op, &alux_op, &alu_op, /* CMP CMP AAS*/ - &alux_op, &alu_op, INVALID, &vector_alux6_op, + &alux_op, &alu_op, INVALID, &complex_alux6_op, /* INC EAX INC ECX INC EDX INC EBX*/ /*40*/ &alu_op, &alu_op, &alu_op, &alu_op, @@ -1053,12 +1136,12 @@ static const risc86_instruction_t *opcode_timings_mod3[256] = &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, /* PUSHA POPA BOUND ARPL*/ -/*60*/ &vector_pusha_op, &vector_popa_op, &vector_bound_op, &vector_arpl_op, +/*60*/ &complex_pusha_op, &complex_popa_op, &complex_bound_op, &complex_arpl_op, INVALID, INVALID, INVALID, INVALID, /* PUSH imm IMUL PUSH imm IMUL*/ - &push_imm_op, &vector_mul_op, &push_imm_op, &vector_mul_op, + &push_imm_op, &complex_mul_op, &push_imm_op, &complex_mul_op, /* INSB INSW OUTSB OUTSW*/ - &vector_ins_op, &vector_ins_op, &vector_outs_op, &vector_outs_op, + &complex_ins_op, &complex_ins_op, &complex_outs_op, &complex_outs_op, /* Jxx*/ /*70*/ &branch_op, &branch_op, &branch_op, &branch_op, @@ -1068,29 +1151,29 @@ static const risc86_instruction_t *opcode_timings_mod3[256] = /*80*/ INVALID, INVALID, INVALID, INVALID, /* TEST TEST XCHG XCHG*/ - &vector_alu1_op, &vector_alu1_op, &vector_alu3_op, &vector_alu3_op, + &complex_alu1_op, &complex_alu1_op, &complex_alu3_op, &complex_alu3_op, /* MOV MOV MOV MOV*/ &store_op, &store_op, &load_op, &load_op, /* MOV from seg LEA MOV to seg POP*/ - &mov_reg_seg_op, &store_op, &vector_mov_seg_reg_op, &pop_reg_op, + &mov_reg_seg_op, &store_op, &complex_mov_seg_reg_op, &pop_reg_op, /* NOP XCHG XCHG XCHG*/ /*90*/ &limm_op, &xchg_op, &xchg_op, &xchg_op, /* XCHG XCHG XCHG XCHG*/ &xchg_op, &xchg_op, &xchg_op, &xchg_op, /* CBW CWD CALL far WAIT*/ - &vector_alu1_op, &vector_alu1_op, &vector_call_far_op, &limm_op, + &complex_alu1_op, &complex_alu1_op, &complex_call_far_op, &limm_op, /* PUSHF POPF SAHF LAHF*/ - &vector_pushf_op, &vector_popf_op, &vector_alux1_op, &vector_alux1_op, + &complex_pushf_op, &complex_popf_op, &complex_alux1_op, &complex_alux1_op, /* MOV MOV MOV MOV*/ /*a0*/ &load_op, &load_op, &store_op, &store_op, /* MOVSB MOVSW CMPSB CMPSW*/ - &movs_op, &movs_op, &vector_cmpsb_op, &vector_cmps_op, + &movs_op, &movs_op, &complex_cmpsb_op, &complex_cmps_op, /* TEST TEST STOSB STOSW*/ &test_reg_b_op, &test_reg_op, &stos_op, &stos_op, /* LODSB LODSW SCASB SCASW*/ - &lods_op, &lods_op, &vector_scasb_op, &vector_scas_op, + &lods_op, &lods_op, &complex_scasb_op, &complex_scas_op, /* MOV*/ /*b0*/ &limm_op, &limm_op, &limm_op, &limm_op, @@ -1099,57 +1182,57 @@ static const risc86_instruction_t *opcode_timings_mod3[256] = &limm_op, &limm_op, &limm_op, &limm_op, /* RET imm RET*/ -/*c0*/ INVALID, INVALID, &vector_ret_op, &vector_ret_op, +/*c0*/ INVALID, INVALID, &complex_ret_op, &complex_ret_op, /* LES LDS MOV MOV*/ - &vector_lss_op, &vector_lss_op, &store_op, &store_op, + &complex_lss_op, &complex_lss_op, &store_op, &store_op, /* ENTER LEAVE RETF RETF*/ - &vector_enter_op, &leave_op, &vector_retf_op, &vector_retf_op, + &complex_enter_op, &leave_op, &complex_retf_op, &complex_retf_op, /* INT3 INT INTO IRET*/ - &vector_int_op, &vector_int_op, &vector_int_op, &vector_iret_op, + &complex_int_op, &complex_int_op, &complex_int_op, &complex_iret_op, /*d0*/ INVALID, INVALID, INVALID, INVALID, /* AAM AAD SETALC XLAT*/ - &vector_alux6_op, &vector_alux3_op, &vector_alux1_op, &vector_xlat_op, + &complex_alux6_op, &complex_alux3_op, &complex_alux1_op, &complex_xlat_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /* LOOPNE LOOPE LOOP JCXZ*/ -/*e0*/ &vector_loop_op, &vector_loop_op, &loop_op, &vector_loop_op, +/*e0*/ &complex_loop_op, &complex_loop_op, &loop_op, &complex_loop_op, /* IN AL IN AX OUT_AL OUT_AX*/ - &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, + &complex_in_op, &complex_in_op, &complex_out_op, &complex_out_op, /* CALL JMP JMP JMP*/ - &store_op, &branch_op, &vector_jmp_far_op, &branch_op, + &store_op, &branch_op, &complex_jmp_far_op, &branch_op, /* IN AL IN AX OUT_AL OUT_AX*/ - &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, + &complex_in_op, &complex_in_op, &complex_out_op, &complex_out_op, /* REPNE REPE*/ /*f0*/ INVALID, INVALID, INVALID, INVALID, /* HLT CMC*/ - &vector_alux1_op, &vector_alu2_op, INVALID, INVALID, + &complex_alux1_op, &complex_alu2_op, INVALID, INVALID, /* CLC STC CLI STI*/ - &vector_alu1_op, &vector_alu1_op, &vector_cli_sti_op, &vector_cli_sti_op, + &complex_alu1_op, &complex_alu1_op, &complex_cli_sti_op, &complex_cli_sti_op, /* CLD STD INCDEC*/ - &vector_alu1_op, &vector_alu1_op, &vector_alux1_op, INVALID + &complex_alu1_op, &complex_alu1_op, &complex_alux1_op, INVALID }; static const risc86_instruction_t *opcode_timings_0f[256] = { -/*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, - INVALID, &vector_alu6_op, &vector_alu6_op, INVALID, - &vector_invd_op, &vector_wbinvd_op, INVALID, INVALID, - INVALID, &load_op, &vector_femms_op, INVALID, +/*00*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, + INVALID, &complex_alu6_op, &complex_alu6_op, INVALID, + &complex_invd_op, &complex_wbinvd_op, INVALID, INVALID, + INVALID, &load_op, &complex_femms_op, INVALID, /*10*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, -/*20*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, - &vector_alu6_op, &vector_alu6_op, INVALID, INVALID, +/*20*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, + &complex_alu6_op, &complex_alu6_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, -/*30*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID, +/*30*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -1170,7 +1253,7 @@ static const risc86_instruction_t *opcode_timings_0f[256] = INVALID, INVALID, &mload_op, &mload_op, /*70*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, - &load_mmx_op, &load_mmx_op, &load_mmx_op, &vector_emms_op, + &load_mmx_op, &load_mmx_op, &load_mmx_op, &complex_emms_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, &mstore_op, &mstore_op, @@ -1179,387 +1262,220 @@ static const risc86_instruction_t *opcode_timings_0f[256] = &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, -/*90*/ &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, - &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, - &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, - &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, +/*90*/ &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, + &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, + &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, + &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, -/*a0*/ &push_seg_op, &vector_mov_seg_mem_op, &vector_cpuid_op, &vector_load_alu_op, - &vector_alu_store_op, &vector_alu_store_op, INVALID, INVALID, - &push_seg_op, &vector_mov_seg_mem_op, INVALID, &vector_load_alu_op, - &vector_alu_store_op, &vector_alu_store_op, INVALID, &vector_mul_op, +/*a0*/ &push_seg_op, &complex_mov_seg_mem_op,&complex_cpuid_op, &complex_load_alu_op, + &complex_alu_store_op, &complex_alu_store_op, INVALID, INVALID, + &push_seg_op, &complex_mov_seg_mem_op,INVALID, &complex_load_alu_op, + &complex_alu_store_op, &complex_alu_store_op, INVALID, &complex_mul_op, -/*b0*/ &vector_cmpxchg_b_op, &vector_cmpxchg_op, &vector_lss_op, &vector_load_alu_op, - &vector_lss_op, &vector_lss_op, &load_alux_op, &load_alu_op, - INVALID, INVALID, &vector_load_alu_op, &vector_load_alu_op, - &vector_bsx_op, &vector_bsx_op, &load_alux_op, &load_alu_op, +/*b0*/ &complex_cmpxchg_b_op, &complex_cmpxchg_op, &complex_lss_op, &complex_load_alu_op, + &complex_lss_op, &complex_lss_op, &load_alux_op, &load_alu_op, + INVALID, INVALID, &complex_load_alu_op, &complex_load_alu_op, + &complex_bsx_op, &complex_bsx_op, &load_alux_op, &load_alu_op, -/*c0*/ &vector_alux_store_op, &vector_alu_store_op, INVALID, INVALID, - INVALID, INVALID, INVALID, &vector_cmpxchg_op, - &bswap_op, &bswap_op, &bswap_op, &bswap_op, - &bswap_op, &bswap_op, &bswap_op, &bswap_op, +/*c0*/ &complex_alux_store_op, &complex_alu_store_op, INVALID, INVALID, + INVALID, INVALID, INVALID, &complex_cmpxchg_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, -/*d0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, - INVALID, &load_mmx_mul_op, INVALID, INVALID, - &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, - &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, +/*d0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, + INVALID, &load_mmx_mul_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, -/*e0*/ &load_mmx_op, &load_mmx_shift_op, &load_mmx_shift_op, INVALID, - INVALID, &pmul_mem_op, INVALID, INVALID, - &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, - &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, +/*e0*/ &load_mmx_op, &load_mmx_shift_op, &load_mmx_shift_op, INVALID, + INVALID, &pmul_mem_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, -/*f0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, - INVALID, &pmul_mem_op, INVALID, INVALID, - &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, - &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, +/*f0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, + INVALID, &pmul_mem_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, + &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, }; static const risc86_instruction_t *opcode_timings_0f_mod3[256] = { -/*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, - INVALID, &vector_alu6_op, &vector_alu6_op, INVALID, - &vector_invd_op, &vector_wbinvd_op, INVALID, INVALID, - INVALID, INVALID, &vector_femms_op, INVALID, +/*00*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, + INVALID, &complex_alu6_op, &complex_alu6_op, INVALID, + &complex_invd_op, &complex_wbinvd_op, INVALID, INVALID, + INVALID, INVALID, &complex_femms_op, INVALID, -/*10*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, +/*10*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, -/*20*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, - &vector_alu6_op, &vector_alu6_op, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, +/*20*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, + &complex_alu6_op, &complex_alu6_op, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, -/*30*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, +/*30*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, -/*40*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, +/*40*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, -/*50*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, +/*50*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, -/*60*/ &mmx_op, &mmx_op, &mmx_op, &mmx_op, - &mmx_op, &mmx_op, &mmx_op, &mmx_op, - &mmx_op, &mmx_op, &mmx_op, &mmx_op, - INVALID, INVALID, &mmx_op, &mmx_op, +/*60*/ &mmx_op, &mmx_op, &mmx_op, &mmx_op, + &mmx_op, &mmx_op, &mmx_op, &mmx_op, + &mmx_op, &mmx_op, &mmx_op, &mmx_op, + INVALID, INVALID, &mmx_op, &mmx_op, -/*70*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, - &mmx_op, &mmx_op, &mmx_op, &vector_emms_op, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, &mmx_op, &mmx_op, +/*70*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + &mmx_op, &mmx_op, &mmx_op, &complex_emms_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, &mmx_op, &mmx_op, /*80*/ &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, -/*90*/ &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, - &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, - &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, - &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, +/*90*/ &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, + &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, + &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, + &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, -/*a0*/ &push_seg_op, &vector_mov_seg_mem_op, &vector_cpuid_op, &vector_alu1_op, - &vector_alu1_op, &vector_alu1_op, INVALID, INVALID, - &push_seg_op, &vector_mov_seg_mem_op, INVALID, &vector_alu1_op, - &vector_alu1_op, &vector_alu1_op, INVALID, &vector_mul_op, +/*a0*/ &push_seg_op, &complex_mov_seg_mem_op, &complex_cpuid_op, &complex_alu1_op, + &complex_alu1_op, &complex_alu1_op, INVALID, INVALID, + &push_seg_op, &complex_mov_seg_mem_op, INVALID, &complex_alu1_op, + &complex_alu1_op, &complex_alu1_op, INVALID, &complex_mul_op, -/*b0*/ &vector_cmpxchg_b_op, &vector_cmpxchg_op, &vector_lss_op, &vector_alu1_op, - &vector_lss_op, &vector_lss_op, &alux_op, &alu_op, - INVALID, INVALID, &vector_alu1_op, &vector_alu1_op, - &vector_bsx_op, &vector_bsx_op, &alux_op, &alu_op, +/*b0*/ &complex_cmpxchg_b_op, &complex_cmpxchg_op, &complex_lss_op, &complex_alu1_op, + &complex_lss_op, &complex_lss_op, &alux_op, &alu_op, + INVALID, INVALID, &complex_alu1_op, &complex_alu1_op, + &complex_bsx_op, &complex_bsx_op, &alux_op, &alu_op, -/*c0*/ &vector_alux1_op, &vector_alu1_op, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - &bswap_op, &bswap_op, &bswap_op, &bswap_op, - &bswap_op, &bswap_op, &bswap_op, &bswap_op, +/*c0*/ &complex_alux1_op, &complex_alu1_op, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, -/*d0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, - INVALID, &mmx_mul_op, INVALID, INVALID, - &mmx_op, &mmx_op, INVALID, &mmx_op, - &mmx_op, &mmx_op, INVALID, &mmx_op, +/*d0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + INVALID, &mmx_mul_op, INVALID, INVALID, + &mmx_op, &mmx_op, INVALID, &mmx_op, + &mmx_op, &mmx_op, INVALID, &mmx_op, -/*e0*/ &mmx_op, &mmx_shift_op, &mmx_shift_op, INVALID, - INVALID, &pmul_op, INVALID, INVALID, - &mmx_op, &mmx_op, INVALID, &mmx_op, - &mmx_op, &mmx_op, INVALID, &mmx_op, - -/*f0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, - INVALID, &pmul_op, INVALID, INVALID, - &mmx_op, &mmx_op, &mmx_op, INVALID, - &mmx_op, &mmx_op, &mmx_op, INVALID, -}; - -static const risc86_instruction_t *opcode_timings_0f0f[256] = -{ -/*00*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*10*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*20*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*30*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*40*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*50*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*60*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*70*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*80*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*90*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*a0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, &load_mmx_mul_op, &load_mmx_mul_op, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*b0*/ INVALID, INVALID, INVALID, INVALID, - &load_mmx_mul_op, INVALID, &load_mmx_mul_op, &load_mmx_mul_op, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, &load_mmx_op, - -/*c0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*d0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*e0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*f0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -}; -static const risc86_instruction_t *opcode_timings_0f0f_mod3[256] = -{ -/*00*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*10*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*20*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*30*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*40*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*50*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*60*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*70*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*80*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*90*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*a0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, &mmx_mul_op, &mmx_mul_op, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*b0*/ INVALID, INVALID, INVALID, INVALID, - &mmx_mul_op, INVALID, &mmx_mul_op, &mmx_mul_op, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, &mmx_op, - -/*c0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*d0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*e0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*f0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, +/*e0*/ &mmx_op, &mmx_shift_op, &mmx_shift_op, INVALID, + INVALID, &pmul_op, INVALID, INVALID, + &mmx_op, &mmx_op, INVALID, &mmx_op, + &mmx_op, &mmx_op, INVALID, &mmx_op, +/*f0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + INVALID, &pmul_op, INVALID, INVALID, + &mmx_op, &mmx_op, &mmx_op, INVALID, + &mmx_op, &mmx_op, &mmx_op, INVALID, }; static const risc86_instruction_t *opcode_timings_shift[8] = { - &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, - &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op + &complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op, + &complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op }; static const risc86_instruction_t *opcode_timings_shift_b[8] = { - &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, - &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op + &complex_alux_store_op, &complex_alux_store_op, &complex_alux_store_op, &complex_alux_store_op, + &complex_alux_store_op, &complex_alux_store_op, &complex_alux_store_op, &complex_alux_store_op }; static const risc86_instruction_t *opcode_timings_shift_mod3[8] = { - &vector_alu1_op, &vector_alu1_op, &vector_alu1_op, &vector_alu1_op, - &alu_op, &alu_op, &alu_op, &alu_op + &complex_alu1_op, &complex_alu1_op, &complex_alu1_op, &complex_alu1_op, + &alu_op, &alu_op, &alu_op, &alu_op }; static const risc86_instruction_t *opcode_timings_shift_b_mod3[8] = { - &vector_alux1_op, &vector_alux1_op, &vector_alux1_op, &vector_alux1_op, - &alux_op, &alux_op, &alux_op, &alux_op + &complex_alux1_op, &complex_alux1_op, &complex_alux1_op, &complex_alux1_op, + &alux_op, &alux_op, &alux_op, &alux_op }; static const risc86_instruction_t *opcode_timings_80[8] = { - &alux_store_op, &alux_store_op, &vector_alux_store_op, &vector_alux_store_op, - &alux_store_op, &alux_store_op, &alux_store_op, &alux_store_op, + &alux_store_op, &alux_store_op, &complex_alux_store_op, &complex_alux_store_op, + &alux_store_op, &alux_store_op, &alux_store_op, &alux_store_op, }; static const risc86_instruction_t *opcode_timings_80_mod3[8] = { - &alux_op, &alux_op, &alux_store_op, &alux_store_op, - &alux_op, &alux_op, &alux_op, &alux_op, + &alux_op, &alux_op, &alux_store_op, &alux_store_op, + &alux_op, &alux_op, &alux_op, &alux_op, }; static const risc86_instruction_t *opcode_timings_8x[8] = { - &alu_store_op, &alu_store_op, &vector_alu_store_op, &vector_alu_store_op, - &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, + &alu_store_op, &alu_store_op, &complex_alu_store_op, &complex_alu_store_op, + &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, }; static const risc86_instruction_t *opcode_timings_8x_mod3[8] = { - &alu_op, &alu_op, &alu_store_op, &alu_store_op, - &alu_op, &alu_op, &alu_op, &alu_op, + &alu_op, &alu_op, &alu_store_op, &alu_store_op, + &alu_op, &alu_op, &alu_op, &alu_op, }; static const risc86_instruction_t *opcode_timings_f6[8] = { /* TST NOT NEG*/ - &test_mem_imm_b_op, INVALID, &vector_alux_store_op, &vector_alux_store_op, + &test_mem_imm_b_op, INVALID, &complex_alux_store_op, &complex_alux_store_op, /* MUL IMUL DIV IDIV*/ - &vector_mul_mem_op, &vector_mul_mem_op, &vector_div16_mem_op, &vector_div16_mem_op, + &complex_mul_mem_op, &complex_mul_mem_op, &complex_div16_mem_op, &complex_div16_mem_op, }; static const risc86_instruction_t *opcode_timings_f6_mod3[8] = { /* TST NOT NEG*/ &test_reg_b_op, INVALID, &alux_op, &alux_op, /* MUL IMUL DIV IDIV*/ - &vector_mul_op, &vector_mul_op, &vector_div16_op, &vector_div16_op, + &complex_mul_op, &complex_mul_op, &complex_div16_op, &complex_div16_op, }; static const risc86_instruction_t *opcode_timings_f7[8] = { /* TST NOT NEG*/ - &test_mem_imm_op, INVALID, &vector_alu_store_op, &vector_alu_store_op, + &test_mem_imm_op, INVALID, &complex_alu_store_op, &complex_alu_store_op, /* MUL IMUL DIV IDIV*/ - &vector_mul64_mem_op, &vector_mul64_mem_op, &vector_div32_mem_op, &vector_div32_mem_op, + &complex_mul64_mem_op, &complex_mul64_mem_op, &complex_div32_mem_op, &complex_div32_mem_op, }; static const risc86_instruction_t *opcode_timings_f7_mod3[8] = { /* TST NOT NEG*/ &test_reg_op, INVALID, &alu_op, &alu_op, /* MUL IMUL DIV IDIV*/ - &vector_mul64_op, &vector_mul64_op, &vector_div32_op, &vector_div32_op, + &complex_mul64_op, &complex_mul64_op, &complex_div32_op, &complex_div32_op, }; static const risc86_instruction_t *opcode_timings_ff[8] = { /* INC DEC CALL CALL far*/ - &alu_store_op, &alu_store_op, &store_op, &vector_call_far_op, + &alu_store_op, &alu_store_op, &store_op, &complex_call_far_op, /* JMP JMP far PUSH*/ - &branch_op, &vector_jmp_far_op, &push_mem_op, INVALID + &branch_op, &complex_jmp_far_op, &push_mem_op, INVALID }; static const risc86_instruction_t *opcode_timings_ff_mod3[8] = { /* INC DEC CALL CALL far*/ - &vector_alu1_op, &vector_alu1_op, &store_op, &vector_call_far_op, + &complex_alu1_op, &complex_alu1_op, &store_op, &complex_call_far_op, /* JMP JMP far PUSH*/ - &branch_op, &vector_jmp_far_op, &vector_push_mem_op, INVALID + &branch_op, &complex_jmp_far_op, &complex_push_mem_op, INVALID }; static const risc86_instruction_t *opcode_timings_d8[8] = { /* FADDs FMULs FCOMs FCOMPs*/ - &load_float_op, &load_float_op, &load_float_op, &load_float_op, + &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, /* FSUBs FSUBRs FDIVs FDIVRs*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, }; static const risc86_instruction_t *opcode_timings_d8_mod3[8] = { /* FADD FMUL FCOM FCOMP*/ - &float_op, &float_op, &float_op, &float_op, + &fadd_op, &fmul_op, &float_op, &float_op, /* FSUB FSUBR FDIV FDIVR*/ &float_op, &float_op, &fdiv_op, &fdiv_op, }; @@ -1569,7 +1485,7 @@ static const risc86_instruction_t *opcode_timings_d9[8] = /* FLDs FSTs FSTPs*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FLDENV FLDCW FSTENV FSTCW*/ - &vector_float_l_op, &vector_fldcw_op, &vector_float_l_op, &vector_float_op + &complex_float_l_op, &complex_fldcw_op, &complex_float_l_op, &complex_float_op }; static const risc86_instruction_t *opcode_timings_d9_mod3[64] = { @@ -1577,16 +1493,16 @@ static const risc86_instruction_t *opcode_timings_d9_mod3[64] = &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, /*FXCH*/ - &float_op, &float_op, &float_op, &float_op, - &float_op, &float_op, &float_op, &float_op, + &limm_op, &limm_op, &limm_op, &limm_op, + &limm_op, &limm_op, &limm_op, &limm_op, /*FNOP*/ &float_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*FSTP*/ - &float_op, &float_op, &float_op, &float_op, - &float_op, &float_op, &float_op, &float_op, + &float2_op, &float2_op, &float2_op, &float2_op, + &float2_op, &float2_op, &float2_op, &float2_op, /* opFCHS opFABS*/ - &float_op, &float_op, INVALID, INVALID, + &fchs_op, &float_op, INVALID, INVALID, /* opFTST opFXAM*/ &float_op, &float_op, INVALID, INVALID, /* opFLD1 opFLDL2T opFLDL2E opFLDPI*/ @@ -1606,7 +1522,7 @@ static const risc86_instruction_t *opcode_timings_d9_mod3[64] = static const risc86_instruction_t *opcode_timings_da[8] = { /* FIADDl FIMULl FICOMl FICOMPl*/ - &load_float_op, &load_float_op, &load_float_op, &load_float_op, + &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, /* FISUBl FISUBRl FIDIVl FIDIVRl*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, }; @@ -1622,7 +1538,7 @@ static const risc86_instruction_t *opcode_timings_db[8] = /* FLDil FSTil FSTPil*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FLDe FSTPe*/ - INVALID, &vector_flde_op, INVALID, &vector_fste_op + INVALID, &complex_flde_op, INVALID, &complex_fste_op }; static const risc86_instruction_t *opcode_timings_db_mod3[64] = { @@ -1656,14 +1572,14 @@ static const risc86_instruction_t *opcode_timings_db_mod3[64] = static const risc86_instruction_t *opcode_timings_dc[8] = { /* FADDd FMULd FCOMd FCOMPd*/ - &load_float_op, &load_float_op, &load_float_op, &load_float_op, + &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, /* FSUBd FSUBRd FDIVd FDIVRd*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, }; static const risc86_instruction_t *opcode_timings_dc_mod3[8] = { /* opFADDr opFMULr*/ - &float_op, &float_op, INVALID, INVALID, + &fadd_op, &fmul_op, INVALID, INVALID, /* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ &float_op, &float_op, &fdiv_op, &fdiv_op }; @@ -1673,7 +1589,7 @@ static const risc86_instruction_t *opcode_timings_dd[8] = /* FLDd FSTd FSTPd*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FRSTOR FSAVE FSTSW*/ - &vector_float_l_op, INVALID, &vector_float_l_op, &vector_float_l_op + &complex_float_l_op, INVALID, &complex_float_l_op, &complex_float_l_op }; static const risc86_instruction_t *opcode_timings_dd_mod3[8] = { @@ -1686,14 +1602,14 @@ static const risc86_instruction_t *opcode_timings_dd_mod3[8] = static const risc86_instruction_t *opcode_timings_de[8] = { /* FIADDw FIMULw FICOMw FICOMPw*/ - &load_float_op, &load_float_op, &load_float_op, &load_float_op, + &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, /* FISUBw FISUBRw FIDIVw FIDIVRw*/ - &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, }; static const risc86_instruction_t *opcode_timings_de_mod3[8] = { /* FADDP FMULP FCOMPP*/ - &float_op, &float_op, INVALID, &float_op, + &fadd_op, &fmul_op, INVALID, &float_op, /* FSUBP FSUBRP FDIVP FDIVRP*/ &float_op, &float_op, &fdiv_op, &fdiv_op, }; @@ -1703,7 +1619,7 @@ static const risc86_instruction_t *opcode_timings_df[8] = /* FILDiw FISTiw FISTPiw*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FILDiq FBSTP FISTPiq*/ - INVALID, &load_float_op, &vector_float_l_op, &fstore_op, + INVALID, &load_float_op, &complex_float_l_op, &fstore_op, }; static const risc86_instruction_t *opcode_timings_df_mod3[8] = { @@ -1731,34 +1647,27 @@ static p6_unit_t *units; /*Pentium Pro has no MMX*/ static p6_unit_t ppro_units[] = { - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX)}, /*Integer X*/ - {.uop_mask = (1 << UOP_ALU)}, /*Integer Y*/ - {.uop_mask = (1 << UOP_FLOAT)}, /*Floating point*/ - {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD)}, /*Load*/ - {.uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE)}, /*Store*/ - {.uop_mask = (1 << UOP_BRANCH)} /*Branch*/ + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) | (1 << UOP_FLOAT)}, /*Integer X & Floating point*/ + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_BRANCH)}, /*Integer Y*/ + {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD)}, /*Load*/ + {.uop_mask = (1 << UOP_STORED) | (1 << UOP_FSTORED)}, /*Data Store*/ + {.uop_mask = (1 << UOP_STOREA) | (1 << UOP_FSTOREA)}, /*Address Store*/ }; #define NR_PPRO_UNITS (sizeof(ppro_units) / sizeof(p6_unit_t)) -/*Well, it works I guess*/ +/*Pentium II/Celeron assigns the multiplier to port 0, the shifter to port 1, and shares the MMX ALU*/ static p6_unit_t p2_units[] = { - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) | (1 << UOP_MEU) | /*Integer X*/ - (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL)}, - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_MEU) | /*Integer Y*/ - (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL)}, - {.uop_mask = (1 << UOP_FLOAT)}, /*Floating point*/ + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) | (1 << UOP_FLOAT) | /*Integer X & Floating point*/ + (1 << UOP_MMX) | (1 << UOP_MMX_MUL)}, + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_BRANCH) | /*Integer Y*/ + (1 << UOP_MMX) | (1 << UOP_MMX_SHIFT)}, {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD)}, /*Load*/ - {.uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE)}, /*Store*/ - {.uop_mask = (1 << UOP_BRANCH)} /*Branch*/ + {.uop_mask = (1 << UOP_STORED) | (1 << UOP_FSTORED) | (1 << UOP_MSTORED)}, /*Data Store*/ + {.uop_mask = (1 << UOP_STOREA) | (1 << UOP_FSTOREA) | (1 << UOP_MSTOREA)}, /*Address Store*/ }; #define NR_P2_UNITS (sizeof(p2_units) / sizeof(p6_unit_t)) -/*First available cycles of shared execution units. Each of these can be submitted - to by ALU X and Y*/ -static int mul_first_available_cycle; -static int shift_first_available_cycle; - static int uop_run(const risc86_uop_t *uop, int decode_time) { int c; @@ -1767,16 +1676,8 @@ static int uop_run(const risc86_uop_t *uop, int decode_time) /*UOP_LIMM does not require execution*/ if (uop->type == UOP_LIMM) - return decode_time; + return decode_time; - if (units == p2_units) /*More hackyness*/ - { - if (uop->type == UOP_MEU_MUL && decode_time < mul_first_available_cycle) - decode_time = mul_first_available_cycle; - else if (uop->type == UOP_MEU_SHIFT && decode_time < mul_first_available_cycle) - decode_time = shift_first_available_cycle; - } - /*Find execution unit for this uOP*/ for (c = 0; c < nr_units; c++) { @@ -1790,43 +1691,36 @@ static int uop_run(const risc86_uop_t *uop, int decode_time) } } if (!best_unit) - fatal("uop_run: can not find execution unit\n"); + fatal("uop_run: can not find execution unit\n"); if (best_start_cycle < decode_time) best_start_cycle = decode_time; - best_unit->first_available_cycle = best_start_cycle + uop->throughput; + best_unit->first_available_cycle = best_start_cycle + uop->latency; - if (units == p2_units) /*More hackyness*/ - { - if (uop->type == UOP_MEU_MUL) - mul_first_available_cycle = best_start_cycle + uop->throughput; - else if (uop->type == UOP_MEU_SHIFT) - shift_first_available_cycle = best_start_cycle + uop->throughput; - } - return best_start_cycle + uop->throughput; + + return best_start_cycle + uop->latency; } -/*The K6 decoder can decode, per clock : - - 1 or 2 'short' instructions, each up to 2 uOPs and 7 bytes long - - 1 'long' instruction, up to 4 uOPs - - 1 'vector' instruction, up to 4 uOPs per cycle, plus (I think) 1 cycle startup delay) +/*The P6 decoders can decode, per clock : + - 1 to 3 'simple' instructions, each up to 1 uOP and 7 bytes long + - 1 'complex' instruction, up to 4 uOPs or 3 per cycle for instructions longer than 4 uOPs */ static struct { int nr_uops; - const risc86_uop_t *uops[4]; + const risc86_uop_t *uops[6]; /*Earliest time a uop can start. If the timestamp is -1, then the uop is part of a dependency chain and the start time is the completion time of the previous uop*/ - int earliest_start[4]; + int earliest_start[6]; } decode_buffer; -#define NR_OPQUADS 6 -/*Timestamps of when the last six opquads completed. The K6 scheduler retires - opquads in order, so this is needed to determine when the next can be scheduled*/ -static int opquad_completion_timestamp[NR_OPQUADS]; -static int next_opquad = 0; +#define NR_OPSEQS 3 +/*Timestamps of when the last three op sequences completed. Technically this is incorrect, +as the actual size of the opseq buffer is 20 bytes and not 18, but I'm restricted to multiples of 6*/ +static int opseq_completion_timestamp[NR_OPSEQS]; +static int next_opseq = 0; #define NR_REGS 8 /*Timestamp of when last operation on an integer register completed*/ @@ -1840,51 +1734,46 @@ static int last_uop_timestamp = 0; void decode_flush_p6() { int c; - int uop_timestamp = 0; - - /*Decoded opquad can not be submitted if there are no free spaces in the - opquad buffer*/ - if (decode_timestamp < opquad_completion_timestamp[next_opquad]) - decode_timestamp = opquad_completion_timestamp[next_opquad]; + int start_timestamp, uop_timestamp = 0; + + /*Decoded opseq can not be submitted if there are no free spaces in the + opseq buffer*/ + if (decode_timestamp < opseq_completion_timestamp[next_opseq]) + decode_timestamp = opseq_completion_timestamp[next_opseq]; /*Ensure that uops can not be submitted before they have been decoded*/ if (decode_timestamp > last_uop_timestamp) last_uop_timestamp = decode_timestamp; /*Submit uops to execution units, and determine the latest completion time*/ - for (c = 0; c < decode_buffer.nr_uops; c++) + for (c = 0; c < (decode_buffer.nr_uops); c++) { - int start_timestamp; - - if (decode_buffer.earliest_start[c] == -1) - start_timestamp = last_uop_timestamp; - else - start_timestamp = decode_buffer.earliest_start[c]; + start_timestamp = decode_buffer.earliest_start[c]; last_uop_timestamp = uop_run(decode_buffer.uops[c], start_timestamp); if (last_uop_timestamp > uop_timestamp) uop_timestamp = last_uop_timestamp; } - /*Calculate opquad completion time. Since opquads complete in order, it + /*Calculate opseq completion time. Since opseqs complete in order, it must be after the last completion.*/ if (uop_timestamp <= last_complete_timestamp) last_complete_timestamp = last_complete_timestamp + 1; else last_complete_timestamp = uop_timestamp; - /*Advance to next opquad in buffer*/ - opquad_completion_timestamp[next_opquad] = last_complete_timestamp; - next_opquad++; - if (next_opquad == NR_OPQUADS) - next_opquad = 0; + /*Advance to next opseq in buffer*/ + opseq_completion_timestamp[next_opseq] = last_complete_timestamp; + next_opseq++; + if (next_opseq == NR_OPSEQS) + next_opseq = 0; decode_timestamp++; decode_buffer.nr_uops = 0; } /*The instruction is only of interest here if it's longer than 7 bytes, as that's the - limit on K6 short decoding*/ + limit on P6 simple decoding*/ static int codegen_timing_instr_length(uint64_t deps, uint32_t fetchdat, int op_32) { int len = prefixes + 1; /*Opcode*/ @@ -1936,8 +1825,9 @@ static int codegen_timing_instr_length(uint64_t deps, uint32_t fetchdat, int op_ static void decode_instruction(const risc86_instruction_t *ins, uint64_t deps, uint32_t fetchdat, int op_32, int bit8) { uint32_t regmask_required; - uint32_t regmask_modified; - int c, d; + uint32_t regmask_modified; + int c; + int d = 0; /*Complex decoder uOPs*/ int earliest_start = 0; decode_type_t decode_type = ins->decode_type; int instr_length = codegen_timing_instr_length(deps, fetchdat, op_32); @@ -1967,87 +1857,68 @@ static void decode_instruction(const risc86_instruction_t *ins, uint64_t deps, u earliest_start = fpu_st_timestamp[reg]; } - /*Short decoders are limited to 7 bytes*/ - if (decode_type == DECODE_SHORT && instr_length > 7) - decode_type = DECODE_LONG; - /*Long decoder is limited to 11 bytes*/ - else if (instr_length > 11) - decode_type = DECODE_VECTOR; + /*Simple decoders are limited to 7 bytes & 1 uOP*/ + if (decode_type == DECODE_SIMPLE && instr_length > 7) + decode_type = DECODE_COMPLEX; + else if (decode_type == DECODE_SIMPLE && ins->nr_uops > 1) + decode_type = DECODE_COMPLEX; switch (decode_type) - { - case DECODE_SHORT: - if (decode_buffer.nr_uops) + { + case DECODE_SIMPLE: + if (decode_buffer.nr_uops - d == 2) { decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; - if (ins->nr_uops > 1) - { - decode_buffer.uops[decode_buffer.nr_uops+1] = &ins->uop[1]; - decode_buffer.earliest_start[decode_buffer.nr_uops+1] = -1; - } - decode_buffer.nr_uops += ins->nr_uops; - + decode_buffer.nr_uops = 3; decode_flush_p6(); + } + else if (decode_buffer.nr_uops - d == 1) + { + decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; + decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; + decode_buffer.nr_uops = 2+d; + if (d) + decode_flush_p6(); + } + else if (decode_buffer.nr_uops) + { + decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; + decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; + decode_buffer.nr_uops = 1+d; } else { - decode_buffer.nr_uops = ins->nr_uops; + decode_buffer.nr_uops = 1; decode_buffer.uops[0] = &ins->uop[0]; decode_buffer.earliest_start[0] = earliest_start; - if (ins->nr_uops > 1) - { - decode_buffer.uops[1] = &ins->uop[1]; - decode_buffer.earliest_start[1] = -1; - } - } + } break; - case DECODE_LONG: + case DECODE_COMPLEX: if (decode_buffer.nr_uops) - decode_flush_p6(); + decode_flush_p6(); /*The 4-1-1 arrangement implies that a complex ins. can't be decoded after a simple one*/ - decode_buffer.nr_uops = ins->nr_uops; - for (c = 0; c < ins->nr_uops; c++) - { - decode_buffer.uops[c] = &ins->uop[c]; - if (c == 0) - decode_buffer.earliest_start[c] = earliest_start; - else - decode_buffer.earliest_start[c] = -1; - } - decode_flush_p6(); - break; - - case DECODE_VECTOR: - if (decode_buffer.nr_uops) - decode_flush_p6(); - - decode_timestamp++; d = 0; - + for (c = 0; c < ins->nr_uops; c++) { decode_buffer.uops[d] = &ins->uop[c]; - if (c == 0) - decode_buffer.earliest_start[d] = earliest_start; - else - decode_buffer.earliest_start[d] = -1; - d++; + decode_buffer.earliest_start[d] = earliest_start; + d++; - if (d == 4) + if ((d == 3) && (ins->nr_uops > 4)) /*Ins. with >4 uOPs require the use of special units only present on 3 translate PLAs*/ { d = 0; - decode_buffer.nr_uops = 4; - decode_flush_p6(); + decode_buffer.nr_uops = 3; + decode_flush_p6(); /*The other two decoders are halted to preserve in-order issue*/ } } - if (d) - { - decode_buffer.nr_uops = d; - decode_flush_p6(); - } - break; + if (d) + { + decode_buffer.nr_uops = d; + } + break; } /*Update write timestamps for any output registers*/ @@ -2098,15 +1969,12 @@ void codegen_timing_p6_block_start() for (c = 0; c < nr_units; c++) units[c].first_available_cycle = 0; - mul_first_available_cycle = 0; - shift_first_available_cycle = 0; - decode_timestamp = 0; last_complete_timestamp = 0; - for (c = 0; c < NR_OPQUADS; c++) - opquad_completion_timestamp[c] = 0; - next_opquad = 0; + for (c = 0; c < NR_OPSEQS; c++) + opseq_completion_timestamp[c] = 0; + next_opseq = 0; for (c = 0; c < NR_REGS; c++) reg_available_timestamp[c] = 0; @@ -2150,59 +2018,8 @@ void codegen_timing_p6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint switch (last_prefix) { case 0x0f: - if (opcode == 0x0f) - { - /*3DNow has the actual opcode after ModR/M, SIB and any offset*/ - uint32_t opcode_pc = op_pc + 1; /*Byte after ModR/M*/ - uint8_t modrm = fetchdat & 0xff; - uint8_t sib = (fetchdat >> 8) & 0xff; - - if ((modrm & 0xc0) != 0xc0) - { - if (op_32 & 0x200) - { - if ((modrm & 7) == 4) - { - /* Has SIB*/ - opcode_pc++; - if ((modrm & 0xc0) == 0x40) - opcode_pc++; - else if ((modrm & 0xc0) == 0x80) - opcode_pc += 4; - else if ((sib & 0x07) == 0x05) - opcode_pc += 4; - } - else - { - if ((modrm & 0xc0) == 0x40) - opcode_pc++; - else if ((modrm & 0xc0) == 0x80) - opcode_pc += 4; - else if ((modrm & 0xc7) == 0x05) - opcode_pc += 4; - } - } - else - { - if ((modrm & 0xc0) == 0x40) - opcode_pc++; - else if ((modrm & 0xc0) == 0x80) - opcode_pc += 2; - else if ((modrm & 0xc7) == 0x06) - opcode_pc += 2; - } - } - - opcode = fastreadb(cs + opcode_pc); - - ins_table = mod3 ? opcode_timings_0f0f_mod3 : opcode_timings_0f0f; - deps = mod3 ? opcode_deps_0f0f_mod3 : opcode_deps_0f0f; - } - else - { - ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; - deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; - } + ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; break; case 0xd8: @@ -2298,7 +2115,7 @@ void codegen_timing_p6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint if (ins_table[opcode]) decode_instruction(ins_table[opcode], deps[opcode], fetchdat, op_32, bit8); else - decode_instruction(&vector_alu1_op, 0, fetchdat, op_32, bit8); + decode_instruction(&complex_alu1_op, 0, fetchdat, op_32, bit8); codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); } diff --git a/src/cpu_new/codegen_timing_p6.c b/src/cpu_new/codegen_timing_p6.c index d84a18904..a51b4df32 100644 --- a/src/cpu_new/codegen_timing_p6.c +++ b/src/cpu_new/codegen_timing_p6.c @@ -1,4 +1,5 @@ -/*Hacky P6 timings based on K6 timings*/ +/*Basic P6 timing model by plant/nerd73. Based on the K6 timing model*/ +/*Some cycle timings come from https://www.agner.org/optimize/instruction_tables.pdf*/ #include #include <86box/86box.h> #include "cpu.h" @@ -18,24 +19,26 @@ typedef enum uop_type_t UOP_ALU = 0, /*Executes in Integer X or Y units*/ UOP_ALUX, /*Executes in Integer X unit*/ UOP_LOAD, /*Executes in Load unit*/ - UOP_STORE, /*Executes in Store unit*/ + UOP_STORED, /*Executes in Data Store unit*/ + UOP_STOREA, /*Executes in Address Store unit*/ UOP_FLOAD, /*Executes in Load unit*/ - UOP_FSTORE, /*Executes in Store unit*/ + UOP_FSTORED, /*Executes in Data Store unit*/ + UOP_FSTOREA, /*Executes in Address Store unit*/ UOP_MLOAD, /*Executes in Load unit*/ - UOP_MSTORE, /*Executes in Store unit*/ + UOP_MSTORED, /*Executes in Data Store unit*/ + UOP_MSTOREA, /*Executes in Address Store unit*/ UOP_FLOAT, /*Executes in Floating Point unit*/ - UOP_MEU, /*Executes in Multimedia unit*/ - UOP_MEU_SHIFT, /*Executes in Multimedia unit or ALU X/Y. Uses MMX shifter*/ - UOP_MEU_MUL, /*Executes in Multimedia unit or ALU X/Y. Uses MMX multiplier*/ + UOP_MMX, /*Executes in Integer X or Y units as MMX*/ + UOP_MMX_SHIFT, /*Executes in Integer Y unit. Uses MMX shifter*/ + UOP_MMX_MUL, /*Executes in Integer X unit. Uses MMX multiplier*/ UOP_BRANCH, /*Executes in Branch unit*/ UOP_LIMM /*Does not require an execution unit*/ } uop_type_t; typedef enum decode_type_t { - DECODE_SHORT, - DECODE_LONG, - DECODE_VECTOR + DECODE_SIMPLE, + DECODE_COMPLEX, } decode_type_t; #define MAX_UOPS 10 @@ -43,7 +46,6 @@ typedef enum decode_type_t typedef struct risc86_uop_t { uop_type_t type; - double throughput; double latency; } risc86_uop_t; @@ -57,804 +59,885 @@ typedef struct risc86_instruction_t static const risc86_instruction_t alu_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t alux_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1} }; static const risc86_instruction_t load_alu_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t load_alux_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1} }; static const risc86_instruction_t alu_store_op = { - .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} + }; static const risc86_instruction_t alux_store_op = { - .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} }; static const risc86_instruction_t branch_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_BRANCH, .latency = 1} }; static const risc86_instruction_t limm_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LIMM, .throughput = 1, .latency = 1} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_LIMM, .latency = 1} }; static const risc86_instruction_t load_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1} }; static const risc86_instruction_t store_op = { - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1} + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = {.type = UOP_STOREA, .latency = 1} }; static const risc86_instruction_t bswap_op = { - .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, }; static const risc86_instruction_t leave_op = { .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t lods_op = { .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t loop_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_BRANCH, .latency = 1} }; static const risc86_instruction_t mov_reg_seg_op = { .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, }; static const risc86_instruction_t movs_op = { .nr_uops = 4, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t pop_reg_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t pop_mem_op = { - .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t push_imm_op = { - .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2}, + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = {.type = UOP_STOREA, .latency = 1}, }; static const risc86_instruction_t push_mem_op = { - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1} }; static const risc86_instruction_t push_seg_op = { - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t stos_op = { - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t test_reg_op = { .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t test_reg_b_op = { .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1} }; static const risc86_instruction_t test_mem_imm_op = { .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t test_mem_imm_b_op = { .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1} }; static const risc86_instruction_t xchg_op = { .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t mmx_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX, .latency = 1.5} }; static const risc86_instruction_t mmx_mul_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU_MUL, .throughput = 1.5, .latency = 3} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX_MUL, .latency = 1.5} }; static const risc86_instruction_t mmx_shift_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU_SHIFT, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX_SHIFT, .latency = 1.5} }; static const risc86_instruction_t load_mmx_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU, .throughput = 1.5, .latency = 3} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_MMX, .latency = 1.5} }; static const risc86_instruction_t load_mmx_mul_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU_MUL, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3}, + .uop[1] = {.type = UOP_MMX_MUL, .latency = 1.5} }; static const risc86_instruction_t load_mmx_shift_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU_SHIFT, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3}, + .uop[1] = {.type = UOP_MMX_SHIFT, .latency = 1.5} }; static const risc86_instruction_t mload_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MLOAD, .throughput = 1, .latency = 2} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_MLOAD, .latency = 3}, }; static const risc86_instruction_t mstore_op = { - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MSTORE, .throughput = 1, .latency = 1} + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_MSTORED, .latency = 1}, + .uop[1] = {.type = UOP_MSTOREA, .latency = 1} }; static const risc86_instruction_t pmul_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX_MUL, .latency = 1.5} }; static const risc86_instruction_t pmul_mem_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3}, + .uop[1] = {.type = UOP_MMX_MUL, .latency = 1.5} }; - static const risc86_instruction_t float_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 1} +}; +static const risc86_instruction_t fadd_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 3} +}; +static const risc86_instruction_t fmul_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALU, .latency = 5} +}; +static const risc86_instruction_t float2_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 1} +}; +static const risc86_instruction_t fchs_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 2}, + .uop[1] = {.type = UOP_FLOAT, .latency = 2}, + .uop[2] = {.type = UOP_FLOAT, .latency = 2} }; static const risc86_instruction_t load_float_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_FLOAT, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 1} +}; +static const risc86_instruction_t load_fadd_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 3} +}; +static const risc86_instruction_t load_fmul_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 5} }; static const risc86_instruction_t fstore_op = { - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FSTORED, .latency = 1}, + .uop[1] = {.type = UOP_FSTOREA, .latency = 1}, +}; +static const risc86_instruction_t load_fiadd_op = +{ + .nr_uops = 7, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 1}, + .uop[2] = {.type = UOP_FLOAT, .latency = 1}, + .uop[3] = {.type = UOP_FLOAT, .latency = 1}, + .uop[4] = {.type = UOP_FLOAT, .latency = 1}, + .uop[5] = {.type = UOP_FLOAT, .latency = 1}, + .uop[6] = {.type = UOP_FLOAT, .latency = 1} }; - static const risc86_instruction_t fdiv_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 38, .latency = 38} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 32} }; static const risc86_instruction_t fdiv_mem_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_FLOAT, .throughput = 38, .latency = 38} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 38} }; static const risc86_instruction_t fsin_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 62, .latency = 62} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 60} }; static const risc86_instruction_t fsqrt_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 49, .latency = 49} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 69} }; -static const risc86_instruction_t vector_fldcw_op = +static const risc86_instruction_t complex_fldcw_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 8, .latency = 8} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 10} }; -static const risc86_instruction_t vector_float_op = +static const risc86_instruction_t complex_float_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 1} }; -static const risc86_instruction_t vector_float_l_op = +static const risc86_instruction_t complex_float_l_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 50, .latency = 50} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 50} }; -static const risc86_instruction_t vector_flde_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} -}; -static const risc86_instruction_t vector_fste_op = +static const risc86_instruction_t complex_flde_op = { .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2}, - .uop[1] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAD, .latency = 1}, + .uop[2] = {.type = UOP_FLOAT, .latency = 2} +}; +static const risc86_instruction_t complex_fste_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 2}, + .uop[1] = {.type = UOP_FSTORED, .latency = 1}, + .uop[2] = {.type = UOP_FSTOREA, .latency = 1} }; -static const risc86_instruction_t vector_alu1_op = +static const risc86_instruction_t complex_alu1_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1} }; -static const risc86_instruction_t vector_alu2_op = +static const risc86_instruction_t complex_alu2_op = { .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} }; -static const risc86_instruction_t vector_alu3_op = +static const risc86_instruction_t complex_alu3_op = { .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} }; -static const risc86_instruction_t vector_alu6_op = +static const risc86_instruction_t complex_alu6_op = { .nr_uops = 6, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1}, + .uop[4] = {.type = UOP_ALU, .latency = 1}, + .uop[5] = {.type = UOP_ALU, .latency = 1} }; -static const risc86_instruction_t vector_alux1_op = +static const risc86_instruction_t complex_alux1_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1} }; -static const risc86_instruction_t vector_alux3_op = +static const risc86_instruction_t complex_alux3_op = { .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .latency = 1} }; -static const risc86_instruction_t vector_alux6_op = +static const risc86_instruction_t complex_alux6_op = { - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .nr_uops = 6, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .latency = 1}, + .uop[3] = {.type = UOP_ALUX, .latency = 1}, + .uop[4] = {.type = UOP_ALUX, .latency = 1}, + .uop[5] = {.type = UOP_ALUX, .latency = 1} }; -static const risc86_instruction_t vector_alu_store_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_alux_store_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_arpl_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[1] = {.type = UOP_ALU, .throughput = 3, .latency = 3} -}; -static const risc86_instruction_t vector_bound_op = +static const risc86_instruction_t complex_alu_store_op = { .nr_uops = 4, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1}, }; -static const risc86_instruction_t vector_bsx_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 10, .latency = 10} -}; -static const risc86_instruction_t vector_call_far_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_cli_sti_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 7, .latency = 7} -}; -static const risc86_instruction_t vector_cmps_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_cmpsb_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_cmpxchg_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, -}; -static const risc86_instruction_t vector_cmpxchg_b_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, -}; -static const risc86_instruction_t vector_cpuid_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 22, .latency = 22} -}; -static const risc86_instruction_t vector_div16_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 10, .latency = 10} -}; -static const risc86_instruction_t vector_div16_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 10, .latency = 10} -}; -static const risc86_instruction_t vector_div32_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 18, .latency = 18} -}; -static const risc86_instruction_t vector_div32_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 18, .latency = 18} -}; -static const risc86_instruction_t vector_emms_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 25, .latency = 25} -}; -static const risc86_instruction_t vector_enter_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 10, .latency = 10} -}; -static const risc86_instruction_t vector_femms_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 6, .latency = 6} -}; -static const risc86_instruction_t vector_in_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11} -}; -static const risc86_instruction_t vector_ins_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_int_op = -{ - .nr_uops = 5, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 20, .latency = 20}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_iret_op = -{ - .nr_uops = 5, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[3] = {.type = UOP_ALU, .throughput = 20, .latency = 20}, - .uop[4] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_invd_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1000, .latency = 1000} -}; -static const risc86_instruction_t vector_jmp_far_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_load_alu_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_load_alux_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_loop_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_lss_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_ALU, .throughput = 3, .latency = 3} -}; -static const risc86_instruction_t vector_mov_mem_seg_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_mov_seg_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 3, .latency = 3} -}; -static const risc86_instruction_t vector_mov_seg_reg_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3} -}; -static const risc86_instruction_t vector_mul_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_mul_mem_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_mul64_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_mul64_mem_op = +static const risc86_instruction_t complex_alux_store_op = { .nr_uops = 4, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} }; -static const risc86_instruction_t vector_out_op = +static const risc86_instruction_t complex_arpl_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3}, + .uop[1] = {.type = UOP_ALU, .latency = 3} +}; +static const risc86_instruction_t complex_bound_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_LOAD, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_bsx_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 10, .latency = 10} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 10} }; -static const risc86_instruction_t vector_outs_op = +static const risc86_instruction_t complex_call_far_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_cli_sti_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 7} +}; +static const risc86_instruction_t complex_cmps_op = { .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 10, .latency = 10}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} }; -static const risc86_instruction_t vector_pusha_op = +static const risc86_instruction_t complex_cmpsb_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_cmpxchg_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} +}; +static const risc86_instruction_t complex_cmpxchg_b_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} +}; +static const risc86_instruction_t complex_cpuid_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 23} +}; +static const risc86_instruction_t complex_div16_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 10} +}; +static const risc86_instruction_t complex_div16_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 10} +}; +static const risc86_instruction_t complex_div32_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 18} +}; +static const risc86_instruction_t complex_div32_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 18} +}; +static const risc86_instruction_t complex_emms_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 25} +}; +static const risc86_instruction_t complex_enter_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = {.type = UOP_STOREA, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 10} +}; +static const risc86_instruction_t complex_femms_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 6} +}; +static const risc86_instruction_t complex_in_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 10} +}; +static const risc86_instruction_t complex_ins_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 10}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_int_op = { .nr_uops = 8, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[6] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[7] = {.type = UOP_STORE, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 20}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_STORED, .latency = 1}, + .uop[4] = {.type = UOP_STOREA, .latency = 1}, + .uop[5] = {.type = UOP_STORED, .latency = 1}, + .uop[6] = {.type = UOP_STOREA, .latency = 1}, + .uop[7] = {.type = UOP_BRANCH, .latency = 1} }; -static const risc86_instruction_t vector_popa_op = +static const risc86_instruction_t complex_iret_op = +{ + .nr_uops = 5, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3}, + .uop[1] = {.type = UOP_LOAD, .latency = 3}, + .uop[2] = {.type = UOP_LOAD, .latency = 3}, + .uop[3] = {.type = UOP_ALU, .latency = 20}, + .uop[4] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_invd_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 500} +}; +static const risc86_instruction_t complex_jmp_far_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3}, + .uop[1] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_load_alu_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_load_alux_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_loop_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_lss_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_LOAD, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 3} +}; +static const risc86_instruction_t complex_mov_mem_seg_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, +}; +static const risc86_instruction_t complex_mov_seg_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 3} +}; +static const risc86_instruction_t complex_mov_seg_reg_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3} +}; +static const risc86_instruction_t complex_mul_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_mul_mem_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_mul64_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_mul64_mem_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .latency = 1}, + .uop[3] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_out_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 10}, + .uop[1] = {.type = UOP_STOREA, .latency = 10}, +}; +static const risc86_instruction_t complex_outs_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 10}, + .uop[1] = {.type = UOP_STOREA, .latency = 10}, + .uop[2] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_pusha_op = { .nr_uops = 8, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[6] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[7] = {.type = UOP_LOAD, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 2}, + .uop[1] = {.type = UOP_STOREA, .latency = 2}, + .uop[2] = {.type = UOP_STORED, .latency = 2}, + .uop[3] = {.type = UOP_STOREA, .latency = 2}, + .uop[4] = {.type = UOP_STORED, .latency = 2}, + .uop[5] = {.type = UOP_STOREA, .latency = 2}, + .uop[6] = {.type = UOP_STORED, .latency = 2}, + .uop[7] = {.type = UOP_STOREA, .latency = 2} }; -static const risc86_instruction_t vector_popf_op = +static const risc86_instruction_t complex_popa_op = +{ + .nr_uops = 8, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_LOAD, .latency = 1}, + .uop[2] = {.type = UOP_LOAD, .latency = 1}, + .uop[3] = {.type = UOP_LOAD, .latency = 1}, + .uop[4] = {.type = UOP_LOAD, .latency = 1}, + .uop[5] = {.type = UOP_LOAD, .latency = 1}, + .uop[6] = {.type = UOP_LOAD, .latency = 1}, + .uop[7] = {.type = UOP_LOAD, .latency = 1} +}; +static const risc86_instruction_t complex_popf_op = { .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 17, .latency = 17} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3}, + .uop[1] = {.type = UOP_ALUX, .latency = 17} }; -static const risc86_instruction_t vector_push_mem_op = +static const risc86_instruction_t complex_push_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = {.type = UOP_STOREA, .latency = 1} +}; +static const risc86_instruction_t complex_pushf_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1} +}; +static const risc86_instruction_t complex_ret_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_retf_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 3}, + .uop[2] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_scas_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_scasb_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_setcc_mem_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_FSTORED, .latency = 1}, + .uop[3] = {.type = UOP_FSTOREA, .latency = 1} +}; +static const risc86_instruction_t complex_setcc_reg_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_test_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_test_mem_b_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_xchg_mem_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_xlat_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_LOAD, .latency = 1} +}; +static const risc86_instruction_t complex_wbinvd_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_pushf_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_ret_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_retf_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[2] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_scas_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_scasb_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_setcc_mem_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_setcc_reg_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_test_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_test_mem_b_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_xchg_mem_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_xlat_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2} -}; -static const risc86_instruction_t vector_wbinvd_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 10000, .latency = 10000} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 10000} }; + #define INVALID NULL static const risc86_instruction_t *opcode_timings[256] = @@ -862,38 +945,38 @@ static const risc86_instruction_t *opcode_timings[256] = /* ADD ADD ADD ADD*/ /*00*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, /* ADD ADD PUSH ES POP ES*/ - &alux_op, &alu_op, &push_seg_op, &vector_mov_seg_mem_op, + &alux_op, &alu_op, &push_seg_op, &complex_mov_seg_mem_op, /* OR OR OR OR*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, /* OR OR PUSH CS */ &alux_op, &alu_op, &push_seg_op, INVALID, /* ADC ADC ADC ADC*/ -/*10*/ &vector_alux_store_op, &vector_alu_store_op, &vector_load_alux_op, &vector_load_alu_op, +/*10*/ &complex_alux_store_op,&complex_alu_store_op, &complex_load_alux_op,&complex_load_alu_op, /* ADC ADC PUSH SS POP SS*/ - &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, + &complex_alux1_op, &complex_alu1_op, &push_seg_op, &complex_mov_seg_mem_op, /* SBB SBB SBB SBB*/ -/*10*/ &vector_alux_store_op, &vector_alu_store_op, &vector_load_alux_op, &vector_load_alu_op, +/*10*/ &complex_alux_store_op,&complex_alu_store_op, &complex_load_alux_op,&complex_load_alu_op, /* SBB SBB PUSH DS POP DS*/ - &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, + &complex_alux1_op, &complex_alu1_op, &push_seg_op, &complex_mov_seg_mem_op, /* AND AND AND AND*/ /*20*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, /* AND AND DAA*/ - &alux_op, &alu_op, INVALID, &vector_alux1_op, + &alux_op, &alu_op, INVALID, &complex_alux1_op, /* SUB SUB SUB SUB*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, /* SUB SUB DAS*/ - &alux_op, &alu_op, INVALID, &vector_alux1_op, + &alux_op, &alu_op, INVALID, &complex_alux1_op, /* XOR XOR XOR XOR*/ /*30*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, /* XOR XOR AAA*/ - &alux_op, &alu_op, INVALID, &vector_alux6_op, + &alux_op, &alu_op, INVALID, &complex_alux6_op, /* CMP CMP CMP CMP*/ &load_alux_op, &load_alu_op, &load_alux_op, &load_alu_op, /* CMP CMP AAS*/ - &alux_op, &alu_op, INVALID, &vector_alux6_op, + &alux_op, &alu_op, INVALID, &complex_alux6_op, /* INC EAX INC ECX INC EDX INC EBX*/ /*40*/ &alu_op, &alu_op, &alu_op, &alu_op, @@ -914,12 +997,12 @@ static const risc86_instruction_t *opcode_timings[256] = &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, /* PUSHA POPA BOUND ARPL*/ -/*60*/ &vector_pusha_op, &vector_popa_op, &vector_bound_op, &vector_arpl_op, +/*60*/ &complex_pusha_op, &complex_popa_op, &complex_bound_op, &complex_arpl_op, INVALID, INVALID, INVALID, INVALID, /* PUSH imm IMUL PUSH imm IMUL*/ - &push_imm_op, &vector_mul_op, &push_imm_op, &vector_mul_op, + &push_imm_op, &complex_mul_op, &push_imm_op, &complex_mul_op, /* INSB INSW OUTSB OUTSW*/ - &vector_ins_op, &vector_ins_op, &vector_outs_op, &vector_outs_op, + &complex_ins_op, &complex_ins_op, &complex_outs_op, &complex_outs_op, /* Jxx*/ /*70*/ &branch_op, &branch_op, &branch_op, &branch_op, @@ -929,29 +1012,29 @@ static const risc86_instruction_t *opcode_timings[256] = /*80*/ INVALID, INVALID, INVALID, INVALID, /* TEST TEST XCHG XCHG*/ - &vector_test_mem_b_op, &vector_test_mem_op, &vector_xchg_mem_op, &vector_xchg_mem_op, + &complex_test_mem_b_op, &complex_test_mem_op, &complex_xchg_mem_op, &complex_xchg_mem_op, /* MOV MOV MOV MOV*/ &store_op, &store_op, &load_op, &load_op, /* MOV from seg LEA MOV to seg POP*/ - &vector_mov_mem_seg_op, &store_op, &vector_mov_seg_mem_op, &pop_mem_op, + &complex_mov_mem_seg_op, &store_op, &complex_mov_seg_mem_op, &pop_mem_op, /* NOP XCHG XCHG XCHG*/ /*90*/ &limm_op, &xchg_op, &xchg_op, &xchg_op, /* XCHG XCHG XCHG XCHG*/ &xchg_op, &xchg_op, &xchg_op, &xchg_op, /* CBW CWD CALL far WAIT*/ - &vector_alu1_op, &vector_alu1_op, &vector_call_far_op, &limm_op, + &complex_alu1_op, &complex_alu1_op, &complex_call_far_op, &limm_op, /* PUSHF POPF SAHF LAHF*/ - &vector_pushf_op, &vector_popf_op, &vector_alux1_op, &vector_alux1_op, + &complex_pushf_op, &complex_popf_op, &complex_alux1_op, &complex_alux1_op, /* MOV MOV MOV MOV*/ /*a0*/ &load_op, &load_op, &store_op, &store_op, /* MOVSB MOVSW CMPSB CMPSW*/ - &movs_op, &movs_op, &vector_cmpsb_op, &vector_cmps_op, + &movs_op, &movs_op, &complex_cmpsb_op, &complex_cmps_op, /* TEST TEST STOSB STOSW*/ &test_reg_b_op, &test_reg_op, &stos_op, &stos_op, /* LODSB LODSW SCASB SCASW*/ - &lods_op, &lods_op, &vector_scasb_op, &vector_scas_op, + &lods_op, &lods_op, &complex_scasb_op, &complex_scas_op, /* MOV*/ /*b0*/ &limm_op, &limm_op, &limm_op, &limm_op, @@ -960,37 +1043,37 @@ static const risc86_instruction_t *opcode_timings[256] = &limm_op, &limm_op, &limm_op, &limm_op, /* RET imm RET*/ -/*c0*/ INVALID, INVALID, &vector_ret_op, &vector_ret_op, +/*c0*/ INVALID, INVALID, &complex_ret_op, &complex_ret_op, /* LES LDS MOV MOV*/ - &vector_lss_op, &vector_lss_op, &store_op, &store_op, + &complex_lss_op, &complex_lss_op, &store_op, &store_op, /* ENTER LEAVE RETF RETF*/ - &vector_enter_op, &leave_op, &vector_retf_op, &vector_retf_op, + &complex_enter_op, &leave_op, &complex_retf_op, &complex_retf_op, /* INT3 INT INTO IRET*/ - &vector_int_op, &vector_int_op, &vector_int_op, &vector_iret_op, + &complex_int_op, &complex_int_op, &complex_int_op, &complex_iret_op, /*d0*/ INVALID, INVALID, INVALID, INVALID, /* AAM AAD SETALC XLAT*/ - &vector_alux6_op, &vector_alux3_op, &vector_alux1_op, &vector_xlat_op, + &complex_alux6_op, &complex_alux3_op, &complex_alux1_op, &complex_xlat_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /* LOOPNE LOOPE LOOP JCXZ*/ -/*e0*/ &vector_loop_op, &vector_loop_op, &loop_op, &vector_loop_op, +/*e0*/ &complex_loop_op, &complex_loop_op, &loop_op, &complex_loop_op, /* IN AL IN AX OUT_AL OUT_AX*/ - &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, + &complex_in_op, &complex_in_op, &complex_out_op, &complex_out_op, /* CALL JMP JMP JMP*/ - &store_op, &branch_op, &vector_jmp_far_op, &branch_op, + &store_op, &branch_op, &complex_jmp_far_op, &branch_op, /* IN AL IN AX OUT_AL OUT_AX*/ - &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, + &complex_in_op, &complex_in_op, &complex_out_op, &complex_out_op, /* REPNE REPE*/ /*f0*/ INVALID, INVALID, INVALID, INVALID, /* HLT CMC*/ - &vector_alux1_op, &vector_alu2_op, INVALID, INVALID, + &complex_alux1_op, &complex_alu2_op, INVALID, INVALID, /* CLC STC CLI STI*/ - &vector_alu1_op, &vector_alu1_op, &vector_cli_sti_op, &vector_cli_sti_op, + &complex_alu1_op, &complex_alu1_op, &complex_cli_sti_op, &complex_cli_sti_op, /* CLD STD INCDEC*/ - &vector_alu1_op, &vector_alu1_op, &alux_store_op, INVALID + &complex_alu1_op, &complex_alu1_op, &alux_store_op, INVALID }; static const risc86_instruction_t *opcode_timings_mod3[256] = @@ -998,38 +1081,38 @@ static const risc86_instruction_t *opcode_timings_mod3[256] = /* ADD ADD ADD ADD*/ /*00*/ &alux_op, &alu_op, &alux_op, &alu_op, /* ADD ADD PUSH ES POP ES*/ - &alux_op, &alu_op, &push_seg_op, &vector_mov_seg_mem_op, + &alux_op, &alu_op, &push_seg_op, &complex_mov_seg_mem_op, /* OR OR OR OR*/ &alux_op, &alu_op, &alux_op, &alu_op, /* OR OR PUSH CS */ &alux_op, &alu_op, &push_seg_op, INVALID, /* ADC ADC ADC ADC*/ -/*10*/ &vector_alux1_op, &vector_alu1_op, &vector_alux1_op, &vector_alu1_op, +/*10*/ &complex_alux1_op, &complex_alu1_op, &complex_alux1_op, &complex_alu1_op, /* ADC ADC PUSH SS POP SS*/ - &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, + &complex_alux1_op, &complex_alu1_op, &push_seg_op, &complex_mov_seg_mem_op, /* SBB SBB SBB SBB*/ - &vector_alux1_op, &vector_alu1_op, &vector_alux1_op, &vector_alu1_op, + &complex_alux1_op, &complex_alu1_op, &complex_alux1_op, &complex_alu1_op, /* SBB SBB PUSH DS POP DS*/ - &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, + &complex_alux1_op, &complex_alu1_op, &push_seg_op, &complex_mov_seg_mem_op, /* AND AND AND AND*/ /*20*/ &alux_op, &alu_op, &alux_op, &alu_op, /* AND AND DAA*/ - &alux_op, &alu_op, INVALID, &vector_alux1_op, + &alux_op, &alu_op, INVALID, &complex_alux1_op, /* SUB SUB SUB SUB*/ &alux_op, &alu_op, &alux_op, &alu_op, /* SUB SUB DAS*/ - &alux_op, &alu_op, INVALID, &vector_alux1_op, + &alux_op, &alu_op, INVALID, &complex_alux1_op, /* XOR XOR XOR XOR*/ /*30*/ &alux_op, &alu_op, &alux_op, &alu_op, /* XOR XOR AAA*/ - &alux_op, &alu_op, INVALID, &vector_alux6_op, + &alux_op, &alu_op, INVALID, &complex_alux6_op, /* CMP CMP CMP CMP*/ &alux_op, &alu_op, &alux_op, &alu_op, /* CMP CMP AAS*/ - &alux_op, &alu_op, INVALID, &vector_alux6_op, + &alux_op, &alu_op, INVALID, &complex_alux6_op, /* INC EAX INC ECX INC EDX INC EBX*/ /*40*/ &alu_op, &alu_op, &alu_op, &alu_op, @@ -1050,12 +1133,12 @@ static const risc86_instruction_t *opcode_timings_mod3[256] = &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, /* PUSHA POPA BOUND ARPL*/ -/*60*/ &vector_pusha_op, &vector_popa_op, &vector_bound_op, &vector_arpl_op, +/*60*/ &complex_pusha_op, &complex_popa_op, &complex_bound_op, &complex_arpl_op, INVALID, INVALID, INVALID, INVALID, /* PUSH imm IMUL PUSH imm IMUL*/ - &push_imm_op, &vector_mul_op, &push_imm_op, &vector_mul_op, + &push_imm_op, &complex_mul_op, &push_imm_op, &complex_mul_op, /* INSB INSW OUTSB OUTSW*/ - &vector_ins_op, &vector_ins_op, &vector_outs_op, &vector_outs_op, + &complex_ins_op, &complex_ins_op, &complex_outs_op, &complex_outs_op, /* Jxx*/ /*70*/ &branch_op, &branch_op, &branch_op, &branch_op, @@ -1065,29 +1148,29 @@ static const risc86_instruction_t *opcode_timings_mod3[256] = /*80*/ INVALID, INVALID, INVALID, INVALID, /* TEST TEST XCHG XCHG*/ - &vector_alu1_op, &vector_alu1_op, &vector_alu3_op, &vector_alu3_op, + &complex_alu1_op, &complex_alu1_op, &complex_alu3_op, &complex_alu3_op, /* MOV MOV MOV MOV*/ &store_op, &store_op, &load_op, &load_op, /* MOV from seg LEA MOV to seg POP*/ - &mov_reg_seg_op, &store_op, &vector_mov_seg_reg_op, &pop_reg_op, + &mov_reg_seg_op, &store_op, &complex_mov_seg_reg_op, &pop_reg_op, /* NOP XCHG XCHG XCHG*/ /*90*/ &limm_op, &xchg_op, &xchg_op, &xchg_op, /* XCHG XCHG XCHG XCHG*/ &xchg_op, &xchg_op, &xchg_op, &xchg_op, /* CBW CWD CALL far WAIT*/ - &vector_alu1_op, &vector_alu1_op, &vector_call_far_op, &limm_op, + &complex_alu1_op, &complex_alu1_op, &complex_call_far_op, &limm_op, /* PUSHF POPF SAHF LAHF*/ - &vector_pushf_op, &vector_popf_op, &vector_alux1_op, &vector_alux1_op, + &complex_pushf_op, &complex_popf_op, &complex_alux1_op, &complex_alux1_op, /* MOV MOV MOV MOV*/ /*a0*/ &load_op, &load_op, &store_op, &store_op, /* MOVSB MOVSW CMPSB CMPSW*/ - &movs_op, &movs_op, &vector_cmpsb_op, &vector_cmps_op, + &movs_op, &movs_op, &complex_cmpsb_op, &complex_cmps_op, /* TEST TEST STOSB STOSW*/ &test_reg_b_op, &test_reg_op, &stos_op, &stos_op, /* LODSB LODSW SCASB SCASW*/ - &lods_op, &lods_op, &vector_scasb_op, &vector_scas_op, + &lods_op, &lods_op, &complex_scasb_op, &complex_scas_op, /* MOV*/ /*b0*/ &limm_op, &limm_op, &limm_op, &limm_op, @@ -1096,57 +1179,57 @@ static const risc86_instruction_t *opcode_timings_mod3[256] = &limm_op, &limm_op, &limm_op, &limm_op, /* RET imm RET*/ -/*c0*/ INVALID, INVALID, &vector_ret_op, &vector_ret_op, +/*c0*/ INVALID, INVALID, &complex_ret_op, &complex_ret_op, /* LES LDS MOV MOV*/ - &vector_lss_op, &vector_lss_op, &store_op, &store_op, + &complex_lss_op, &complex_lss_op, &store_op, &store_op, /* ENTER LEAVE RETF RETF*/ - &vector_enter_op, &leave_op, &vector_retf_op, &vector_retf_op, + &complex_enter_op, &leave_op, &complex_retf_op, &complex_retf_op, /* INT3 INT INTO IRET*/ - &vector_int_op, &vector_int_op, &vector_int_op, &vector_iret_op, + &complex_int_op, &complex_int_op, &complex_int_op, &complex_iret_op, /*d0*/ INVALID, INVALID, INVALID, INVALID, /* AAM AAD SETALC XLAT*/ - &vector_alux6_op, &vector_alux3_op, &vector_alux1_op, &vector_xlat_op, + &complex_alux6_op, &complex_alux3_op, &complex_alux1_op, &complex_xlat_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /* LOOPNE LOOPE LOOP JCXZ*/ -/*e0*/ &vector_loop_op, &vector_loop_op, &loop_op, &vector_loop_op, +/*e0*/ &complex_loop_op, &complex_loop_op, &loop_op, &complex_loop_op, /* IN AL IN AX OUT_AL OUT_AX*/ - &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, + &complex_in_op, &complex_in_op, &complex_out_op, &complex_out_op, /* CALL JMP JMP JMP*/ - &store_op, &branch_op, &vector_jmp_far_op, &branch_op, + &store_op, &branch_op, &complex_jmp_far_op, &branch_op, /* IN AL IN AX OUT_AL OUT_AX*/ - &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, + &complex_in_op, &complex_in_op, &complex_out_op, &complex_out_op, /* REPNE REPE*/ /*f0*/ INVALID, INVALID, INVALID, INVALID, /* HLT CMC*/ - &vector_alux1_op, &vector_alu2_op, INVALID, INVALID, + &complex_alux1_op, &complex_alu2_op, INVALID, INVALID, /* CLC STC CLI STI*/ - &vector_alu1_op, &vector_alu1_op, &vector_cli_sti_op, &vector_cli_sti_op, + &complex_alu1_op, &complex_alu1_op, &complex_cli_sti_op, &complex_cli_sti_op, /* CLD STD INCDEC*/ - &vector_alu1_op, &vector_alu1_op, &vector_alux1_op, INVALID + &complex_alu1_op, &complex_alu1_op, &complex_alux1_op, INVALID }; static const risc86_instruction_t *opcode_timings_0f[256] = { -/*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, - INVALID, &vector_alu6_op, &vector_alu6_op, INVALID, - &vector_invd_op, &vector_wbinvd_op, INVALID, INVALID, - INVALID, &load_op, &vector_femms_op, INVALID, +/*00*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, + INVALID, &complex_alu6_op, &complex_alu6_op, INVALID, + &complex_invd_op, &complex_wbinvd_op, INVALID, INVALID, + INVALID, &load_op, &complex_femms_op, INVALID, /*10*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, -/*20*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, - &vector_alu6_op, &vector_alu6_op, INVALID, INVALID, +/*20*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, + &complex_alu6_op, &complex_alu6_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, -/*30*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID, +/*30*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -1167,7 +1250,7 @@ static const risc86_instruction_t *opcode_timings_0f[256] = INVALID, INVALID, &mload_op, &mload_op, /*70*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, - &load_mmx_op, &load_mmx_op, &load_mmx_op, &vector_emms_op, + &load_mmx_op, &load_mmx_op, &load_mmx_op, &complex_emms_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, &mstore_op, &mstore_op, @@ -1176,387 +1259,220 @@ static const risc86_instruction_t *opcode_timings_0f[256] = &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, -/*90*/ &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, - &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, - &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, - &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, +/*90*/ &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, + &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, + &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, + &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, -/*a0*/ &push_seg_op, &vector_mov_seg_mem_op, &vector_cpuid_op, &vector_load_alu_op, - &vector_alu_store_op, &vector_alu_store_op, INVALID, INVALID, - &push_seg_op, &vector_mov_seg_mem_op, INVALID, &vector_load_alu_op, - &vector_alu_store_op, &vector_alu_store_op, INVALID, &vector_mul_op, +/*a0*/ &push_seg_op, &complex_mov_seg_mem_op,&complex_cpuid_op, &complex_load_alu_op, + &complex_alu_store_op, &complex_alu_store_op, INVALID, INVALID, + &push_seg_op, &complex_mov_seg_mem_op,INVALID, &complex_load_alu_op, + &complex_alu_store_op, &complex_alu_store_op, INVALID, &complex_mul_op, -/*b0*/ &vector_cmpxchg_b_op, &vector_cmpxchg_op, &vector_lss_op, &vector_load_alu_op, - &vector_lss_op, &vector_lss_op, &load_alux_op, &load_alu_op, - INVALID, INVALID, &vector_load_alu_op, &vector_load_alu_op, - &vector_bsx_op, &vector_bsx_op, &load_alux_op, &load_alu_op, +/*b0*/ &complex_cmpxchg_b_op, &complex_cmpxchg_op, &complex_lss_op, &complex_load_alu_op, + &complex_lss_op, &complex_lss_op, &load_alux_op, &load_alu_op, + INVALID, INVALID, &complex_load_alu_op, &complex_load_alu_op, + &complex_bsx_op, &complex_bsx_op, &load_alux_op, &load_alu_op, -/*c0*/ &vector_alux_store_op, &vector_alu_store_op, INVALID, INVALID, - INVALID, INVALID, INVALID, &vector_cmpxchg_op, - &bswap_op, &bswap_op, &bswap_op, &bswap_op, - &bswap_op, &bswap_op, &bswap_op, &bswap_op, +/*c0*/ &complex_alux_store_op, &complex_alu_store_op, INVALID, INVALID, + INVALID, INVALID, INVALID, &complex_cmpxchg_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, -/*d0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, - INVALID, &load_mmx_mul_op, INVALID, INVALID, - &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, - &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, +/*d0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, + INVALID, &load_mmx_mul_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, -/*e0*/ &load_mmx_op, &load_mmx_shift_op, &load_mmx_shift_op, INVALID, - INVALID, &pmul_mem_op, INVALID, INVALID, - &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, - &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, +/*e0*/ &load_mmx_op, &load_mmx_shift_op, &load_mmx_shift_op, INVALID, + INVALID, &pmul_mem_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, -/*f0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, - INVALID, &pmul_mem_op, INVALID, INVALID, - &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, - &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, +/*f0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, + INVALID, &pmul_mem_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, + &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, }; static const risc86_instruction_t *opcode_timings_0f_mod3[256] = { -/*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, - INVALID, &vector_alu6_op, &vector_alu6_op, INVALID, - &vector_invd_op, &vector_wbinvd_op, INVALID, INVALID, - INVALID, INVALID, &vector_femms_op, INVALID, +/*00*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, + INVALID, &complex_alu6_op, &complex_alu6_op, INVALID, + &complex_invd_op, &complex_wbinvd_op, INVALID, INVALID, + INVALID, INVALID, &complex_femms_op, INVALID, -/*10*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, +/*10*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, -/*20*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, - &vector_alu6_op, &vector_alu6_op, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, +/*20*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, + &complex_alu6_op, &complex_alu6_op, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, -/*30*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, +/*30*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, -/*40*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, +/*40*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, -/*50*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, +/*50*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, -/*60*/ &mmx_op, &mmx_op, &mmx_op, &mmx_op, - &mmx_op, &mmx_op, &mmx_op, &mmx_op, - &mmx_op, &mmx_op, &mmx_op, &mmx_op, - INVALID, INVALID, &mmx_op, &mmx_op, +/*60*/ &mmx_op, &mmx_op, &mmx_op, &mmx_op, + &mmx_op, &mmx_op, &mmx_op, &mmx_op, + &mmx_op, &mmx_op, &mmx_op, &mmx_op, + INVALID, INVALID, &mmx_op, &mmx_op, -/*70*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, - &mmx_op, &mmx_op, &mmx_op, &vector_emms_op, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, &mmx_op, &mmx_op, +/*70*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + &mmx_op, &mmx_op, &mmx_op, &complex_emms_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, &mmx_op, &mmx_op, /*80*/ &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, -/*90*/ &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, - &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, - &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, - &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, +/*90*/ &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, + &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, + &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, + &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, -/*a0*/ &push_seg_op, &vector_mov_seg_mem_op, &vector_cpuid_op, &vector_alu1_op, - &vector_alu1_op, &vector_alu1_op, INVALID, INVALID, - &push_seg_op, &vector_mov_seg_mem_op, INVALID, &vector_alu1_op, - &vector_alu1_op, &vector_alu1_op, INVALID, &vector_mul_op, +/*a0*/ &push_seg_op, &complex_mov_seg_mem_op, &complex_cpuid_op, &complex_alu1_op, + &complex_alu1_op, &complex_alu1_op, INVALID, INVALID, + &push_seg_op, &complex_mov_seg_mem_op, INVALID, &complex_alu1_op, + &complex_alu1_op, &complex_alu1_op, INVALID, &complex_mul_op, -/*b0*/ &vector_cmpxchg_b_op, &vector_cmpxchg_op, &vector_lss_op, &vector_alu1_op, - &vector_lss_op, &vector_lss_op, &alux_op, &alu_op, - INVALID, INVALID, &vector_alu1_op, &vector_alu1_op, - &vector_bsx_op, &vector_bsx_op, &alux_op, &alu_op, +/*b0*/ &complex_cmpxchg_b_op, &complex_cmpxchg_op, &complex_lss_op, &complex_alu1_op, + &complex_lss_op, &complex_lss_op, &alux_op, &alu_op, + INVALID, INVALID, &complex_alu1_op, &complex_alu1_op, + &complex_bsx_op, &complex_bsx_op, &alux_op, &alu_op, -/*c0*/ &vector_alux1_op, &vector_alu1_op, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - &bswap_op, &bswap_op, &bswap_op, &bswap_op, - &bswap_op, &bswap_op, &bswap_op, &bswap_op, +/*c0*/ &complex_alux1_op, &complex_alu1_op, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, -/*d0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, - INVALID, &mmx_mul_op, INVALID, INVALID, - &mmx_op, &mmx_op, INVALID, &mmx_op, - &mmx_op, &mmx_op, INVALID, &mmx_op, +/*d0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + INVALID, &mmx_mul_op, INVALID, INVALID, + &mmx_op, &mmx_op, INVALID, &mmx_op, + &mmx_op, &mmx_op, INVALID, &mmx_op, -/*e0*/ &mmx_op, &mmx_shift_op, &mmx_shift_op, INVALID, - INVALID, &pmul_op, INVALID, INVALID, - &mmx_op, &mmx_op, INVALID, &mmx_op, - &mmx_op, &mmx_op, INVALID, &mmx_op, - -/*f0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, - INVALID, &pmul_op, INVALID, INVALID, - &mmx_op, &mmx_op, &mmx_op, INVALID, - &mmx_op, &mmx_op, &mmx_op, INVALID, -}; - -static const risc86_instruction_t *opcode_timings_0f0f[256] = -{ -/*00*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*10*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*20*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*30*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*40*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*50*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*60*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*70*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*80*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*90*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*a0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, &load_mmx_mul_op, &load_mmx_mul_op, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*b0*/ INVALID, INVALID, INVALID, INVALID, - &load_mmx_mul_op, INVALID, &load_mmx_mul_op, &load_mmx_mul_op, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, &load_mmx_op, - -/*c0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*d0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*e0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*f0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -}; -static const risc86_instruction_t *opcode_timings_0f0f_mod3[256] = -{ -/*00*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*10*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*20*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*30*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*40*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*50*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*60*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*70*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*80*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*90*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*a0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, &mmx_mul_op, &mmx_mul_op, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*b0*/ INVALID, INVALID, INVALID, INVALID, - &mmx_mul_op, INVALID, &mmx_mul_op, &mmx_mul_op, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, &mmx_op, - -/*c0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*d0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*e0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*f0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, +/*e0*/ &mmx_op, &mmx_shift_op, &mmx_shift_op, INVALID, + INVALID, &pmul_op, INVALID, INVALID, + &mmx_op, &mmx_op, INVALID, &mmx_op, + &mmx_op, &mmx_op, INVALID, &mmx_op, +/*f0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + INVALID, &pmul_op, INVALID, INVALID, + &mmx_op, &mmx_op, &mmx_op, INVALID, + &mmx_op, &mmx_op, &mmx_op, INVALID, }; static const risc86_instruction_t *opcode_timings_shift[8] = { - &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, - &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op + &complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op, + &complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op }; static const risc86_instruction_t *opcode_timings_shift_b[8] = { - &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, - &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op + &complex_alux_store_op, &complex_alux_store_op, &complex_alux_store_op, &complex_alux_store_op, + &complex_alux_store_op, &complex_alux_store_op, &complex_alux_store_op, &complex_alux_store_op }; static const risc86_instruction_t *opcode_timings_shift_mod3[8] = { - &vector_alu1_op, &vector_alu1_op, &vector_alu1_op, &vector_alu1_op, - &alu_op, &alu_op, &alu_op, &alu_op + &complex_alu1_op, &complex_alu1_op, &complex_alu1_op, &complex_alu1_op, + &alu_op, &alu_op, &alu_op, &alu_op }; static const risc86_instruction_t *opcode_timings_shift_b_mod3[8] = { - &vector_alux1_op, &vector_alux1_op, &vector_alux1_op, &vector_alux1_op, - &alux_op, &alux_op, &alux_op, &alux_op + &complex_alux1_op, &complex_alux1_op, &complex_alux1_op, &complex_alux1_op, + &alux_op, &alux_op, &alux_op, &alux_op }; static const risc86_instruction_t *opcode_timings_80[8] = { - &alux_store_op, &alux_store_op, &vector_alux_store_op, &vector_alux_store_op, - &alux_store_op, &alux_store_op, &alux_store_op, &alux_store_op, + &alux_store_op, &alux_store_op, &complex_alux_store_op, &complex_alux_store_op, + &alux_store_op, &alux_store_op, &alux_store_op, &alux_store_op, }; static const risc86_instruction_t *opcode_timings_80_mod3[8] = { - &alux_op, &alux_op, &alux_store_op, &alux_store_op, - &alux_op, &alux_op, &alux_op, &alux_op, + &alux_op, &alux_op, &alux_store_op, &alux_store_op, + &alux_op, &alux_op, &alux_op, &alux_op, }; static const risc86_instruction_t *opcode_timings_8x[8] = { - &alu_store_op, &alu_store_op, &vector_alu_store_op, &vector_alu_store_op, - &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, + &alu_store_op, &alu_store_op, &complex_alu_store_op, &complex_alu_store_op, + &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, }; static const risc86_instruction_t *opcode_timings_8x_mod3[8] = { - &alu_op, &alu_op, &alu_store_op, &alu_store_op, - &alu_op, &alu_op, &alu_op, &alu_op, + &alu_op, &alu_op, &alu_store_op, &alu_store_op, + &alu_op, &alu_op, &alu_op, &alu_op, }; static const risc86_instruction_t *opcode_timings_f6[8] = { /* TST NOT NEG*/ - &test_mem_imm_b_op, INVALID, &vector_alux_store_op, &vector_alux_store_op, + &test_mem_imm_b_op, INVALID, &complex_alux_store_op, &complex_alux_store_op, /* MUL IMUL DIV IDIV*/ - &vector_mul_mem_op, &vector_mul_mem_op, &vector_div16_mem_op, &vector_div16_mem_op, + &complex_mul_mem_op, &complex_mul_mem_op, &complex_div16_mem_op, &complex_div16_mem_op, }; static const risc86_instruction_t *opcode_timings_f6_mod3[8] = { /* TST NOT NEG*/ &test_reg_b_op, INVALID, &alux_op, &alux_op, /* MUL IMUL DIV IDIV*/ - &vector_mul_op, &vector_mul_op, &vector_div16_op, &vector_div16_op, + &complex_mul_op, &complex_mul_op, &complex_div16_op, &complex_div16_op, }; static const risc86_instruction_t *opcode_timings_f7[8] = { /* TST NOT NEG*/ - &test_mem_imm_op, INVALID, &vector_alu_store_op, &vector_alu_store_op, + &test_mem_imm_op, INVALID, &complex_alu_store_op, &complex_alu_store_op, /* MUL IMUL DIV IDIV*/ - &vector_mul64_mem_op, &vector_mul64_mem_op, &vector_div32_mem_op, &vector_div32_mem_op, + &complex_mul64_mem_op, &complex_mul64_mem_op, &complex_div32_mem_op, &complex_div32_mem_op, }; static const risc86_instruction_t *opcode_timings_f7_mod3[8] = { /* TST NOT NEG*/ &test_reg_op, INVALID, &alu_op, &alu_op, /* MUL IMUL DIV IDIV*/ - &vector_mul64_op, &vector_mul64_op, &vector_div32_op, &vector_div32_op, + &complex_mul64_op, &complex_mul64_op, &complex_div32_op, &complex_div32_op, }; static const risc86_instruction_t *opcode_timings_ff[8] = { /* INC DEC CALL CALL far*/ - &alu_store_op, &alu_store_op, &store_op, &vector_call_far_op, + &alu_store_op, &alu_store_op, &store_op, &complex_call_far_op, /* JMP JMP far PUSH*/ - &branch_op, &vector_jmp_far_op, &push_mem_op, INVALID + &branch_op, &complex_jmp_far_op, &push_mem_op, INVALID }; static const risc86_instruction_t *opcode_timings_ff_mod3[8] = { /* INC DEC CALL CALL far*/ - &vector_alu1_op, &vector_alu1_op, &store_op, &vector_call_far_op, + &complex_alu1_op, &complex_alu1_op, &store_op, &complex_call_far_op, /* JMP JMP far PUSH*/ - &branch_op, &vector_jmp_far_op, &vector_push_mem_op, INVALID + &branch_op, &complex_jmp_far_op, &complex_push_mem_op, INVALID }; static const risc86_instruction_t *opcode_timings_d8[8] = { /* FADDs FMULs FCOMs FCOMPs*/ - &load_float_op, &load_float_op, &load_float_op, &load_float_op, + &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, /* FSUBs FSUBRs FDIVs FDIVRs*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, }; static const risc86_instruction_t *opcode_timings_d8_mod3[8] = { /* FADD FMUL FCOM FCOMP*/ - &float_op, &float_op, &float_op, &float_op, + &fadd_op, &fmul_op, &float_op, &float_op, /* FSUB FSUBR FDIV FDIVR*/ &float_op, &float_op, &fdiv_op, &fdiv_op, }; @@ -1566,7 +1482,7 @@ static const risc86_instruction_t *opcode_timings_d9[8] = /* FLDs FSTs FSTPs*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FLDENV FLDCW FSTENV FSTCW*/ - &vector_float_l_op, &vector_fldcw_op, &vector_float_l_op, &vector_float_op + &complex_float_l_op, &complex_fldcw_op, &complex_float_l_op, &complex_float_op }; static const risc86_instruction_t *opcode_timings_d9_mod3[64] = { @@ -1574,16 +1490,16 @@ static const risc86_instruction_t *opcode_timings_d9_mod3[64] = &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, /*FXCH*/ - &float_op, &float_op, &float_op, &float_op, - &float_op, &float_op, &float_op, &float_op, + &limm_op, &limm_op, &limm_op, &limm_op, + &limm_op, &limm_op, &limm_op, &limm_op, /*FNOP*/ &float_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*FSTP*/ - &float_op, &float_op, &float_op, &float_op, - &float_op, &float_op, &float_op, &float_op, + &float2_op, &float2_op, &float2_op, &float2_op, + &float2_op, &float2_op, &float2_op, &float2_op, /* opFCHS opFABS*/ - &float_op, &float_op, INVALID, INVALID, + &fchs_op, &float_op, INVALID, INVALID, /* opFTST opFXAM*/ &float_op, &float_op, INVALID, INVALID, /* opFLD1 opFLDL2T opFLDL2E opFLDPI*/ @@ -1603,7 +1519,7 @@ static const risc86_instruction_t *opcode_timings_d9_mod3[64] = static const risc86_instruction_t *opcode_timings_da[8] = { /* FIADDl FIMULl FICOMl FICOMPl*/ - &load_float_op, &load_float_op, &load_float_op, &load_float_op, + &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, /* FISUBl FISUBRl FIDIVl FIDIVRl*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, }; @@ -1619,7 +1535,7 @@ static const risc86_instruction_t *opcode_timings_db[8] = /* FLDil FSTil FSTPil*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FLDe FSTPe*/ - INVALID, &vector_flde_op, INVALID, &vector_fste_op + INVALID, &complex_flde_op, INVALID, &complex_fste_op }; static const risc86_instruction_t *opcode_timings_db_mod3[64] = { @@ -1653,14 +1569,14 @@ static const risc86_instruction_t *opcode_timings_db_mod3[64] = static const risc86_instruction_t *opcode_timings_dc[8] = { /* FADDd FMULd FCOMd FCOMPd*/ - &load_float_op, &load_float_op, &load_float_op, &load_float_op, + &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, /* FSUBd FSUBRd FDIVd FDIVRd*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, }; static const risc86_instruction_t *opcode_timings_dc_mod3[8] = { /* opFADDr opFMULr*/ - &float_op, &float_op, INVALID, INVALID, + &fadd_op, &fmul_op, INVALID, INVALID, /* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ &float_op, &float_op, &fdiv_op, &fdiv_op }; @@ -1670,7 +1586,7 @@ static const risc86_instruction_t *opcode_timings_dd[8] = /* FLDd FSTd FSTPd*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FRSTOR FSAVE FSTSW*/ - &vector_float_l_op, INVALID, &vector_float_l_op, &vector_float_l_op + &complex_float_l_op, INVALID, &complex_float_l_op, &complex_float_l_op }; static const risc86_instruction_t *opcode_timings_dd_mod3[8] = { @@ -1683,14 +1599,14 @@ static const risc86_instruction_t *opcode_timings_dd_mod3[8] = static const risc86_instruction_t *opcode_timings_de[8] = { /* FIADDw FIMULw FICOMw FICOMPw*/ - &load_float_op, &load_float_op, &load_float_op, &load_float_op, + &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, /* FISUBw FISUBRw FIDIVw FIDIVRw*/ - &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, }; static const risc86_instruction_t *opcode_timings_de_mod3[8] = { /* FADDP FMULP FCOMPP*/ - &float_op, &float_op, INVALID, &float_op, + &fadd_op, &fmul_op, INVALID, &float_op, /* FSUBP FSUBRP FDIVP FDIVRP*/ &float_op, &float_op, &fdiv_op, &fdiv_op, }; @@ -1700,7 +1616,7 @@ static const risc86_instruction_t *opcode_timings_df[8] = /* FILDiw FISTiw FISTPiw*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FILDiq FBSTP FISTPiq*/ - INVALID, &load_float_op, &vector_float_l_op, &fstore_op, + INVALID, &load_float_op, &complex_float_l_op, &fstore_op, }; static const risc86_instruction_t *opcode_timings_df_mod3[8] = { @@ -1728,34 +1644,27 @@ static p6_unit_t *units; /*Pentium Pro has no MMX*/ static p6_unit_t ppro_units[] = { - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX)}, /*Integer X*/ - {.uop_mask = (1 << UOP_ALU)}, /*Integer Y*/ - {.uop_mask = (1 << UOP_FLOAT)}, /*Floating point*/ - {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD)}, /*Load*/ - {.uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE)}, /*Store*/ - {.uop_mask = (1 << UOP_BRANCH)} /*Branch*/ + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) | (1 << UOP_FLOAT)}, /*Integer X & Floating point*/ + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_BRANCH)}, /*Integer Y*/ + {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD)}, /*Load*/ + {.uop_mask = (1 << UOP_STORED) | (1 << UOP_FSTORED)}, /*Data Store*/ + {.uop_mask = (1 << UOP_STOREA) | (1 << UOP_FSTOREA)}, /*Address Store*/ }; #define NR_PPRO_UNITS (sizeof(ppro_units) / sizeof(p6_unit_t)) -/*Well, it works I guess*/ +/*Pentium II/Celeron assigns the multiplier to port 0, the shifter to port 1, and shares the MMX ALU*/ static p6_unit_t p2_units[] = { - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) | (1 << UOP_MEU) | /*Integer X*/ - (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL)}, - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_MEU) | /*Integer Y*/ - (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL)}, - {.uop_mask = (1 << UOP_FLOAT)}, /*Floating point*/ + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) | (1 << UOP_FLOAT) | /*Integer X & Floating point*/ + (1 << UOP_MMX) | (1 << UOP_MMX_MUL)}, + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_BRANCH) | /*Integer Y*/ + (1 << UOP_MMX) | (1 << UOP_MMX_SHIFT)}, {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD)}, /*Load*/ - {.uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE)}, /*Store*/ - {.uop_mask = (1 << UOP_BRANCH)} /*Branch*/ + {.uop_mask = (1 << UOP_STORED) | (1 << UOP_FSTORED) | (1 << UOP_MSTORED)}, /*Data Store*/ + {.uop_mask = (1 << UOP_STOREA) | (1 << UOP_FSTOREA) | (1 << UOP_MSTOREA)}, /*Address Store*/ }; #define NR_P2_UNITS (sizeof(p2_units) / sizeof(p6_unit_t)) -/*First available cycles of shared execution units. Each of these can be submitted - to by ALU X and Y*/ -static int mul_first_available_cycle; -static int shift_first_available_cycle; - static int uop_run(const risc86_uop_t *uop, int decode_time) { int c; @@ -1764,16 +1673,8 @@ static int uop_run(const risc86_uop_t *uop, int decode_time) /*UOP_LIMM does not require execution*/ if (uop->type == UOP_LIMM) - return decode_time; + return decode_time; - if (units == p2_units) /*More hackyness*/ - { - if (uop->type == UOP_MEU_MUL && decode_time < mul_first_available_cycle) - decode_time = mul_first_available_cycle; - else if (uop->type == UOP_MEU_SHIFT && decode_time < mul_first_available_cycle) - decode_time = shift_first_available_cycle; - } - /*Find execution unit for this uOP*/ for (c = 0; c < nr_units; c++) { @@ -1787,43 +1688,36 @@ static int uop_run(const risc86_uop_t *uop, int decode_time) } } if (!best_unit) - fatal("uop_run: can not find execution unit\n"); + fatal("uop_run: can not find execution unit\n"); if (best_start_cycle < decode_time) best_start_cycle = decode_time; - best_unit->first_available_cycle = best_start_cycle + uop->throughput; + best_unit->first_available_cycle = best_start_cycle + uop->latency; - if (units == p2_units) /*More hackyness*/ - { - if (uop->type == UOP_MEU_MUL) - mul_first_available_cycle = best_start_cycle + uop->throughput; - else if (uop->type == UOP_MEU_SHIFT) - shift_first_available_cycle = best_start_cycle + uop->throughput; - } - return best_start_cycle + uop->throughput; + + return best_start_cycle + uop->latency; } -/*The K6 decoder can decode, per clock : - - 1 or 2 'short' instructions, each up to 2 uOPs and 7 bytes long - - 1 'long' instruction, up to 4 uOPs - - 1 'vector' instruction, up to 4 uOPs per cycle, plus (I think) 1 cycle startup delay) +/*The P6 decoders can decode, per clock : + - 1 to 3 'simple' instructions, each up to 1 uOP and 7 bytes long + - 1 'complex' instruction, up to 4 uOPs or 3 per cycle for instructions longer than 4 uOPs */ static struct { int nr_uops; - const risc86_uop_t *uops[4]; + const risc86_uop_t *uops[6]; /*Earliest time a uop can start. If the timestamp is -1, then the uop is part of a dependency chain and the start time is the completion time of the previous uop*/ - int earliest_start[4]; + int earliest_start[6]; } decode_buffer; -#define NR_OPQUADS 6 -/*Timestamps of when the last six opquads completed. The K6 scheduler retires - opquads in order, so this is needed to determine when the next can be scheduled*/ -static int opquad_completion_timestamp[NR_OPQUADS]; -static int next_opquad = 0; +#define NR_OPSEQS 3 +/*Timestamps of when the last three op sequences completed. Technically this is incorrect, +as the actual size of the opseq buffer is 20 bytes and not 18, but I'm restricted to multiples of 6*/ +static int opseq_completion_timestamp[NR_OPSEQS]; +static int next_opseq = 0; #define NR_REGS 8 /*Timestamp of when last operation on an integer register completed*/ @@ -1837,51 +1731,46 @@ static int last_uop_timestamp = 0; void decode_flush_p6() { int c; - int uop_timestamp = 0; - - /*Decoded opquad can not be submitted if there are no free spaces in the - opquad buffer*/ - if (decode_timestamp < opquad_completion_timestamp[next_opquad]) - decode_timestamp = opquad_completion_timestamp[next_opquad]; + int start_timestamp, uop_timestamp = 0; + + /*Decoded opseq can not be submitted if there are no free spaces in the + opseq buffer*/ + if (decode_timestamp < opseq_completion_timestamp[next_opseq]) + decode_timestamp = opseq_completion_timestamp[next_opseq]; /*Ensure that uops can not be submitted before they have been decoded*/ if (decode_timestamp > last_uop_timestamp) last_uop_timestamp = decode_timestamp; /*Submit uops to execution units, and determine the latest completion time*/ - for (c = 0; c < decode_buffer.nr_uops; c++) + for (c = 0; c < (decode_buffer.nr_uops); c++) { - int start_timestamp; - - if (decode_buffer.earliest_start[c] == -1) - start_timestamp = last_uop_timestamp; - else - start_timestamp = decode_buffer.earliest_start[c]; + start_timestamp = decode_buffer.earliest_start[c]; last_uop_timestamp = uop_run(decode_buffer.uops[c], start_timestamp); if (last_uop_timestamp > uop_timestamp) uop_timestamp = last_uop_timestamp; } - /*Calculate opquad completion time. Since opquads complete in order, it + /*Calculate opseq completion time. Since opseqs complete in order, it must be after the last completion.*/ if (uop_timestamp <= last_complete_timestamp) last_complete_timestamp = last_complete_timestamp + 1; else last_complete_timestamp = uop_timestamp; - /*Advance to next opquad in buffer*/ - opquad_completion_timestamp[next_opquad] = last_complete_timestamp; - next_opquad++; - if (next_opquad == NR_OPQUADS) - next_opquad = 0; + /*Advance to next opseq in buffer*/ + opseq_completion_timestamp[next_opseq] = last_complete_timestamp; + next_opseq++; + if (next_opseq == NR_OPSEQS) + next_opseq = 0; decode_timestamp++; decode_buffer.nr_uops = 0; } /*The instruction is only of interest here if it's longer than 7 bytes, as that's the - limit on K6 short decoding*/ + limit on P6 simple decoding*/ static int codegen_timing_instr_length(uint64_t deps, uint32_t fetchdat, int op_32) { int len = prefixes + 1; /*Opcode*/ @@ -1933,8 +1822,9 @@ static int codegen_timing_instr_length(uint64_t deps, uint32_t fetchdat, int op_ static void decode_instruction(const risc86_instruction_t *ins, uint64_t deps, uint32_t fetchdat, int op_32, int bit8) { uint32_t regmask_required; - uint32_t regmask_modified; - int c, d; + uint32_t regmask_modified; + int c; + int d = 0; /*Complex decoder uOPs*/ int earliest_start = 0; decode_type_t decode_type = ins->decode_type; int instr_length = codegen_timing_instr_length(deps, fetchdat, op_32); @@ -1964,87 +1854,68 @@ static void decode_instruction(const risc86_instruction_t *ins, uint64_t deps, u earliest_start = fpu_st_timestamp[reg]; } - /*Short decoders are limited to 7 bytes*/ - if (decode_type == DECODE_SHORT && instr_length > 7) - decode_type = DECODE_LONG; - /*Long decoder is limited to 11 bytes*/ - else if (instr_length > 11) - decode_type = DECODE_VECTOR; + /*Simple decoders are limited to 7 bytes & 1 uOP*/ + if (decode_type == DECODE_SIMPLE && instr_length > 7) + decode_type = DECODE_COMPLEX; + else if (decode_type == DECODE_SIMPLE && ins->nr_uops > 1) + decode_type = DECODE_COMPLEX; switch (decode_type) - { - case DECODE_SHORT: - if (decode_buffer.nr_uops) + { + case DECODE_SIMPLE: + if (decode_buffer.nr_uops - d == 2) { decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; - if (ins->nr_uops > 1) - { - decode_buffer.uops[decode_buffer.nr_uops+1] = &ins->uop[1]; - decode_buffer.earliest_start[decode_buffer.nr_uops+1] = -1; - } - decode_buffer.nr_uops += ins->nr_uops; - + decode_buffer.nr_uops = 3; decode_flush_p6(); + } + else if (decode_buffer.nr_uops - d == 1) + { + decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; + decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; + decode_buffer.nr_uops = 2+d; + if (d) + decode_flush_p6(); + } + else if (decode_buffer.nr_uops) + { + decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; + decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; + decode_buffer.nr_uops = 1+d; } else { - decode_buffer.nr_uops = ins->nr_uops; + decode_buffer.nr_uops = 1; decode_buffer.uops[0] = &ins->uop[0]; decode_buffer.earliest_start[0] = earliest_start; - if (ins->nr_uops > 1) - { - decode_buffer.uops[1] = &ins->uop[1]; - decode_buffer.earliest_start[1] = -1; - } - } + } break; - case DECODE_LONG: + case DECODE_COMPLEX: if (decode_buffer.nr_uops) - decode_flush_p6(); + decode_flush_p6(); /*The 4-1-1 arrangement implies that a complex ins. can't be decoded after a simple one*/ - decode_buffer.nr_uops = ins->nr_uops; - for (c = 0; c < ins->nr_uops; c++) - { - decode_buffer.uops[c] = &ins->uop[c]; - if (c == 0) - decode_buffer.earliest_start[c] = earliest_start; - else - decode_buffer.earliest_start[c] = -1; - } - decode_flush_p6(); - break; - - case DECODE_VECTOR: - if (decode_buffer.nr_uops) - decode_flush_p6(); - - decode_timestamp++; d = 0; - + for (c = 0; c < ins->nr_uops; c++) { decode_buffer.uops[d] = &ins->uop[c]; - if (c == 0) - decode_buffer.earliest_start[d] = earliest_start; - else - decode_buffer.earliest_start[d] = -1; - d++; + decode_buffer.earliest_start[d] = earliest_start; + d++; - if (d == 4) + if ((d == 3) && (ins->nr_uops > 4)) /*Ins. with >4 uOPs require the use of special units only present on 3 translate PLAs*/ { d = 0; - decode_buffer.nr_uops = 4; - decode_flush_p6(); + decode_buffer.nr_uops = 3; + decode_flush_p6(); /*The other two decoders are halted to preserve in-order issue*/ } } - if (d) - { - decode_buffer.nr_uops = d; - decode_flush_p6(); - } - break; + if (d) + { + decode_buffer.nr_uops = d; + } + break; } /*Update write timestamps for any output registers*/ @@ -2095,15 +1966,12 @@ void codegen_timing_p6_block_start() for (c = 0; c < nr_units; c++) units[c].first_available_cycle = 0; - mul_first_available_cycle = 0; - shift_first_available_cycle = 0; - decode_timestamp = 0; last_complete_timestamp = 0; - for (c = 0; c < NR_OPQUADS; c++) - opquad_completion_timestamp[c] = 0; - next_opquad = 0; + for (c = 0; c < NR_OPSEQS; c++) + opseq_completion_timestamp[c] = 0; + next_opseq = 0; for (c = 0; c < NR_REGS; c++) reg_available_timestamp[c] = 0; @@ -2147,59 +2015,8 @@ void codegen_timing_p6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint switch (last_prefix) { case 0x0f: - if (opcode == 0x0f) - { - /*3DNow has the actual opcode after ModR/M, SIB and any offset*/ - uint32_t opcode_pc = op_pc + 1; /*Byte after ModR/M*/ - uint8_t modrm = fetchdat & 0xff; - uint8_t sib = (fetchdat >> 8) & 0xff; - - if ((modrm & 0xc0) != 0xc0) - { - if (op_32 & 0x200) - { - if ((modrm & 7) == 4) - { - /* Has SIB*/ - opcode_pc++; - if ((modrm & 0xc0) == 0x40) - opcode_pc++; - else if ((modrm & 0xc0) == 0x80) - opcode_pc += 4; - else if ((sib & 0x07) == 0x05) - opcode_pc += 4; - } - else - { - if ((modrm & 0xc0) == 0x40) - opcode_pc++; - else if ((modrm & 0xc0) == 0x80) - opcode_pc += 4; - else if ((modrm & 0xc7) == 0x05) - opcode_pc += 4; - } - } - else - { - if ((modrm & 0xc0) == 0x40) - opcode_pc++; - else if ((modrm & 0xc0) == 0x80) - opcode_pc += 2; - else if ((modrm & 0xc7) == 0x06) - opcode_pc += 2; - } - } - - opcode = fastreadb(cs + opcode_pc); - - ins_table = mod3 ? opcode_timings_0f0f_mod3 : opcode_timings_0f0f; - deps = mod3 ? opcode_deps_0f0f_mod3 : opcode_deps_0f0f; - } - else - { - ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; - deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; - } + ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; break; case 0xd8: @@ -2295,7 +2112,7 @@ void codegen_timing_p6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint if (ins_table[opcode]) decode_instruction(ins_table[opcode], deps[opcode], fetchdat, op_32, bit8); else - decode_instruction(&vector_alu1_op, 0, fetchdat, op_32, bit8); + decode_instruction(&complex_alu1_op, 0, fetchdat, op_32, bit8); codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); } diff --git a/src/ddma.c b/src/ddma.c index 98e4da293..92ee1f4d3 100644 --- a/src/ddma.c +++ b/src/ddma.c @@ -107,9 +107,9 @@ ddma_reg_write(uint16_t addr, uint8_t val, void *p) break; case 0x02: if (ch >= 4) - outb(0x88 + page_regs[ch], val); + outb(0x88 + page_regs[ch & 3], val); else - outb(0x80 + page_regs[ch], val); + outb(0x80 + page_regs[ch & 3], val); break; case 0x04: dma[ch].cb = (dma[ch].cb & 0xffff00) | val; diff --git a/src/disk/hdc_ide.c b/src/disk/hdc_ide.c index 2b6104ee8..0c0453cb1 100644 --- a/src/disk/hdc_ide.c +++ b/src/disk/hdc_ide.c @@ -410,7 +410,7 @@ ide_get_max(ide_t *ide, int type) return -1; case TYPE_UDMA: /* UDMA */ if (!ide_boards[ide->board]->force_ata3 && (ide_bm[ide->board] != NULL)) - return 2; + return 4 /*2*/; return -1; default: diff --git a/src/disk/mo.c b/src/disk/mo.c index 289a3d187..1687d37f3 100644 --- a/src/disk/mo.c +++ b/src/disk/mo.c @@ -1983,7 +1983,7 @@ mo_get_max(int ide_has_dma, int type) ret = ide_has_dma ? 1 : -1; break; case TYPE_UDMA: - ret = ide_has_dma ? 2 : -1; + ret = ide_has_dma ? 4 /*2*/ : -1; break; } diff --git a/src/disk/zip.c b/src/disk/zip.c index 850468573..8a6253100 100644 --- a/src/disk/zip.c +++ b/src/disk/zip.c @@ -2243,7 +2243,7 @@ zip_get_max(int ide_has_dma, int type) ret = ide_has_dma ? 1 : -1; break; case TYPE_UDMA: - ret = ide_has_dma ? 2 : -1; + ret = ide_has_dma ? 4 /*2*/ : -1; break; } diff --git a/src/include/86box/acpi.h b/src/include/86box/acpi.h index ca0cb3b93..fa10aaa2f 100644 --- a/src/include/86box/acpi.h +++ b/src/include/86box/acpi.h @@ -6,13 +6,6 @@ * * This file is part of the 86Box distribution. * - * Implementation of the ISA Bus (de)Bugger expansion card - * sold as a DIY kit in the late 1980's in The Netherlands. - * This card was a assemble-yourself 8bit ISA addon card for - * PC and AT systems that had several tools to aid in low- - * level debugging (mostly for faulty BIOSes, bootloaders - * and system kernels...) - * * Definitions for the ACPI emulation. * * @@ -46,21 +39,31 @@ extern "C" { #define ACPI_ENABLE 0xf1 #define ACPI_DISABLE 0xf0 +#define VEN_INTEL 0x8086 +#define VEN_VIA 0x1106 + typedef struct { uint8_t plvl2, plvl3, + smicmd, gpio_dir, + gpio_val, extsmi_val, timer32, gpireg[3], gporeg[4]; uint16_t pmsts, pmen, pmcntrl, gpsts, - gpen, io_base; - int slot, - irq_mode, irq_pin; + gpen, io_base, + gpscien, gpsmien, + pscntrl, gpo_val, + gpi_val; + int slot, irq_mode, + irq_pin, smi_lock, + smi_active; uint32_t pcntrl, glbsts, devsts, glben, glbctl, devctl, - timer_val; + padsts, paden, + gptren, timer_val; uint64_t tmr_overflow_time; } acpi_regs_t; @@ -69,13 +72,16 @@ typedef struct { acpi_regs_t regs; uint8_t gporeg_default[4]; + int vendor; pc_timer_t timer; nvr_t *nvr; + apm_t *apm; } acpi_t; /* Global variables. */ -extern const device_t acpi_device; +extern const device_t acpi_intel_device; +extern const device_t acpi_via_device; /* Functions. */ diff --git a/src/include/86box/apm.h b/src/include/86box/apm.h index b7754f78a..1fd985951 100644 --- a/src/include/86box/apm.h +++ b/src/include/86box/apm.h @@ -6,13 +6,6 @@ * * This file is part of the 86Box distribution. * - * Implementation of the ISA Bus (de)Bugger expansion card - * sold as a DIY kit in the late 1980's in The Netherlands. - * This card was a assemble-yourself 8bit ISA addon card for - * PC and AT systems that had several tools to aid in low- - * level debugging (mostly for faulty BIOSes, bootloaders - * and system kernels...) - * * Definitions for the Advanced Power Management emulation. * * @@ -38,7 +31,9 @@ typedef struct /* Global variables. */ extern const device_t apm_device; + extern const device_t apm_pci_device; +extern const device_t apm_pci_acpi_device; /* Functions. */ diff --git a/src/include/86box/ddma.h b/src/include/86box/ddma.h index 2d0a9f8d9..64642f2ae 100644 --- a/src/include/86box/ddma.h +++ b/src/include/86box/ddma.h @@ -6,13 +6,6 @@ * * This file is part of the 86Box distribution. * - * Implementation of the ISA Bus (de)Bugger expansion card - * sold as a DIY kit in the late 1980's in The Netherlands. - * This card was a assemble-yourself 8bit ISA addon card for - * PC and AT systems that had several tools to aid in low- - * level debugging (mostly for faulty BIOSes, bootloaders - * and system kernels...) - * * Definitions for the Distributed DMA emulation. * * diff --git a/src/include/86box/machine.h b/src/include/86box/machine.h index 453c18132..8809a527b 100644 --- a/src/include/86box/machine.h +++ b/src/include/86box/machine.h @@ -267,7 +267,6 @@ extern int machine_at_endeavor_init(const machine_t *); extern int machine_at_zappa_init(const machine_t *); extern int machine_at_mb500n_init(const machine_t *); extern int machine_at_president_init(const machine_t *); -extern int machine_at_apollo_init(const machine_t *); #if defined(DEV_BRANCH) && defined(USE_VECTRA54) extern int machine_at_vectra54_init(const machine_t *); #endif diff --git a/src/include/86box/piix.h b/src/include/86box/piix.h index e07607606..9f6724fae 100644 --- a/src/include/86box/piix.h +++ b/src/include/86box/piix.h @@ -4,7 +4,8 @@ * PC systems and compatibles from 1981 through fairly recent * system designs based on the PCI bus. * - * Emulation of the Intel PIIX and PIIX3 Xcelerators. + * Emulation of the Intel PIIX, PIIX3, PIIX4, PIIX4E, and SMSC + * SLC90E66 (Victory66) Xcelerators. * * Emulation core dispatcher. * diff --git a/src/include/86box/usb.h b/src/include/86box/usb.h index b027796a2..aced538c8 100644 --- a/src/include/86box/usb.h +++ b/src/include/86box/usb.h @@ -6,13 +6,6 @@ * * This file is part of the 86Box distribution. * - * Implementation of the ISA Bus (de)Bugger expansion card - * sold as a DIY kit in the late 1980's in The Netherlands. - * This card was a assemble-yourself 8bit ISA addon card for - * PC and AT systems that had several tools to aid in low- - * level debugging (mostly for faulty BIOSes, bootloaders - * and system kernels...) - * * Definitions for the Distributed DMA emulation. * * diff --git a/src/intel_piix.c b/src/intel_piix.c index 257b9be9a..4a9a80f1e 100644 --- a/src/intel_piix.c +++ b/src/intel_piix.c @@ -4,7 +4,8 @@ * PC systems and compatibles from 1981 through fairly recent * system designs based on the PCI bus. * - * Emulation of the Intel PIIX and PIIX3 Xcelerators. + * Emulation of the Intel PIIX, PIIX3, PIIX4, PIIX4E, and SMSC + * SLC90E66 (Victory66) Xcelerators. * * PRD format : * word 0 - base address @@ -779,7 +780,7 @@ piix_write(int func, int addr, uint8_t val, void *priv) case 0x04: fregs[0x04] = (val & 0x01); smbus_update_io_mapping(dev); - apm_set_do_smi(dev->apm, !!(fregs[0x5b] & 0x02) && !!(val & 0x01)); + apm_set_do_smi(dev->acpi->apm, !!(fregs[0x5b] & 0x02) && !!(val & 0x01)); break; case 0x07: if (val & 0x08) @@ -844,7 +845,7 @@ piix_write(int func, int addr, uint8_t val, void *priv) break; case 0x5b: fregs[addr] = val & 0x03; - apm_set_do_smi(dev->apm, !!(val & 0x02) && !!(fregs[0x04] & 0x01)); + apm_set_do_smi(dev->acpi->apm, !!(val & 0x02) && !!(fregs[0x04] & 0x01)); break; case 0x63: fregs[addr] = val & 0xf7; @@ -1109,9 +1110,7 @@ piix_apm_out(uint16_t port, uint8_t val, void *p) piix_t *dev = (piix_t *) p; if (dev->apm->do_smi) { - if (dev->type > 3) - dev->acpi->regs.glbsts |= 0x20; - else + if (dev->type < 4) dev->regs[0][0xaa] |= 0x80; } } @@ -1189,7 +1188,7 @@ static void dev->nvr = device_add(&piix4_nvr_device); dev->smbus = device_add(&piix4_smbus_device); - dev->acpi = device_add(&acpi_device); + dev->acpi = device_add(&acpi_intel_device); acpi_set_slot(dev->acpi, dev->pci_slot); acpi_set_nvr(dev->acpi, dev->nvr); @@ -1206,9 +1205,13 @@ static void } else cpu_fast_off_val = cpu_fast_off_count = 0; - dev->apm = device_add(&apm_pci_device); - /* APM intercept handler to update PIIX/PIIX3 and PIIX4/4E/SMSC ACPI SMI status on APM SMI. */ - io_sethandler(0x00b2, 0x0001, NULL, NULL, NULL, piix_apm_out, NULL, NULL, dev); + /* On PIIX4, PIIX4E, and SMSC, APM is added by the ACPI device. */ + if (dev->type < 4) { + dev->apm = device_add(&apm_pci_device); + /* APM intercept handler to update PIIX/PIIX3 and PIIX4/4E/SMSC ACPI SMI status on APM SMI. */ + io_sethandler(0x00b2, 0x0001, NULL, NULL, NULL, piix_apm_out, NULL, NULL, dev); + } + dev->port_92 = device_add(&port_92_pci_device); dma_alias_set(); diff --git a/src/machine/m_at_socket4_5.c b/src/machine/m_at_socket4_5.c index b86c3fe4b..81dcb13ac 100644 --- a/src/machine/m_at_socket4_5.c +++ b/src/machine/m_at_socket4_5.c @@ -384,37 +384,6 @@ machine_at_president_init(const machine_t *model) } -int -machine_at_apollo_init(const machine_t *model) -{ - int ret; - - ret = bios_load_linear(L"roms/machines/apollo/S728P.ROM", - 0x000e0000, 131072, 0); - - if (bios_only || !ret) - return ret; - - machine_at_common_init_ex(model, 2); - device_add(&ls486e_nvr_device); - - pci_init(PCI_CONFIG_TYPE_1); - pci_register_slot(0x00, PCI_CARD_NORTHBRIDGE, 0, 0, 0, 0); - pci_register_slot(0x08, PCI_CARD_NORMAL, 1, 2, 3, 4); - pci_register_slot(0x09, PCI_CARD_NORMAL, 2, 3, 4, 1); - pci_register_slot(0x0A, PCI_CARD_NORMAL, 3, 4, 1, 2); - pci_register_slot(0x0B, PCI_CARD_NORMAL, 4, 1, 2, 3); - pci_register_slot(0x07, PCI_CARD_SOUTHBRIDGE, 0, 0, 0, 0); - device_add(&i430fx_device); - device_add(&piix_device); - device_add(&keyboard_ps2_pci_device); - device_add(&fdc37c932fr_device); - device_add(&intel_flash_bxt_device); - - return ret; -} - - #if defined(DEV_BRANCH) && defined(USE_VECTRA54) int machine_at_vectra54_init(const machine_t *model) diff --git a/src/machine/m_at_socket7_s7.c b/src/machine/m_at_socket7_s7.c index 702f7d9d3..4eb1f963b 100644 --- a/src/machine/m_at_socket7_s7.c +++ b/src/machine/m_at_socket7_s7.c @@ -511,7 +511,6 @@ machine_at_brio80xx_init(const machine_t *model) device_add(&piix3_device); device_add(&keyboard_ps2_ami_pci_device); device_add(&fdc37c935_device); - // device_add(&intel_flash_bxt_device); device_add(&sst_flash_29ee020_device); return ret; @@ -536,7 +535,7 @@ machine_at_pb680_init(const machine_t *model) pci_init(PCI_CONFIG_TYPE_1); pci_register_slot(0x00, PCI_CARD_NORTHBRIDGE, 0, 0, 0, 0); - pci_register_slot(0x08, PCI_CARD_NORMAL, 1, 2, 3, 4); + pci_register_slot(0x08, PCI_CARD_ONBOARD, 4, 0, 0, 0); pci_register_slot(0x11, PCI_CARD_NORMAL, 1, 2, 3, 4); pci_register_slot(0x13, PCI_CARD_NORMAL, 2, 3, 4, 1); pci_register_slot(0x0B, PCI_CARD_NORMAL, 3, 4, 1, 2); diff --git a/src/machine/machine_table.c b/src/machine/machine_table.c index 5b8abfb7e..b7f81b4c3 100644 --- a/src/machine/machine_table.c +++ b/src/machine/machine_table.c @@ -201,7 +201,7 @@ const machine_t machines[] = { { "[486 PCI] Zida Tomato 4DP", "4dps", {{"Intel", cpus_i486}, {"AMD", cpus_Am486}, {"Cyrix", cpus_Cx486}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_HDC, 1, 255, 1, 127, machine_at_4dps_init, NULL }, /* Socket 4 machines */ - //430LX + /* 430LX */ { "[Socket 4 LX] IBM Ambra DP60 PCI", "ambradp60", {{"Intel", cpus_Pentium5V}, {"", NULL}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 2, 128, 2, 127, machine_at_ambradp60_init, NULL }, #if defined(DEV_BRANCH) && defined(USE_VPP60) { "[Socket 4 LX] IBM PS/ValuePoint P60", "valuepointp60", {{"Intel", cpus_Pentium5V}, {"", NULL}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 2, 128, 2, 127, machine_at_valuepointp60_init, NULL }, @@ -210,13 +210,12 @@ const machine_t machines[] = { { "[Socket 4 LX] Micro Star 586MC1", "586mc1", {{"Intel", cpus_Pentium5V}, {"", NULL}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 2, 128, 2, 127, machine_at_586mc1_init, NULL }, /* Socket 5 machines */ - //430NX + /* 430NX */ { "[Socket 5 NX] Intel Premiere/PCI II", "plato", MACHINE_CPUS_PENTIUM_S5, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 2, 128, 2, 127, machine_at_plato_init, NULL }, { "[Socket 5 NX] IBM Ambra DP90 PCI", "ambradp90", MACHINE_CPUS_PENTIUM_S5, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 2, 128, 2, 127, machine_at_ambradp90_init, NULL }, { "[Socket 5 NX] Gigabyte GA-586IP", "430nx", MACHINE_CPUS_PENTIUM_S5, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 2, 128, 2, 127, machine_at_430nx_init, NULL }, - - //430FX - { "[Socket 5 FX] AMI Apollo", "apollo", MACHINE_CPUS_PENTIUM_S5, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_HDC, 8, 128, 8, 127, machine_at_apollo_init, NULL }, + + /* 430FX */ #if defined(DEV_BRANCH) && defined(USE_VECTRA54) { "[Socket 5 FX] HP Vectra VL 5 Series 4", "vectra54", MACHINE_CPUS_PENTIUM_S5, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_HDC, 8, 128, 8, 511, machine_at_vectra54_init, NULL }, #endif @@ -226,7 +225,7 @@ const machine_t machines[] = { { "[Socket 5 FX] President Award 430FX PCI","president", MACHINE_CPUS_PENTIUM_S5, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_HDC, 8, 128, 8, 127, machine_at_president_init, NULL }, /* Socket 7 machines */ - //430FX + /* 430FX */ { "[Socket 7-3V FX] ASUS P/I-P54TP4XE", "p54tp4xe", MACHINE_CPUS_PENTIUM_S73V, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 128, 8, 127, machine_at_p54tp4xe_init, NULL }, { "[Socket 7-3V FX] Intel Advanced/ATX", "thor", MACHINE_CPUS_PENTIUM_S73V, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 128, 8, 127, machine_at_thor_init, NULL }, { "[Socket 7-3V FX] Intel Advanced/EV", "endeavor", MACHINE_CPUS_PENTIUM_S73V, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC | MACHINE_VIDEO, 8, 128, 8, 127, machine_at_endeavor_init, at_endeavor_get_device }, @@ -235,7 +234,7 @@ const machine_t machines[] = { #endif { "[Socket 7-3V FX] Packard Bell PB640", "pb640", MACHINE_CPUS_PENTIUM_S73V, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC | MACHINE_VIDEO, 8, 128, 8, 127, machine_at_pb640_init, at_pb640_get_device }, - //430HX + /* 430HX */ { "[Socket 7-3V HX] Acer M3a", "acerm3a", MACHINE_CPUS_PENTIUM_S73V, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 192, 8, 127, machine_at_acerm3a_init, NULL }, { "[Socket 7-3V HX] AOpen AP53", "ap53", MACHINE_CPUS_PENTIUM_S73V, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 512, 8, 127, machine_at_ap53_init, NULL }, { "[Socket 7-3V HX] SuperMicro Super P55T2S","p55t2s", MACHINE_CPUS_PENTIUM_S73V, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 768, 8, 127, machine_at_p55t2s_init, NULL }, @@ -246,47 +245,46 @@ const machine_t machines[] = { { "[Socket 7 HX] Intel TC430HX", "tc430hx", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 128, 8, 255, machine_at_tc430hx_init, NULL }, { "[Socket 7 HX] Toshiba Equium 5200D", "equium5200", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 192, 8, 127, machine_at_equium5200_init, NULL }, - //430VX + /* 430VX */ { "[Socket 7 VX] ASUS P/I-P55TVP4", "p55tvp4", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 128, 8, 127, machine_at_p55tvp4_init, NULL }, { "[Socket 7 VX] Shuttle HOT-557", "430vx", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 128, 8, 127, machine_at_i430vx_init, NULL }, { "[Socket 7 VX] Epox P55-VA", "p55va", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 128, 8, 127, machine_at_p55va_init, NULL }, { "[Socket 7 VX] HP Brio 80xx", "brio80xx", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 128, 8, 127, machine_at_brio80xx_init, NULL }, { "[Socket 7 VX] Packard Bell PB680", "pb680", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 128, 8, 127, machine_at_pb680_init, NULL }, - //430TX + /* 430TX */ { "[Socket 7 TX] ASUS TX97", "tx97", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 256, 8, 255, machine_at_tx97_init, NULL }, { "[Socket 7 TX] Gigabyte GA-586T2", "586t2", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 256, 8, 255, machine_at_586t2_init, NULL }, { "[Socket 7 TX] Intel YM430TX", "ym430tx", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 256, 8, 255, machine_at_ym430tx_init, NULL }, { "[Socket 7 TX] Iwill P55XB2", "p55xb2", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 256, 8, 255, machine_at_p55xb2_init, NULL }, { "[Socket 7 TX] PC Partner TXA807DS", "807ds", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 256, 8, 255, machine_at_807ds_init, NULL }, { "[Socket 7 TX] SuperMicro P5MMS98", "p5mms98", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 256, 8, 255, machine_at_p5mms98_init, NULL }, - - //Apollo VPX + + /* Apollo VPX */ { "[Socket 7 VPX] Zida Tomato TX100", "tx100", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 512, 8, 127, machine_at_tx100_init, NULL }, - //Apollo VP3 + /* Apollo VP3 */ { "[Socket 7 VP3] QDI Advance II", "advanceii", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 128, 8, 127, machine_at_advanceii_init, NULL }, /* Super Socket 7 machines */ - //Apollo MVP3 + /* Apollo MVP3 */ { "[Super 7 MVP3] AOpen AX59 Pro", "ax59pro", MACHINE_CPUS_PENTIUM_SS7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 1024, 8, 255, machine_at_ax59pro_init, NULL }, { "[Super 7 MVP3] FIC VA-503+", "ficva503p", MACHINE_CPUS_PENTIUM_SS7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 512, 8, 255, machine_at_mvp3_init, NULL }, /* Socket 8 machines */ - //440FX + /* 440FX */ { "[Socket 8 FX] Gigabyte GA-686NX", "686nx", {{"Intel", cpus_PentiumPro}, {"", NULL}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 512, 8, 127, machine_at_686nx_init, NULL }, { "[Socket 8 FX] PC Partner MB600N", "mb600n", {{"Intel", cpus_PentiumPro}, {"", NULL}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 512, 8, 127, machine_at_mb600n_init, NULL }, { "[Socket 8 FX] Biostar MB-8500ttc", "8500ttc", {{"Intel", cpus_PentiumPro}, {"", NULL}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 512, 8, 127, machine_at_8500ttc_init, NULL }, { "[Socket 8 FX] Micronics M6MI", "m6mi", {{"Intel", cpus_PentiumPro}, {"", NULL}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 384, 8, 127, machine_at_m6mi_init, NULL }, /* Slot 1 machines */ - //440FX + /* 440FX */ { "[Slot 1 FX] ECS P6KFX-A", "p6kfx", {{"Intel", cpus_PentiumII_28v},{"", NULL}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 384, 8, 127, machine_at_p6kfx_init, NULL }, - //440LX - - - //440BX + /* 440LX */ + + /* 440BX */ { "[Slot 1 BX] Gigabyte GA-6BXC", "6bxc", {{"Intel", cpus_PentiumII}, {"Intel/PGA370", cpus_Celeron},{"VIA", cpus_Cyrix3},{"", NULL},{"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 768, 8, 255, machine_at_6bxc_init, NULL }, { "[Slot 1 BX] ASUS P2B-LS", "p2bls", {{"Intel", cpus_PentiumII}, {"Intel/PGA370", cpus_Celeron},{"VIA", cpus_Cyrix3},{"", NULL},{"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 1024, 8, 255, machine_at_p2bls_init, NULL }, { "[Slot 1 BX] ASUS P2B-LS (coreboot BIOS)","p2bls_cb", {{"Intel", cpus_PentiumII}, {"Intel/PGA370", cpus_Celeron},{"", NULL}, {"", NULL},{"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC | MACHINE_COREBOOT, 8, 1024, 8, 255, machine_at_p2bls_init, NULL }, @@ -294,18 +292,18 @@ const machine_t machines[] = { { "[Slot 1 BX] ASUS P3B-F (coreboot BIOS)", "p3bf_cb", {{"Intel", cpus_PentiumII}, {"Intel/PGA370", cpus_Celeron},{"", NULL}, {"", NULL},{"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC | MACHINE_COREBOOT, 8, 1024, 8, 255, machine_at_p3bf_init, NULL }, { "[Slot 1 BX] ABit BF6", "bf6", {{"Intel", cpus_PentiumII}, {"Intel/PGA370", cpus_Celeron},{"VIA", cpus_Cyrix3},{"", NULL},{"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 768, 8, 255, machine_at_bf6_init, NULL }, - //440ZX + /* 440ZX */ { "[Slot 1 ZX] Packard Bell Bora Pro", "borapro", {{"Intel", cpus_PentiumII}, {"Intel/PGA370", cpus_Celeron},{"VIA", cpus_Cyrix3},{"", NULL},{"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 512, 8, 255, machine_at_borapro_init, NULL }, /* PGA370 machines */ - //440BX + /* 440BX */ { "[Socket 370 BX] ASUS CUBX", "cubx", {{"Intel", cpus_Celeron}, {"VIA", cpus_Cyrix3}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 1024, 8, 255, machine_at_cubx_init, NULL }, { "[Socket 370 BX] A-Trend ATC7020BXII", "atc7020bxii", {{"Intel", cpus_Celeron}, {"VIA", cpus_Cyrix3}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 1024, 8, 255, machine_at_atc7020bxii_init, NULL }, - //440ZX + /* 440ZX */ { "[Socket 370 ZX] Soltek SL-63A1", "63a", {{"Intel", cpus_Celeron}, {"", NULL}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 512, 8, 255, machine_at_63a_init, NULL }, - //VIA Apollo Pro + /* VIA Apollo Pro */ { "[Socket 370 APRO] PC Partner APAS3", "apas3", {{"Intel", cpus_Celeron}, {"VIA", cpus_Cyrix3}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 1024, 8, 255, machine_at_apas3_init, NULL }, { NULL, NULL, {{"", 0}, {"", 0}, {"", 0}, {"", 0}, {"", 0}}, 0, 0, 0, 0, 0, NULL, NULL } diff --git a/src/mouse_bus.c b/src/mouse_bus.c index e2146a5af..c2b4afad7 100644 --- a/src/mouse_bus.c +++ b/src/mouse_bus.c @@ -832,7 +832,7 @@ static const device_config_t ms_config[] = { const device_t mouse_logibus_device = { - "Logitech Bus Mouse", + "Logitech/Microsoft Bus Mouse", DEVICE_ISA, MOUSE_TYPE_LOGIBUS, bm_init, bm_close, NULL, diff --git a/src/scsi/scsi_cdrom.c b/src/scsi/scsi_cdrom.c index 2944335ee..106e2bb87 100644 --- a/src/scsi/scsi_cdrom.c +++ b/src/scsi/scsi_cdrom.c @@ -2595,7 +2595,7 @@ scsi_cdrom_get_max(int ide_has_dma, int type) ret = ide_has_dma ? 2 : -1; break; case TYPE_UDMA: - ret = ide_has_dma ? 2 : -1; + ret = ide_has_dma ? 4 /*2*/ : -1; break; default: ret = -1; diff --git a/src/win/win_about.c b/src/win/win_about.c index b0001b177..0c57c346d 100644 --- a/src/win/win_about.c +++ b/src/win/win_about.c @@ -53,6 +53,7 @@ AboutDialogProcedure(HWND hdlg, UINT message, WPARAM wParam, LPARAM lParam) case WM_COMMAND: switch (LOWORD(wParam)) { case IDOK: + case IDCANCEL: EndDialog(hdlg, 0); plat_pause(0); return TRUE; diff --git a/src/win/win_settings.c b/src/win/win_settings.c index a6e34420b..b34afd943 100644 --- a/src/win/win_settings.c +++ b/src/win/win_settings.c @@ -2352,7 +2352,10 @@ win_settings_hard_disks_update_item(HWND hwndList, int i, int column) lvI.pszText = szText; lvI.iImage = 0; } else if (column == 1) { - lvI.pszText = temp_hdd[i].fn; + if (!wcsnicmp(temp_hdd[i].fn, usr_path, wcslen(usr_path))) + lvI.pszText = temp_hdd[i].fn + wcslen(usr_path); + else + lvI.pszText = temp_hdd[i].fn; lvI.iImage = 0; } else if (column == 2) { wsprintf(szText, plat_get_string(IDS_4098), temp_hdd[i].tracks); @@ -2421,7 +2424,10 @@ win_settings_hard_disks_recalc_list(HWND hwndList) return FALSE; lvI.iSubItem = 1; - lvI.pszText = temp_hdd[i].fn; + if (!wcsnicmp(temp_hdd[i].fn, usr_path, wcslen(usr_path))) + lvI.pszText = temp_hdd[i].fn + wcslen(usr_path); + else + lvI.pszText = temp_hdd[i].fn; lvI.iItem = j; lvI.iImage = 0;