From 746175e3976614168afa733d17704b1c4956dfc3 Mon Sep 17 00:00:00 2001 From: OBattler Date: Thu, 23 Apr 2020 23:10:13 +0200 Subject: [PATCH 01/12] Fixed the PB680 PCI slots. --- src/machine/m_at_socket7_s7.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/machine/m_at_socket7_s7.c b/src/machine/m_at_socket7_s7.c index 702f7d9d3..4eb1f963b 100644 --- a/src/machine/m_at_socket7_s7.c +++ b/src/machine/m_at_socket7_s7.c @@ -511,7 +511,6 @@ machine_at_brio80xx_init(const machine_t *model) device_add(&piix3_device); device_add(&keyboard_ps2_ami_pci_device); device_add(&fdc37c935_device); - // device_add(&intel_flash_bxt_device); device_add(&sst_flash_29ee020_device); return ret; @@ -536,7 +535,7 @@ machine_at_pb680_init(const machine_t *model) pci_init(PCI_CONFIG_TYPE_1); pci_register_slot(0x00, PCI_CARD_NORTHBRIDGE, 0, 0, 0, 0); - pci_register_slot(0x08, PCI_CARD_NORMAL, 1, 2, 3, 4); + pci_register_slot(0x08, PCI_CARD_ONBOARD, 4, 0, 0, 0); pci_register_slot(0x11, PCI_CARD_NORMAL, 1, 2, 3, 4); pci_register_slot(0x13, PCI_CARD_NORMAL, 2, 3, 4, 1); pci_register_slot(0x0B, PCI_CARD_NORMAL, 3, 4, 1, 2); From 3942f97a1a291d563cfef5ef7a8ab36c950cf646 Mon Sep 17 00:00:00 2001 From: OBattler Date: Fri, 24 Apr 2020 00:36:49 +0200 Subject: [PATCH 02/12] Removed the AMI Apollo. --- src/include/86box/machine.h | 1 - src/machine/m_at_socket4_5.c | 31 ------------------------- src/machine/machine_table.c | 44 +++++++++++++++++------------------- 3 files changed, 21 insertions(+), 55 deletions(-) diff --git a/src/include/86box/machine.h b/src/include/86box/machine.h index b24bbcff1..7e51ab210 100644 --- a/src/include/86box/machine.h +++ b/src/include/86box/machine.h @@ -265,7 +265,6 @@ extern int machine_at_endeavor_init(const machine_t *); extern int machine_at_zappa_init(const machine_t *); extern int machine_at_mb500n_init(const machine_t *); extern int machine_at_president_init(const machine_t *); -extern int machine_at_apollo_init(const machine_t *); #if defined(DEV_BRANCH) && defined(USE_VECTRA54) extern int machine_at_vectra54_init(const machine_t *); #endif diff --git a/src/machine/m_at_socket4_5.c b/src/machine/m_at_socket4_5.c index b86c3fe4b..81dcb13ac 100644 --- a/src/machine/m_at_socket4_5.c +++ b/src/machine/m_at_socket4_5.c @@ -384,37 +384,6 @@ machine_at_president_init(const machine_t *model) } -int -machine_at_apollo_init(const machine_t *model) -{ - int ret; - - ret = bios_load_linear(L"roms/machines/apollo/S728P.ROM", - 0x000e0000, 131072, 0); - - if (bios_only || !ret) - return ret; - - machine_at_common_init_ex(model, 2); - device_add(&ls486e_nvr_device); - - pci_init(PCI_CONFIG_TYPE_1); - pci_register_slot(0x00, PCI_CARD_NORTHBRIDGE, 0, 0, 0, 0); - pci_register_slot(0x08, PCI_CARD_NORMAL, 1, 2, 3, 4); - pci_register_slot(0x09, PCI_CARD_NORMAL, 2, 3, 4, 1); - pci_register_slot(0x0A, PCI_CARD_NORMAL, 3, 4, 1, 2); - pci_register_slot(0x0B, PCI_CARD_NORMAL, 4, 1, 2, 3); - pci_register_slot(0x07, PCI_CARD_SOUTHBRIDGE, 0, 0, 0, 0); - device_add(&i430fx_device); - device_add(&piix_device); - device_add(&keyboard_ps2_pci_device); - device_add(&fdc37c932fr_device); - device_add(&intel_flash_bxt_device); - - return ret; -} - - #if defined(DEV_BRANCH) && defined(USE_VECTRA54) int machine_at_vectra54_init(const machine_t *model) diff --git a/src/machine/machine_table.c b/src/machine/machine_table.c index b358fde33..8cc0dbfaf 100644 --- a/src/machine/machine_table.c +++ b/src/machine/machine_table.c @@ -201,7 +201,7 @@ const machine_t machines[] = { { "[486 PCI] Zida Tomato 4DP", "4dps", {{"Intel", cpus_i486}, {"AMD", cpus_Am486}, {"Cyrix", cpus_Cx486}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_HDC, 1, 255, 1, 127, machine_at_4dps_init, NULL }, /* Socket 4 machines */ - //430LX + /* 430LX */ { "[Socket 4 LX] IBM Ambra DP60 PCI", "ambradp60", {{"Intel", cpus_Pentium5V}, {"", NULL}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 2, 128, 2, 127, machine_at_ambradp60_init, NULL }, #if defined(DEV_BRANCH) && defined(USE_VPP60) { "[Socket 4 LX] IBM PS/ValuePoint P60", "valuepointp60", {{"Intel", cpus_Pentium5V}, {"", NULL}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 2, 128, 2, 127, machine_at_valuepointp60_init, NULL }, @@ -210,13 +210,12 @@ const machine_t machines[] = { { "[Socket 4 LX] Micro Star 586MC1", "586mc1", {{"Intel", cpus_Pentium5V}, {"", NULL}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 2, 128, 2, 127, machine_at_586mc1_init, NULL }, /* Socket 5 machines */ - //430NX + /* 430NX */ { "[Socket 5 NX] Intel Premiere/PCI II", "plato", MACHINE_CPUS_PENTIUM_S5, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 2, 128, 2, 127, machine_at_plato_init, NULL }, { "[Socket 5 NX] IBM Ambra DP90 PCI", "ambradp90", MACHINE_CPUS_PENTIUM_S5, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 2, 128, 2, 127, machine_at_ambradp90_init, NULL }, { "[Socket 5 NX] Gigabyte GA-586IP", "430nx", MACHINE_CPUS_PENTIUM_S5, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 2, 128, 2, 127, machine_at_430nx_init, NULL }, - - //430FX - { "[Socket 5 FX] AMI Apollo", "apollo", MACHINE_CPUS_PENTIUM_S5, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_HDC, 8, 128, 8, 127, machine_at_apollo_init, NULL }, + + /* 430FX */ #if defined(DEV_BRANCH) && defined(USE_VECTRA54) { "[Socket 5 FX] HP Vectra VL 5 Series 4", "vectra54", MACHINE_CPUS_PENTIUM_S5, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_HDC, 8, 128, 8, 511, machine_at_vectra54_init, NULL }, #endif @@ -226,7 +225,7 @@ const machine_t machines[] = { { "[Socket 5 FX] President Award 430FX PCI","president", MACHINE_CPUS_PENTIUM_S5, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_HDC, 8, 128, 8, 127, machine_at_president_init, NULL }, /* Socket 7 machines */ - //430FX + /* 430FX */ { "[Socket 7-3V FX] ASUS P/I-P54TP4XE", "p54tp4xe", MACHINE_CPUS_PENTIUM_S73V, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 128, 8, 127, machine_at_p54tp4xe_init, NULL }, { "[Socket 7-3V FX] Intel Advanced/ATX", "thor", MACHINE_CPUS_PENTIUM_S73V, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 128, 8, 127, machine_at_thor_init, NULL }, { "[Socket 7-3V FX] Intel Advanced/EV", "endeavor", MACHINE_CPUS_PENTIUM_S73V, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC | MACHINE_VIDEO, 8, 128, 8, 127, machine_at_endeavor_init, at_endeavor_get_device }, @@ -235,7 +234,7 @@ const machine_t machines[] = { #endif { "[Socket 7-3V FX] Packard Bell PB640", "pb640", MACHINE_CPUS_PENTIUM_S73V, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC | MACHINE_VIDEO, 8, 128, 8, 127, machine_at_pb640_init, at_pb640_get_device }, - //430HX + /* 430HX */ { "[Socket 7-3V HX] Acer M3a", "acerm3a", MACHINE_CPUS_PENTIUM_S73V, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 192, 8, 127, machine_at_acerm3a_init, NULL }, { "[Socket 7-3V HX] AOpen AP53", "ap53", MACHINE_CPUS_PENTIUM_S73V, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 512, 8, 127, machine_at_ap53_init, NULL }, { "[Socket 7-3V HX] SuperMicro Super P55T2S","p55t2s", MACHINE_CPUS_PENTIUM_S73V, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 768, 8, 127, machine_at_p55t2s_init, NULL }, @@ -246,64 +245,63 @@ const machine_t machines[] = { { "[Socket 7 HX] Intel TC430HX", "tc430hx", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 128, 8, 255, machine_at_tc430hx_init, NULL }, { "[Socket 7 HX] Toshiba Equium 5200D", "equium5200", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 192, 8, 127, machine_at_equium5200_init, NULL }, - //430VX + /* 430VX */ { "[Socket 7 VX] ASUS P/I-P55TVP4", "p55tvp4", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 128, 8, 127, machine_at_p55tvp4_init, NULL }, { "[Socket 7 VX] Shuttle HOT-557", "430vx", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 128, 8, 127, machine_at_i430vx_init, NULL }, { "[Socket 7 VX] Epox P55-VA", "p55va", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 128, 8, 127, machine_at_p55va_init, NULL }, { "[Socket 7 VX] HP Brio 80xx", "brio80xx", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 128, 8, 127, machine_at_brio80xx_init, NULL }, { "[Socket 7 VX] Packard Bell PB680", "pb680", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 128, 8, 127, machine_at_pb680_init, NULL }, - //430TX + /* 430TX */ { "[Socket 7 TX] ASUS TX97", "tx97", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 256, 8, 255, machine_at_tx97_init, NULL }, { "[Socket 7 TX] Gigabyte GA-586T2", "586t2", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 256, 8, 255, machine_at_586t2_init, NULL }, { "[Socket 7 TX] Intel YM430TX", "ym430tx", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 256, 8, 255, machine_at_ym430tx_init, NULL }, { "[Socket 7 TX] Iwill P55XB2", "p55xb2", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 256, 8, 255, machine_at_p55xb2_init, NULL }, { "[Socket 7 TX] PC Partner TXA807DS", "807ds", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 256, 8, 255, machine_at_807ds_init, NULL }, { "[Socket 7 TX] SuperMicro P5MMS98", "p5mms98", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 256, 8, 255, machine_at_p5mms98_init, NULL }, - - //Apollo VPX + + /* Apollo VPX */ { "[Socket 7 VPX] Zida Tomato TX100", "tx100", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 512, 8, 127, machine_at_tx100_init, NULL }, - //Apollo VP3 + /* Apollo VP3 */ { "[Socket 7 VP3] QDI Advance II", "advanceii", MACHINE_CPUS_PENTIUM_S7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 128, 8, 127, machine_at_advanceii_init, NULL }, /* Super Socket 7 machines */ - //Apollo MVP3 + /* Apollo MVP3 */ { "[Super 7 MVP3] AOpen AX59 Pro", "ax59pro", MACHINE_CPUS_PENTIUM_SS7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 1024, 8, 255, machine_at_ax59pro_init, NULL }, { "[Super 7 MVP3] FIC VA-503+", "ficva503p", MACHINE_CPUS_PENTIUM_SS7, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 512, 8, 255, machine_at_mvp3_init, NULL }, /* Socket 8 machines */ - //440FX + /* 440FX */ { "[Socket 8 FX] Gigabyte GA-686NX", "686nx", {{"Intel", cpus_PentiumPro}, {"", NULL}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 512, 8, 127, machine_at_686nx_init, NULL }, { "[Socket 8 FX] PC Partner MB600N", "mb600n", {{"Intel", cpus_PentiumPro}, {"", NULL}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 512, 8, 127, machine_at_mb600n_init, NULL }, { "[Socket 8 FX] Biostar MB-8500ttc", "8500ttc", {{"Intel", cpus_PentiumPro}, {"", NULL}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 512, 8, 127, machine_at_8500ttc_init, NULL }, { "[Socket 8 FX] Micronics M6MI", "m6mi", {{"Intel", cpus_PentiumPro}, {"", NULL}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 384, 8, 127, machine_at_m6mi_init, NULL }, /* Slot 1 machines */ - //440FX + /* 440FX */ { "[Slot 1 FX] ECS P6KFX-A", "p6kfx", {{"Intel", cpus_PentiumII_28v},{"", NULL}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 384, 8, 127, machine_at_p6kfx_init, NULL }, - //440LX - - - //440BX + /* 440LX */ + + /* 440BX */ { "[Slot 1 BX] Gigabyte GA-6BXC", "6bxc", {{"Intel", cpus_PentiumII}, {"Intel/PGA370", cpus_Celeron},{"VIA", cpus_Cyrix3},{"", NULL},{"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 768, 8, 255, machine_at_6bxc_init, NULL }, { "[Slot 1 BX] ASUS P2B-LS", "p2bls", {{"Intel", cpus_PentiumII}, {"Intel/PGA370", cpus_Celeron},{"VIA", cpus_Cyrix3},{"", NULL},{"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 1024, 8, 255, machine_at_p2bls_init, NULL }, { "[Slot 1 BX] ASUS P3B-F", "p3bf", {{"Intel", cpus_PentiumII}, {"Intel/PGA370", cpus_Celeron},{"VIA", cpus_Cyrix3},{"", NULL},{"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 1024, 8, 255, machine_at_p3bf_init, NULL }, { "[Slot 1 BX] ABit BF6", "bf6", {{"Intel", cpus_PentiumII}, {"Intel/PGA370", cpus_Celeron},{"VIA", cpus_Cyrix3},{"", NULL},{"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 768, 8, 255, machine_at_bf6_init, NULL }, - //440ZX + /* 440ZX */ { "[Slot 1 ZX] Packard Bell Bora Pro", "borapro", {{"Intel", cpus_PentiumII}, {"Intel/PGA370", cpus_Celeron},{"VIA", cpus_Cyrix3},{"", NULL},{"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 512, 8, 255, machine_at_borapro_init, NULL }, /* PGA370 machines */ - //440BX + /* 440BX */ { "[Socket 370 BX] ASUS CUBX", "cubx", {{"Intel", cpus_Celeron}, {"VIA", cpus_Cyrix3}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 1024, 8, 255, machine_at_cubx_init, NULL }, { "[Socket 370 BX] A-Trend ATC7020BXII", "atc7020bxii", {{"Intel", cpus_Celeron}, {"VIA", cpus_Cyrix3}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 1024, 8, 255, machine_at_atc7020bxii_init, NULL }, - //440ZX + /* 440ZX */ { "[Socket 370 ZX] Soltek SL-63A1", "63a", {{"Intel", cpus_Celeron}, {"", NULL}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 512, 8, 255, machine_at_63a_init, NULL }, - //VIA Apollo Pro + /* VIA Apollo Pro */ { "[Socket 370 APRO] PC Partner APAS3", "apas3", {{"Intel", cpus_Celeron}, {"VIA", cpus_Cyrix3}, {"", NULL}, {"", NULL}, {"", NULL}}, MACHINE_PCI | MACHINE_ISA | MACHINE_AT | MACHINE_PS2 | MACHINE_HDC, 8, 1024, 8, 255, machine_at_apas3_init, NULL }, { NULL, NULL, {{"", 0}, {"", 0}, {"", 0}, {"", 0}, {"", 0}}, 0, 0, 0, 0, 0, NULL, NULL } From f9de3183aa32df1d277df894f2bf936a3417e9ed Mon Sep 17 00:00:00 2001 From: Shaojun Li Date: Fri, 24 Apr 2020 13:33:56 +0800 Subject: [PATCH 03/12] Allow relative path of disk images. --- src/config.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/config.c b/src/config.c index 0cc5d35a1..d63294264 100644 --- a/src/config.c +++ b/src/config.c @@ -957,7 +957,12 @@ load_hard_disks(void) wcsncpy(hdd[c].fn, &wp[wcslen(usr_path)], sizeof_w(hdd[c].fn)); } else #endif - wcsncpy(hdd[c].fn, wp, sizeof_w(hdd[c].fn)); + if (plat_path_abs(wp)) { + wcsncpy(hdd[c].fn, wp, sizeof_w(hdd[c].fn)); + } else { + wcsncpy(hdd[c].fn, usr_path, sizeof_w(hdd[c].fn)); + wcsncat(hdd[c].fn, wp, sizeof_w(hdd[c].fn)-wcslen(usr_path)); + } /* If disk is empty or invalid, mark it for deletion. */ if (! hdd_is_valid(c)) { @@ -1832,7 +1837,10 @@ save_hard_disks(void) sprintf(temp, "hdd_%02i_fn", c+1); if (hdd_is_valid(c) && (wcslen(hdd[c].fn) != 0)) - config_set_wstring(cat, temp, hdd[c].fn); + if (wcsnicmp(hdd[c].fn, usr_path, wcslen(usr_path))) + config_set_wstring(cat, temp, &hdd[c].fn[wcslen(usr_path)]); + else + config_set_wstring(cat, temp, hdd[c].fn); else config_delete_var(cat, temp); } From 0e7c0cca02ca2e16fbb43f68df936b9298b6e83e Mon Sep 17 00:00:00 2001 From: Shaojun Li Date: Fri, 24 Apr 2020 13:38:52 +0800 Subject: [PATCH 04/12] Fix the close button of about dialog not working. --- src/win/win_about.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/win/win_about.c b/src/win/win_about.c index b0001b177..0c57c346d 100644 --- a/src/win/win_about.c +++ b/src/win/win_about.c @@ -53,6 +53,7 @@ AboutDialogProcedure(HWND hdlg, UINT message, WPARAM wParam, LPARAM lParam) case WM_COMMAND: switch (LOWORD(wParam)) { case IDOK: + case IDCANCEL: EndDialog(hdlg, 0); plat_pause(0); return TRUE; From 31216312f73a92f59ab2cb89508359e531c2e0a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Hrdli=C4=8Dka?= Date: Fri, 24 Apr 2020 14:42:01 +0200 Subject: [PATCH 05/12] config: fix hard disk images outside usrpath --- src/config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config.c b/src/config.c index d63294264..0d3b13a29 100644 --- a/src/config.c +++ b/src/config.c @@ -1837,7 +1837,7 @@ save_hard_disks(void) sprintf(temp, "hdd_%02i_fn", c+1); if (hdd_is_valid(c) && (wcslen(hdd[c].fn) != 0)) - if (wcsnicmp(hdd[c].fn, usr_path, wcslen(usr_path))) + if (!wcsnicmp(hdd[c].fn, usr_path, wcslen(usr_path))) config_set_wstring(cat, temp, &hdd[c].fn[wcslen(usr_path)]); else config_set_wstring(cat, temp, hdd[c].fn); From af07bd5e6c01a93f143546641de09af7e563c8b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Hrdli=C4=8Dka?= Date: Fri, 24 Apr 2020 14:56:30 +0200 Subject: [PATCH 06/12] win: Don't show full path for HD images in usrpath --- src/win/win_settings.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/win/win_settings.c b/src/win/win_settings.c index a6e34420b..b34afd943 100644 --- a/src/win/win_settings.c +++ b/src/win/win_settings.c @@ -2352,7 +2352,10 @@ win_settings_hard_disks_update_item(HWND hwndList, int i, int column) lvI.pszText = szText; lvI.iImage = 0; } else if (column == 1) { - lvI.pszText = temp_hdd[i].fn; + if (!wcsnicmp(temp_hdd[i].fn, usr_path, wcslen(usr_path))) + lvI.pszText = temp_hdd[i].fn + wcslen(usr_path); + else + lvI.pszText = temp_hdd[i].fn; lvI.iImage = 0; } else if (column == 2) { wsprintf(szText, plat_get_string(IDS_4098), temp_hdd[i].tracks); @@ -2421,7 +2424,10 @@ win_settings_hard_disks_recalc_list(HWND hwndList) return FALSE; lvI.iSubItem = 1; - lvI.pszText = temp_hdd[i].fn; + if (!wcsnicmp(temp_hdd[i].fn, usr_path, wcslen(usr_path))) + lvI.pszText = temp_hdd[i].fn + wcslen(usr_path); + else + lvI.pszText = temp_hdd[i].fn; lvI.iItem = j; lvI.iImage = 0; From fe6df042be95c74dfcba8c01ffcb44d33c2dac13 Mon Sep 17 00:00:00 2001 From: nerd73 Date: Sat, 25 Apr 2020 03:24:59 -0600 Subject: [PATCH 07/12] Implement a proper P6 timing model --- src/cpu/codegen_timing_p6.c | 1864 ++++++++++++++++--------------- src/cpu_new/codegen_timing_p6.c | 1864 ++++++++++++++++--------------- 2 files changed, 1902 insertions(+), 1826 deletions(-) diff --git a/src/cpu/codegen_timing_p6.c b/src/cpu/codegen_timing_p6.c index 0cddde494..a0da30304 100644 --- a/src/cpu/codegen_timing_p6.c +++ b/src/cpu/codegen_timing_p6.c @@ -1,4 +1,5 @@ -/*Hacky P6 timings based on K6 timings*/ +/*Basic P6 timing model by plant/nerd73. Based on the K6 timing model*/ +/*Some cycle timings come from https://www.agner.org/optimize/instruction_tables.pdf*/ #include #include #include @@ -21,24 +22,26 @@ typedef enum uop_type_t UOP_ALU = 0, /*Executes in Integer X or Y units*/ UOP_ALUX, /*Executes in Integer X unit*/ UOP_LOAD, /*Executes in Load unit*/ - UOP_STORE, /*Executes in Store unit*/ + UOP_STORED, /*Executes in Data Store unit*/ + UOP_STOREA, /*Executes in Address Store unit*/ UOP_FLOAD, /*Executes in Load unit*/ - UOP_FSTORE, /*Executes in Store unit*/ + UOP_FSTORED, /*Executes in Data Store unit*/ + UOP_FSTOREA, /*Executes in Address Store unit*/ UOP_MLOAD, /*Executes in Load unit*/ - UOP_MSTORE, /*Executes in Store unit*/ + UOP_MSTORED, /*Executes in Data Store unit*/ + UOP_MSTOREA, /*Executes in Address Store unit*/ UOP_FLOAT, /*Executes in Floating Point unit*/ - UOP_MEU, /*Executes in Multimedia unit*/ - UOP_MEU_SHIFT, /*Executes in Multimedia unit or ALU X/Y. Uses MMX shifter*/ - UOP_MEU_MUL, /*Executes in Multimedia unit or ALU X/Y. Uses MMX multiplier*/ + UOP_MMX, /*Executes in Integer X or Y units as MMX*/ + UOP_MMX_SHIFT, /*Executes in Integer Y unit. Uses MMX shifter*/ + UOP_MMX_MUL, /*Executes in Integer X unit. Uses MMX multiplier*/ UOP_BRANCH, /*Executes in Branch unit*/ UOP_LIMM /*Does not require an execution unit*/ } uop_type_t; typedef enum decode_type_t { - DECODE_SHORT, - DECODE_LONG, - DECODE_VECTOR + DECODE_SIMPLE, + DECODE_COMPLEX, } decode_type_t; #define MAX_UOPS 10 @@ -46,7 +49,6 @@ typedef enum decode_type_t typedef struct risc86_uop_t { uop_type_t type; - double throughput; double latency; } risc86_uop_t; @@ -60,804 +62,885 @@ typedef struct risc86_instruction_t static const risc86_instruction_t alu_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t alux_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1} }; static const risc86_instruction_t load_alu_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t load_alux_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1} }; static const risc86_instruction_t alu_store_op = { - .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} + }; static const risc86_instruction_t alux_store_op = { - .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} }; static const risc86_instruction_t branch_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_BRANCH, .latency = 1} }; static const risc86_instruction_t limm_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LIMM, .throughput = 1, .latency = 1} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_LIMM, .latency = 1} }; static const risc86_instruction_t load_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1} }; static const risc86_instruction_t store_op = { - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1} + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = {.type = UOP_STOREA, .latency = 1} }; static const risc86_instruction_t bswap_op = { - .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, }; static const risc86_instruction_t leave_op = { .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t lods_op = { .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t loop_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_BRANCH, .latency = 1} }; static const risc86_instruction_t mov_reg_seg_op = { .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, }; static const risc86_instruction_t movs_op = { .nr_uops = 4, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t pop_reg_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t pop_mem_op = { - .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t push_imm_op = { - .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2}, + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = {.type = UOP_STOREA, .latency = 1}, }; static const risc86_instruction_t push_mem_op = { - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1} }; static const risc86_instruction_t push_seg_op = { - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t stos_op = { - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t test_reg_op = { .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t test_reg_b_op = { .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1} }; static const risc86_instruction_t test_mem_imm_op = { .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t test_mem_imm_b_op = { .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1} }; static const risc86_instruction_t xchg_op = { .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t mmx_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX, .latency = 1.5} }; static const risc86_instruction_t mmx_mul_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU_MUL, .throughput = 1.5, .latency = 3} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX_MUL, .latency = 1.5} }; static const risc86_instruction_t mmx_shift_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU_SHIFT, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX_SHIFT, .latency = 1.5} }; static const risc86_instruction_t load_mmx_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU, .throughput = 1.5, .latency = 3} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_MMX, .latency = 1.5} }; static const risc86_instruction_t load_mmx_mul_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU_MUL, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3}, + .uop[1] = {.type = UOP_MMX_MUL, .latency = 1.5} }; static const risc86_instruction_t load_mmx_shift_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU_SHIFT, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3}, + .uop[1] = {.type = UOP_MMX_SHIFT, .latency = 1.5} }; static const risc86_instruction_t mload_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MLOAD, .throughput = 1, .latency = 2} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_MLOAD, .latency = 3}, }; static const risc86_instruction_t mstore_op = { - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MSTORE, .throughput = 1, .latency = 1} + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_MSTORED, .latency = 1}, + .uop[1] = {.type = UOP_MSTOREA, .latency = 1} }; static const risc86_instruction_t pmul_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX_MUL, .latency = 1.5} }; static const risc86_instruction_t pmul_mem_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3}, + .uop[1] = {.type = UOP_MMX_MUL, .latency = 1.5} }; - static const risc86_instruction_t float_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 1} +}; +static const risc86_instruction_t fadd_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 3} +}; +static const risc86_instruction_t fmul_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALU, .latency = 5} +}; +static const risc86_instruction_t float2_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 1} +}; +static const risc86_instruction_t fchs_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 2}, + .uop[1] = {.type = UOP_FLOAT, .latency = 2}, + .uop[2] = {.type = UOP_FLOAT, .latency = 2} }; static const risc86_instruction_t load_float_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_FLOAT, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 1} +}; +static const risc86_instruction_t load_fadd_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 3} +}; +static const risc86_instruction_t load_fmul_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 5} }; static const risc86_instruction_t fstore_op = { - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FSTORED, .latency = 1}, + .uop[1] = {.type = UOP_FSTOREA, .latency = 1}, +}; +static const risc86_instruction_t load_fiadd_op = +{ + .nr_uops = 7, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 1}, + .uop[2] = {.type = UOP_FLOAT, .latency = 1}, + .uop[3] = {.type = UOP_FLOAT, .latency = 1}, + .uop[4] = {.type = UOP_FLOAT, .latency = 1}, + .uop[5] = {.type = UOP_FLOAT, .latency = 1}, + .uop[6] = {.type = UOP_FLOAT, .latency = 1} }; - static const risc86_instruction_t fdiv_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 38, .latency = 38} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 32} }; static const risc86_instruction_t fdiv_mem_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_FLOAT, .throughput = 38, .latency = 38} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 38} }; static const risc86_instruction_t fsin_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 62, .latency = 62} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 60} }; static const risc86_instruction_t fsqrt_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 49, .latency = 49} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 69} }; -static const risc86_instruction_t vector_fldcw_op = +static const risc86_instruction_t complex_fldcw_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 8, .latency = 8} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 10} }; -static const risc86_instruction_t vector_float_op = +static const risc86_instruction_t complex_float_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 1} }; -static const risc86_instruction_t vector_float_l_op = +static const risc86_instruction_t complex_float_l_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 50, .latency = 50} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 50} }; -static const risc86_instruction_t vector_flde_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} -}; -static const risc86_instruction_t vector_fste_op = +static const risc86_instruction_t complex_flde_op = { .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2}, - .uop[1] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAD, .latency = 1}, + .uop[2] = {.type = UOP_FLOAT, .latency = 2} +}; +static const risc86_instruction_t complex_fste_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 2}, + .uop[1] = {.type = UOP_FSTORED, .latency = 1}, + .uop[2] = {.type = UOP_FSTOREA, .latency = 1} }; -static const risc86_instruction_t vector_alu1_op = +static const risc86_instruction_t complex_alu1_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1} }; -static const risc86_instruction_t vector_alu2_op = +static const risc86_instruction_t complex_alu2_op = { .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} }; -static const risc86_instruction_t vector_alu3_op = +static const risc86_instruction_t complex_alu3_op = { .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} }; -static const risc86_instruction_t vector_alu6_op = +static const risc86_instruction_t complex_alu6_op = { .nr_uops = 6, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1}, + .uop[4] = {.type = UOP_ALU, .latency = 1}, + .uop[5] = {.type = UOP_ALU, .latency = 1} }; -static const risc86_instruction_t vector_alux1_op = +static const risc86_instruction_t complex_alux1_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1} }; -static const risc86_instruction_t vector_alux3_op = +static const risc86_instruction_t complex_alux3_op = { .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .latency = 1} }; -static const risc86_instruction_t vector_alux6_op = +static const risc86_instruction_t complex_alux6_op = { - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .nr_uops = 6, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .latency = 1}, + .uop[3] = {.type = UOP_ALUX, .latency = 1}, + .uop[4] = {.type = UOP_ALUX, .latency = 1}, + .uop[5] = {.type = UOP_ALUX, .latency = 1} }; -static const risc86_instruction_t vector_alu_store_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_alux_store_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_arpl_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[1] = {.type = UOP_ALU, .throughput = 3, .latency = 3} -}; -static const risc86_instruction_t vector_bound_op = +static const risc86_instruction_t complex_alu_store_op = { .nr_uops = 4, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1}, }; -static const risc86_instruction_t vector_bsx_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 10, .latency = 10} -}; -static const risc86_instruction_t vector_call_far_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_cli_sti_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 7, .latency = 7} -}; -static const risc86_instruction_t vector_cmps_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_cmpsb_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_cmpxchg_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, -}; -static const risc86_instruction_t vector_cmpxchg_b_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, -}; -static const risc86_instruction_t vector_cpuid_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 22, .latency = 22} -}; -static const risc86_instruction_t vector_div16_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 10, .latency = 10} -}; -static const risc86_instruction_t vector_div16_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 10, .latency = 10} -}; -static const risc86_instruction_t vector_div32_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 18, .latency = 18} -}; -static const risc86_instruction_t vector_div32_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 18, .latency = 18} -}; -static const risc86_instruction_t vector_emms_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 25, .latency = 25} -}; -static const risc86_instruction_t vector_enter_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 10, .latency = 10} -}; -static const risc86_instruction_t vector_femms_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 6, .latency = 6} -}; -static const risc86_instruction_t vector_in_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11} -}; -static const risc86_instruction_t vector_ins_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_int_op = -{ - .nr_uops = 5, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 20, .latency = 20}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_iret_op = -{ - .nr_uops = 5, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[3] = {.type = UOP_ALU, .throughput = 20, .latency = 20}, - .uop[4] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_invd_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1000, .latency = 1000} -}; -static const risc86_instruction_t vector_jmp_far_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_load_alu_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_load_alux_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_loop_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_lss_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_ALU, .throughput = 3, .latency = 3} -}; -static const risc86_instruction_t vector_mov_mem_seg_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_mov_seg_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 3, .latency = 3} -}; -static const risc86_instruction_t vector_mov_seg_reg_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3} -}; -static const risc86_instruction_t vector_mul_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_mul_mem_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_mul64_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_mul64_mem_op = +static const risc86_instruction_t complex_alux_store_op = { .nr_uops = 4, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} }; -static const risc86_instruction_t vector_out_op = +static const risc86_instruction_t complex_arpl_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3}, + .uop[1] = {.type = UOP_ALU, .latency = 3} +}; +static const risc86_instruction_t complex_bound_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_LOAD, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_bsx_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 10, .latency = 10} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 10} }; -static const risc86_instruction_t vector_outs_op = +static const risc86_instruction_t complex_call_far_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_cli_sti_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 7} +}; +static const risc86_instruction_t complex_cmps_op = { .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 10, .latency = 10}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} }; -static const risc86_instruction_t vector_pusha_op = +static const risc86_instruction_t complex_cmpsb_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_cmpxchg_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} +}; +static const risc86_instruction_t complex_cmpxchg_b_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} +}; +static const risc86_instruction_t complex_cpuid_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 23} +}; +static const risc86_instruction_t complex_div16_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 10} +}; +static const risc86_instruction_t complex_div16_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 10} +}; +static const risc86_instruction_t complex_div32_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 18} +}; +static const risc86_instruction_t complex_div32_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 18} +}; +static const risc86_instruction_t complex_emms_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 25} +}; +static const risc86_instruction_t complex_enter_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = {.type = UOP_STOREA, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 10} +}; +static const risc86_instruction_t complex_femms_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 6} +}; +static const risc86_instruction_t complex_in_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 10} +}; +static const risc86_instruction_t complex_ins_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 10}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_int_op = { .nr_uops = 8, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[6] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[7] = {.type = UOP_STORE, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 20}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_STORED, .latency = 1}, + .uop[4] = {.type = UOP_STOREA, .latency = 1}, + .uop[5] = {.type = UOP_STORED, .latency = 1}, + .uop[6] = {.type = UOP_STOREA, .latency = 1}, + .uop[7] = {.type = UOP_BRANCH, .latency = 1} }; -static const risc86_instruction_t vector_popa_op = +static const risc86_instruction_t complex_iret_op = +{ + .nr_uops = 5, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3}, + .uop[1] = {.type = UOP_LOAD, .latency = 3}, + .uop[2] = {.type = UOP_LOAD, .latency = 3}, + .uop[3] = {.type = UOP_ALU, .latency = 20}, + .uop[4] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_invd_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 500} +}; +static const risc86_instruction_t complex_jmp_far_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3}, + .uop[1] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_load_alu_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_load_alux_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_loop_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_lss_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_LOAD, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 3} +}; +static const risc86_instruction_t complex_mov_mem_seg_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, +}; +static const risc86_instruction_t complex_mov_seg_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 3} +}; +static const risc86_instruction_t complex_mov_seg_reg_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3} +}; +static const risc86_instruction_t complex_mul_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_mul_mem_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_mul64_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_mul64_mem_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .latency = 1}, + .uop[3] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_out_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 10}, + .uop[1] = {.type = UOP_STOREA, .latency = 10}, +}; +static const risc86_instruction_t complex_outs_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 10}, + .uop[1] = {.type = UOP_STOREA, .latency = 10}, + .uop[2] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_pusha_op = { .nr_uops = 8, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[6] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[7] = {.type = UOP_LOAD, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 2}, + .uop[1] = {.type = UOP_STOREA, .latency = 2}, + .uop[2] = {.type = UOP_STORED, .latency = 2}, + .uop[3] = {.type = UOP_STOREA, .latency = 2}, + .uop[4] = {.type = UOP_STORED, .latency = 2}, + .uop[5] = {.type = UOP_STOREA, .latency = 2}, + .uop[6] = {.type = UOP_STORED, .latency = 2}, + .uop[7] = {.type = UOP_STOREA, .latency = 2} }; -static const risc86_instruction_t vector_popf_op = +static const risc86_instruction_t complex_popa_op = +{ + .nr_uops = 8, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_LOAD, .latency = 1}, + .uop[2] = {.type = UOP_LOAD, .latency = 1}, + .uop[3] = {.type = UOP_LOAD, .latency = 1}, + .uop[4] = {.type = UOP_LOAD, .latency = 1}, + .uop[5] = {.type = UOP_LOAD, .latency = 1}, + .uop[6] = {.type = UOP_LOAD, .latency = 1}, + .uop[7] = {.type = UOP_LOAD, .latency = 1} +}; +static const risc86_instruction_t complex_popf_op = { .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 17, .latency = 17} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3}, + .uop[1] = {.type = UOP_ALUX, .latency = 17} }; -static const risc86_instruction_t vector_push_mem_op = +static const risc86_instruction_t complex_push_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = {.type = UOP_STOREA, .latency = 1} +}; +static const risc86_instruction_t complex_pushf_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1} +}; +static const risc86_instruction_t complex_ret_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_retf_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 3}, + .uop[2] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_scas_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_scasb_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_setcc_mem_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_FSTORED, .latency = 1}, + .uop[3] = {.type = UOP_FSTOREA, .latency = 1} +}; +static const risc86_instruction_t complex_setcc_reg_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_test_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_test_mem_b_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_xchg_mem_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_xlat_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_LOAD, .latency = 1} +}; +static const risc86_instruction_t complex_wbinvd_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_pushf_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_ret_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_retf_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[2] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_scas_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_scasb_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_setcc_mem_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_setcc_reg_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_test_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_test_mem_b_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_xchg_mem_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_xlat_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2} -}; -static const risc86_instruction_t vector_wbinvd_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 10000, .latency = 10000} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 10000} }; + #define INVALID NULL static const risc86_instruction_t *opcode_timings[256] = @@ -865,38 +948,38 @@ static const risc86_instruction_t *opcode_timings[256] = /* ADD ADD ADD ADD*/ /*00*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, /* ADD ADD PUSH ES POP ES*/ - &alux_op, &alu_op, &push_seg_op, &vector_mov_seg_mem_op, + &alux_op, &alu_op, &push_seg_op, &complex_mov_seg_mem_op, /* OR OR OR OR*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, /* OR OR PUSH CS */ &alux_op, &alu_op, &push_seg_op, INVALID, /* ADC ADC ADC ADC*/ -/*10*/ &vector_alux_store_op, &vector_alu_store_op, &vector_load_alux_op, &vector_load_alu_op, +/*10*/ &complex_alux_store_op,&complex_alu_store_op, &complex_load_alux_op,&complex_load_alu_op, /* ADC ADC PUSH SS POP SS*/ - &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, + &complex_alux1_op, &complex_alu1_op, &push_seg_op, &complex_mov_seg_mem_op, /* SBB SBB SBB SBB*/ -/*10*/ &vector_alux_store_op, &vector_alu_store_op, &vector_load_alux_op, &vector_load_alu_op, +/*10*/ &complex_alux_store_op,&complex_alu_store_op, &complex_load_alux_op,&complex_load_alu_op, /* SBB SBB PUSH DS POP DS*/ - &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, + &complex_alux1_op, &complex_alu1_op, &push_seg_op, &complex_mov_seg_mem_op, /* AND AND AND AND*/ /*20*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, /* AND AND DAA*/ - &alux_op, &alu_op, INVALID, &vector_alux1_op, + &alux_op, &alu_op, INVALID, &complex_alux1_op, /* SUB SUB SUB SUB*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, /* SUB SUB DAS*/ - &alux_op, &alu_op, INVALID, &vector_alux1_op, + &alux_op, &alu_op, INVALID, &complex_alux1_op, /* XOR XOR XOR XOR*/ /*30*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, /* XOR XOR AAA*/ - &alux_op, &alu_op, INVALID, &vector_alux6_op, + &alux_op, &alu_op, INVALID, &complex_alux6_op, /* CMP CMP CMP CMP*/ &load_alux_op, &load_alu_op, &load_alux_op, &load_alu_op, /* CMP CMP AAS*/ - &alux_op, &alu_op, INVALID, &vector_alux6_op, + &alux_op, &alu_op, INVALID, &complex_alux6_op, /* INC EAX INC ECX INC EDX INC EBX*/ /*40*/ &alu_op, &alu_op, &alu_op, &alu_op, @@ -917,12 +1000,12 @@ static const risc86_instruction_t *opcode_timings[256] = &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, /* PUSHA POPA BOUND ARPL*/ -/*60*/ &vector_pusha_op, &vector_popa_op, &vector_bound_op, &vector_arpl_op, +/*60*/ &complex_pusha_op, &complex_popa_op, &complex_bound_op, &complex_arpl_op, INVALID, INVALID, INVALID, INVALID, /* PUSH imm IMUL PUSH imm IMUL*/ - &push_imm_op, &vector_mul_op, &push_imm_op, &vector_mul_op, + &push_imm_op, &complex_mul_op, &push_imm_op, &complex_mul_op, /* INSB INSW OUTSB OUTSW*/ - &vector_ins_op, &vector_ins_op, &vector_outs_op, &vector_outs_op, + &complex_ins_op, &complex_ins_op, &complex_outs_op, &complex_outs_op, /* Jxx*/ /*70*/ &branch_op, &branch_op, &branch_op, &branch_op, @@ -932,29 +1015,29 @@ static const risc86_instruction_t *opcode_timings[256] = /*80*/ INVALID, INVALID, INVALID, INVALID, /* TEST TEST XCHG XCHG*/ - &vector_test_mem_b_op, &vector_test_mem_op, &vector_xchg_mem_op, &vector_xchg_mem_op, + &complex_test_mem_b_op, &complex_test_mem_op, &complex_xchg_mem_op, &complex_xchg_mem_op, /* MOV MOV MOV MOV*/ &store_op, &store_op, &load_op, &load_op, /* MOV from seg LEA MOV to seg POP*/ - &vector_mov_mem_seg_op, &store_op, &vector_mov_seg_mem_op, &pop_mem_op, + &complex_mov_mem_seg_op, &store_op, &complex_mov_seg_mem_op, &pop_mem_op, /* NOP XCHG XCHG XCHG*/ /*90*/ &limm_op, &xchg_op, &xchg_op, &xchg_op, /* XCHG XCHG XCHG XCHG*/ &xchg_op, &xchg_op, &xchg_op, &xchg_op, /* CBW CWD CALL far WAIT*/ - &vector_alu1_op, &vector_alu1_op, &vector_call_far_op, &limm_op, + &complex_alu1_op, &complex_alu1_op, &complex_call_far_op, &limm_op, /* PUSHF POPF SAHF LAHF*/ - &vector_pushf_op, &vector_popf_op, &vector_alux1_op, &vector_alux1_op, + &complex_pushf_op, &complex_popf_op, &complex_alux1_op, &complex_alux1_op, /* MOV MOV MOV MOV*/ /*a0*/ &load_op, &load_op, &store_op, &store_op, /* MOVSB MOVSW CMPSB CMPSW*/ - &movs_op, &movs_op, &vector_cmpsb_op, &vector_cmps_op, + &movs_op, &movs_op, &complex_cmpsb_op, &complex_cmps_op, /* TEST TEST STOSB STOSW*/ &test_reg_b_op, &test_reg_op, &stos_op, &stos_op, /* LODSB LODSW SCASB SCASW*/ - &lods_op, &lods_op, &vector_scasb_op, &vector_scas_op, + &lods_op, &lods_op, &complex_scasb_op, &complex_scas_op, /* MOV*/ /*b0*/ &limm_op, &limm_op, &limm_op, &limm_op, @@ -963,37 +1046,37 @@ static const risc86_instruction_t *opcode_timings[256] = &limm_op, &limm_op, &limm_op, &limm_op, /* RET imm RET*/ -/*c0*/ INVALID, INVALID, &vector_ret_op, &vector_ret_op, +/*c0*/ INVALID, INVALID, &complex_ret_op, &complex_ret_op, /* LES LDS MOV MOV*/ - &vector_lss_op, &vector_lss_op, &store_op, &store_op, + &complex_lss_op, &complex_lss_op, &store_op, &store_op, /* ENTER LEAVE RETF RETF*/ - &vector_enter_op, &leave_op, &vector_retf_op, &vector_retf_op, + &complex_enter_op, &leave_op, &complex_retf_op, &complex_retf_op, /* INT3 INT INTO IRET*/ - &vector_int_op, &vector_int_op, &vector_int_op, &vector_iret_op, + &complex_int_op, &complex_int_op, &complex_int_op, &complex_iret_op, /*d0*/ INVALID, INVALID, INVALID, INVALID, /* AAM AAD SETALC XLAT*/ - &vector_alux6_op, &vector_alux3_op, &vector_alux1_op, &vector_xlat_op, + &complex_alux6_op, &complex_alux3_op, &complex_alux1_op, &complex_xlat_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /* LOOPNE LOOPE LOOP JCXZ*/ -/*e0*/ &vector_loop_op, &vector_loop_op, &loop_op, &vector_loop_op, +/*e0*/ &complex_loop_op, &complex_loop_op, &loop_op, &complex_loop_op, /* IN AL IN AX OUT_AL OUT_AX*/ - &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, + &complex_in_op, &complex_in_op, &complex_out_op, &complex_out_op, /* CALL JMP JMP JMP*/ - &store_op, &branch_op, &vector_jmp_far_op, &branch_op, + &store_op, &branch_op, &complex_jmp_far_op, &branch_op, /* IN AL IN AX OUT_AL OUT_AX*/ - &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, + &complex_in_op, &complex_in_op, &complex_out_op, &complex_out_op, /* REPNE REPE*/ /*f0*/ INVALID, INVALID, INVALID, INVALID, /* HLT CMC*/ - &vector_alux1_op, &vector_alu2_op, INVALID, INVALID, + &complex_alux1_op, &complex_alu2_op, INVALID, INVALID, /* CLC STC CLI STI*/ - &vector_alu1_op, &vector_alu1_op, &vector_cli_sti_op, &vector_cli_sti_op, + &complex_alu1_op, &complex_alu1_op, &complex_cli_sti_op, &complex_cli_sti_op, /* CLD STD INCDEC*/ - &vector_alu1_op, &vector_alu1_op, &alux_store_op, INVALID + &complex_alu1_op, &complex_alu1_op, &alux_store_op, INVALID }; static const risc86_instruction_t *opcode_timings_mod3[256] = @@ -1001,38 +1084,38 @@ static const risc86_instruction_t *opcode_timings_mod3[256] = /* ADD ADD ADD ADD*/ /*00*/ &alux_op, &alu_op, &alux_op, &alu_op, /* ADD ADD PUSH ES POP ES*/ - &alux_op, &alu_op, &push_seg_op, &vector_mov_seg_mem_op, + &alux_op, &alu_op, &push_seg_op, &complex_mov_seg_mem_op, /* OR OR OR OR*/ &alux_op, &alu_op, &alux_op, &alu_op, /* OR OR PUSH CS */ &alux_op, &alu_op, &push_seg_op, INVALID, /* ADC ADC ADC ADC*/ -/*10*/ &vector_alux1_op, &vector_alu1_op, &vector_alux1_op, &vector_alu1_op, +/*10*/ &complex_alux1_op, &complex_alu1_op, &complex_alux1_op, &complex_alu1_op, /* ADC ADC PUSH SS POP SS*/ - &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, + &complex_alux1_op, &complex_alu1_op, &push_seg_op, &complex_mov_seg_mem_op, /* SBB SBB SBB SBB*/ - &vector_alux1_op, &vector_alu1_op, &vector_alux1_op, &vector_alu1_op, + &complex_alux1_op, &complex_alu1_op, &complex_alux1_op, &complex_alu1_op, /* SBB SBB PUSH DS POP DS*/ - &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, + &complex_alux1_op, &complex_alu1_op, &push_seg_op, &complex_mov_seg_mem_op, /* AND AND AND AND*/ /*20*/ &alux_op, &alu_op, &alux_op, &alu_op, /* AND AND DAA*/ - &alux_op, &alu_op, INVALID, &vector_alux1_op, + &alux_op, &alu_op, INVALID, &complex_alux1_op, /* SUB SUB SUB SUB*/ &alux_op, &alu_op, &alux_op, &alu_op, /* SUB SUB DAS*/ - &alux_op, &alu_op, INVALID, &vector_alux1_op, + &alux_op, &alu_op, INVALID, &complex_alux1_op, /* XOR XOR XOR XOR*/ /*30*/ &alux_op, &alu_op, &alux_op, &alu_op, /* XOR XOR AAA*/ - &alux_op, &alu_op, INVALID, &vector_alux6_op, + &alux_op, &alu_op, INVALID, &complex_alux6_op, /* CMP CMP CMP CMP*/ &alux_op, &alu_op, &alux_op, &alu_op, /* CMP CMP AAS*/ - &alux_op, &alu_op, INVALID, &vector_alux6_op, + &alux_op, &alu_op, INVALID, &complex_alux6_op, /* INC EAX INC ECX INC EDX INC EBX*/ /*40*/ &alu_op, &alu_op, &alu_op, &alu_op, @@ -1053,12 +1136,12 @@ static const risc86_instruction_t *opcode_timings_mod3[256] = &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, /* PUSHA POPA BOUND ARPL*/ -/*60*/ &vector_pusha_op, &vector_popa_op, &vector_bound_op, &vector_arpl_op, +/*60*/ &complex_pusha_op, &complex_popa_op, &complex_bound_op, &complex_arpl_op, INVALID, INVALID, INVALID, INVALID, /* PUSH imm IMUL PUSH imm IMUL*/ - &push_imm_op, &vector_mul_op, &push_imm_op, &vector_mul_op, + &push_imm_op, &complex_mul_op, &push_imm_op, &complex_mul_op, /* INSB INSW OUTSB OUTSW*/ - &vector_ins_op, &vector_ins_op, &vector_outs_op, &vector_outs_op, + &complex_ins_op, &complex_ins_op, &complex_outs_op, &complex_outs_op, /* Jxx*/ /*70*/ &branch_op, &branch_op, &branch_op, &branch_op, @@ -1068,29 +1151,29 @@ static const risc86_instruction_t *opcode_timings_mod3[256] = /*80*/ INVALID, INVALID, INVALID, INVALID, /* TEST TEST XCHG XCHG*/ - &vector_alu1_op, &vector_alu1_op, &vector_alu3_op, &vector_alu3_op, + &complex_alu1_op, &complex_alu1_op, &complex_alu3_op, &complex_alu3_op, /* MOV MOV MOV MOV*/ &store_op, &store_op, &load_op, &load_op, /* MOV from seg LEA MOV to seg POP*/ - &mov_reg_seg_op, &store_op, &vector_mov_seg_reg_op, &pop_reg_op, + &mov_reg_seg_op, &store_op, &complex_mov_seg_reg_op, &pop_reg_op, /* NOP XCHG XCHG XCHG*/ /*90*/ &limm_op, &xchg_op, &xchg_op, &xchg_op, /* XCHG XCHG XCHG XCHG*/ &xchg_op, &xchg_op, &xchg_op, &xchg_op, /* CBW CWD CALL far WAIT*/ - &vector_alu1_op, &vector_alu1_op, &vector_call_far_op, &limm_op, + &complex_alu1_op, &complex_alu1_op, &complex_call_far_op, &limm_op, /* PUSHF POPF SAHF LAHF*/ - &vector_pushf_op, &vector_popf_op, &vector_alux1_op, &vector_alux1_op, + &complex_pushf_op, &complex_popf_op, &complex_alux1_op, &complex_alux1_op, /* MOV MOV MOV MOV*/ /*a0*/ &load_op, &load_op, &store_op, &store_op, /* MOVSB MOVSW CMPSB CMPSW*/ - &movs_op, &movs_op, &vector_cmpsb_op, &vector_cmps_op, + &movs_op, &movs_op, &complex_cmpsb_op, &complex_cmps_op, /* TEST TEST STOSB STOSW*/ &test_reg_b_op, &test_reg_op, &stos_op, &stos_op, /* LODSB LODSW SCASB SCASW*/ - &lods_op, &lods_op, &vector_scasb_op, &vector_scas_op, + &lods_op, &lods_op, &complex_scasb_op, &complex_scas_op, /* MOV*/ /*b0*/ &limm_op, &limm_op, &limm_op, &limm_op, @@ -1099,57 +1182,57 @@ static const risc86_instruction_t *opcode_timings_mod3[256] = &limm_op, &limm_op, &limm_op, &limm_op, /* RET imm RET*/ -/*c0*/ INVALID, INVALID, &vector_ret_op, &vector_ret_op, +/*c0*/ INVALID, INVALID, &complex_ret_op, &complex_ret_op, /* LES LDS MOV MOV*/ - &vector_lss_op, &vector_lss_op, &store_op, &store_op, + &complex_lss_op, &complex_lss_op, &store_op, &store_op, /* ENTER LEAVE RETF RETF*/ - &vector_enter_op, &leave_op, &vector_retf_op, &vector_retf_op, + &complex_enter_op, &leave_op, &complex_retf_op, &complex_retf_op, /* INT3 INT INTO IRET*/ - &vector_int_op, &vector_int_op, &vector_int_op, &vector_iret_op, + &complex_int_op, &complex_int_op, &complex_int_op, &complex_iret_op, /*d0*/ INVALID, INVALID, INVALID, INVALID, /* AAM AAD SETALC XLAT*/ - &vector_alux6_op, &vector_alux3_op, &vector_alux1_op, &vector_xlat_op, + &complex_alux6_op, &complex_alux3_op, &complex_alux1_op, &complex_xlat_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /* LOOPNE LOOPE LOOP JCXZ*/ -/*e0*/ &vector_loop_op, &vector_loop_op, &loop_op, &vector_loop_op, +/*e0*/ &complex_loop_op, &complex_loop_op, &loop_op, &complex_loop_op, /* IN AL IN AX OUT_AL OUT_AX*/ - &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, + &complex_in_op, &complex_in_op, &complex_out_op, &complex_out_op, /* CALL JMP JMP JMP*/ - &store_op, &branch_op, &vector_jmp_far_op, &branch_op, + &store_op, &branch_op, &complex_jmp_far_op, &branch_op, /* IN AL IN AX OUT_AL OUT_AX*/ - &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, + &complex_in_op, &complex_in_op, &complex_out_op, &complex_out_op, /* REPNE REPE*/ /*f0*/ INVALID, INVALID, INVALID, INVALID, /* HLT CMC*/ - &vector_alux1_op, &vector_alu2_op, INVALID, INVALID, + &complex_alux1_op, &complex_alu2_op, INVALID, INVALID, /* CLC STC CLI STI*/ - &vector_alu1_op, &vector_alu1_op, &vector_cli_sti_op, &vector_cli_sti_op, + &complex_alu1_op, &complex_alu1_op, &complex_cli_sti_op, &complex_cli_sti_op, /* CLD STD INCDEC*/ - &vector_alu1_op, &vector_alu1_op, &vector_alux1_op, INVALID + &complex_alu1_op, &complex_alu1_op, &complex_alux1_op, INVALID }; static const risc86_instruction_t *opcode_timings_0f[256] = { -/*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, - INVALID, &vector_alu6_op, &vector_alu6_op, INVALID, - &vector_invd_op, &vector_wbinvd_op, INVALID, INVALID, - INVALID, &load_op, &vector_femms_op, INVALID, +/*00*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, + INVALID, &complex_alu6_op, &complex_alu6_op, INVALID, + &complex_invd_op, &complex_wbinvd_op, INVALID, INVALID, + INVALID, &load_op, &complex_femms_op, INVALID, /*10*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, -/*20*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, - &vector_alu6_op, &vector_alu6_op, INVALID, INVALID, +/*20*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, + &complex_alu6_op, &complex_alu6_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, -/*30*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID, +/*30*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -1170,7 +1253,7 @@ static const risc86_instruction_t *opcode_timings_0f[256] = INVALID, INVALID, &mload_op, &mload_op, /*70*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, - &load_mmx_op, &load_mmx_op, &load_mmx_op, &vector_emms_op, + &load_mmx_op, &load_mmx_op, &load_mmx_op, &complex_emms_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, &mstore_op, &mstore_op, @@ -1179,122 +1262,122 @@ static const risc86_instruction_t *opcode_timings_0f[256] = &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, -/*90*/ &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, - &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, - &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, - &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, +/*90*/ &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, + &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, + &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, + &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, -/*a0*/ &push_seg_op, &vector_mov_seg_mem_op, &vector_cpuid_op, &vector_load_alu_op, - &vector_alu_store_op, &vector_alu_store_op, INVALID, INVALID, - &push_seg_op, &vector_mov_seg_mem_op, INVALID, &vector_load_alu_op, - &vector_alu_store_op, &vector_alu_store_op, INVALID, &vector_mul_op, +/*a0*/ &push_seg_op, &complex_mov_seg_mem_op,&complex_cpuid_op, &complex_load_alu_op, + &complex_alu_store_op, &complex_alu_store_op, INVALID, INVALID, + &push_seg_op, &complex_mov_seg_mem_op,INVALID, &complex_load_alu_op, + &complex_alu_store_op, &complex_alu_store_op, INVALID, &complex_mul_op, -/*b0*/ &vector_cmpxchg_b_op, &vector_cmpxchg_op, &vector_lss_op, &vector_load_alu_op, - &vector_lss_op, &vector_lss_op, &load_alux_op, &load_alu_op, - INVALID, INVALID, &vector_load_alu_op, &vector_load_alu_op, - &vector_bsx_op, &vector_bsx_op, &load_alux_op, &load_alu_op, +/*b0*/ &complex_cmpxchg_b_op, &complex_cmpxchg_op, &complex_lss_op, &complex_load_alu_op, + &complex_lss_op, &complex_lss_op, &load_alux_op, &load_alu_op, + INVALID, INVALID, &complex_load_alu_op, &complex_load_alu_op, + &complex_bsx_op, &complex_bsx_op, &load_alux_op, &load_alu_op, -/*c0*/ &vector_alux_store_op, &vector_alu_store_op, INVALID, INVALID, - INVALID, INVALID, INVALID, &vector_cmpxchg_op, - &bswap_op, &bswap_op, &bswap_op, &bswap_op, - &bswap_op, &bswap_op, &bswap_op, &bswap_op, +/*c0*/ &complex_alux_store_op, &complex_alu_store_op, INVALID, INVALID, + INVALID, INVALID, INVALID, &complex_cmpxchg_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, -/*d0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, - INVALID, &load_mmx_mul_op, INVALID, INVALID, - &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, - &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, +/*d0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, + INVALID, &load_mmx_mul_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, -/*e0*/ &load_mmx_op, &load_mmx_shift_op, &load_mmx_shift_op, INVALID, - INVALID, &pmul_mem_op, INVALID, INVALID, - &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, - &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, +/*e0*/ &load_mmx_op, &load_mmx_shift_op, &load_mmx_shift_op, INVALID, + INVALID, &pmul_mem_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, -/*f0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, - INVALID, &pmul_mem_op, INVALID, INVALID, - &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, - &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, +/*f0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, + INVALID, &pmul_mem_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, + &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, }; static const risc86_instruction_t *opcode_timings_0f_mod3[256] = { -/*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, - INVALID, &vector_alu6_op, &vector_alu6_op, INVALID, - &vector_invd_op, &vector_wbinvd_op, INVALID, INVALID, - INVALID, INVALID, &vector_femms_op, INVALID, +/*00*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, + INVALID, &complex_alu6_op, &complex_alu6_op, INVALID, + &complex_invd_op, &complex_wbinvd_op, INVALID, INVALID, + INVALID, INVALID, &complex_femms_op, INVALID, -/*10*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, +/*10*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, -/*20*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, - &vector_alu6_op, &vector_alu6_op, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, +/*20*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, + &complex_alu6_op, &complex_alu6_op, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, -/*30*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, +/*30*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, -/*40*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, +/*40*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, -/*50*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, +/*50*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, -/*60*/ &mmx_op, &mmx_op, &mmx_op, &mmx_op, - &mmx_op, &mmx_op, &mmx_op, &mmx_op, - &mmx_op, &mmx_op, &mmx_op, &mmx_op, - INVALID, INVALID, &mmx_op, &mmx_op, +/*60*/ &mmx_op, &mmx_op, &mmx_op, &mmx_op, + &mmx_op, &mmx_op, &mmx_op, &mmx_op, + &mmx_op, &mmx_op, &mmx_op, &mmx_op, + INVALID, INVALID, &mmx_op, &mmx_op, -/*70*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, - &mmx_op, &mmx_op, &mmx_op, &vector_emms_op, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, &mmx_op, &mmx_op, +/*70*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + &mmx_op, &mmx_op, &mmx_op, &complex_emms_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, &mmx_op, &mmx_op, /*80*/ &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, -/*90*/ &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, - &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, - &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, - &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, +/*90*/ &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, + &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, + &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, + &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, -/*a0*/ &push_seg_op, &vector_mov_seg_mem_op, &vector_cpuid_op, &vector_alu1_op, - &vector_alu1_op, &vector_alu1_op, INVALID, INVALID, - &push_seg_op, &vector_mov_seg_mem_op, INVALID, &vector_alu1_op, - &vector_alu1_op, &vector_alu1_op, INVALID, &vector_mul_op, +/*a0*/ &push_seg_op, &complex_mov_seg_mem_op, &complex_cpuid_op, &complex_alu1_op, + &complex_alu1_op, &complex_alu1_op, INVALID, INVALID, + &push_seg_op, &complex_mov_seg_mem_op, INVALID, &complex_alu1_op, + &complex_alu1_op, &complex_alu1_op, INVALID, &complex_mul_op, -/*b0*/ &vector_cmpxchg_b_op, &vector_cmpxchg_op, &vector_lss_op, &vector_alu1_op, - &vector_lss_op, &vector_lss_op, &alux_op, &alu_op, - INVALID, INVALID, &vector_alu1_op, &vector_alu1_op, - &vector_bsx_op, &vector_bsx_op, &alux_op, &alu_op, +/*b0*/ &complex_cmpxchg_b_op, &complex_cmpxchg_op, &complex_lss_op, &complex_alu1_op, + &complex_lss_op, &complex_lss_op, &alux_op, &alu_op, + INVALID, INVALID, &complex_alu1_op, &complex_alu1_op, + &complex_bsx_op, &complex_bsx_op, &alux_op, &alu_op, -/*c0*/ &vector_alux1_op, &vector_alu1_op, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - &bswap_op, &bswap_op, &bswap_op, &bswap_op, - &bswap_op, &bswap_op, &bswap_op, &bswap_op, +/*c0*/ &complex_alux1_op, &complex_alu1_op, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, -/*d0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, - INVALID, &mmx_mul_op, INVALID, INVALID, - &mmx_op, &mmx_op, INVALID, &mmx_op, - &mmx_op, &mmx_op, INVALID, &mmx_op, +/*d0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + INVALID, &mmx_mul_op, INVALID, INVALID, + &mmx_op, &mmx_op, INVALID, &mmx_op, + &mmx_op, &mmx_op, INVALID, &mmx_op, -/*e0*/ &mmx_op, &mmx_shift_op, &mmx_shift_op, INVALID, - INVALID, &pmul_op, INVALID, INVALID, - &mmx_op, &mmx_op, INVALID, &mmx_op, - &mmx_op, &mmx_op, INVALID, &mmx_op, +/*e0*/ &mmx_op, &mmx_shift_op, &mmx_shift_op, INVALID, + INVALID, &pmul_op, INVALID, INVALID, + &mmx_op, &mmx_op, INVALID, &mmx_op, + &mmx_op, &mmx_op, INVALID, &mmx_op, -/*f0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, - INVALID, &pmul_op, INVALID, INVALID, - &mmx_op, &mmx_op, &mmx_op, INVALID, - &mmx_op, &mmx_op, &mmx_op, INVALID, +/*f0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + INVALID, &pmul_op, INVALID, INVALID, + &mmx_op, &mmx_op, &mmx_op, INVALID, + &mmx_op, &mmx_op, &mmx_op, INVALID, }; static const risc86_instruction_t *opcode_timings_0f0f[256] = @@ -1466,100 +1549,100 @@ static const risc86_instruction_t *opcode_timings_0f0f_mod3[256] = static const risc86_instruction_t *opcode_timings_shift[8] = { - &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, - &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op + &complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op, + &complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op }; static const risc86_instruction_t *opcode_timings_shift_b[8] = { - &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, - &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op + &complex_alux_store_op, &complex_alux_store_op, &complex_alux_store_op, &complex_alux_store_op, + &complex_alux_store_op, &complex_alux_store_op, &complex_alux_store_op, &complex_alux_store_op }; static const risc86_instruction_t *opcode_timings_shift_mod3[8] = { - &vector_alu1_op, &vector_alu1_op, &vector_alu1_op, &vector_alu1_op, - &alu_op, &alu_op, &alu_op, &alu_op + &complex_alu1_op, &complex_alu1_op, &complex_alu1_op, &complex_alu1_op, + &alu_op, &alu_op, &alu_op, &alu_op }; static const risc86_instruction_t *opcode_timings_shift_b_mod3[8] = { - &vector_alux1_op, &vector_alux1_op, &vector_alux1_op, &vector_alux1_op, - &alux_op, &alux_op, &alux_op, &alux_op + &complex_alux1_op, &complex_alux1_op, &complex_alux1_op, &complex_alux1_op, + &alux_op, &alux_op, &alux_op, &alux_op }; static const risc86_instruction_t *opcode_timings_80[8] = { - &alux_store_op, &alux_store_op, &vector_alux_store_op, &vector_alux_store_op, - &alux_store_op, &alux_store_op, &alux_store_op, &alux_store_op, + &alux_store_op, &alux_store_op, &complex_alux_store_op, &complex_alux_store_op, + &alux_store_op, &alux_store_op, &alux_store_op, &alux_store_op, }; static const risc86_instruction_t *opcode_timings_80_mod3[8] = { - &alux_op, &alux_op, &alux_store_op, &alux_store_op, - &alux_op, &alux_op, &alux_op, &alux_op, + &alux_op, &alux_op, &alux_store_op, &alux_store_op, + &alux_op, &alux_op, &alux_op, &alux_op, }; static const risc86_instruction_t *opcode_timings_8x[8] = { - &alu_store_op, &alu_store_op, &vector_alu_store_op, &vector_alu_store_op, - &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, + &alu_store_op, &alu_store_op, &complex_alu_store_op, &complex_alu_store_op, + &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, }; static const risc86_instruction_t *opcode_timings_8x_mod3[8] = { - &alu_op, &alu_op, &alu_store_op, &alu_store_op, - &alu_op, &alu_op, &alu_op, &alu_op, + &alu_op, &alu_op, &alu_store_op, &alu_store_op, + &alu_op, &alu_op, &alu_op, &alu_op, }; static const risc86_instruction_t *opcode_timings_f6[8] = { /* TST NOT NEG*/ - &test_mem_imm_b_op, INVALID, &vector_alux_store_op, &vector_alux_store_op, + &test_mem_imm_b_op, INVALID, &complex_alux_store_op, &complex_alux_store_op, /* MUL IMUL DIV IDIV*/ - &vector_mul_mem_op, &vector_mul_mem_op, &vector_div16_mem_op, &vector_div16_mem_op, + &complex_mul_mem_op, &complex_mul_mem_op, &complex_div16_mem_op, &complex_div16_mem_op, }; static const risc86_instruction_t *opcode_timings_f6_mod3[8] = { /* TST NOT NEG*/ &test_reg_b_op, INVALID, &alux_op, &alux_op, /* MUL IMUL DIV IDIV*/ - &vector_mul_op, &vector_mul_op, &vector_div16_op, &vector_div16_op, + &complex_mul_op, &complex_mul_op, &complex_div16_op, &complex_div16_op, }; static const risc86_instruction_t *opcode_timings_f7[8] = { /* TST NOT NEG*/ - &test_mem_imm_op, INVALID, &vector_alu_store_op, &vector_alu_store_op, + &test_mem_imm_op, INVALID, &complex_alu_store_op, &complex_alu_store_op, /* MUL IMUL DIV IDIV*/ - &vector_mul64_mem_op, &vector_mul64_mem_op, &vector_div32_mem_op, &vector_div32_mem_op, + &complex_mul64_mem_op, &complex_mul64_mem_op, &complex_div32_mem_op, &complex_div32_mem_op, }; static const risc86_instruction_t *opcode_timings_f7_mod3[8] = { /* TST NOT NEG*/ &test_reg_op, INVALID, &alu_op, &alu_op, /* MUL IMUL DIV IDIV*/ - &vector_mul64_op, &vector_mul64_op, &vector_div32_op, &vector_div32_op, + &complex_mul64_op, &complex_mul64_op, &complex_div32_op, &complex_div32_op, }; static const risc86_instruction_t *opcode_timings_ff[8] = { /* INC DEC CALL CALL far*/ - &alu_store_op, &alu_store_op, &store_op, &vector_call_far_op, + &alu_store_op, &alu_store_op, &store_op, &complex_call_far_op, /* JMP JMP far PUSH*/ - &branch_op, &vector_jmp_far_op, &push_mem_op, INVALID + &branch_op, &complex_jmp_far_op, &push_mem_op, INVALID }; static const risc86_instruction_t *opcode_timings_ff_mod3[8] = { /* INC DEC CALL CALL far*/ - &vector_alu1_op, &vector_alu1_op, &store_op, &vector_call_far_op, + &complex_alu1_op, &complex_alu1_op, &store_op, &complex_call_far_op, /* JMP JMP far PUSH*/ - &branch_op, &vector_jmp_far_op, &vector_push_mem_op, INVALID + &branch_op, &complex_jmp_far_op, &complex_push_mem_op, INVALID }; static const risc86_instruction_t *opcode_timings_d8[8] = { /* FADDs FMULs FCOMs FCOMPs*/ - &load_float_op, &load_float_op, &load_float_op, &load_float_op, + &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, /* FSUBs FSUBRs FDIVs FDIVRs*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, }; static const risc86_instruction_t *opcode_timings_d8_mod3[8] = { /* FADD FMUL FCOM FCOMP*/ - &float_op, &float_op, &float_op, &float_op, + &fadd_op, &fmul_op, &float_op, &float_op, /* FSUB FSUBR FDIV FDIVR*/ &float_op, &float_op, &fdiv_op, &fdiv_op, }; @@ -1569,7 +1652,7 @@ static const risc86_instruction_t *opcode_timings_d9[8] = /* FLDs FSTs FSTPs*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FLDENV FLDCW FSTENV FSTCW*/ - &vector_float_l_op, &vector_fldcw_op, &vector_float_l_op, &vector_float_op + &complex_float_l_op, &complex_fldcw_op, &complex_float_l_op, &complex_float_op }; static const risc86_instruction_t *opcode_timings_d9_mod3[64] = { @@ -1577,16 +1660,16 @@ static const risc86_instruction_t *opcode_timings_d9_mod3[64] = &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, /*FXCH*/ - &float_op, &float_op, &float_op, &float_op, - &float_op, &float_op, &float_op, &float_op, + &limm_op, &limm_op, &limm_op, &limm_op, + &limm_op, &limm_op, &limm_op, &limm_op, /*FNOP*/ &float_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*FSTP*/ - &float_op, &float_op, &float_op, &float_op, - &float_op, &float_op, &float_op, &float_op, + &float2_op, &float2_op, &float2_op, &float2_op, + &float2_op, &float2_op, &float2_op, &float2_op, /* opFCHS opFABS*/ - &float_op, &float_op, INVALID, INVALID, + &fchs_op, &float_op, INVALID, INVALID, /* opFTST opFXAM*/ &float_op, &float_op, INVALID, INVALID, /* opFLD1 opFLDL2T opFLDL2E opFLDPI*/ @@ -1606,7 +1689,7 @@ static const risc86_instruction_t *opcode_timings_d9_mod3[64] = static const risc86_instruction_t *opcode_timings_da[8] = { /* FIADDl FIMULl FICOMl FICOMPl*/ - &load_float_op, &load_float_op, &load_float_op, &load_float_op, + &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, /* FISUBl FISUBRl FIDIVl FIDIVRl*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, }; @@ -1622,7 +1705,7 @@ static const risc86_instruction_t *opcode_timings_db[8] = /* FLDil FSTil FSTPil*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FLDe FSTPe*/ - INVALID, &vector_flde_op, INVALID, &vector_fste_op + INVALID, &complex_flde_op, INVALID, &complex_fste_op }; static const risc86_instruction_t *opcode_timings_db_mod3[64] = { @@ -1656,14 +1739,14 @@ static const risc86_instruction_t *opcode_timings_db_mod3[64] = static const risc86_instruction_t *opcode_timings_dc[8] = { /* FADDd FMULd FCOMd FCOMPd*/ - &load_float_op, &load_float_op, &load_float_op, &load_float_op, + &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, /* FSUBd FSUBRd FDIVd FDIVRd*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, }; static const risc86_instruction_t *opcode_timings_dc_mod3[8] = { /* opFADDr opFMULr*/ - &float_op, &float_op, INVALID, INVALID, + &fadd_op, &fmul_op, INVALID, INVALID, /* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ &float_op, &float_op, &fdiv_op, &fdiv_op }; @@ -1673,7 +1756,7 @@ static const risc86_instruction_t *opcode_timings_dd[8] = /* FLDd FSTd FSTPd*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FRSTOR FSAVE FSTSW*/ - &vector_float_l_op, INVALID, &vector_float_l_op, &vector_float_l_op + &complex_float_l_op, INVALID, &complex_float_l_op, &complex_float_l_op }; static const risc86_instruction_t *opcode_timings_dd_mod3[8] = { @@ -1686,14 +1769,14 @@ static const risc86_instruction_t *opcode_timings_dd_mod3[8] = static const risc86_instruction_t *opcode_timings_de[8] = { /* FIADDw FIMULw FICOMw FICOMPw*/ - &load_float_op, &load_float_op, &load_float_op, &load_float_op, + &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, /* FISUBw FISUBRw FIDIVw FIDIVRw*/ - &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, }; static const risc86_instruction_t *opcode_timings_de_mod3[8] = { /* FADDP FMULP FCOMPP*/ - &float_op, &float_op, INVALID, &float_op, + &fadd_op, &fmul_op, INVALID, &float_op, /* FSUBP FSUBRP FDIVP FDIVRP*/ &float_op, &float_op, &fdiv_op, &fdiv_op, }; @@ -1703,7 +1786,7 @@ static const risc86_instruction_t *opcode_timings_df[8] = /* FILDiw FISTiw FISTPiw*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FILDiq FBSTP FISTPiq*/ - INVALID, &load_float_op, &vector_float_l_op, &fstore_op, + INVALID, &load_float_op, &complex_float_l_op, &fstore_op, }; static const risc86_instruction_t *opcode_timings_df_mod3[8] = { @@ -1731,34 +1814,27 @@ static p6_unit_t *units; /*Pentium Pro has no MMX*/ static p6_unit_t ppro_units[] = { - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX)}, /*Integer X*/ - {.uop_mask = (1 << UOP_ALU)}, /*Integer Y*/ - {.uop_mask = (1 << UOP_FLOAT)}, /*Floating point*/ - {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD)}, /*Load*/ - {.uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE)}, /*Store*/ - {.uop_mask = (1 << UOP_BRANCH)} /*Branch*/ + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) | (1 << UOP_FLOAT)}, /*Integer X & Floating point*/ + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_BRANCH)}, /*Integer Y*/ + {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD)}, /*Load*/ + {.uop_mask = (1 << UOP_STORED) | (1 << UOP_FSTORED)}, /*Data Store*/ + {.uop_mask = (1 << UOP_STOREA) | (1 << UOP_FSTOREA)}, /*Address Store*/ }; #define NR_PPRO_UNITS (sizeof(ppro_units) / sizeof(p6_unit_t)) -/*Well, it works I guess*/ +/*Pentium II/Celeron assigns the multiplier to port 0, the shifter to port 1, and shares the MMX ALU*/ static p6_unit_t p2_units[] = { - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) | (1 << UOP_MEU) | /*Integer X*/ - (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL)}, - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_MEU) | /*Integer Y*/ - (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL)}, - {.uop_mask = (1 << UOP_FLOAT)}, /*Floating point*/ + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) | (1 << UOP_FLOAT) | /*Integer X & Floating point*/ + (1 << UOP_MMX) | (1 << UOP_MMX_MUL)}, + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_BRANCH) | /*Integer Y*/ + (1 << UOP_MMX) | (1 << UOP_MMX_SHIFT)}, {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD)}, /*Load*/ - {.uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE)}, /*Store*/ - {.uop_mask = (1 << UOP_BRANCH)} /*Branch*/ + {.uop_mask = (1 << UOP_STORED) | (1 << UOP_FSTORED) | (1 << UOP_MSTORED)}, /*Data Store*/ + {.uop_mask = (1 << UOP_STOREA) | (1 << UOP_FSTOREA) | (1 << UOP_MSTOREA)}, /*Address Store*/ }; #define NR_P2_UNITS (sizeof(p2_units) / sizeof(p6_unit_t)) -/*First available cycles of shared execution units. Each of these can be submitted - to by ALU X and Y*/ -static int mul_first_available_cycle; -static int shift_first_available_cycle; - static int uop_run(const risc86_uop_t *uop, int decode_time) { int c; @@ -1767,16 +1843,8 @@ static int uop_run(const risc86_uop_t *uop, int decode_time) /*UOP_LIMM does not require execution*/ if (uop->type == UOP_LIMM) - return decode_time; + return decode_time; - if (units == p2_units) /*More hackyness*/ - { - if (uop->type == UOP_MEU_MUL && decode_time < mul_first_available_cycle) - decode_time = mul_first_available_cycle; - else if (uop->type == UOP_MEU_SHIFT && decode_time < mul_first_available_cycle) - decode_time = shift_first_available_cycle; - } - /*Find execution unit for this uOP*/ for (c = 0; c < nr_units; c++) { @@ -1790,43 +1858,36 @@ static int uop_run(const risc86_uop_t *uop, int decode_time) } } if (!best_unit) - fatal("uop_run: can not find execution unit\n"); + fatal("uop_run: can not find execution unit\n"); if (best_start_cycle < decode_time) best_start_cycle = decode_time; - best_unit->first_available_cycle = best_start_cycle + uop->throughput; + best_unit->first_available_cycle = best_start_cycle + uop->latency; - if (units == p2_units) /*More hackyness*/ - { - if (uop->type == UOP_MEU_MUL) - mul_first_available_cycle = best_start_cycle + uop->throughput; - else if (uop->type == UOP_MEU_SHIFT) - shift_first_available_cycle = best_start_cycle + uop->throughput; - } - return best_start_cycle + uop->throughput; + + return best_start_cycle + uop->latency; } -/*The K6 decoder can decode, per clock : - - 1 or 2 'short' instructions, each up to 2 uOPs and 7 bytes long - - 1 'long' instruction, up to 4 uOPs - - 1 'vector' instruction, up to 4 uOPs per cycle, plus (I think) 1 cycle startup delay) +/*The P6 decoders can decode, per clock : + - 1 to 3 'simple' instructions, each up to 1 uOP and 7 bytes long + - 1 'complex' instruction, up to 4 uOPs or 3 per cycle for instructions longer than 4 uOPs */ static struct { int nr_uops; - const risc86_uop_t *uops[4]; + const risc86_uop_t *uops[6]; /*Earliest time a uop can start. If the timestamp is -1, then the uop is part of a dependency chain and the start time is the completion time of the previous uop*/ - int earliest_start[4]; + int earliest_start[6]; } decode_buffer; -#define NR_OPQUADS 6 -/*Timestamps of when the last six opquads completed. The K6 scheduler retires - opquads in order, so this is needed to determine when the next can be scheduled*/ -static int opquad_completion_timestamp[NR_OPQUADS]; -static int next_opquad = 0; +#define NR_OPSEQS 3 +/*Timestamps of when the last three op sequences completed. Technically this is incorrect, +as the actual size of the opseq buffer is 20 bytes and not 18, but I'm restricted to multiples of 6*/ +static int opseq_completion_timestamp[NR_OPSEQS]; +static int next_opseq = 0; #define NR_REGS 8 /*Timestamp of when last operation on an integer register completed*/ @@ -1841,50 +1902,48 @@ void decode_flush_p6() { int c; int uop_timestamp = 0; - - /*Decoded opquad can not be submitted if there are no free spaces in the - opquad buffer*/ - if (decode_timestamp < opquad_completion_timestamp[next_opquad]) - decode_timestamp = opquad_completion_timestamp[next_opquad]; + + /*Decoded opseq can not be submitted if there are no free spaces in the + opseq buffer*/ + if (decode_timestamp < opseq_completion_timestamp[next_opseq]) + decode_timestamp = opseq_completion_timestamp[next_opseq]; /*Ensure that uops can not be submitted before they have been decoded*/ if (decode_timestamp > last_uop_timestamp) last_uop_timestamp = decode_timestamp; /*Submit uops to execution units, and determine the latest completion time*/ - for (c = 0; c < decode_buffer.nr_uops; c++) + for (c = 0; c < (decode_buffer.nr_uops); c++) { int start_timestamp; - if (decode_buffer.earliest_start[c] == -1) - start_timestamp = last_uop_timestamp; - else - start_timestamp = decode_buffer.earliest_start[c]; + + start_timestamp = decode_buffer.earliest_start[c]; last_uop_timestamp = uop_run(decode_buffer.uops[c], start_timestamp); if (last_uop_timestamp > uop_timestamp) uop_timestamp = last_uop_timestamp; } - /*Calculate opquad completion time. Since opquads complete in order, it + /*Calculate opseq completion time. Since opseqs complete in order, it must be after the last completion.*/ if (uop_timestamp <= last_complete_timestamp) last_complete_timestamp = last_complete_timestamp + 1; else last_complete_timestamp = uop_timestamp; - /*Advance to next opquad in buffer*/ - opquad_completion_timestamp[next_opquad] = last_complete_timestamp; - next_opquad++; - if (next_opquad == NR_OPQUADS) - next_opquad = 0; + /*Advance to next opseq in buffer*/ + opseq_completion_timestamp[next_opseq] = last_complete_timestamp; + next_opseq++; + if (next_opseq == NR_OPSEQS) + next_opseq = 0; decode_timestamp++; decode_buffer.nr_uops = 0; } /*The instruction is only of interest here if it's longer than 7 bytes, as that's the - limit on K6 short decoding*/ + limit on P6 simple decoding*/ static int codegen_timing_instr_length(uint64_t deps, uint32_t fetchdat, int op_32) { int len = prefixes + 1; /*Opcode*/ @@ -1936,8 +1995,9 @@ static int codegen_timing_instr_length(uint64_t deps, uint32_t fetchdat, int op_ static void decode_instruction(const risc86_instruction_t *ins, uint64_t deps, uint32_t fetchdat, int op_32, int bit8) { uint32_t regmask_required; - uint32_t regmask_modified; - int c, d; + uint32_t regmask_modified; + int c; + int d = 0; /*Complex decoder uOPs*/ int earliest_start = 0; decode_type_t decode_type = ins->decode_type; int instr_length = codegen_timing_instr_length(deps, fetchdat, op_32); @@ -1967,87 +2027,68 @@ static void decode_instruction(const risc86_instruction_t *ins, uint64_t deps, u earliest_start = fpu_st_timestamp[reg]; } - /*Short decoders are limited to 7 bytes*/ - if (decode_type == DECODE_SHORT && instr_length > 7) - decode_type = DECODE_LONG; - /*Long decoder is limited to 11 bytes*/ - else if (instr_length > 11) - decode_type = DECODE_VECTOR; + /*Simple decoders are limited to 7 bytes & 1 uOP*/ + if (decode_type == DECODE_SIMPLE && instr_length > 7) + decode_type = DECODE_COMPLEX; + else if (decode_type == DECODE_SIMPLE && ins->nr_uops > 1) + decode_type = DECODE_COMPLEX; switch (decode_type) - { - case DECODE_SHORT: - if (decode_buffer.nr_uops) + { + case DECODE_SIMPLE: + if (decode_buffer.nr_uops - d == 2) + { + decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; + decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; + decode_buffer.nr_uops = 3; + decode_flush_p6(); + } + else if (decode_buffer.nr_uops - d == 1) + { + decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; + decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; + decode_buffer.nr_uops = 2+d; + if (d) + decode_flush_p6(); + } + else if (decode_buffer.nr_uops) { decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; - if (ins->nr_uops > 1) - { - decode_buffer.uops[decode_buffer.nr_uops+1] = &ins->uop[1]; - decode_buffer.earliest_start[decode_buffer.nr_uops+1] = -1; - } - decode_buffer.nr_uops += ins->nr_uops; - - decode_flush_p6(); + decode_buffer.nr_uops = 1+d; } else { - decode_buffer.nr_uops = ins->nr_uops; + decode_buffer.nr_uops = 1; decode_buffer.uops[0] = &ins->uop[0]; decode_buffer.earliest_start[0] = earliest_start; - if (ins->nr_uops > 1) - { - decode_buffer.uops[1] = &ins->uop[1]; - decode_buffer.earliest_start[1] = -1; - } - } + } break; - case DECODE_LONG: + case DECODE_COMPLEX: if (decode_buffer.nr_uops) - decode_flush_p6(); + decode_flush_p6(); /*The 4-1-1 arrangement implies that a complex ins. can't be decoded after a simple one*/ - decode_buffer.nr_uops = ins->nr_uops; - for (c = 0; c < ins->nr_uops; c++) - { - decode_buffer.uops[c] = &ins->uop[c]; - if (c == 0) - decode_buffer.earliest_start[c] = earliest_start; - else - decode_buffer.earliest_start[c] = -1; - } - decode_flush_p6(); - break; - - case DECODE_VECTOR: - if (decode_buffer.nr_uops) - decode_flush_p6(); - - decode_timestamp++; d = 0; - + for (c = 0; c < ins->nr_uops; c++) { decode_buffer.uops[d] = &ins->uop[c]; - if (c == 0) - decode_buffer.earliest_start[d] = earliest_start; - else - decode_buffer.earliest_start[d] = -1; - d++; + decode_buffer.earliest_start[c] = earliest_start; + d++; - if (d == 4) + if (d == 3 && ins->nr_uops > 4) /*Ins. with >4 uOPs require the use of special units only present on 3 translate PLAs*/ { d = 0; - decode_buffer.nr_uops = 4; - decode_flush_p6(); + decode_buffer.nr_uops = 3; + decode_flush_p6(); /*The other two decoders are halted to preserve in-order issue*/ } } - if (d) - { - decode_buffer.nr_uops = d; - decode_flush_p6(); - } - break; + if (d) + { + decode_buffer.nr_uops = d; + } + break; } /*Update write timestamps for any output registers*/ @@ -2098,15 +2139,12 @@ void codegen_timing_p6_block_start() for (c = 0; c < nr_units; c++) units[c].first_available_cycle = 0; - mul_first_available_cycle = 0; - shift_first_available_cycle = 0; - decode_timestamp = 0; last_complete_timestamp = 0; - for (c = 0; c < NR_OPQUADS; c++) - opquad_completion_timestamp[c] = 0; - next_opquad = 0; + for (c = 0; c < NR_OPSEQS; c++) + opseq_completion_timestamp[c] = 0; + next_opseq = 0; for (c = 0; c < NR_REGS; c++) reg_available_timestamp[c] = 0; @@ -2298,7 +2336,7 @@ void codegen_timing_p6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint if (ins_table[opcode]) decode_instruction(ins_table[opcode], deps[opcode], fetchdat, op_32, bit8); else - decode_instruction(&vector_alu1_op, 0, fetchdat, op_32, bit8); + decode_instruction(&complex_alu1_op, 0, fetchdat, op_32, bit8); codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); } @@ -2327,4 +2365,4 @@ codegen_timing_t codegen_timing_p6 = codegen_timing_p6_block_start, codegen_timing_p6_block_end, codegen_timing_p6_jump_cycles -}; +}; \ No newline at end of file diff --git a/src/cpu_new/codegen_timing_p6.c b/src/cpu_new/codegen_timing_p6.c index d84a18904..845f46803 100644 --- a/src/cpu_new/codegen_timing_p6.c +++ b/src/cpu_new/codegen_timing_p6.c @@ -1,4 +1,5 @@ -/*Hacky P6 timings based on K6 timings*/ +/*Basic P6 timing model by plant/nerd73. Based on the K6 timing model*/ +/*Some cycle timings come from https://www.agner.org/optimize/instruction_tables.pdf*/ #include #include <86box/86box.h> #include "cpu.h" @@ -18,24 +19,26 @@ typedef enum uop_type_t UOP_ALU = 0, /*Executes in Integer X or Y units*/ UOP_ALUX, /*Executes in Integer X unit*/ UOP_LOAD, /*Executes in Load unit*/ - UOP_STORE, /*Executes in Store unit*/ + UOP_STORED, /*Executes in Data Store unit*/ + UOP_STOREA, /*Executes in Address Store unit*/ UOP_FLOAD, /*Executes in Load unit*/ - UOP_FSTORE, /*Executes in Store unit*/ + UOP_FSTORED, /*Executes in Data Store unit*/ + UOP_FSTOREA, /*Executes in Address Store unit*/ UOP_MLOAD, /*Executes in Load unit*/ - UOP_MSTORE, /*Executes in Store unit*/ + UOP_MSTORED, /*Executes in Data Store unit*/ + UOP_MSTOREA, /*Executes in Address Store unit*/ UOP_FLOAT, /*Executes in Floating Point unit*/ - UOP_MEU, /*Executes in Multimedia unit*/ - UOP_MEU_SHIFT, /*Executes in Multimedia unit or ALU X/Y. Uses MMX shifter*/ - UOP_MEU_MUL, /*Executes in Multimedia unit or ALU X/Y. Uses MMX multiplier*/ + UOP_MMX, /*Executes in Integer X or Y units as MMX*/ + UOP_MMX_SHIFT, /*Executes in Integer Y unit. Uses MMX shifter*/ + UOP_MMX_MUL, /*Executes in Integer X unit. Uses MMX multiplier*/ UOP_BRANCH, /*Executes in Branch unit*/ UOP_LIMM /*Does not require an execution unit*/ } uop_type_t; typedef enum decode_type_t { - DECODE_SHORT, - DECODE_LONG, - DECODE_VECTOR + DECODE_SIMPLE, + DECODE_COMPLEX, } decode_type_t; #define MAX_UOPS 10 @@ -43,7 +46,6 @@ typedef enum decode_type_t typedef struct risc86_uop_t { uop_type_t type; - double throughput; double latency; } risc86_uop_t; @@ -57,804 +59,885 @@ typedef struct risc86_instruction_t static const risc86_instruction_t alu_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t alux_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1} }; static const risc86_instruction_t load_alu_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t load_alux_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1} }; static const risc86_instruction_t alu_store_op = { - .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} + }; static const risc86_instruction_t alux_store_op = { - .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} }; static const risc86_instruction_t branch_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_BRANCH, .latency = 1} }; static const risc86_instruction_t limm_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LIMM, .throughput = 1, .latency = 1} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_LIMM, .latency = 1} }; static const risc86_instruction_t load_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1} }; static const risc86_instruction_t store_op = { - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1} + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = {.type = UOP_STOREA, .latency = 1} }; static const risc86_instruction_t bswap_op = { - .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, }; static const risc86_instruction_t leave_op = { .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t lods_op = { .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t loop_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_BRANCH, .latency = 1} }; static const risc86_instruction_t mov_reg_seg_op = { .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, }; static const risc86_instruction_t movs_op = { .nr_uops = 4, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t pop_reg_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t pop_mem_op = { - .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t push_imm_op = { - .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2}, + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = {.type = UOP_STOREA, .latency = 1}, }; static const risc86_instruction_t push_mem_op = { - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1} }; static const risc86_instruction_t push_seg_op = { - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t stos_op = { - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t test_reg_op = { .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t test_reg_b_op = { .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1} }; static const risc86_instruction_t test_mem_imm_op = { .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t test_mem_imm_b_op = { .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1} }; static const risc86_instruction_t xchg_op = { .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} }; static const risc86_instruction_t mmx_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX, .latency = 1.5} }; static const risc86_instruction_t mmx_mul_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU_MUL, .throughput = 1.5, .latency = 3} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX_MUL, .latency = 1.5} }; static const risc86_instruction_t mmx_shift_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU_SHIFT, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX_SHIFT, .latency = 1.5} }; static const risc86_instruction_t load_mmx_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU, .throughput = 1.5, .latency = 3} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_MMX, .latency = 1.5} }; static const risc86_instruction_t load_mmx_mul_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU_MUL, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3}, + .uop[1] = {.type = UOP_MMX_MUL, .latency = 1.5} }; static const risc86_instruction_t load_mmx_shift_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU_SHIFT, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3}, + .uop[1] = {.type = UOP_MMX_SHIFT, .latency = 1.5} }; static const risc86_instruction_t mload_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MLOAD, .throughput = 1, .latency = 2} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_MLOAD, .latency = 3}, }; static const risc86_instruction_t mstore_op = { - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MSTORE, .throughput = 1, .latency = 1} + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_MSTORED, .latency = 1}, + .uop[1] = {.type = UOP_MSTOREA, .latency = 1} }; static const risc86_instruction_t pmul_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX_MUL, .latency = 1.5} }; static const risc86_instruction_t pmul_mem_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3}, + .uop[1] = {.type = UOP_MMX_MUL, .latency = 1.5} }; - static const risc86_instruction_t float_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 1} +}; +static const risc86_instruction_t fadd_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 3} +}; +static const risc86_instruction_t fmul_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALU, .latency = 5} +}; +static const risc86_instruction_t float2_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 1} +}; +static const risc86_instruction_t fchs_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 2}, + .uop[1] = {.type = UOP_FLOAT, .latency = 2}, + .uop[2] = {.type = UOP_FLOAT, .latency = 2} }; static const risc86_instruction_t load_float_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_FLOAT, .throughput = 1.5, .latency = 1.5} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 1} +}; +static const risc86_instruction_t load_fadd_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 3} +}; +static const risc86_instruction_t load_fmul_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 5} }; static const risc86_instruction_t fstore_op = { - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FSTORED, .latency = 1}, + .uop[1] = {.type = UOP_FSTOREA, .latency = 1}, +}; +static const risc86_instruction_t load_fiadd_op = +{ + .nr_uops = 7, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 1}, + .uop[2] = {.type = UOP_FLOAT, .latency = 1}, + .uop[3] = {.type = UOP_FLOAT, .latency = 1}, + .uop[4] = {.type = UOP_FLOAT, .latency = 1}, + .uop[5] = {.type = UOP_FLOAT, .latency = 1}, + .uop[6] = {.type = UOP_FLOAT, .latency = 1} }; - static const risc86_instruction_t fdiv_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 38, .latency = 38} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 32} }; static const risc86_instruction_t fdiv_mem_op = { .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_FLOAT, .throughput = 38, .latency = 38} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 38} }; static const risc86_instruction_t fsin_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 62, .latency = 62} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 60} }; static const risc86_instruction_t fsqrt_op = { .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 49, .latency = 49} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 69} }; -static const risc86_instruction_t vector_fldcw_op = +static const risc86_instruction_t complex_fldcw_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 8, .latency = 8} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 10} }; -static const risc86_instruction_t vector_float_op = +static const risc86_instruction_t complex_float_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 1} }; -static const risc86_instruction_t vector_float_l_op = +static const risc86_instruction_t complex_float_l_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 50, .latency = 50} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 50} }; -static const risc86_instruction_t vector_flde_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} -}; -static const risc86_instruction_t vector_fste_op = +static const risc86_instruction_t complex_flde_op = { .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2}, - .uop[1] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAD, .latency = 1}, + .uop[2] = {.type = UOP_FLOAT, .latency = 2} +}; +static const risc86_instruction_t complex_fste_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 2}, + .uop[1] = {.type = UOP_FSTORED, .latency = 1}, + .uop[2] = {.type = UOP_FSTOREA, .latency = 1} }; -static const risc86_instruction_t vector_alu1_op = +static const risc86_instruction_t complex_alu1_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1} }; -static const risc86_instruction_t vector_alu2_op = +static const risc86_instruction_t complex_alu2_op = { .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} }; -static const risc86_instruction_t vector_alu3_op = +static const risc86_instruction_t complex_alu3_op = { .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} }; -static const risc86_instruction_t vector_alu6_op = +static const risc86_instruction_t complex_alu6_op = { .nr_uops = 6, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1}, + .uop[4] = {.type = UOP_ALU, .latency = 1}, + .uop[5] = {.type = UOP_ALU, .latency = 1} }; -static const risc86_instruction_t vector_alux1_op = +static const risc86_instruction_t complex_alux1_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1} }; -static const risc86_instruction_t vector_alux3_op = +static const risc86_instruction_t complex_alux3_op = { .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .latency = 1} }; -static const risc86_instruction_t vector_alux6_op = +static const risc86_instruction_t complex_alux6_op = { - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .nr_uops = 6, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .latency = 1}, + .uop[3] = {.type = UOP_ALUX, .latency = 1}, + .uop[4] = {.type = UOP_ALUX, .latency = 1}, + .uop[5] = {.type = UOP_ALUX, .latency = 1} }; -static const risc86_instruction_t vector_alu_store_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_alux_store_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_arpl_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[1] = {.type = UOP_ALU, .throughput = 3, .latency = 3} -}; -static const risc86_instruction_t vector_bound_op = +static const risc86_instruction_t complex_alu_store_op = { .nr_uops = 4, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1}, }; -static const risc86_instruction_t vector_bsx_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 10, .latency = 10} -}; -static const risc86_instruction_t vector_call_far_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_cli_sti_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 7, .latency = 7} -}; -static const risc86_instruction_t vector_cmps_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_cmpsb_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_cmpxchg_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, -}; -static const risc86_instruction_t vector_cmpxchg_b_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, -}; -static const risc86_instruction_t vector_cpuid_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 22, .latency = 22} -}; -static const risc86_instruction_t vector_div16_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 10, .latency = 10} -}; -static const risc86_instruction_t vector_div16_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 10, .latency = 10} -}; -static const risc86_instruction_t vector_div32_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 18, .latency = 18} -}; -static const risc86_instruction_t vector_div32_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 18, .latency = 18} -}; -static const risc86_instruction_t vector_emms_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 25, .latency = 25} -}; -static const risc86_instruction_t vector_enter_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 10, .latency = 10} -}; -static const risc86_instruction_t vector_femms_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 6, .latency = 6} -}; -static const risc86_instruction_t vector_in_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11} -}; -static const risc86_instruction_t vector_ins_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_int_op = -{ - .nr_uops = 5, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 20, .latency = 20}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_iret_op = -{ - .nr_uops = 5, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[3] = {.type = UOP_ALU, .throughput = 20, .latency = 20}, - .uop[4] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_invd_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1000, .latency = 1000} -}; -static const risc86_instruction_t vector_jmp_far_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_load_alu_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_load_alux_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_loop_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_lss_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_ALU, .throughput = 3, .latency = 3} -}; -static const risc86_instruction_t vector_mov_mem_seg_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_mov_seg_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 3, .latency = 3} -}; -static const risc86_instruction_t vector_mov_seg_reg_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3} -}; -static const risc86_instruction_t vector_mul_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_mul_mem_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_mul64_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_mul64_mem_op = +static const risc86_instruction_t complex_alux_store_op = { .nr_uops = 4, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} }; -static const risc86_instruction_t vector_out_op = +static const risc86_instruction_t complex_arpl_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3}, + .uop[1] = {.type = UOP_ALU, .latency = 3} +}; +static const risc86_instruction_t complex_bound_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_LOAD, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_bsx_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 10, .latency = 10} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 10} }; -static const risc86_instruction_t vector_outs_op = +static const risc86_instruction_t complex_call_far_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_cli_sti_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 7} +}; +static const risc86_instruction_t complex_cmps_op = { .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 10, .latency = 10}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} }; -static const risc86_instruction_t vector_pusha_op = +static const risc86_instruction_t complex_cmpsb_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_cmpxchg_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} +}; +static const risc86_instruction_t complex_cmpxchg_b_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} +}; +static const risc86_instruction_t complex_cpuid_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 23} +}; +static const risc86_instruction_t complex_div16_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 10} +}; +static const risc86_instruction_t complex_div16_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 10} +}; +static const risc86_instruction_t complex_div32_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 18} +}; +static const risc86_instruction_t complex_div32_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 18} +}; +static const risc86_instruction_t complex_emms_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 25} +}; +static const risc86_instruction_t complex_enter_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = {.type = UOP_STOREA, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 10} +}; +static const risc86_instruction_t complex_femms_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 6} +}; +static const risc86_instruction_t complex_in_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 10} +}; +static const risc86_instruction_t complex_ins_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 10}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_int_op = { .nr_uops = 8, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[6] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[7] = {.type = UOP_STORE, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 20}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_STORED, .latency = 1}, + .uop[4] = {.type = UOP_STOREA, .latency = 1}, + .uop[5] = {.type = UOP_STORED, .latency = 1}, + .uop[6] = {.type = UOP_STOREA, .latency = 1}, + .uop[7] = {.type = UOP_BRANCH, .latency = 1} }; -static const risc86_instruction_t vector_popa_op = +static const risc86_instruction_t complex_iret_op = +{ + .nr_uops = 5, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3}, + .uop[1] = {.type = UOP_LOAD, .latency = 3}, + .uop[2] = {.type = UOP_LOAD, .latency = 3}, + .uop[3] = {.type = UOP_ALU, .latency = 20}, + .uop[4] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_invd_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 500} +}; +static const risc86_instruction_t complex_jmp_far_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3}, + .uop[1] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_load_alu_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_load_alux_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_loop_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_lss_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_LOAD, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 3} +}; +static const risc86_instruction_t complex_mov_mem_seg_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, +}; +static const risc86_instruction_t complex_mov_seg_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 3} +}; +static const risc86_instruction_t complex_mov_seg_reg_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3} +}; +static const risc86_instruction_t complex_mul_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_mul_mem_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_mul64_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_mul64_mem_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .latency = 1}, + .uop[3] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_out_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 10}, + .uop[1] = {.type = UOP_STOREA, .latency = 10}, +}; +static const risc86_instruction_t complex_outs_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 10}, + .uop[1] = {.type = UOP_STOREA, .latency = 10}, + .uop[2] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_pusha_op = { .nr_uops = 8, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[6] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[7] = {.type = UOP_LOAD, .throughput = 1, .latency = 1} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 2}, + .uop[1] = {.type = UOP_STOREA, .latency = 2}, + .uop[2] = {.type = UOP_STORED, .latency = 2}, + .uop[3] = {.type = UOP_STOREA, .latency = 2}, + .uop[4] = {.type = UOP_STORED, .latency = 2}, + .uop[5] = {.type = UOP_STOREA, .latency = 2}, + .uop[6] = {.type = UOP_STORED, .latency = 2}, + .uop[7] = {.type = UOP_STOREA, .latency = 2} }; -static const risc86_instruction_t vector_popf_op = +static const risc86_instruction_t complex_popa_op = +{ + .nr_uops = 8, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_LOAD, .latency = 1}, + .uop[2] = {.type = UOP_LOAD, .latency = 1}, + .uop[3] = {.type = UOP_LOAD, .latency = 1}, + .uop[4] = {.type = UOP_LOAD, .latency = 1}, + .uop[5] = {.type = UOP_LOAD, .latency = 1}, + .uop[6] = {.type = UOP_LOAD, .latency = 1}, + .uop[7] = {.type = UOP_LOAD, .latency = 1} +}; +static const risc86_instruction_t complex_popf_op = { .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 17, .latency = 17} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3}, + .uop[1] = {.type = UOP_ALUX, .latency = 17} }; -static const risc86_instruction_t vector_push_mem_op = +static const risc86_instruction_t complex_push_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = {.type = UOP_STOREA, .latency = 1} +}; +static const risc86_instruction_t complex_pushf_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1} +}; +static const risc86_instruction_t complex_ret_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_retf_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 3}, + .uop[2] = {.type = UOP_BRANCH, .latency = 1} +}; +static const risc86_instruction_t complex_scas_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_scasb_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_setcc_mem_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_FSTORED, .latency = 1}, + .uop[3] = {.type = UOP_FSTOREA, .latency = 1} +}; +static const risc86_instruction_t complex_setcc_reg_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUX, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_test_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_test_mem_b_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .latency = 1} +}; +static const risc86_instruction_t complex_xchg_mem_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} +}; +static const risc86_instruction_t complex_xlat_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_LOAD, .latency = 1} +}; +static const risc86_instruction_t complex_wbinvd_op = { .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_pushf_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_ret_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_retf_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[2] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_scas_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_scasb_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_setcc_mem_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_setcc_reg_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_test_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_test_mem_b_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_xchg_mem_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} -}; -static const risc86_instruction_t vector_xlat_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2} -}; -static const risc86_instruction_t vector_wbinvd_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 10000, .latency = 10000} + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 10000} }; + #define INVALID NULL static const risc86_instruction_t *opcode_timings[256] = @@ -862,38 +945,38 @@ static const risc86_instruction_t *opcode_timings[256] = /* ADD ADD ADD ADD*/ /*00*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, /* ADD ADD PUSH ES POP ES*/ - &alux_op, &alu_op, &push_seg_op, &vector_mov_seg_mem_op, + &alux_op, &alu_op, &push_seg_op, &complex_mov_seg_mem_op, /* OR OR OR OR*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, /* OR OR PUSH CS */ &alux_op, &alu_op, &push_seg_op, INVALID, /* ADC ADC ADC ADC*/ -/*10*/ &vector_alux_store_op, &vector_alu_store_op, &vector_load_alux_op, &vector_load_alu_op, +/*10*/ &complex_alux_store_op,&complex_alu_store_op, &complex_load_alux_op,&complex_load_alu_op, /* ADC ADC PUSH SS POP SS*/ - &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, + &complex_alux1_op, &complex_alu1_op, &push_seg_op, &complex_mov_seg_mem_op, /* SBB SBB SBB SBB*/ -/*10*/ &vector_alux_store_op, &vector_alu_store_op, &vector_load_alux_op, &vector_load_alu_op, +/*10*/ &complex_alux_store_op,&complex_alu_store_op, &complex_load_alux_op,&complex_load_alu_op, /* SBB SBB PUSH DS POP DS*/ - &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, + &complex_alux1_op, &complex_alu1_op, &push_seg_op, &complex_mov_seg_mem_op, /* AND AND AND AND*/ /*20*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, /* AND AND DAA*/ - &alux_op, &alu_op, INVALID, &vector_alux1_op, + &alux_op, &alu_op, INVALID, &complex_alux1_op, /* SUB SUB SUB SUB*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, /* SUB SUB DAS*/ - &alux_op, &alu_op, INVALID, &vector_alux1_op, + &alux_op, &alu_op, INVALID, &complex_alux1_op, /* XOR XOR XOR XOR*/ /*30*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, /* XOR XOR AAA*/ - &alux_op, &alu_op, INVALID, &vector_alux6_op, + &alux_op, &alu_op, INVALID, &complex_alux6_op, /* CMP CMP CMP CMP*/ &load_alux_op, &load_alu_op, &load_alux_op, &load_alu_op, /* CMP CMP AAS*/ - &alux_op, &alu_op, INVALID, &vector_alux6_op, + &alux_op, &alu_op, INVALID, &complex_alux6_op, /* INC EAX INC ECX INC EDX INC EBX*/ /*40*/ &alu_op, &alu_op, &alu_op, &alu_op, @@ -914,12 +997,12 @@ static const risc86_instruction_t *opcode_timings[256] = &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, /* PUSHA POPA BOUND ARPL*/ -/*60*/ &vector_pusha_op, &vector_popa_op, &vector_bound_op, &vector_arpl_op, +/*60*/ &complex_pusha_op, &complex_popa_op, &complex_bound_op, &complex_arpl_op, INVALID, INVALID, INVALID, INVALID, /* PUSH imm IMUL PUSH imm IMUL*/ - &push_imm_op, &vector_mul_op, &push_imm_op, &vector_mul_op, + &push_imm_op, &complex_mul_op, &push_imm_op, &complex_mul_op, /* INSB INSW OUTSB OUTSW*/ - &vector_ins_op, &vector_ins_op, &vector_outs_op, &vector_outs_op, + &complex_ins_op, &complex_ins_op, &complex_outs_op, &complex_outs_op, /* Jxx*/ /*70*/ &branch_op, &branch_op, &branch_op, &branch_op, @@ -929,29 +1012,29 @@ static const risc86_instruction_t *opcode_timings[256] = /*80*/ INVALID, INVALID, INVALID, INVALID, /* TEST TEST XCHG XCHG*/ - &vector_test_mem_b_op, &vector_test_mem_op, &vector_xchg_mem_op, &vector_xchg_mem_op, + &complex_test_mem_b_op, &complex_test_mem_op, &complex_xchg_mem_op, &complex_xchg_mem_op, /* MOV MOV MOV MOV*/ &store_op, &store_op, &load_op, &load_op, /* MOV from seg LEA MOV to seg POP*/ - &vector_mov_mem_seg_op, &store_op, &vector_mov_seg_mem_op, &pop_mem_op, + &complex_mov_mem_seg_op, &store_op, &complex_mov_seg_mem_op, &pop_mem_op, /* NOP XCHG XCHG XCHG*/ /*90*/ &limm_op, &xchg_op, &xchg_op, &xchg_op, /* XCHG XCHG XCHG XCHG*/ &xchg_op, &xchg_op, &xchg_op, &xchg_op, /* CBW CWD CALL far WAIT*/ - &vector_alu1_op, &vector_alu1_op, &vector_call_far_op, &limm_op, + &complex_alu1_op, &complex_alu1_op, &complex_call_far_op, &limm_op, /* PUSHF POPF SAHF LAHF*/ - &vector_pushf_op, &vector_popf_op, &vector_alux1_op, &vector_alux1_op, + &complex_pushf_op, &complex_popf_op, &complex_alux1_op, &complex_alux1_op, /* MOV MOV MOV MOV*/ /*a0*/ &load_op, &load_op, &store_op, &store_op, /* MOVSB MOVSW CMPSB CMPSW*/ - &movs_op, &movs_op, &vector_cmpsb_op, &vector_cmps_op, + &movs_op, &movs_op, &complex_cmpsb_op, &complex_cmps_op, /* TEST TEST STOSB STOSW*/ &test_reg_b_op, &test_reg_op, &stos_op, &stos_op, /* LODSB LODSW SCASB SCASW*/ - &lods_op, &lods_op, &vector_scasb_op, &vector_scas_op, + &lods_op, &lods_op, &complex_scasb_op, &complex_scas_op, /* MOV*/ /*b0*/ &limm_op, &limm_op, &limm_op, &limm_op, @@ -960,37 +1043,37 @@ static const risc86_instruction_t *opcode_timings[256] = &limm_op, &limm_op, &limm_op, &limm_op, /* RET imm RET*/ -/*c0*/ INVALID, INVALID, &vector_ret_op, &vector_ret_op, +/*c0*/ INVALID, INVALID, &complex_ret_op, &complex_ret_op, /* LES LDS MOV MOV*/ - &vector_lss_op, &vector_lss_op, &store_op, &store_op, + &complex_lss_op, &complex_lss_op, &store_op, &store_op, /* ENTER LEAVE RETF RETF*/ - &vector_enter_op, &leave_op, &vector_retf_op, &vector_retf_op, + &complex_enter_op, &leave_op, &complex_retf_op, &complex_retf_op, /* INT3 INT INTO IRET*/ - &vector_int_op, &vector_int_op, &vector_int_op, &vector_iret_op, + &complex_int_op, &complex_int_op, &complex_int_op, &complex_iret_op, /*d0*/ INVALID, INVALID, INVALID, INVALID, /* AAM AAD SETALC XLAT*/ - &vector_alux6_op, &vector_alux3_op, &vector_alux1_op, &vector_xlat_op, + &complex_alux6_op, &complex_alux3_op, &complex_alux1_op, &complex_xlat_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /* LOOPNE LOOPE LOOP JCXZ*/ -/*e0*/ &vector_loop_op, &vector_loop_op, &loop_op, &vector_loop_op, +/*e0*/ &complex_loop_op, &complex_loop_op, &loop_op, &complex_loop_op, /* IN AL IN AX OUT_AL OUT_AX*/ - &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, + &complex_in_op, &complex_in_op, &complex_out_op, &complex_out_op, /* CALL JMP JMP JMP*/ - &store_op, &branch_op, &vector_jmp_far_op, &branch_op, + &store_op, &branch_op, &complex_jmp_far_op, &branch_op, /* IN AL IN AX OUT_AL OUT_AX*/ - &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, + &complex_in_op, &complex_in_op, &complex_out_op, &complex_out_op, /* REPNE REPE*/ /*f0*/ INVALID, INVALID, INVALID, INVALID, /* HLT CMC*/ - &vector_alux1_op, &vector_alu2_op, INVALID, INVALID, + &complex_alux1_op, &complex_alu2_op, INVALID, INVALID, /* CLC STC CLI STI*/ - &vector_alu1_op, &vector_alu1_op, &vector_cli_sti_op, &vector_cli_sti_op, + &complex_alu1_op, &complex_alu1_op, &complex_cli_sti_op, &complex_cli_sti_op, /* CLD STD INCDEC*/ - &vector_alu1_op, &vector_alu1_op, &alux_store_op, INVALID + &complex_alu1_op, &complex_alu1_op, &alux_store_op, INVALID }; static const risc86_instruction_t *opcode_timings_mod3[256] = @@ -998,38 +1081,38 @@ static const risc86_instruction_t *opcode_timings_mod3[256] = /* ADD ADD ADD ADD*/ /*00*/ &alux_op, &alu_op, &alux_op, &alu_op, /* ADD ADD PUSH ES POP ES*/ - &alux_op, &alu_op, &push_seg_op, &vector_mov_seg_mem_op, + &alux_op, &alu_op, &push_seg_op, &complex_mov_seg_mem_op, /* OR OR OR OR*/ &alux_op, &alu_op, &alux_op, &alu_op, /* OR OR PUSH CS */ &alux_op, &alu_op, &push_seg_op, INVALID, /* ADC ADC ADC ADC*/ -/*10*/ &vector_alux1_op, &vector_alu1_op, &vector_alux1_op, &vector_alu1_op, +/*10*/ &complex_alux1_op, &complex_alu1_op, &complex_alux1_op, &complex_alu1_op, /* ADC ADC PUSH SS POP SS*/ - &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, + &complex_alux1_op, &complex_alu1_op, &push_seg_op, &complex_mov_seg_mem_op, /* SBB SBB SBB SBB*/ - &vector_alux1_op, &vector_alu1_op, &vector_alux1_op, &vector_alu1_op, + &complex_alux1_op, &complex_alu1_op, &complex_alux1_op, &complex_alu1_op, /* SBB SBB PUSH DS POP DS*/ - &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, + &complex_alux1_op, &complex_alu1_op, &push_seg_op, &complex_mov_seg_mem_op, /* AND AND AND AND*/ /*20*/ &alux_op, &alu_op, &alux_op, &alu_op, /* AND AND DAA*/ - &alux_op, &alu_op, INVALID, &vector_alux1_op, + &alux_op, &alu_op, INVALID, &complex_alux1_op, /* SUB SUB SUB SUB*/ &alux_op, &alu_op, &alux_op, &alu_op, /* SUB SUB DAS*/ - &alux_op, &alu_op, INVALID, &vector_alux1_op, + &alux_op, &alu_op, INVALID, &complex_alux1_op, /* XOR XOR XOR XOR*/ /*30*/ &alux_op, &alu_op, &alux_op, &alu_op, /* XOR XOR AAA*/ - &alux_op, &alu_op, INVALID, &vector_alux6_op, + &alux_op, &alu_op, INVALID, &complex_alux6_op, /* CMP CMP CMP CMP*/ &alux_op, &alu_op, &alux_op, &alu_op, /* CMP CMP AAS*/ - &alux_op, &alu_op, INVALID, &vector_alux6_op, + &alux_op, &alu_op, INVALID, &complex_alux6_op, /* INC EAX INC ECX INC EDX INC EBX*/ /*40*/ &alu_op, &alu_op, &alu_op, &alu_op, @@ -1050,12 +1133,12 @@ static const risc86_instruction_t *opcode_timings_mod3[256] = &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, /* PUSHA POPA BOUND ARPL*/ -/*60*/ &vector_pusha_op, &vector_popa_op, &vector_bound_op, &vector_arpl_op, +/*60*/ &complex_pusha_op, &complex_popa_op, &complex_bound_op, &complex_arpl_op, INVALID, INVALID, INVALID, INVALID, /* PUSH imm IMUL PUSH imm IMUL*/ - &push_imm_op, &vector_mul_op, &push_imm_op, &vector_mul_op, + &push_imm_op, &complex_mul_op, &push_imm_op, &complex_mul_op, /* INSB INSW OUTSB OUTSW*/ - &vector_ins_op, &vector_ins_op, &vector_outs_op, &vector_outs_op, + &complex_ins_op, &complex_ins_op, &complex_outs_op, &complex_outs_op, /* Jxx*/ /*70*/ &branch_op, &branch_op, &branch_op, &branch_op, @@ -1065,29 +1148,29 @@ static const risc86_instruction_t *opcode_timings_mod3[256] = /*80*/ INVALID, INVALID, INVALID, INVALID, /* TEST TEST XCHG XCHG*/ - &vector_alu1_op, &vector_alu1_op, &vector_alu3_op, &vector_alu3_op, + &complex_alu1_op, &complex_alu1_op, &complex_alu3_op, &complex_alu3_op, /* MOV MOV MOV MOV*/ &store_op, &store_op, &load_op, &load_op, /* MOV from seg LEA MOV to seg POP*/ - &mov_reg_seg_op, &store_op, &vector_mov_seg_reg_op, &pop_reg_op, + &mov_reg_seg_op, &store_op, &complex_mov_seg_reg_op, &pop_reg_op, /* NOP XCHG XCHG XCHG*/ /*90*/ &limm_op, &xchg_op, &xchg_op, &xchg_op, /* XCHG XCHG XCHG XCHG*/ &xchg_op, &xchg_op, &xchg_op, &xchg_op, /* CBW CWD CALL far WAIT*/ - &vector_alu1_op, &vector_alu1_op, &vector_call_far_op, &limm_op, + &complex_alu1_op, &complex_alu1_op, &complex_call_far_op, &limm_op, /* PUSHF POPF SAHF LAHF*/ - &vector_pushf_op, &vector_popf_op, &vector_alux1_op, &vector_alux1_op, + &complex_pushf_op, &complex_popf_op, &complex_alux1_op, &complex_alux1_op, /* MOV MOV MOV MOV*/ /*a0*/ &load_op, &load_op, &store_op, &store_op, /* MOVSB MOVSW CMPSB CMPSW*/ - &movs_op, &movs_op, &vector_cmpsb_op, &vector_cmps_op, + &movs_op, &movs_op, &complex_cmpsb_op, &complex_cmps_op, /* TEST TEST STOSB STOSW*/ &test_reg_b_op, &test_reg_op, &stos_op, &stos_op, /* LODSB LODSW SCASB SCASW*/ - &lods_op, &lods_op, &vector_scasb_op, &vector_scas_op, + &lods_op, &lods_op, &complex_scasb_op, &complex_scas_op, /* MOV*/ /*b0*/ &limm_op, &limm_op, &limm_op, &limm_op, @@ -1096,57 +1179,57 @@ static const risc86_instruction_t *opcode_timings_mod3[256] = &limm_op, &limm_op, &limm_op, &limm_op, /* RET imm RET*/ -/*c0*/ INVALID, INVALID, &vector_ret_op, &vector_ret_op, +/*c0*/ INVALID, INVALID, &complex_ret_op, &complex_ret_op, /* LES LDS MOV MOV*/ - &vector_lss_op, &vector_lss_op, &store_op, &store_op, + &complex_lss_op, &complex_lss_op, &store_op, &store_op, /* ENTER LEAVE RETF RETF*/ - &vector_enter_op, &leave_op, &vector_retf_op, &vector_retf_op, + &complex_enter_op, &leave_op, &complex_retf_op, &complex_retf_op, /* INT3 INT INTO IRET*/ - &vector_int_op, &vector_int_op, &vector_int_op, &vector_iret_op, + &complex_int_op, &complex_int_op, &complex_int_op, &complex_iret_op, /*d0*/ INVALID, INVALID, INVALID, INVALID, /* AAM AAD SETALC XLAT*/ - &vector_alux6_op, &vector_alux3_op, &vector_alux1_op, &vector_xlat_op, + &complex_alux6_op, &complex_alux3_op, &complex_alux1_op, &complex_xlat_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /* LOOPNE LOOPE LOOP JCXZ*/ -/*e0*/ &vector_loop_op, &vector_loop_op, &loop_op, &vector_loop_op, +/*e0*/ &complex_loop_op, &complex_loop_op, &loop_op, &complex_loop_op, /* IN AL IN AX OUT_AL OUT_AX*/ - &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, + &complex_in_op, &complex_in_op, &complex_out_op, &complex_out_op, /* CALL JMP JMP JMP*/ - &store_op, &branch_op, &vector_jmp_far_op, &branch_op, + &store_op, &branch_op, &complex_jmp_far_op, &branch_op, /* IN AL IN AX OUT_AL OUT_AX*/ - &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, + &complex_in_op, &complex_in_op, &complex_out_op, &complex_out_op, /* REPNE REPE*/ /*f0*/ INVALID, INVALID, INVALID, INVALID, /* HLT CMC*/ - &vector_alux1_op, &vector_alu2_op, INVALID, INVALID, + &complex_alux1_op, &complex_alu2_op, INVALID, INVALID, /* CLC STC CLI STI*/ - &vector_alu1_op, &vector_alu1_op, &vector_cli_sti_op, &vector_cli_sti_op, + &complex_alu1_op, &complex_alu1_op, &complex_cli_sti_op, &complex_cli_sti_op, /* CLD STD INCDEC*/ - &vector_alu1_op, &vector_alu1_op, &vector_alux1_op, INVALID + &complex_alu1_op, &complex_alu1_op, &complex_alux1_op, INVALID }; static const risc86_instruction_t *opcode_timings_0f[256] = { -/*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, - INVALID, &vector_alu6_op, &vector_alu6_op, INVALID, - &vector_invd_op, &vector_wbinvd_op, INVALID, INVALID, - INVALID, &load_op, &vector_femms_op, INVALID, +/*00*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, + INVALID, &complex_alu6_op, &complex_alu6_op, INVALID, + &complex_invd_op, &complex_wbinvd_op, INVALID, INVALID, + INVALID, &load_op, &complex_femms_op, INVALID, /*10*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, -/*20*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, - &vector_alu6_op, &vector_alu6_op, INVALID, INVALID, +/*20*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, + &complex_alu6_op, &complex_alu6_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, -/*30*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID, +/*30*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -1167,7 +1250,7 @@ static const risc86_instruction_t *opcode_timings_0f[256] = INVALID, INVALID, &mload_op, &mload_op, /*70*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, - &load_mmx_op, &load_mmx_op, &load_mmx_op, &vector_emms_op, + &load_mmx_op, &load_mmx_op, &load_mmx_op, &complex_emms_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, &mstore_op, &mstore_op, @@ -1176,122 +1259,122 @@ static const risc86_instruction_t *opcode_timings_0f[256] = &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, -/*90*/ &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, - &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, - &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, - &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, +/*90*/ &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, + &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, + &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, + &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, -/*a0*/ &push_seg_op, &vector_mov_seg_mem_op, &vector_cpuid_op, &vector_load_alu_op, - &vector_alu_store_op, &vector_alu_store_op, INVALID, INVALID, - &push_seg_op, &vector_mov_seg_mem_op, INVALID, &vector_load_alu_op, - &vector_alu_store_op, &vector_alu_store_op, INVALID, &vector_mul_op, +/*a0*/ &push_seg_op, &complex_mov_seg_mem_op,&complex_cpuid_op, &complex_load_alu_op, + &complex_alu_store_op, &complex_alu_store_op, INVALID, INVALID, + &push_seg_op, &complex_mov_seg_mem_op,INVALID, &complex_load_alu_op, + &complex_alu_store_op, &complex_alu_store_op, INVALID, &complex_mul_op, -/*b0*/ &vector_cmpxchg_b_op, &vector_cmpxchg_op, &vector_lss_op, &vector_load_alu_op, - &vector_lss_op, &vector_lss_op, &load_alux_op, &load_alu_op, - INVALID, INVALID, &vector_load_alu_op, &vector_load_alu_op, - &vector_bsx_op, &vector_bsx_op, &load_alux_op, &load_alu_op, +/*b0*/ &complex_cmpxchg_b_op, &complex_cmpxchg_op, &complex_lss_op, &complex_load_alu_op, + &complex_lss_op, &complex_lss_op, &load_alux_op, &load_alu_op, + INVALID, INVALID, &complex_load_alu_op, &complex_load_alu_op, + &complex_bsx_op, &complex_bsx_op, &load_alux_op, &load_alu_op, -/*c0*/ &vector_alux_store_op, &vector_alu_store_op, INVALID, INVALID, - INVALID, INVALID, INVALID, &vector_cmpxchg_op, - &bswap_op, &bswap_op, &bswap_op, &bswap_op, - &bswap_op, &bswap_op, &bswap_op, &bswap_op, +/*c0*/ &complex_alux_store_op, &complex_alu_store_op, INVALID, INVALID, + INVALID, INVALID, INVALID, &complex_cmpxchg_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, -/*d0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, - INVALID, &load_mmx_mul_op, INVALID, INVALID, - &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, - &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, +/*d0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, + INVALID, &load_mmx_mul_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, -/*e0*/ &load_mmx_op, &load_mmx_shift_op, &load_mmx_shift_op, INVALID, - INVALID, &pmul_mem_op, INVALID, INVALID, - &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, - &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, +/*e0*/ &load_mmx_op, &load_mmx_shift_op, &load_mmx_shift_op, INVALID, + INVALID, &pmul_mem_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, -/*f0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, - INVALID, &pmul_mem_op, INVALID, INVALID, - &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, - &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, +/*f0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, + INVALID, &pmul_mem_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, + &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, }; static const risc86_instruction_t *opcode_timings_0f_mod3[256] = { -/*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, - INVALID, &vector_alu6_op, &vector_alu6_op, INVALID, - &vector_invd_op, &vector_wbinvd_op, INVALID, INVALID, - INVALID, INVALID, &vector_femms_op, INVALID, +/*00*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, + INVALID, &complex_alu6_op, &complex_alu6_op, INVALID, + &complex_invd_op, &complex_wbinvd_op, INVALID, INVALID, + INVALID, INVALID, &complex_femms_op, INVALID, -/*10*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, +/*10*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, -/*20*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, - &vector_alu6_op, &vector_alu6_op, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, +/*20*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, + &complex_alu6_op, &complex_alu6_op, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, -/*30*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, +/*30*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, -/*40*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, +/*40*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, -/*50*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, +/*50*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, -/*60*/ &mmx_op, &mmx_op, &mmx_op, &mmx_op, - &mmx_op, &mmx_op, &mmx_op, &mmx_op, - &mmx_op, &mmx_op, &mmx_op, &mmx_op, - INVALID, INVALID, &mmx_op, &mmx_op, +/*60*/ &mmx_op, &mmx_op, &mmx_op, &mmx_op, + &mmx_op, &mmx_op, &mmx_op, &mmx_op, + &mmx_op, &mmx_op, &mmx_op, &mmx_op, + INVALID, INVALID, &mmx_op, &mmx_op, -/*70*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, - &mmx_op, &mmx_op, &mmx_op, &vector_emms_op, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, &mmx_op, &mmx_op, +/*70*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + &mmx_op, &mmx_op, &mmx_op, &complex_emms_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, &mmx_op, &mmx_op, /*80*/ &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, &branch_op, -/*90*/ &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, - &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, - &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, - &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, +/*90*/ &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, + &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, + &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, + &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, -/*a0*/ &push_seg_op, &vector_mov_seg_mem_op, &vector_cpuid_op, &vector_alu1_op, - &vector_alu1_op, &vector_alu1_op, INVALID, INVALID, - &push_seg_op, &vector_mov_seg_mem_op, INVALID, &vector_alu1_op, - &vector_alu1_op, &vector_alu1_op, INVALID, &vector_mul_op, +/*a0*/ &push_seg_op, &complex_mov_seg_mem_op, &complex_cpuid_op, &complex_alu1_op, + &complex_alu1_op, &complex_alu1_op, INVALID, INVALID, + &push_seg_op, &complex_mov_seg_mem_op, INVALID, &complex_alu1_op, + &complex_alu1_op, &complex_alu1_op, INVALID, &complex_mul_op, -/*b0*/ &vector_cmpxchg_b_op, &vector_cmpxchg_op, &vector_lss_op, &vector_alu1_op, - &vector_lss_op, &vector_lss_op, &alux_op, &alu_op, - INVALID, INVALID, &vector_alu1_op, &vector_alu1_op, - &vector_bsx_op, &vector_bsx_op, &alux_op, &alu_op, +/*b0*/ &complex_cmpxchg_b_op, &complex_cmpxchg_op, &complex_lss_op, &complex_alu1_op, + &complex_lss_op, &complex_lss_op, &alux_op, &alu_op, + INVALID, INVALID, &complex_alu1_op, &complex_alu1_op, + &complex_bsx_op, &complex_bsx_op, &alux_op, &alu_op, -/*c0*/ &vector_alux1_op, &vector_alu1_op, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - &bswap_op, &bswap_op, &bswap_op, &bswap_op, - &bswap_op, &bswap_op, &bswap_op, &bswap_op, +/*c0*/ &complex_alux1_op, &complex_alu1_op, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, -/*d0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, - INVALID, &mmx_mul_op, INVALID, INVALID, - &mmx_op, &mmx_op, INVALID, &mmx_op, - &mmx_op, &mmx_op, INVALID, &mmx_op, +/*d0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + INVALID, &mmx_mul_op, INVALID, INVALID, + &mmx_op, &mmx_op, INVALID, &mmx_op, + &mmx_op, &mmx_op, INVALID, &mmx_op, -/*e0*/ &mmx_op, &mmx_shift_op, &mmx_shift_op, INVALID, - INVALID, &pmul_op, INVALID, INVALID, - &mmx_op, &mmx_op, INVALID, &mmx_op, - &mmx_op, &mmx_op, INVALID, &mmx_op, +/*e0*/ &mmx_op, &mmx_shift_op, &mmx_shift_op, INVALID, + INVALID, &pmul_op, INVALID, INVALID, + &mmx_op, &mmx_op, INVALID, &mmx_op, + &mmx_op, &mmx_op, INVALID, &mmx_op, -/*f0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, - INVALID, &pmul_op, INVALID, INVALID, - &mmx_op, &mmx_op, &mmx_op, INVALID, - &mmx_op, &mmx_op, &mmx_op, INVALID, +/*f0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + INVALID, &pmul_op, INVALID, INVALID, + &mmx_op, &mmx_op, &mmx_op, INVALID, + &mmx_op, &mmx_op, &mmx_op, INVALID, }; static const risc86_instruction_t *opcode_timings_0f0f[256] = @@ -1463,100 +1546,100 @@ static const risc86_instruction_t *opcode_timings_0f0f_mod3[256] = static const risc86_instruction_t *opcode_timings_shift[8] = { - &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, - &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op + &complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op, + &complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op }; static const risc86_instruction_t *opcode_timings_shift_b[8] = { - &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, - &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op + &complex_alux_store_op, &complex_alux_store_op, &complex_alux_store_op, &complex_alux_store_op, + &complex_alux_store_op, &complex_alux_store_op, &complex_alux_store_op, &complex_alux_store_op }; static const risc86_instruction_t *opcode_timings_shift_mod3[8] = { - &vector_alu1_op, &vector_alu1_op, &vector_alu1_op, &vector_alu1_op, - &alu_op, &alu_op, &alu_op, &alu_op + &complex_alu1_op, &complex_alu1_op, &complex_alu1_op, &complex_alu1_op, + &alu_op, &alu_op, &alu_op, &alu_op }; static const risc86_instruction_t *opcode_timings_shift_b_mod3[8] = { - &vector_alux1_op, &vector_alux1_op, &vector_alux1_op, &vector_alux1_op, - &alux_op, &alux_op, &alux_op, &alux_op + &complex_alux1_op, &complex_alux1_op, &complex_alux1_op, &complex_alux1_op, + &alux_op, &alux_op, &alux_op, &alux_op }; static const risc86_instruction_t *opcode_timings_80[8] = { - &alux_store_op, &alux_store_op, &vector_alux_store_op, &vector_alux_store_op, - &alux_store_op, &alux_store_op, &alux_store_op, &alux_store_op, + &alux_store_op, &alux_store_op, &complex_alux_store_op, &complex_alux_store_op, + &alux_store_op, &alux_store_op, &alux_store_op, &alux_store_op, }; static const risc86_instruction_t *opcode_timings_80_mod3[8] = { - &alux_op, &alux_op, &alux_store_op, &alux_store_op, - &alux_op, &alux_op, &alux_op, &alux_op, + &alux_op, &alux_op, &alux_store_op, &alux_store_op, + &alux_op, &alux_op, &alux_op, &alux_op, }; static const risc86_instruction_t *opcode_timings_8x[8] = { - &alu_store_op, &alu_store_op, &vector_alu_store_op, &vector_alu_store_op, - &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, + &alu_store_op, &alu_store_op, &complex_alu_store_op, &complex_alu_store_op, + &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, }; static const risc86_instruction_t *opcode_timings_8x_mod3[8] = { - &alu_op, &alu_op, &alu_store_op, &alu_store_op, - &alu_op, &alu_op, &alu_op, &alu_op, + &alu_op, &alu_op, &alu_store_op, &alu_store_op, + &alu_op, &alu_op, &alu_op, &alu_op, }; static const risc86_instruction_t *opcode_timings_f6[8] = { /* TST NOT NEG*/ - &test_mem_imm_b_op, INVALID, &vector_alux_store_op, &vector_alux_store_op, + &test_mem_imm_b_op, INVALID, &complex_alux_store_op, &complex_alux_store_op, /* MUL IMUL DIV IDIV*/ - &vector_mul_mem_op, &vector_mul_mem_op, &vector_div16_mem_op, &vector_div16_mem_op, + &complex_mul_mem_op, &complex_mul_mem_op, &complex_div16_mem_op, &complex_div16_mem_op, }; static const risc86_instruction_t *opcode_timings_f6_mod3[8] = { /* TST NOT NEG*/ &test_reg_b_op, INVALID, &alux_op, &alux_op, /* MUL IMUL DIV IDIV*/ - &vector_mul_op, &vector_mul_op, &vector_div16_op, &vector_div16_op, + &complex_mul_op, &complex_mul_op, &complex_div16_op, &complex_div16_op, }; static const risc86_instruction_t *opcode_timings_f7[8] = { /* TST NOT NEG*/ - &test_mem_imm_op, INVALID, &vector_alu_store_op, &vector_alu_store_op, + &test_mem_imm_op, INVALID, &complex_alu_store_op, &complex_alu_store_op, /* MUL IMUL DIV IDIV*/ - &vector_mul64_mem_op, &vector_mul64_mem_op, &vector_div32_mem_op, &vector_div32_mem_op, + &complex_mul64_mem_op, &complex_mul64_mem_op, &complex_div32_mem_op, &complex_div32_mem_op, }; static const risc86_instruction_t *opcode_timings_f7_mod3[8] = { /* TST NOT NEG*/ &test_reg_op, INVALID, &alu_op, &alu_op, /* MUL IMUL DIV IDIV*/ - &vector_mul64_op, &vector_mul64_op, &vector_div32_op, &vector_div32_op, + &complex_mul64_op, &complex_mul64_op, &complex_div32_op, &complex_div32_op, }; static const risc86_instruction_t *opcode_timings_ff[8] = { /* INC DEC CALL CALL far*/ - &alu_store_op, &alu_store_op, &store_op, &vector_call_far_op, + &alu_store_op, &alu_store_op, &store_op, &complex_call_far_op, /* JMP JMP far PUSH*/ - &branch_op, &vector_jmp_far_op, &push_mem_op, INVALID + &branch_op, &complex_jmp_far_op, &push_mem_op, INVALID }; static const risc86_instruction_t *opcode_timings_ff_mod3[8] = { /* INC DEC CALL CALL far*/ - &vector_alu1_op, &vector_alu1_op, &store_op, &vector_call_far_op, + &complex_alu1_op, &complex_alu1_op, &store_op, &complex_call_far_op, /* JMP JMP far PUSH*/ - &branch_op, &vector_jmp_far_op, &vector_push_mem_op, INVALID + &branch_op, &complex_jmp_far_op, &complex_push_mem_op, INVALID }; static const risc86_instruction_t *opcode_timings_d8[8] = { /* FADDs FMULs FCOMs FCOMPs*/ - &load_float_op, &load_float_op, &load_float_op, &load_float_op, + &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, /* FSUBs FSUBRs FDIVs FDIVRs*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, }; static const risc86_instruction_t *opcode_timings_d8_mod3[8] = { /* FADD FMUL FCOM FCOMP*/ - &float_op, &float_op, &float_op, &float_op, + &fadd_op, &fmul_op, &float_op, &float_op, /* FSUB FSUBR FDIV FDIVR*/ &float_op, &float_op, &fdiv_op, &fdiv_op, }; @@ -1566,7 +1649,7 @@ static const risc86_instruction_t *opcode_timings_d9[8] = /* FLDs FSTs FSTPs*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FLDENV FLDCW FSTENV FSTCW*/ - &vector_float_l_op, &vector_fldcw_op, &vector_float_l_op, &vector_float_op + &complex_float_l_op, &complex_fldcw_op, &complex_float_l_op, &complex_float_op }; static const risc86_instruction_t *opcode_timings_d9_mod3[64] = { @@ -1574,16 +1657,16 @@ static const risc86_instruction_t *opcode_timings_d9_mod3[64] = &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, /*FXCH*/ - &float_op, &float_op, &float_op, &float_op, - &float_op, &float_op, &float_op, &float_op, + &limm_op, &limm_op, &limm_op, &limm_op, + &limm_op, &limm_op, &limm_op, &limm_op, /*FNOP*/ &float_op, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*FSTP*/ - &float_op, &float_op, &float_op, &float_op, - &float_op, &float_op, &float_op, &float_op, + &float2_op, &float2_op, &float2_op, &float2_op, + &float2_op, &float2_op, &float2_op, &float2_op, /* opFCHS opFABS*/ - &float_op, &float_op, INVALID, INVALID, + &fchs_op, &float_op, INVALID, INVALID, /* opFTST opFXAM*/ &float_op, &float_op, INVALID, INVALID, /* opFLD1 opFLDL2T opFLDL2E opFLDPI*/ @@ -1603,7 +1686,7 @@ static const risc86_instruction_t *opcode_timings_d9_mod3[64] = static const risc86_instruction_t *opcode_timings_da[8] = { /* FIADDl FIMULl FICOMl FICOMPl*/ - &load_float_op, &load_float_op, &load_float_op, &load_float_op, + &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, /* FISUBl FISUBRl FIDIVl FIDIVRl*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, }; @@ -1619,7 +1702,7 @@ static const risc86_instruction_t *opcode_timings_db[8] = /* FLDil FSTil FSTPil*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FLDe FSTPe*/ - INVALID, &vector_flde_op, INVALID, &vector_fste_op + INVALID, &complex_flde_op, INVALID, &complex_fste_op }; static const risc86_instruction_t *opcode_timings_db_mod3[64] = { @@ -1653,14 +1736,14 @@ static const risc86_instruction_t *opcode_timings_db_mod3[64] = static const risc86_instruction_t *opcode_timings_dc[8] = { /* FADDd FMULd FCOMd FCOMPd*/ - &load_float_op, &load_float_op, &load_float_op, &load_float_op, + &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, /* FSUBd FSUBRd FDIVd FDIVRd*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, }; static const risc86_instruction_t *opcode_timings_dc_mod3[8] = { /* opFADDr opFMULr*/ - &float_op, &float_op, INVALID, INVALID, + &fadd_op, &fmul_op, INVALID, INVALID, /* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ &float_op, &float_op, &fdiv_op, &fdiv_op }; @@ -1670,7 +1753,7 @@ static const risc86_instruction_t *opcode_timings_dd[8] = /* FLDd FSTd FSTPd*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FRSTOR FSAVE FSTSW*/ - &vector_float_l_op, INVALID, &vector_float_l_op, &vector_float_l_op + &complex_float_l_op, INVALID, &complex_float_l_op, &complex_float_l_op }; static const risc86_instruction_t *opcode_timings_dd_mod3[8] = { @@ -1683,14 +1766,14 @@ static const risc86_instruction_t *opcode_timings_dd_mod3[8] = static const risc86_instruction_t *opcode_timings_de[8] = { /* FIADDw FIMULw FICOMw FICOMPw*/ - &load_float_op, &load_float_op, &load_float_op, &load_float_op, + &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, /* FISUBw FISUBRw FIDIVw FIDIVRw*/ - &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, }; static const risc86_instruction_t *opcode_timings_de_mod3[8] = { /* FADDP FMULP FCOMPP*/ - &float_op, &float_op, INVALID, &float_op, + &fadd_op, &fmul_op, INVALID, &float_op, /* FSUBP FSUBRP FDIVP FDIVRP*/ &float_op, &float_op, &fdiv_op, &fdiv_op, }; @@ -1700,7 +1783,7 @@ static const risc86_instruction_t *opcode_timings_df[8] = /* FILDiw FISTiw FISTPiw*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FILDiq FBSTP FISTPiq*/ - INVALID, &load_float_op, &vector_float_l_op, &fstore_op, + INVALID, &load_float_op, &complex_float_l_op, &fstore_op, }; static const risc86_instruction_t *opcode_timings_df_mod3[8] = { @@ -1728,34 +1811,27 @@ static p6_unit_t *units; /*Pentium Pro has no MMX*/ static p6_unit_t ppro_units[] = { - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX)}, /*Integer X*/ - {.uop_mask = (1 << UOP_ALU)}, /*Integer Y*/ - {.uop_mask = (1 << UOP_FLOAT)}, /*Floating point*/ - {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD)}, /*Load*/ - {.uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE)}, /*Store*/ - {.uop_mask = (1 << UOP_BRANCH)} /*Branch*/ + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) | (1 << UOP_FLOAT)}, /*Integer X & Floating point*/ + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_BRANCH)}, /*Integer Y*/ + {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD)}, /*Load*/ + {.uop_mask = (1 << UOP_STORED) | (1 << UOP_FSTORED)}, /*Data Store*/ + {.uop_mask = (1 << UOP_STOREA) | (1 << UOP_FSTOREA)}, /*Address Store*/ }; #define NR_PPRO_UNITS (sizeof(ppro_units) / sizeof(p6_unit_t)) -/*Well, it works I guess*/ +/*Pentium II/Celeron assigns the multiplier to port 0, the shifter to port 1, and shares the MMX ALU*/ static p6_unit_t p2_units[] = { - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) | (1 << UOP_MEU) | /*Integer X*/ - (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL)}, - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_MEU) | /*Integer Y*/ - (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL)}, - {.uop_mask = (1 << UOP_FLOAT)}, /*Floating point*/ + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) | (1 << UOP_FLOAT) | /*Integer X & Floating point*/ + (1 << UOP_MMX) | (1 << UOP_MMX_MUL)}, + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_BRANCH) | /*Integer Y*/ + (1 << UOP_MMX) | (1 << UOP_MMX_SHIFT)}, {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD)}, /*Load*/ - {.uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE)}, /*Store*/ - {.uop_mask = (1 << UOP_BRANCH)} /*Branch*/ + {.uop_mask = (1 << UOP_STORED) | (1 << UOP_FSTORED) | (1 << UOP_MSTORED)}, /*Data Store*/ + {.uop_mask = (1 << UOP_STOREA) | (1 << UOP_FSTOREA) | (1 << UOP_MSTOREA)}, /*Address Store*/ }; #define NR_P2_UNITS (sizeof(p2_units) / sizeof(p6_unit_t)) -/*First available cycles of shared execution units. Each of these can be submitted - to by ALU X and Y*/ -static int mul_first_available_cycle; -static int shift_first_available_cycle; - static int uop_run(const risc86_uop_t *uop, int decode_time) { int c; @@ -1764,16 +1840,8 @@ static int uop_run(const risc86_uop_t *uop, int decode_time) /*UOP_LIMM does not require execution*/ if (uop->type == UOP_LIMM) - return decode_time; + return decode_time; - if (units == p2_units) /*More hackyness*/ - { - if (uop->type == UOP_MEU_MUL && decode_time < mul_first_available_cycle) - decode_time = mul_first_available_cycle; - else if (uop->type == UOP_MEU_SHIFT && decode_time < mul_first_available_cycle) - decode_time = shift_first_available_cycle; - } - /*Find execution unit for this uOP*/ for (c = 0; c < nr_units; c++) { @@ -1787,43 +1855,36 @@ static int uop_run(const risc86_uop_t *uop, int decode_time) } } if (!best_unit) - fatal("uop_run: can not find execution unit\n"); + fatal("uop_run: can not find execution unit\n"); if (best_start_cycle < decode_time) best_start_cycle = decode_time; - best_unit->first_available_cycle = best_start_cycle + uop->throughput; + best_unit->first_available_cycle = best_start_cycle + uop->latency; - if (units == p2_units) /*More hackyness*/ - { - if (uop->type == UOP_MEU_MUL) - mul_first_available_cycle = best_start_cycle + uop->throughput; - else if (uop->type == UOP_MEU_SHIFT) - shift_first_available_cycle = best_start_cycle + uop->throughput; - } - return best_start_cycle + uop->throughput; + + return best_start_cycle + uop->latency; } -/*The K6 decoder can decode, per clock : - - 1 or 2 'short' instructions, each up to 2 uOPs and 7 bytes long - - 1 'long' instruction, up to 4 uOPs - - 1 'vector' instruction, up to 4 uOPs per cycle, plus (I think) 1 cycle startup delay) +/*The P6 decoders can decode, per clock : + - 1 to 3 'simple' instructions, each up to 1 uOP and 7 bytes long + - 1 'complex' instruction, up to 4 uOPs or 3 per cycle for instructions longer than 4 uOPs */ static struct { int nr_uops; - const risc86_uop_t *uops[4]; + const risc86_uop_t *uops[6]; /*Earliest time a uop can start. If the timestamp is -1, then the uop is part of a dependency chain and the start time is the completion time of the previous uop*/ - int earliest_start[4]; + int earliest_start[6]; } decode_buffer; -#define NR_OPQUADS 6 -/*Timestamps of when the last six opquads completed. The K6 scheduler retires - opquads in order, so this is needed to determine when the next can be scheduled*/ -static int opquad_completion_timestamp[NR_OPQUADS]; -static int next_opquad = 0; +#define NR_OPSEQS 3 +/*Timestamps of when the last three op sequences completed. Technically this is incorrect, +as the actual size of the opseq buffer is 20 bytes and not 18, but I'm restricted to multiples of 6*/ +static int opseq_completion_timestamp[NR_OPSEQS]; +static int next_opseq = 0; #define NR_REGS 8 /*Timestamp of when last operation on an integer register completed*/ @@ -1838,50 +1899,48 @@ void decode_flush_p6() { int c; int uop_timestamp = 0; - - /*Decoded opquad can not be submitted if there are no free spaces in the - opquad buffer*/ - if (decode_timestamp < opquad_completion_timestamp[next_opquad]) - decode_timestamp = opquad_completion_timestamp[next_opquad]; + + /*Decoded opseq can not be submitted if there are no free spaces in the + opseq buffer*/ + if (decode_timestamp < opseq_completion_timestamp[next_opseq]) + decode_timestamp = opseq_completion_timestamp[next_opseq]; /*Ensure that uops can not be submitted before they have been decoded*/ if (decode_timestamp > last_uop_timestamp) last_uop_timestamp = decode_timestamp; /*Submit uops to execution units, and determine the latest completion time*/ - for (c = 0; c < decode_buffer.nr_uops; c++) + for (c = 0; c < (decode_buffer.nr_uops); c++) { int start_timestamp; - if (decode_buffer.earliest_start[c] == -1) - start_timestamp = last_uop_timestamp; - else - start_timestamp = decode_buffer.earliest_start[c]; + + start_timestamp = decode_buffer.earliest_start[c]; last_uop_timestamp = uop_run(decode_buffer.uops[c], start_timestamp); if (last_uop_timestamp > uop_timestamp) uop_timestamp = last_uop_timestamp; } - /*Calculate opquad completion time. Since opquads complete in order, it + /*Calculate opseq completion time. Since opseqs complete in order, it must be after the last completion.*/ if (uop_timestamp <= last_complete_timestamp) last_complete_timestamp = last_complete_timestamp + 1; else last_complete_timestamp = uop_timestamp; - /*Advance to next opquad in buffer*/ - opquad_completion_timestamp[next_opquad] = last_complete_timestamp; - next_opquad++; - if (next_opquad == NR_OPQUADS) - next_opquad = 0; + /*Advance to next opseq in buffer*/ + opseq_completion_timestamp[next_opseq] = last_complete_timestamp; + next_opseq++; + if (next_opseq == NR_OPSEQS) + next_opseq = 0; decode_timestamp++; decode_buffer.nr_uops = 0; } /*The instruction is only of interest here if it's longer than 7 bytes, as that's the - limit on K6 short decoding*/ + limit on P6 simple decoding*/ static int codegen_timing_instr_length(uint64_t deps, uint32_t fetchdat, int op_32) { int len = prefixes + 1; /*Opcode*/ @@ -1933,8 +1992,9 @@ static int codegen_timing_instr_length(uint64_t deps, uint32_t fetchdat, int op_ static void decode_instruction(const risc86_instruction_t *ins, uint64_t deps, uint32_t fetchdat, int op_32, int bit8) { uint32_t regmask_required; - uint32_t regmask_modified; - int c, d; + uint32_t regmask_modified; + int c; + int d = 0; /*Complex decoder uOPs*/ int earliest_start = 0; decode_type_t decode_type = ins->decode_type; int instr_length = codegen_timing_instr_length(deps, fetchdat, op_32); @@ -1964,87 +2024,68 @@ static void decode_instruction(const risc86_instruction_t *ins, uint64_t deps, u earliest_start = fpu_st_timestamp[reg]; } - /*Short decoders are limited to 7 bytes*/ - if (decode_type == DECODE_SHORT && instr_length > 7) - decode_type = DECODE_LONG; - /*Long decoder is limited to 11 bytes*/ - else if (instr_length > 11) - decode_type = DECODE_VECTOR; + /*Simple decoders are limited to 7 bytes & 1 uOP*/ + if (decode_type == DECODE_SIMPLE && instr_length > 7) + decode_type = DECODE_COMPLEX; + else if (decode_type == DECODE_SIMPLE && ins->nr_uops > 1) + decode_type = DECODE_COMPLEX; switch (decode_type) - { - case DECODE_SHORT: - if (decode_buffer.nr_uops) + { + case DECODE_SIMPLE: + if (decode_buffer.nr_uops - d == 2) + { + decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; + decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; + decode_buffer.nr_uops = 3; + decode_flush_p6(); + } + else if (decode_buffer.nr_uops - d == 1) + { + decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; + decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; + decode_buffer.nr_uops = 2+d; + if (d) + decode_flush_p6(); + } + else if (decode_buffer.nr_uops) { decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; - if (ins->nr_uops > 1) - { - decode_buffer.uops[decode_buffer.nr_uops+1] = &ins->uop[1]; - decode_buffer.earliest_start[decode_buffer.nr_uops+1] = -1; - } - decode_buffer.nr_uops += ins->nr_uops; - - decode_flush_p6(); + decode_buffer.nr_uops = 1+d; } else { - decode_buffer.nr_uops = ins->nr_uops; + decode_buffer.nr_uops = 1; decode_buffer.uops[0] = &ins->uop[0]; decode_buffer.earliest_start[0] = earliest_start; - if (ins->nr_uops > 1) - { - decode_buffer.uops[1] = &ins->uop[1]; - decode_buffer.earliest_start[1] = -1; - } - } + } break; - case DECODE_LONG: + case DECODE_COMPLEX: if (decode_buffer.nr_uops) - decode_flush_p6(); + decode_flush_p6(); /*The 4-1-1 arrangement implies that a complex ins. can't be decoded after a simple one*/ - decode_buffer.nr_uops = ins->nr_uops; - for (c = 0; c < ins->nr_uops; c++) - { - decode_buffer.uops[c] = &ins->uop[c]; - if (c == 0) - decode_buffer.earliest_start[c] = earliest_start; - else - decode_buffer.earliest_start[c] = -1; - } - decode_flush_p6(); - break; - - case DECODE_VECTOR: - if (decode_buffer.nr_uops) - decode_flush_p6(); - - decode_timestamp++; d = 0; - + for (c = 0; c < ins->nr_uops; c++) { decode_buffer.uops[d] = &ins->uop[c]; - if (c == 0) - decode_buffer.earliest_start[d] = earliest_start; - else - decode_buffer.earliest_start[d] = -1; - d++; + decode_buffer.earliest_start[c] = earliest_start; + d++; - if (d == 4) + if (d == 3 && ins->nr_uops > 4) /*Ins. with >4 uOPs require the use of special units only present on 3 translate PLAs*/ { d = 0; - decode_buffer.nr_uops = 4; - decode_flush_p6(); + decode_buffer.nr_uops = 3; + decode_flush_p6(); /*The other two decoders are halted to preserve in-order issue*/ } } - if (d) - { - decode_buffer.nr_uops = d; - decode_flush_p6(); - } - break; + if (d) + { + decode_buffer.nr_uops = d; + } + break; } /*Update write timestamps for any output registers*/ @@ -2095,15 +2136,12 @@ void codegen_timing_p6_block_start() for (c = 0; c < nr_units; c++) units[c].first_available_cycle = 0; - mul_first_available_cycle = 0; - shift_first_available_cycle = 0; - decode_timestamp = 0; last_complete_timestamp = 0; - for (c = 0; c < NR_OPQUADS; c++) - opquad_completion_timestamp[c] = 0; - next_opquad = 0; + for (c = 0; c < NR_OPSEQS; c++) + opseq_completion_timestamp[c] = 0; + next_opseq = 0; for (c = 0; c < NR_REGS; c++) reg_available_timestamp[c] = 0; @@ -2295,7 +2333,7 @@ void codegen_timing_p6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint if (ins_table[opcode]) decode_instruction(ins_table[opcode], deps[opcode], fetchdat, op_32, bit8); else - decode_instruction(&vector_alu1_op, 0, fetchdat, op_32, bit8); + decode_instruction(&complex_alu1_op, 0, fetchdat, op_32, bit8); codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); } @@ -2324,4 +2362,4 @@ codegen_timing_t codegen_timing_p6 = codegen_timing_p6_block_start, codegen_timing_p6_block_end, codegen_timing_p6_jump_cycles -}; +}; \ No newline at end of file From d8f9a5528601716914cb84d4d244dfe8a5adb50b Mon Sep 17 00:00:00 2001 From: daviunic Date: Sat, 25 Apr 2020 18:12:30 +0200 Subject: [PATCH 08/12] Renamed logi bus mouse to logi/ms for clarity --- src/mouse_bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mouse_bus.c b/src/mouse_bus.c index e2146a5af..c2b4afad7 100644 --- a/src/mouse_bus.c +++ b/src/mouse_bus.c @@ -832,7 +832,7 @@ static const device_config_t ms_config[] = { const device_t mouse_logibus_device = { - "Logitech Bus Mouse", + "Logitech/Microsoft Bus Mouse", DEVICE_ISA, MOUSE_TYPE_LOGIBUS, bm_init, bm_close, NULL, From 6033f4480cc1641784e179213cae4890e322372f Mon Sep 17 00:00:00 2001 From: OBattler Date: Sat, 25 Apr 2020 22:02:48 +0200 Subject: [PATCH 09/12] Fixed P6 timings so they no longer fatal (and removed the 3DNow! stuff from them), and also fixed the Distributed DMA bug reported by TheCollector1995. --- src/cpu/codegen_timing_p6.c | 243 ++------------------------------ src/cpu_new/codegen_timing_p6.c | 243 ++------------------------------ src/ddma.c | 4 +- 3 files changed, 24 insertions(+), 466 deletions(-) diff --git a/src/cpu/codegen_timing_p6.c b/src/cpu/codegen_timing_p6.c index a0da30304..db0c965e1 100644 --- a/src/cpu/codegen_timing_p6.c +++ b/src/cpu/codegen_timing_p6.c @@ -1380,173 +1380,6 @@ static const risc86_instruction_t *opcode_timings_0f_mod3[256] = &mmx_op, &mmx_op, &mmx_op, INVALID, }; -static const risc86_instruction_t *opcode_timings_0f0f[256] = -{ -/*00*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*10*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*20*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*30*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*40*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*50*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*60*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*70*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*80*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*90*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*a0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, &load_mmx_mul_op, &load_mmx_mul_op, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*b0*/ INVALID, INVALID, INVALID, INVALID, - &load_mmx_mul_op, INVALID, &load_mmx_mul_op, &load_mmx_mul_op, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, &load_mmx_op, - -/*c0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*d0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*e0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*f0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -}; -static const risc86_instruction_t *opcode_timings_0f0f_mod3[256] = -{ -/*00*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*10*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*20*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*30*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*40*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*50*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*60*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*70*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*80*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*90*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*a0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, &mmx_mul_op, &mmx_mul_op, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*b0*/ INVALID, INVALID, INVALID, INVALID, - &mmx_mul_op, INVALID, &mmx_mul_op, &mmx_mul_op, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, &mmx_op, - -/*c0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*d0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*e0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*f0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -}; - static const risc86_instruction_t *opcode_timings_shift[8] = { &complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op, @@ -1876,11 +1709,11 @@ static int uop_run(const risc86_uop_t *uop, int decode_time) static struct { int nr_uops; - const risc86_uop_t *uops[6]; + const risc86_uop_t *uops[MAX_UOPS]; /*Earliest time a uop can start. If the timestamp is -1, then the uop is part of a dependency chain and the start time is the completion time of the previous uop*/ - int earliest_start[6]; + int earliest_start[MAX_UOPS]; } decode_buffer; #define NR_OPSEQS 3 @@ -1901,7 +1734,7 @@ static int last_uop_timestamp = 0; void decode_flush_p6() { int c; - int uop_timestamp = 0; + int start_timestamp, uop_timestamp = 0; /*Decoded opseq can not be submitted if there are no free spaces in the opseq buffer*/ @@ -1915,9 +1748,6 @@ void decode_flush_p6() /*Submit uops to execution units, and determine the latest completion time*/ for (c = 0; c < (decode_buffer.nr_uops); c++) { - int start_timestamp; - - start_timestamp = decode_buffer.earliest_start[c]; last_uop_timestamp = uop_run(decode_buffer.uops[c], start_timestamp); @@ -2037,7 +1867,7 @@ static void decode_instruction(const risc86_instruction_t *ins, uint64_t deps, u { case DECODE_SIMPLE: if (decode_buffer.nr_uops - d == 2) - { + { decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; decode_buffer.nr_uops = 3; @@ -2084,10 +1914,10 @@ static void decode_instruction(const risc86_instruction_t *ins, uint64_t deps, u decode_flush_p6(); /*The other two decoders are halted to preserve in-order issue*/ } } - if (d) - { - decode_buffer.nr_uops = d; - } + if (d) + { + decode_buffer.nr_uops = d; + } break; } @@ -2188,59 +2018,8 @@ void codegen_timing_p6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint switch (last_prefix) { case 0x0f: - if (opcode == 0x0f) - { - /*3DNow has the actual opcode after ModR/M, SIB and any offset*/ - uint32_t opcode_pc = op_pc + 1; /*Byte after ModR/M*/ - uint8_t modrm = fetchdat & 0xff; - uint8_t sib = (fetchdat >> 8) & 0xff; - - if ((modrm & 0xc0) != 0xc0) - { - if (op_32 & 0x200) - { - if ((modrm & 7) == 4) - { - /* Has SIB*/ - opcode_pc++; - if ((modrm & 0xc0) == 0x40) - opcode_pc++; - else if ((modrm & 0xc0) == 0x80) - opcode_pc += 4; - else if ((sib & 0x07) == 0x05) - opcode_pc += 4; - } - else - { - if ((modrm & 0xc0) == 0x40) - opcode_pc++; - else if ((modrm & 0xc0) == 0x80) - opcode_pc += 4; - else if ((modrm & 0xc7) == 0x05) - opcode_pc += 4; - } - } - else - { - if ((modrm & 0xc0) == 0x40) - opcode_pc++; - else if ((modrm & 0xc0) == 0x80) - opcode_pc += 2; - else if ((modrm & 0xc7) == 0x06) - opcode_pc += 2; - } - } - - opcode = fastreadb(cs + opcode_pc); - - ins_table = mod3 ? opcode_timings_0f0f_mod3 : opcode_timings_0f0f; - deps = mod3 ? opcode_deps_0f0f_mod3 : opcode_deps_0f0f; - } - else - { - ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; - deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; - } + ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; break; case 0xd8: @@ -2365,4 +2144,4 @@ codegen_timing_t codegen_timing_p6 = codegen_timing_p6_block_start, codegen_timing_p6_block_end, codegen_timing_p6_jump_cycles -}; \ No newline at end of file +}; diff --git a/src/cpu_new/codegen_timing_p6.c b/src/cpu_new/codegen_timing_p6.c index 845f46803..d002631bc 100644 --- a/src/cpu_new/codegen_timing_p6.c +++ b/src/cpu_new/codegen_timing_p6.c @@ -1377,173 +1377,6 @@ static const risc86_instruction_t *opcode_timings_0f_mod3[256] = &mmx_op, &mmx_op, &mmx_op, INVALID, }; -static const risc86_instruction_t *opcode_timings_0f0f[256] = -{ -/*00*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*10*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*20*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*30*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*40*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*50*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*60*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*70*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*80*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*90*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*a0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, &load_mmx_mul_op, &load_mmx_mul_op, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*b0*/ INVALID, INVALID, INVALID, INVALID, - &load_mmx_mul_op, INVALID, &load_mmx_mul_op, &load_mmx_mul_op, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, &load_mmx_op, - -/*c0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*d0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*e0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*f0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -}; -static const risc86_instruction_t *opcode_timings_0f0f_mod3[256] = -{ -/*00*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*10*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*20*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*30*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*40*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*50*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*60*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*70*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*80*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*90*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*a0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, &mmx_mul_op, &mmx_mul_op, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*b0*/ INVALID, INVALID, INVALID, INVALID, - &mmx_mul_op, INVALID, &mmx_mul_op, &mmx_mul_op, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, &mmx_op, - -/*c0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*d0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*e0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -/*f0*/ INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - INVALID, INVALID, INVALID, INVALID, - -}; - static const risc86_instruction_t *opcode_timings_shift[8] = { &complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op, @@ -1873,11 +1706,11 @@ static int uop_run(const risc86_uop_t *uop, int decode_time) static struct { int nr_uops; - const risc86_uop_t *uops[6]; + const risc86_uop_t *uops[MAX_UOPS]; /*Earliest time a uop can start. If the timestamp is -1, then the uop is part of a dependency chain and the start time is the completion time of the previous uop*/ - int earliest_start[6]; + int earliest_start[MAX_UOPS]; } decode_buffer; #define NR_OPSEQS 3 @@ -1898,7 +1731,7 @@ static int last_uop_timestamp = 0; void decode_flush_p6() { int c; - int uop_timestamp = 0; + int start_timestamp, uop_timestamp = 0; /*Decoded opseq can not be submitted if there are no free spaces in the opseq buffer*/ @@ -1912,9 +1745,6 @@ void decode_flush_p6() /*Submit uops to execution units, and determine the latest completion time*/ for (c = 0; c < (decode_buffer.nr_uops); c++) { - int start_timestamp; - - start_timestamp = decode_buffer.earliest_start[c]; last_uop_timestamp = uop_run(decode_buffer.uops[c], start_timestamp); @@ -2034,7 +1864,7 @@ static void decode_instruction(const risc86_instruction_t *ins, uint64_t deps, u { case DECODE_SIMPLE: if (decode_buffer.nr_uops - d == 2) - { + { decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; decode_buffer.nr_uops = 3; @@ -2081,10 +1911,10 @@ static void decode_instruction(const risc86_instruction_t *ins, uint64_t deps, u decode_flush_p6(); /*The other two decoders are halted to preserve in-order issue*/ } } - if (d) - { - decode_buffer.nr_uops = d; - } + if (d) + { + decode_buffer.nr_uops = d; + } break; } @@ -2185,59 +2015,8 @@ void codegen_timing_p6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint switch (last_prefix) { case 0x0f: - if (opcode == 0x0f) - { - /*3DNow has the actual opcode after ModR/M, SIB and any offset*/ - uint32_t opcode_pc = op_pc + 1; /*Byte after ModR/M*/ - uint8_t modrm = fetchdat & 0xff; - uint8_t sib = (fetchdat >> 8) & 0xff; - - if ((modrm & 0xc0) != 0xc0) - { - if (op_32 & 0x200) - { - if ((modrm & 7) == 4) - { - /* Has SIB*/ - opcode_pc++; - if ((modrm & 0xc0) == 0x40) - opcode_pc++; - else if ((modrm & 0xc0) == 0x80) - opcode_pc += 4; - else if ((sib & 0x07) == 0x05) - opcode_pc += 4; - } - else - { - if ((modrm & 0xc0) == 0x40) - opcode_pc++; - else if ((modrm & 0xc0) == 0x80) - opcode_pc += 4; - else if ((modrm & 0xc7) == 0x05) - opcode_pc += 4; - } - } - else - { - if ((modrm & 0xc0) == 0x40) - opcode_pc++; - else if ((modrm & 0xc0) == 0x80) - opcode_pc += 2; - else if ((modrm & 0xc7) == 0x06) - opcode_pc += 2; - } - } - - opcode = fastreadb(cs + opcode_pc); - - ins_table = mod3 ? opcode_timings_0f0f_mod3 : opcode_timings_0f0f; - deps = mod3 ? opcode_deps_0f0f_mod3 : opcode_deps_0f0f; - } - else - { - ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; - deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; - } + ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; break; case 0xd8: @@ -2362,4 +2141,4 @@ codegen_timing_t codegen_timing_p6 = codegen_timing_p6_block_start, codegen_timing_p6_block_end, codegen_timing_p6_jump_cycles -}; \ No newline at end of file +}; diff --git a/src/ddma.c b/src/ddma.c index 98e4da293..92ee1f4d3 100644 --- a/src/ddma.c +++ b/src/ddma.c @@ -107,9 +107,9 @@ ddma_reg_write(uint16_t addr, uint8_t val, void *p) break; case 0x02: if (ch >= 4) - outb(0x88 + page_regs[ch], val); + outb(0x88 + page_regs[ch & 3], val); else - outb(0x80 + page_regs[ch], val); + outb(0x80 + page_regs[ch & 3], val); break; case 0x04: dma[ch].cb = (dma[ch].cb & 0xffff00) | val; From 20cf306d44dbc15d70f6fab1f1ca5fe94b91d5d7 Mon Sep 17 00:00:00 2001 From: OBattler Date: Sat, 25 Apr 2020 22:14:43 +0200 Subject: [PATCH 10/12] Better fix for the P6 timings. --- src/cpu/codegen_timing_p6.c | 8 ++++---- src/cpu_new/codegen_timing_p6.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/cpu/codegen_timing_p6.c b/src/cpu/codegen_timing_p6.c index db0c965e1..ec5c54640 100644 --- a/src/cpu/codegen_timing_p6.c +++ b/src/cpu/codegen_timing_p6.c @@ -1709,11 +1709,11 @@ static int uop_run(const risc86_uop_t *uop, int decode_time) static struct { int nr_uops; - const risc86_uop_t *uops[MAX_UOPS]; + const risc86_uop_t *uops[6]; /*Earliest time a uop can start. If the timestamp is -1, then the uop is part of a dependency chain and the start time is the completion time of the previous uop*/ - int earliest_start[MAX_UOPS]; + int earliest_start[6]; } decode_buffer; #define NR_OPSEQS 3 @@ -1904,10 +1904,10 @@ static void decode_instruction(const risc86_instruction_t *ins, uint64_t deps, u for (c = 0; c < ins->nr_uops; c++) { decode_buffer.uops[d] = &ins->uop[c]; - decode_buffer.earliest_start[c] = earliest_start; + decode_buffer.earliest_start[d] = earliest_start; d++; - if (d == 3 && ins->nr_uops > 4) /*Ins. with >4 uOPs require the use of special units only present on 3 translate PLAs*/ + if ((d == 3) && (ins->nr_uops > 4)) /*Ins. with >4 uOPs require the use of special units only present on 3 translate PLAs*/ { d = 0; decode_buffer.nr_uops = 3; diff --git a/src/cpu_new/codegen_timing_p6.c b/src/cpu_new/codegen_timing_p6.c index d002631bc..a51b4df32 100644 --- a/src/cpu_new/codegen_timing_p6.c +++ b/src/cpu_new/codegen_timing_p6.c @@ -1706,11 +1706,11 @@ static int uop_run(const risc86_uop_t *uop, int decode_time) static struct { int nr_uops; - const risc86_uop_t *uops[MAX_UOPS]; + const risc86_uop_t *uops[6]; /*Earliest time a uop can start. If the timestamp is -1, then the uop is part of a dependency chain and the start time is the completion time of the previous uop*/ - int earliest_start[MAX_UOPS]; + int earliest_start[6]; } decode_buffer; #define NR_OPSEQS 3 @@ -1901,10 +1901,10 @@ static void decode_instruction(const risc86_instruction_t *ins, uint64_t deps, u for (c = 0; c < ins->nr_uops; c++) { decode_buffer.uops[d] = &ins->uop[c]; - decode_buffer.earliest_start[c] = earliest_start; + decode_buffer.earliest_start[d] = earliest_start; d++; - if (d == 3 && ins->nr_uops > 4) /*Ins. with >4 uOPs require the use of special units only present on 3 translate PLAs*/ + if ((d == 3) && (ins->nr_uops > 4)) /*Ins. with >4 uOPs require the use of special units only present on 3 translate PLAs*/ { d = 0; decode_buffer.nr_uops = 3; From e6ccbd2406b741d6a5b73e5139f6258451bf7367 Mon Sep 17 00:00:00 2001 From: OBattler Date: Sat, 25 Apr 2020 22:37:07 +0200 Subject: [PATCH 11/12] Bumped maximum UDMA mode from 2 (ATA 33) to 4 (ATA 66) so it can be used where supported. --- src/disk/hdc_ide.c | 2 +- src/disk/mo.c | 2 +- src/disk/zip.c | 2 +- src/scsi/scsi_cdrom.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/disk/hdc_ide.c b/src/disk/hdc_ide.c index 2b6104ee8..0c0453cb1 100644 --- a/src/disk/hdc_ide.c +++ b/src/disk/hdc_ide.c @@ -410,7 +410,7 @@ ide_get_max(ide_t *ide, int type) return -1; case TYPE_UDMA: /* UDMA */ if (!ide_boards[ide->board]->force_ata3 && (ide_bm[ide->board] != NULL)) - return 2; + return 4 /*2*/; return -1; default: diff --git a/src/disk/mo.c b/src/disk/mo.c index 289a3d187..1687d37f3 100644 --- a/src/disk/mo.c +++ b/src/disk/mo.c @@ -1983,7 +1983,7 @@ mo_get_max(int ide_has_dma, int type) ret = ide_has_dma ? 1 : -1; break; case TYPE_UDMA: - ret = ide_has_dma ? 2 : -1; + ret = ide_has_dma ? 4 /*2*/ : -1; break; } diff --git a/src/disk/zip.c b/src/disk/zip.c index 850468573..8a6253100 100644 --- a/src/disk/zip.c +++ b/src/disk/zip.c @@ -2243,7 +2243,7 @@ zip_get_max(int ide_has_dma, int type) ret = ide_has_dma ? 1 : -1; break; case TYPE_UDMA: - ret = ide_has_dma ? 2 : -1; + ret = ide_has_dma ? 4 /*2*/ : -1; break; } diff --git a/src/scsi/scsi_cdrom.c b/src/scsi/scsi_cdrom.c index 2944335ee..106e2bb87 100644 --- a/src/scsi/scsi_cdrom.c +++ b/src/scsi/scsi_cdrom.c @@ -2595,7 +2595,7 @@ scsi_cdrom_get_max(int ide_has_dma, int type) ret = ide_has_dma ? 2 : -1; break; case TYPE_UDMA: - ret = ide_has_dma ? 2 : -1; + ret = ide_has_dma ? 4 /*2*/ : -1; break; default: ret = -1; From 3aaf9ac6fe241d5a81277f890dff11fad0f3e9cd Mon Sep 17 00:00:00 2001 From: OBattler Date: Sun, 26 Apr 2020 00:49:46 +0200 Subject: [PATCH 12/12] More ACPI improvements, implementation of VIA ACPI (yet to be hooked to the southbridges), removed ISABugger stuff from acpi.h, apm.h, dma.h, and usb.h, and updated the descriptions in piix.c and piix.h. --- src/acpi.c | 412 ++++++++++++++++++++++++++++++++++++--- src/apm.c | 18 +- src/include/86box/acpi.h | 30 +-- src/include/86box/apm.h | 9 +- src/include/86box/ddma.h | 7 - src/include/86box/piix.h | 3 +- src/include/86box/usb.h | 7 - src/intel_piix.c | 23 ++- 8 files changed, 437 insertions(+), 72 deletions(-) diff --git a/src/acpi.c b/src/acpi.c index 26855a743..d8555cb95 100644 --- a/src/acpi.c +++ b/src/acpi.c @@ -32,6 +32,7 @@ #include <86box/keyboard.h> #include <86box/nvr.h> #include <86box/pit.h> +#include <86box/apm.h> #include <86box/acpi.h> @@ -77,8 +78,28 @@ acpi_update_irq(void *priv) } +static void +acpi_raise_smi(void *priv) +{ + acpi_t *dev = (acpi_t *) priv; + + if (dev->vendor == VEN_VIA) { + if ((dev->regs.glbctl & 0x01) && (!dev->regs.smi_lock || !dev->regs.smi_active)) { + smi_line = 1; + dev->regs.smi_active = 1; + } + } else { + if (dev->regs.glbctl & 0x01) { + smi_line = 1; + /* Clear bit 16 of GLBCTL. */ + dev->regs.glbctl &= ~0x00010000; + } + } +} + + static uint32_t -acpi_reg_read_common(int size, uint16_t addr, void *p) +acpi_reg_read_intel(int size, uint16_t addr, void *p) { acpi_t *dev = (acpi_t *) p; uint32_t ret = 0x00000000; @@ -117,16 +138,6 @@ acpi_reg_read_common(int size, uint16_t addr, void *p) /* PCNTRL - Processor Control Register (IO) */ ret = (dev->regs.pcntrl >> shift32) & 0xff; break; - case 0x14: - /* PLVL2 - Processor Level 2 Register (IO) */ - if (size == 1) - ret = dev->regs.plvl2; - break; - case 0x15: - /* PLVL3 - Processor Level 3 Register (IO) */ - if (size == 1) - ret = dev->regs.plvl3; - break; case 0x18: case 0x19: /* GLBSTS - Global Status Register (IO) */ ret = (dev->regs.glbsts >> shift16) & 0xff; @@ -173,8 +184,118 @@ acpi_reg_read_common(int size, uint16_t addr, void *p) } +static uint32_t +acpi_reg_read_via(int size, uint16_t addr, void *p) +{ + acpi_t *dev = (acpi_t *) p; + uint32_t ret = 0x00000000; + int shift16, shift32; + + addr &= 0xff; + shift16 = (addr & 1) << 3; + shift32 = (addr & 3) << 3; + + switch (addr) { + case 0x00: case 0x01: + /* PMSTS - Power Management Status Register (IO) */ + ret = (dev->regs.pmsts >> shift16) & 0xff; + break; + case 0x02: case 0x03: + /* PMEN - Power Management Resume Enable Register (IO) */ + ret = (dev->regs.pmen >> shift16) & 0xff; + break; + case 0x04: case 0x05: + /* PMCNTRL - Power Management Control Register (IO) */ + ret = (dev->regs.pmcntrl >> shift16) & 0xff; + break; + case 0x08: case 0x09: case 0x0a: case 0x0b: + /* PMTMR - Power Management Timer Register (IO) */ + ret = (dev->regs.timer_val >> shift32) & 0xff; + break; + case 0x10: case 0x11: case 0x12: case 0x13: + /* PCNTRL - Processor Control Register (IO) */ + ret = (dev->regs.pcntrl >> shift32) & 0xff; + break; + case 0x20: case 0x21: + /* GPSTS - General Purpose Status Register (IO) */ + ret = (dev->regs.gpsts >> shift16) & 0xff; + break; + case 0x22: case 0x23: + /* General Purpose SCI Enable */ + ret = (dev->regs.gpscien >> shift16) & 0xff; + break; + case 0x24: case 0x25: + /* General Purpose SMI Enable */ + ret = (dev->regs.gpsmien >> shift16) & 0xff; + break; + case 0x26: case 0x27: + /* Power Supply Control */ + ret = (dev->regs.pscntrl >> shift16) & 0xff; + break; + case 0x28: case 0x29: + /* GLBSTS - Global Status Register (IO) */ + ret = (dev->regs.glbsts >> shift16) & 0xff; + break; + case 0x2a: case 0x2b: + /* GLBEN - Global Enable Register (IO) */ + ret = (dev->regs.glben >> shift16) & 0xff; + break; + case 0x2c: case 0x2d: + /* GLBCTL - Global Control Register (IO) */ + ret = (dev->regs.glbctl >> shift16) & 0xff; + ret &= ~0x0110; + ret |= (dev->regs.smi_lock ? 0x10 : 0x00); + ret |= (dev->regs.smi_active ? 0x01 : 0x00); + break; + case 0x2f: + /* SMI Command */ + if (size == 1) + ret = dev->regs.smicmd & 0xff; + break; + case 0x30: case 0x31: case 0x32: case 0x33: + /* Primary Activity Detect Status */ + ret = (dev->regs.padsts >> shift32) & 0xff; + break; + case 0x34: case 0x35: case 0x36: case 0x37: + /* Primary Activity Detect Enable */ + ret = (dev->regs.paden >> shift32) & 0xff; + break; + case 0x38: case 0x39: case 0x3a: case 0x3b: + /* GP Timer Reload Enable */ + ret = (dev->regs.gptren >> shift32) & 0xff; + break; + case 0x40: + /* GPIO Direction Control */ + if (size == 1) + ret = dev->regs.gpio_dir & 0xff; + break; + case 0x42: + /* GPIO port Output Value */ + if (size == 1) + ret = dev->regs.gpio_val & 0xff; + break; + case 0x44: + /* GPIO port Output Value */ + if (size == 1) + ret = dev->regs.extsmi_val & 0xff; + break; + case 0x46: case 0x47: + /* GPO Port Output Value */ + ret = (dev->regs.gpo_val >> shift16) & 0xff; + break; + case 0x48: case 0x49: + /* GPO Port Input Value */ + ret = (dev->regs.gpi_val >> shift16) & 0xff; + break; + } + + acpi_log("(%i) ACPI Read (%i) %02X: %02X\n", in_smm, size, addr, ret); + return ret; +} + + static void -acpi_reg_write_common(int size, uint16_t addr, uint8_t val, void *p) +acpi_reg_write_intel(int size, uint16_t addr, uint8_t val, void *p) { acpi_t *dev = (acpi_t *) p; int shift16, shift32; @@ -199,6 +320,12 @@ acpi_reg_write_common(int size, uint16_t addr, uint8_t val, void *p) case 0x04: case 0x05: /* PMCNTRL - Power Management Control Register (IO) */ dev->regs.pmcntrl = ((dev->regs.pmcntrl & ~(0xff << shift16)) | (val << shift16)) & 0x3c07; + /* Setting GBL_RLS also sets BIOS_STS and generates SMI. */ + if ((addr == 0x04) && (dev->regs.pmcntrl & 0x0004)) { + dev->regs.glbsts |= 0x01; + if (dev->regs.glben & 0x02) + acpi_raise_smi(dev); + } if (dev->regs.pmcntrl & 0x2000) { sus_typ = (dev->regs.pmcntrl >> 10) & 7; switch (sus_typ) { @@ -240,16 +367,6 @@ acpi_reg_write_common(int size, uint16_t addr, uint8_t val, void *p) /* PCNTRL - Processor Control Register (IO) */ dev->regs.pcntrl = ((dev->regs.pcntrl & ~(0xff << shift32)) | (val << shift32)) & 0x00023e1e; break; - case 0x14: - /* PLVL2 - Processor Level 2 Register (IO) */ - if (size == 1) - dev->regs.plvl2 = val; - break; - case 0x15: - /* PLVL3 - Processor Level 3 Register (IO) */ - if (size == 1) - dev->regs.plvl3 = val; - break; case 0x18: case 0x19: /* GLBSTS - Global Status Register (IO) */ dev->regs.glbsts &= ~((val << shift16) & 0x0df7); @@ -264,8 +381,13 @@ acpi_reg_write_common(int size, uint16_t addr, uint8_t val, void *p) break; case 0x28: case 0x29: case 0x2a: case 0x2b: /* GLBCTL - Global Control Register (IO) */ - // dev->regs.glbctl = ((dev->regs.glbctl & ~(0xff << shift32)) | (val << shift32)) & 0x0701ff07; - dev->regs.glbctl = ((dev->regs.glbctl & ~(0xff << shift32)) | (val << shift32)) & 0x0700ff07; + dev->regs.glbctl = ((dev->regs.glbctl & ~(0xff << shift32)) | (val << shift32)) & 0x0701ff07; + /* Setting BIOS_RLS also sets GBL_STS and generates SMI. */ + if (dev->regs.glbctl & 0x00000002) { + dev->regs.pmsts |= 0x20; + if (dev->regs.pmen & 0x20) + acpi_update_irq(dev); + } break; case 0x2c: case 0x2d: case 0x2e: case 0x2f: /* DEVCTL - Device Control Register (IO) */ @@ -280,6 +402,180 @@ acpi_reg_write_common(int size, uint16_t addr, uint8_t val, void *p) } +static void +acpi_reg_write_via(int size, uint16_t addr, uint8_t val, void *p) +{ + acpi_t *dev = (acpi_t *) p; + int shift16, shift32; + int sus_typ; + + addr &= 0xff; + acpi_log("(%i) ACPI Write (%i) %02X: %02X\n", in_smm, size, addr, val); + shift16 = (addr & 1) << 3; + shift32 = (addr & 3) << 3; + + switch (addr) { + case 0x00: case 0x01: + /* PMSTS - Power Management Status Register (IO) */ + dev->regs.pmsts &= ~((val << shift16) & 0x8d31); + acpi_update_irq(dev); + if ((addr == 0x00) && !(dev->regs.pmsts & 0x20)) + dev->regs.glbctl &= ~0x0002; + break; + case 0x02: case 0x03: + /* PMEN - Power Management Resume Enable Register (IO) */ + dev->regs.pmen = ((dev->regs.pmen & ~(0xff << shift16)) | (val << shift16)) & 0x0521; + acpi_update_irq(dev); + break; + case 0x04: case 0x05: + /* PMCNTRL - Power Management Control Register (IO) */ + dev->regs.pmcntrl = ((dev->regs.pmcntrl & ~(0xff << shift16)) | (val << shift16)) & 0x3c07; + /* Setting GBL_RLS also sets BIOS_STS and generates SMI. */ + if ((addr == 0x04) && (dev->regs.pmcntrl & 0x0004)) { + dev->regs.glbsts |= 0x20; + if (dev->regs.glben & 0x20) + acpi_raise_smi(dev); + } + if (dev->regs.pmcntrl & 0x2000) { + sus_typ = (dev->regs.pmcntrl >> 10) & 7; + switch (sus_typ) { + case 0: + /* Soft power off. */ + exit(-1); + break; + case 1: + /* Suspend to RAM. */ + nvr_reg_write(0x000f, 0xff, dev->nvr); + + /* Do a hard reset. */ + device_reset_all_pci(); + + cpu_alt_reset = 0; + + pci_reset(); + keyboard_at_reset(); + + mem_a20_alt = 0; + mem_a20_recalc(); + + flushmmucache(); + + resetx86(); + break; + } + } + break; + case 0x10: case 0x11: case 0x12: case 0x13: + /* PCNTRL - Processor Control Register (IO) */ + dev->regs.pcntrl = ((dev->regs.pcntrl & ~(0xff << shift32)) | (val << shift32)) & 0x0000001e; + break; + case 0x20: case 0x21: + /* GPSTS - General Purpose Status Register (IO) */ + dev->regs.gpsts &= ~((val << shift16) & 0x03ff); + break; + case 0x22: case 0x23: + /* General Purpose SCI Enable */ + dev->regs.gpscien = ((dev->regs.gpscien & ~(0xff << shift16)) | (val << shift16)) & 0x03ff; + break; + case 0x24: case 0x25: + /* General Purpose SMI Enable */ + dev->regs.gpsmien = ((dev->regs.gpsmien & ~(0xff << shift16)) | (val << shift16)) & 0x03ff; + break; + case 0x26: case 0x27: + /* Power Supply Control */ + dev->regs.pscntrl = ((dev->regs.pscntrl & ~(0xff << shift16)) | (val << shift16)) & 0x0701; + break; + case 0x28: case 0x29: + /* GLBSTS - Global Status Register (IO) */ + dev->regs.glbsts &= ~((val << shift16) & 0x007f); + break; + case 0x2a: case 0x2b: + /* GLBEN - Global Enable Register (IO) */ + dev->regs.glben = ((dev->regs.glben & ~(0xff << shift16)) | (val << shift16)) & 0x007f; + break; + case 0x2c: + /* GLBCTL - Global Control Register (IO) */ + dev->regs.glbctl = (dev->regs.glbctl & ~0xff) | (val & 0xff); + dev->regs.smi_lock = !!(dev->regs.glbctl & 0x0010); + /* Setting BIOS_RLS also sets GBL_STS and generates SMI. */ + if (dev->regs.glbctl & 0x0002) { + dev->regs.pmsts |= 0x20; + if (dev->regs.pmen & 0x20) + acpi_update_irq(dev); + } + break; + case 0x2d: + /* GLBCTL - Global Control Register (IO) */ + dev->regs.glbctl &= ~((val << 8) & 0x0100); + if (val & 0x01) + dev->regs.smi_active = 0; + break; + case 0x2f: + /* SMI Command */ + if (size == 1) { + dev->regs.smicmd = val & 0xff; + dev->regs.glbsts |= 0x40; + if (dev->regs.glben & 0x40) + acpi_raise_smi(dev); + } + break; + case 0x30: case 0x31: case 0x32: case 0x33: + /* Primary Activity Detect Status */ + dev->regs.padsts &= ~((val << shift32) & 0x000000fd); + break; + case 0x34: case 0x35: case 0x36: case 0x37: + /* Primary Activity Detect Enable */ + dev->regs.paden = ((dev->regs.paden & ~(0xff << shift32)) | (val << shift32)) & 0x000000fd; + break; + case 0x38: case 0x39: case 0x3a: case 0x3b: + /* GP Timer Reload Enable */ + dev->regs.gptren = ((dev->regs.gptren & ~(0xff << shift32)) | (val << shift32)) & 0x000000d9; + break; + case 0x40: + /* GPIO Direction Control */ + if (size == 1) + dev->regs.gpio_dir = val & 0xff; + break; + case 0x42: + /* GPIO port Output Value */ + if (size == 1) + dev->regs.gpio_val = val & 0xff; + break; + case 0x46: case 0x47: + /* GPO Port Output Value */ + dev->regs.gpo_val = ((dev->regs.gpo_val & ~(0xff << shift16)) | (val << shift16)) & 0xffff; + break; + } +} + + +static uint32_t +acpi_reg_read_common(int size, uint16_t addr, void *p) +{ + acpi_t *dev = (acpi_t *) p; + uint8_t ret = 0xff; + + if (dev->vendor == VEN_VIA) + ret = acpi_reg_read_via(size, addr, p); + else + ret = acpi_reg_read_intel(size, addr, p); + + return ret; +} + + +static void +acpi_reg_write_common(int size, uint16_t addr, uint8_t val, void *p) +{ + acpi_t *dev = (acpi_t *) p; + + if (dev->vendor == VEN_VIA) + acpi_reg_write_via(size, addr, val, p); + else + acpi_reg_write_intel(size, addr, val, p); +} + + static uint32_t acpi_reg_readl(uint16_t addr, void *p) { @@ -440,6 +736,46 @@ acpi_set_nvr(acpi_t *dev, nvr_t *nvr) } +static void +acpi_apm_out(uint16_t port, uint8_t val, void *p) +{ + acpi_t *dev = (acpi_t *) p; + + acpi_log("[%04X:%08X] APM write: %04X = %02X (BX = %04X, CX = %04X)\n", CS, cpu_state.pc, port, val, BX, CX); + + port &= 0x0001; + + if (port == 0x0000) { + dev->apm->cmd = val; + if (dev->apm->do_smi) { + if (dev->vendor == VEN_INTEL) + dev->regs.glbsts |= 0x20; + acpi_raise_smi(dev); + } + } else + dev->apm->stat = val; +} + + +static uint8_t +acpi_apm_in(uint16_t port, void *p) +{ + acpi_t *dev = (acpi_t *) p; + uint8_t ret = 0xff; + + port &= 0x0001; + + if (port == 0x0000) + ret = dev->apm->cmd; + else + ret = dev->apm->stat; + + acpi_log("[%04X:%08X] APM read: %04X = %02X\n", CS, cpu_state.pc, port, ret); + + return ret; +} + + static void acpi_reset(void *priv) { @@ -482,6 +818,13 @@ acpi_init(const device_t *info) if (dev == NULL) return(NULL); memset(dev, 0x00, sizeof(acpi_t)); + dev->vendor = info->local; + + if (dev->vendor == VEN_INTEL) { + dev->apm = device_add(&apm_pci_acpi_device); + io_sethandler(0x00b2, 0x0002, acpi_apm_in, NULL, NULL, acpi_apm_out, NULL, NULL, dev); + } + timer_add(&dev->timer, acpi_timer_count, dev, 0); timer_set_delay_u64(&dev->timer, ACPICONST); @@ -489,11 +832,26 @@ acpi_init(const device_t *info) } -const device_t acpi_device = +const device_t acpi_intel_device = { - "ACPI", + "ACPI v1.0", DEVICE_PCI, - 0, + VEN_INTEL, + acpi_init, + acpi_close, + acpi_reset, + NULL, + acpi_speed_changed, + NULL, + NULL +}; + + +const device_t acpi_via_device = +{ + "ACPI v1.2", + DEVICE_PCI, + VEN_VIA, acpi_init, acpi_close, acpi_reset, diff --git a/src/apm.c b/src/apm.c index b197c0964..38d223e88 100644 --- a/src/apm.c +++ b/src/apm.c @@ -116,7 +116,8 @@ static void apm_t *dev = (apm_t *) malloc(sizeof(apm_t)); memset(dev, 0, sizeof(apm_t)); - io_sethandler(0x00b2, 0x0002, apm_in, NULL, NULL, apm_out, NULL, NULL, dev); + if (info->local == 0) + io_sethandler(0x00b2, 0x0002, apm_in, NULL, NULL, apm_out, NULL, NULL, dev); return dev; } @@ -150,3 +151,18 @@ const device_t apm_pci_device = NULL, NULL }; + + +const device_t apm_pci_acpi_device = +{ + "Advanced Power Management (PCI)", + DEVICE_PCI, + 1, + apm_init, + apm_close, + apm_reset, + NULL, + NULL, + NULL, + NULL +}; diff --git a/src/include/86box/acpi.h b/src/include/86box/acpi.h index ca0cb3b93..fa10aaa2f 100644 --- a/src/include/86box/acpi.h +++ b/src/include/86box/acpi.h @@ -6,13 +6,6 @@ * * This file is part of the 86Box distribution. * - * Implementation of the ISA Bus (de)Bugger expansion card - * sold as a DIY kit in the late 1980's in The Netherlands. - * This card was a assemble-yourself 8bit ISA addon card for - * PC and AT systems that had several tools to aid in low- - * level debugging (mostly for faulty BIOSes, bootloaders - * and system kernels...) - * * Definitions for the ACPI emulation. * * @@ -46,21 +39,31 @@ extern "C" { #define ACPI_ENABLE 0xf1 #define ACPI_DISABLE 0xf0 +#define VEN_INTEL 0x8086 +#define VEN_VIA 0x1106 + typedef struct { uint8_t plvl2, plvl3, + smicmd, gpio_dir, + gpio_val, extsmi_val, timer32, gpireg[3], gporeg[4]; uint16_t pmsts, pmen, pmcntrl, gpsts, - gpen, io_base; - int slot, - irq_mode, irq_pin; + gpen, io_base, + gpscien, gpsmien, + pscntrl, gpo_val, + gpi_val; + int slot, irq_mode, + irq_pin, smi_lock, + smi_active; uint32_t pcntrl, glbsts, devsts, glben, glbctl, devctl, - timer_val; + padsts, paden, + gptren, timer_val; uint64_t tmr_overflow_time; } acpi_regs_t; @@ -69,13 +72,16 @@ typedef struct { acpi_regs_t regs; uint8_t gporeg_default[4]; + int vendor; pc_timer_t timer; nvr_t *nvr; + apm_t *apm; } acpi_t; /* Global variables. */ -extern const device_t acpi_device; +extern const device_t acpi_intel_device; +extern const device_t acpi_via_device; /* Functions. */ diff --git a/src/include/86box/apm.h b/src/include/86box/apm.h index b7754f78a..1fd985951 100644 --- a/src/include/86box/apm.h +++ b/src/include/86box/apm.h @@ -6,13 +6,6 @@ * * This file is part of the 86Box distribution. * - * Implementation of the ISA Bus (de)Bugger expansion card - * sold as a DIY kit in the late 1980's in The Netherlands. - * This card was a assemble-yourself 8bit ISA addon card for - * PC and AT systems that had several tools to aid in low- - * level debugging (mostly for faulty BIOSes, bootloaders - * and system kernels...) - * * Definitions for the Advanced Power Management emulation. * * @@ -38,7 +31,9 @@ typedef struct /* Global variables. */ extern const device_t apm_device; + extern const device_t apm_pci_device; +extern const device_t apm_pci_acpi_device; /* Functions. */ diff --git a/src/include/86box/ddma.h b/src/include/86box/ddma.h index 2d0a9f8d9..64642f2ae 100644 --- a/src/include/86box/ddma.h +++ b/src/include/86box/ddma.h @@ -6,13 +6,6 @@ * * This file is part of the 86Box distribution. * - * Implementation of the ISA Bus (de)Bugger expansion card - * sold as a DIY kit in the late 1980's in The Netherlands. - * This card was a assemble-yourself 8bit ISA addon card for - * PC and AT systems that had several tools to aid in low- - * level debugging (mostly for faulty BIOSes, bootloaders - * and system kernels...) - * * Definitions for the Distributed DMA emulation. * * diff --git a/src/include/86box/piix.h b/src/include/86box/piix.h index e07607606..9f6724fae 100644 --- a/src/include/86box/piix.h +++ b/src/include/86box/piix.h @@ -4,7 +4,8 @@ * PC systems and compatibles from 1981 through fairly recent * system designs based on the PCI bus. * - * Emulation of the Intel PIIX and PIIX3 Xcelerators. + * Emulation of the Intel PIIX, PIIX3, PIIX4, PIIX4E, and SMSC + * SLC90E66 (Victory66) Xcelerators. * * Emulation core dispatcher. * diff --git a/src/include/86box/usb.h b/src/include/86box/usb.h index b027796a2..aced538c8 100644 --- a/src/include/86box/usb.h +++ b/src/include/86box/usb.h @@ -6,13 +6,6 @@ * * This file is part of the 86Box distribution. * - * Implementation of the ISA Bus (de)Bugger expansion card - * sold as a DIY kit in the late 1980's in The Netherlands. - * This card was a assemble-yourself 8bit ISA addon card for - * PC and AT systems that had several tools to aid in low- - * level debugging (mostly for faulty BIOSes, bootloaders - * and system kernels...) - * * Definitions for the Distributed DMA emulation. * * diff --git a/src/intel_piix.c b/src/intel_piix.c index 257b9be9a..4a9a80f1e 100644 --- a/src/intel_piix.c +++ b/src/intel_piix.c @@ -4,7 +4,8 @@ * PC systems and compatibles from 1981 through fairly recent * system designs based on the PCI bus. * - * Emulation of the Intel PIIX and PIIX3 Xcelerators. + * Emulation of the Intel PIIX, PIIX3, PIIX4, PIIX4E, and SMSC + * SLC90E66 (Victory66) Xcelerators. * * PRD format : * word 0 - base address @@ -779,7 +780,7 @@ piix_write(int func, int addr, uint8_t val, void *priv) case 0x04: fregs[0x04] = (val & 0x01); smbus_update_io_mapping(dev); - apm_set_do_smi(dev->apm, !!(fregs[0x5b] & 0x02) && !!(val & 0x01)); + apm_set_do_smi(dev->acpi->apm, !!(fregs[0x5b] & 0x02) && !!(val & 0x01)); break; case 0x07: if (val & 0x08) @@ -844,7 +845,7 @@ piix_write(int func, int addr, uint8_t val, void *priv) break; case 0x5b: fregs[addr] = val & 0x03; - apm_set_do_smi(dev->apm, !!(val & 0x02) && !!(fregs[0x04] & 0x01)); + apm_set_do_smi(dev->acpi->apm, !!(val & 0x02) && !!(fregs[0x04] & 0x01)); break; case 0x63: fregs[addr] = val & 0xf7; @@ -1109,9 +1110,7 @@ piix_apm_out(uint16_t port, uint8_t val, void *p) piix_t *dev = (piix_t *) p; if (dev->apm->do_smi) { - if (dev->type > 3) - dev->acpi->regs.glbsts |= 0x20; - else + if (dev->type < 4) dev->regs[0][0xaa] |= 0x80; } } @@ -1189,7 +1188,7 @@ static void dev->nvr = device_add(&piix4_nvr_device); dev->smbus = device_add(&piix4_smbus_device); - dev->acpi = device_add(&acpi_device); + dev->acpi = device_add(&acpi_intel_device); acpi_set_slot(dev->acpi, dev->pci_slot); acpi_set_nvr(dev->acpi, dev->nvr); @@ -1206,9 +1205,13 @@ static void } else cpu_fast_off_val = cpu_fast_off_count = 0; - dev->apm = device_add(&apm_pci_device); - /* APM intercept handler to update PIIX/PIIX3 and PIIX4/4E/SMSC ACPI SMI status on APM SMI. */ - io_sethandler(0x00b2, 0x0001, NULL, NULL, NULL, piix_apm_out, NULL, NULL, dev); + /* On PIIX4, PIIX4E, and SMSC, APM is added by the ACPI device. */ + if (dev->type < 4) { + dev->apm = device_add(&apm_pci_device); + /* APM intercept handler to update PIIX/PIIX3 and PIIX4/4E/SMSC ACPI SMI status on APM SMI. */ + io_sethandler(0x00b2, 0x0001, NULL, NULL, NULL, piix_apm_out, NULL, NULL, dev); + } + dev->port_92 = device_add(&port_92_pci_device); dma_alias_set();