diff --git a/CMakeLists.txt b/CMakeLists.txt index ca75ebc25..2d5a9dc1e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -151,6 +151,7 @@ endif() # ------ ----------- ---- --------- --------- cmake_dependent_option(AMD_K5 "AMD K5" ON "DEV_BRANCH" OFF) cmake_dependent_option(AN430TX "Intel AN430TX" ON "DEV_BRANCH" OFF) +cmake_dependent_option(CDROM_MITSUMI "Mitsumi CDROM" ON "DEV_BRANCH" OFF) cmake_dependent_option(CYRIX_6X86 "Cyrix 6x86" ON "DEV_BRANCH" OFF) cmake_dependent_option(G100 "Matrox Productiva G100" ON "DEV_BRANCH" OFF) cmake_dependent_option(GUSMAX "Gravis UltraSound MAX" ON "DEV_BRANCH" OFF) @@ -163,6 +164,7 @@ cmake_dependent_option(OPEN_AT "OpenAT" cmake_dependent_option(OPL4ML "OPL4-ML daughterboard" ON "DEV_BRANCH" OFF) cmake_dependent_option(PCL "Generic PCL5e Printer" ON "DEV_BRANCH" OFF) cmake_dependent_option(SIO_DETECT "Super I/O Detection Helper" ON "DEV_BRANCH" OFF) +cmake_dependent_option(WACOM "Wacom Input Devices" ON "DEV_BRANCH" OFF) cmake_dependent_option(XL24 "ATI VGA Wonder XL24 (ATI-28800-6)" ON "DEV_BRANCH" OFF) # Ditto but for Qt diff --git a/README.md b/README.md index b4b4142be..ce38632e6 100644 --- a/README.md +++ b/README.md @@ -53,10 +53,14 @@ We operate an IRC channel and a Discord server for discussing 86Box, its develop [![Visit our Discord server](https://discordapp.com/api/guilds/262614059009048590/embed.png)](https://discord.gg/QXK9XTv) Contributions ---------- +------------- We welcome all contributions to the project, as long as the [contribution guidelines](CONTRIBUTING.md) are followed. +Building +--------- +For instructions on how to build 86Box from source, see the [build guide](https://86box.readthedocs.io/en/latest/dev/buildguide.html). + Licensing --------- diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a36dd796c..ad339040f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -27,11 +27,9 @@ if(CMAKE_SYSTEM_NAME MATCHES "Linux") add_compile_definitions(_FILE_OFFSET_BITS=64 _LARGEFILE_SOURCE=1 _LARGEFILE64_SOURCE=1) endif() -if(PCL) - target_compile_definitions(86Box PRIVATE USE_PCL) -endif() - -if(CPPTHREADS) +if(WIN32) + target_sources(86Box PRIVATE qt/win_thread.c) +else() target_sources(86Box PRIVATE thread.cpp) endif() @@ -52,10 +50,6 @@ if(DYNAREC) add_compile_definitions(USE_DYNAREC) endif() -if(DEV_BRANCH) - add_compile_definitions(DEV_BRANCH) -endif() - if(DISCORD) add_compile_definitions(DISCORD) target_sources(86Box PRIVATE discord.c) @@ -147,8 +141,10 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}/include) include_directories(include) if(NEW_DYNAREC) include_directories(cpu codegen_new) -else() +elseif(DYNAREC) include_directories(cpu codegen) +else() + include_directories(cpu) endif() add_subdirectory(cdrom) @@ -157,7 +153,7 @@ add_subdirectory(chipset) add_subdirectory(cpu) if(NEW_DYNAREC) add_subdirectory(codegen_new) -else() +elseif(DYNAREC) add_subdirectory(codegen) endif() diff --git a/src/cdrom/CMakeLists.txt b/src/cdrom/CMakeLists.txt index 897b353cb..0b4455eda 100644 --- a/src/cdrom/CMakeLists.txt +++ b/src/cdrom/CMakeLists.txt @@ -17,8 +17,14 @@ find_package(PkgConfig REQUIRED) pkg_check_modules(SNDFILE REQUIRED IMPORTED_TARGET sndfile) -add_library(cdrom OBJECT cdrom.c cdrom_image_backend.c cdrom_image_viso.c cdrom_image.c cdrom_ioctl.c cdrom_mitsumi.c) +add_library(cdrom OBJECT cdrom.c cdrom_image_backend.c cdrom_image_viso.c cdrom_image.c cdrom_ioctl.c) target_link_libraries(86Box PkgConfig::SNDFILE) + +if(CDROM_MITSUMI) + target_compile_definitions(cdrom PRIVATE USE_CDROM_MITSUMI) + target_sources(cdrom PRIVATE cdrom_mitsumi.c) +endif() + if (WIN32) # MSYS2 target_link_libraries(86Box -static ${SNDFILE_STATIC_LIBRARIES}) diff --git a/src/cdrom/cdrom_image_backend.c b/src/cdrom/cdrom_image_backend.c index 25085285c..7dcde1cdb 100644 --- a/src/cdrom/cdrom_image_backend.c +++ b/src/cdrom/cdrom_image_backend.c @@ -170,25 +170,23 @@ cleanup_error: static int bin_read(void *priv, uint8_t *buffer, uint64_t seek, size_t count) { - track_file_t *tf; - - cdrom_image_backend_log("CDROM: binary_read(%08lx, pos=%" PRIu64 " count=%lu)\n", - tf->fp, seek, count); + track_file_t *tf = NULL; if ((tf = (track_file_t *) priv)->fp == NULL) return 0; + cdrom_image_backend_log("CDROM: binary_read(%08lx, pos=%" PRIu64 " count=%lu)\n", + tf->fp, seek, count); + if (fseeko64(tf->fp, seek, SEEK_SET) == -1) { -#ifdef ENABLE_CDROM_IMAGE_BACKEND_LOG cdrom_image_backend_log("CDROM: binary_read failed during seek!\n"); -#endif + return 0; } if (fread(buffer, count, 1, tf->fp) != 1) { -#ifdef ENABLE_CDROM_IMAGE_BACKEND_LOG cdrom_image_backend_log("CDROM: binary_read failed during read!\n"); -#endif + return 0; } @@ -207,9 +205,7 @@ bin_read(void *priv, uint8_t *buffer, uint64_t seek, size_t count) static uint64_t bin_get_length(void *priv) { - track_file_t *tf; - - cdrom_image_backend_log("CDROM: binary_length(%08lx)\n", tf->fp); + track_file_t *tf = NULL; if ((tf = (track_file_t *) priv)->fp == NULL) return 0; @@ -1159,10 +1155,9 @@ cdi_load_cue(cd_img_t *cdi, const char *cuefile) trk.file = audio_init(filename, &error); } if (error) { -#ifdef ENABLE_CDROM_IMAGE_BACKEND_LOG cdrom_image_backend_log("CUE: cannot open file '%s' in cue sheet!\n", filename); -#endif + if (trk.file != NULL) { trk.file->close(trk.file); trk.file = NULL; @@ -1177,9 +1172,8 @@ cdi_load_cue(cd_img_t *cdi, const char *cuefile) /* Ignored commands. */ success = 1; } else { -#ifdef ENABLE_CDROM_IMAGE_BACKEND_LOG cdrom_image_backend_log("CUE: unsupported command '%s' in cue sheet!\n", command); -#endif + success = 0; } diff --git a/src/chipset/CMakeLists.txt b/src/chipset/CMakeLists.txt index 9a706e15b..1b3de5bf2 100644 --- a/src/chipset/CMakeLists.txt +++ b/src/chipset/CMakeLists.txt @@ -21,7 +21,7 @@ add_library(chipset OBJECT 82c100.c acc2168.c cs8230.c ali1429.c ali1435.c ali14 sis_85c496.c sis_85c50x.c sis_5511.c sis_5571.c sis_5581.c sis_5591.c sis_5600.c sis_5511_h2p.c sis_5571_h2p.c sis_5581_h2p.c sis_5591_h2p.c sis_5600_h2p.c sis_5513_p2i.c sis_5513_ide.c sis_5572_usb.c sis_5595_pmu.c sis_55xx.c via_vt82c49x.c - via_vt82c505.c sis_85c310.c sis_85c4xx.c sis_85c496.c sis_85c50x.c gc100.c stpc.c + via_vt82c505.c gc100.c stpc.c umc_8886.c umc_hb4.c umc_8890.c via_apollo.c via_pipc.c vl82c480.c wd76c10.c) if(OLIVETTI) diff --git a/src/chipset/sis_85c50x.c b/src/chipset/sis_85c50x.c index 137ddb5cf..2286105ce 100644 --- a/src/chipset/sis_85c50x.c +++ b/src/chipset/sis_85c50x.c @@ -107,8 +107,8 @@ sis_85c50x_shadow_recalc(sis_85c50x_t *dev) if (dev->states[8 + i] != state) { mem_set_mem_state_both(base, 0x00004000, state); sis_85c50x_log("%05X-%05X: R%c, W%c\n", base, base + 0x3fff, - (dev->pci_conf[0x543 & (0x80 >> i)) ? - ((dev->pci_conf[0x54] & 0x40) ? 'I' : 'D') : 'E', + (dev->pci_conf[0x54] & (0x80 >> i)) ? + ((dev->pci_conf[0x53] & 0x40) ? 'I' : 'D') : 'E', (dev->pci_conf[0x54] & (0x80 >> i)) ? ((dev->pci_conf[0x53] & 0x20) ? 'P' : 'I') : 'E'); dev->states[8 + i] = state; diff --git a/src/codegen/codegen.h b/src/codegen/codegen.h index 6d30211a6..d020fc57f 100644 --- a/src/codegen/codegen.h +++ b/src/codegen/codegen.h @@ -311,6 +311,7 @@ extern codegen_timing_t codegen_timing_686; extern codegen_timing_t codegen_timing_486; extern codegen_timing_t codegen_timing_winchip; extern codegen_timing_t codegen_timing_winchip2; +extern codegen_timing_t codegen_timing_k5; extern codegen_timing_t codegen_timing_k6; extern codegen_timing_t codegen_timing_p6; diff --git a/src/codegen_new/codegen.h b/src/codegen_new/codegen.h index deeeb899c..eecfa249b 100644 --- a/src/codegen_new/codegen.h +++ b/src/codegen_new/codegen.h @@ -341,6 +341,7 @@ extern codegen_timing_t codegen_timing_686; extern codegen_timing_t codegen_timing_486; extern codegen_timing_t codegen_timing_winchip; extern codegen_timing_t codegen_timing_winchip2; +extern codegen_timing_t codegen_timing_k5; extern codegen_timing_t codegen_timing_k6; extern codegen_timing_t codegen_timing_p6; diff --git a/src/cpu/386.c b/src/cpu/386.c index 657bd0894..3524e0d1e 100644 --- a/src/cpu/386.c +++ b/src/cpu/386.c @@ -279,7 +279,7 @@ exec386_2386(int32_t cycs) if (!cpu_state.abrt) { #ifdef ENABLE_386_LOG if (in_smm) - x386_2386_log("[%04X:%08X] %08X\n", CS, cpu_state.pc, fetchdat); + x386_log("[%04X:%08X] %08X\n", CS, cpu_state.pc, fetchdat); #endif opcode = fetchdat & 0xFF; fetchdat >>= 8; diff --git a/src/cpu/386_ops.h b/src/cpu/386_ops.h index 3e0d191f2..130166344 100644 --- a/src/cpu/386_ops.h +++ b/src/cpu/386_ops.h @@ -1384,7 +1384,7 @@ const OpFn OP_TABLE(pentium_0f)[1024] = { // clang-format on }; -# if defined(DEV_BRANCH) && defined(USE_CYRIX_6X86) +# ifdef USE_CYRIX_6X86 const OpFn OP_TABLE(c6x86_0f)[1024] = { // clang-format off /*16-bit data, 16-bit addr*/ @@ -1476,7 +1476,7 @@ const OpFn OP_TABLE(c6x86_0f)[1024] = { /*f0*/ ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, // clang-format on }; -# endif +# endif /* USE_CYRIX_6X86 */ const OpFn OP_TABLE(pentiummmx_0f)[1024] = { // clang-format off @@ -1754,7 +1754,7 @@ const OpFn OP_TABLE(k62_0f)[1024] = { // clang-format on }; -# if defined(DEV_BRANCH) && defined(USE_CYRIX_6X86) +# ifdef USE_CYRIX_6X86 const OpFn OP_TABLE(c6x86mx_0f)[1024] = { // clang-format off /*16-bit data, 16-bit addr*/ @@ -1846,7 +1846,7 @@ const OpFn OP_TABLE(c6x86mx_0f)[1024] = { /*f0*/ ILLEGAL, opPSLLW_a32, opPSLLD_a32, opPSLLQ_a32, ILLEGAL, opPMADDWD_a32, ILLEGAL, ILLEGAL, opPSUBB_a32, opPSUBW_a32, opPSUBD_a32, ILLEGAL, opPADDB_a32, opPADDW_a32, opPADDD_a32, ILLEGAL, // clang-format on }; -# endif +# endif /* USE_CYRIX_6X86 */ const OpFn OP_TABLE(pentiumpro_0f)[1024] = { // clang-format off diff --git a/src/cpu/808x.c b/src/cpu/808x.c index 6705563c0..990f9ae87 100644 --- a/src/cpu/808x.c +++ b/src/cpu/808x.c @@ -130,10 +130,10 @@ typedef int (*OpFn)(uint32_t fetchdat); static int tempc_fpu = 0; #ifdef ENABLE_808X_LOG +#if 0 void dumpregs(int); - +#endif int x808x_do_log = ENABLE_808X_LOG; -int indump = 0; static void x808x_log(const char *fmt, ...) diff --git a/src/cpu/CMakeLists.txt b/src/cpu/CMakeLists.txt index a3677767d..8ae97e2e6 100644 --- a/src/cpu/CMakeLists.txt +++ b/src/cpu/CMakeLists.txt @@ -19,14 +19,22 @@ add_library(cpu OBJECT cpu.c cpu_table.c fpu.c x86.c 808x.c 386.c 386_common.c if(AMD_K5) target_compile_definitions(cpu PRIVATE USE_AMD_K5) + +if(DYNAREC) + add_library(ctk5 OBJECT codegen_timing_k5.c) + target_link_libraries(86Box ctk5) +endif() + endif() if(CYRIX_6X86) target_compile_definitions(cpu PRIVATE USE_CYRIX_6X86) +if(DYNAREC) add_library(ct686 OBJECT codegen_timing_686.c) target_link_libraries(86Box ct686) endif() +endif() if(DYNAREC) target_sources(cpu PRIVATE 386_dynarec_ops.c) diff --git a/src/cpu/codegen_timing_486.c b/src/cpu/codegen_timing_486.c index 548a9ec28..90d682211 100644 --- a/src/cpu/codegen_timing_486.c +++ b/src/cpu/codegen_timing_486.c @@ -18,7 +18,7 @@ #define CYCLES(c) (int *) c #define CYCLES2(c16, c32) (int *) ((-1 & ~0xffff) | c16 | (c32 << 8)) -static int *opcode_timings[256] = { +static int *opcode_timings_486[256] = { // clang-format off /*00*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), NULL, /*10*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), @@ -42,7 +42,7 @@ static int *opcode_timings[256] = { // clang-format on }; -static int *opcode_timings_mod3[256] = { +static int *opcode_timings_486_mod3[256] = { // clang-format off /*00*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), NULL, /*10*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), @@ -66,7 +66,7 @@ static int *opcode_timings_mod3[256] = { // clang-format on }; -static int *opcode_timings_0f[256] = { +static int *opcode_timings_486_0f[256] = { // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), NULL, CYCLES(195), CYCLES(7), NULL, CYCLES(1000), CYCLES(10000), NULL, NULL, NULL, NULL, NULL, NULL, /*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -89,7 +89,7 @@ static int *opcode_timings_0f[256] = { /*f0*/ NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, // clang-format on }; -static int *opcode_timings_0f_mod3[256] = { +static int *opcode_timings_486_0f_mod3[256] = { // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), NULL, CYCLES(195), CYCLES(7), NULL, CYCLES(1000), CYCLES(10000), NULL, NULL, NULL, NULL, NULL, NULL, /*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -113,65 +113,65 @@ static int *opcode_timings_0f_mod3[256] = { // clang-format on }; -static int *opcode_timings_shift[8] = { +static int *opcode_timings_486_shift[8] = { // clang-format off CYCLES(7), CYCLES(7), CYCLES(10), CYCLES(10), CYCLES(7), CYCLES(7), CYCLES(7), CYCLES(7) }; -static int *opcode_timings_shift_mod3[8] = { +static int *opcode_timings_486_shift_mod3[8] = { // clang-format off CYCLES(3), CYCLES(3), CYCLES(9), CYCLES(9), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3) // clang-format on }; -static int *opcode_timings_f6[8] = { +static int *opcode_timings_486_f6[8] = { // clang-format off &timing_rm, NULL, &timing_mm, &timing_mm, CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) // clang-format on }; -static int *opcode_timings_f6_mod3[8] = { +static int *opcode_timings_486_f6_mod3[8] = { // clang-format off &timing_rr, NULL, &timing_rr, &timing_rr, CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) // clang-format on }; -static int *opcode_timings_f7[8] = { +static int *opcode_timings_486_f7[8] = { // clang-format off &timing_rm, NULL, &timing_mm, &timing_mm, CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) // clang-format on }; -static int *opcode_timings_f7_mod3[8] = { +static int *opcode_timings_486_f7_mod3[8] = { // clang-format off &timing_rr, NULL, &timing_rr, &timing_rr, CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) }; -static int *opcode_timings_ff[8] = { +static int *opcode_timings_486_ff[8] = { // clang-format off &timing_mm, &timing_mm, CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), NULL }; -static int *opcode_timings_ff_mod3[8] = { +static int *opcode_timings_486_ff_mod3[8] = { // clang-format off &timing_rr, &timing_rr, CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), NULL // clang-format on }; -static int *opcode_timings_d8[8] = { +static int *opcode_timings_486_d8[8] = { // clang-format off /* FADDil FMULil FCOMil FCOMPil FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(8), CYCLES(11), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) // clang-format on }; -static int *opcode_timings_d8_mod3[8] = { +static int *opcode_timings_486_d8_mod3[8] = { // clang-format off /* FADD FMUL FCOM FCOMP FSUB FSUBR FDIV FDIVR*/ CYCLES(8), CYCLES(16), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) // clang-format on }; -static int *opcode_timings_d9[8] = { +static int *opcode_timings_486_d9[8] = { // clang-format off /* FLDs FSTs FSTPs FLDENV FLDCW FSTENV FSTCW*/ CYCLES(3), NULL, CYCLES(7), CYCLES(7), CYCLES(34), CYCLES(4), CYCLES(67), CYCLES(3) // clang-format on }; -static int *opcode_timings_d9_mod3[64] = { +static int *opcode_timings_486_d9_mod3[64] = { // clang-format off /*FLD*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), @@ -192,25 +192,25 @@ static int *opcode_timings_d9_mod3[64] = { // clang-format on }; -static int *opcode_timings_da[8] = { +static int *opcode_timings_486_da[8] = { // clang-format off /* FADDil FMULil FCOMil FCOMPil FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(8), CYCLES(11), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) // clang-format on }; -static int *opcode_timings_da_mod3[8] = { +static int *opcode_timings_486_da_mod3[8] = { // clang-format off NULL, NULL, NULL, NULL, NULL, CYCLES(5), NULL, NULL // clang-format on }; -static int *opcode_timings_db[8] = { +static int *opcode_timings_486_db[8] = { // clang-format off /* FLDil FSTil FSTPil FLDe FSTPe*/ CYCLES(9), NULL, CYCLES(28), CYCLES(28), NULL, CYCLES(5), NULL, CYCLES(6) // clang-format on }; -static int *opcode_timings_db_mod3[64] = { +static int *opcode_timings_486_db_mod3[64] = { // clang-format off NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -224,74 +224,74 @@ static int *opcode_timings_db_mod3[64] = { // clang-format on }; -static int *opcode_timings_dc[8] = { +static int *opcode_timings_486_dc[8] = { // clang-format off /* opFADDd_a16 opFMULd_a16 opFCOMd_a16 opFCOMPd_a16 opFSUBd_a16 opFSUBRd_a16 opFDIVd_a16 opFDIVRd_a16*/ CYCLES(8), CYCLES(11), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) // clang-format on }; -static int *opcode_timings_dc_mod3[8] = { +static int *opcode_timings_486_dc_mod3[8] = { // clang-format off /* opFADDr opFMULr opFSUBRr opFSUBr opFDIVRr opFDIVr*/ CYCLES(8), CYCLES(16), NULL, NULL, CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) // clang-format on }; -static int *opcode_timings_dd[8] = { +static int *opcode_timings_486_dd[8] = { // clang-format off /* FLDd FSTd FSTPd FRSTOR FSAVE FSTSW*/ CYCLES(3), NULL, CYCLES(8), CYCLES(8), CYCLES(131), NULL, CYCLES(154), CYCLES(3) // clang-format on }; -static int *opcode_timings_dd_mod3[8] = { +static int *opcode_timings_486_dd_mod3[8] = { // clang-format off /* FFFREE FST FSTP FUCOM FUCOMP*/ CYCLES(3), NULL, CYCLES(3), CYCLES(3), CYCLES(4), CYCLES(4), NULL, NULL // clang-format on }; -static int *opcode_timings_de[8] = { +static int *opcode_timings_486_de[8] = { // clang-format off /* FADDiw FMULiw FCOMiw FCOMPiw FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(8), CYCLES(11), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) // clang-format on }; -static int *opcode_timings_de_mod3[8] = { +static int *opcode_timings_486_de_mod3[8] = { // clang-format off /* FADD FMUL FCOMPP FSUB FSUBR FDIV FDIVR*/ CYCLES(8), CYCLES(16), NULL, CYCLES(5), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) // clang-format on }; -static int *opcode_timings_df[8] = { +static int *opcode_timings_486_df[8] = { // clang-format off /* FILDiw FISTiw FISTPiw FILDiq FBSTP FISTPiq*/ CYCLES(13), NULL, CYCLES(29), CYCLES(29), NULL, CYCLES(10), CYCLES(172), CYCLES(28) // clang-format on }; -static int *opcode_timings_df_mod3[8] = { +static int *opcode_timings_486_df_mod3[8] = { // clang-format off /* FFREE FST FSTP FUCOM FUCOMP*/ CYCLES(3), NULL, CYCLES(3), CYCLES(3), CYCLES(4), CYCLES(4), NULL, NULL // clang-format on }; -static int *opcode_timings_8x[8] = { +static int *opcode_timings_486_8x[8] = { // clang-format off &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm // clang-format on }; -static int *opcode_timings_8x_mod3[8] = { +static int *opcode_timings_486_8x_mod3[8] = { // clang-format off &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm // clang-format on }; -static int *opcode_timings_81[8] = { +static int *opcode_timings_486_81[8] = { // clang-format off &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm // clang-format on }; -static int *opcode_timings_81_mod3[8] = { +static int *opcode_timings_486_81_mod3[8] = { // clang-format off &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm // clang-format on @@ -330,7 +330,7 @@ codegen_timing_486_start(void) void codegen_timing_486_prefix(uint8_t prefix, uint32_t fetchdat) { - timing_count += COUNT(opcode_timings[prefix], 0); + timing_count += COUNT(opcode_timings_486[prefix], 0); last_prefix = prefix; } @@ -344,47 +344,47 @@ codegen_timing_486_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(u switch (last_prefix) { case 0x0f: - timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + timings = mod3 ? opcode_timings_486_0f_mod3 : opcode_timings_486_0f; deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; break; case 0xd8: - timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + timings = mod3 ? opcode_timings_486_d8_mod3 : opcode_timings_486_d8; deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; opcode = (opcode >> 3) & 7; break; case 0xd9: - timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + timings = mod3 ? opcode_timings_486_d9_mod3 : opcode_timings_486_d9; deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xda: - timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + timings = mod3 ? opcode_timings_486_da_mod3 : opcode_timings_486_da; deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; opcode = (opcode >> 3) & 7; break; case 0xdb: - timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + timings = mod3 ? opcode_timings_486_db_mod3 : opcode_timings_486_db; deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xdc: - timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + timings = mod3 ? opcode_timings_486_dc_mod3 : opcode_timings_486_dc; deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; opcode = (opcode >> 3) & 7; break; case 0xdd: - timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + timings = mod3 ? opcode_timings_486_dd_mod3 : opcode_timings_486_dd; deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; opcode = (opcode >> 3) & 7; break; case 0xde: - timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + timings = mod3 ? opcode_timings_486_de_mod3 : opcode_timings_486_de; deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; opcode = (opcode >> 3) & 7; break; case 0xdf: - timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + timings = mod3 ? opcode_timings_486_df_mod3 : opcode_timings_486_df; deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; opcode = (opcode >> 3) & 7; break; @@ -394,12 +394,12 @@ codegen_timing_486_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(u case 0x80: case 0x82: case 0x83: - timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + timings = mod3 ? opcode_timings_486_8x_mod3 : opcode_timings_486_8x; deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; opcode = (fetchdat >> 3) & 7; break; case 0x81: - timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + timings = mod3 ? opcode_timings_486_81_mod3 : opcode_timings_486_81; deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; opcode = (fetchdat >> 3) & 7; break; @@ -410,29 +410,29 @@ codegen_timing_486_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(u case 0xd1: case 0xd2: case 0xd3: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + timings = mod3 ? opcode_timings_486_shift_mod3 : opcode_timings_486_shift; deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; opcode = (fetchdat >> 3) & 7; break; case 0xf6: - timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + timings = mod3 ? opcode_timings_486_f6_mod3 : opcode_timings_486_f6; deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; opcode = (fetchdat >> 3) & 7; break; case 0xf7: - timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + timings = mod3 ? opcode_timings_486_f7_mod3 : opcode_timings_486_f7; deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; opcode = (fetchdat >> 3) & 7; break; case 0xff: - timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + timings = mod3 ? opcode_timings_486_ff_mod3 : opcode_timings_486_ff; deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; opcode = (fetchdat >> 3) & 7; break; default: - timings = mod3 ? opcode_timings_mod3 : opcode_timings; + timings = mod3 ? opcode_timings_486_mod3 : opcode_timings_486; deps = mod3 ? opcode_deps_mod3 : opcode_deps; break; } diff --git a/src/cpu/codegen_timing_686.c b/src/cpu/codegen_timing_686.c index 6ea5ac543..285881956 100644 --- a/src/cpu/codegen_timing_686.c +++ b/src/cpu/codegen_timing_686.c @@ -65,7 +65,7 @@ static uint32_t prev_fetchdat; static uint32_t last_regmask_modified; static uint32_t regmask_modified; -static uint32_t opcode_timings[256] = { +static uint32_t opcode_timings_686[256] = { // clang-format off /* ADD ADD ADD ADD*/ /*00*/ PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RM, PAIR_XY | CYCLES_RM, @@ -202,7 +202,7 @@ static uint32_t opcode_timings[256] = { // clang-format on }; -static uint32_t opcode_timings_mod3[256] = { +static uint32_t opcode_timings_686_mod3[256] = { // clang-format off /* ADD ADD ADD ADD*/ /*00*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, @@ -340,7 +340,7 @@ static uint32_t opcode_timings_mod3[256] = { // clang-format on }; -static uint32_t opcode_timings_0f[256] = { +static uint32_t opcode_timings_686_0f[256] = { // clang-format off /*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10), INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID, @@ -423,7 +423,7 @@ static uint32_t opcode_timings_0f[256] = { PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, INVALID, // clang-format on }; -static uint32_t opcode_timings_0f_mod3[256] = { +static uint32_t opcode_timings_686_0f_mod3[256] = { // clang-format off /*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10), INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID, @@ -506,44 +506,44 @@ static uint32_t opcode_timings_0f_mod3[256] = { // clang-format on }; -static uint32_t opcode_timings_shift[8] = { +static uint32_t opcode_timings_686_shift[8] = { // clang-format off PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(4), PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, // clang-format on }; -static uint32_t opcode_timings_shift_mod3[8] = { +static uint32_t opcode_timings_686_shift_mod3[8] = { // clang-format off PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(4), PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, // clang-format on }; -static uint32_t opcode_timings_shift_imm[8] = { +static uint32_t opcode_timings_686_shift_imm[8] = { // clang-format off PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES(8), PAIR_XY | CYCLES(9), PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, // clang-format on }; -static uint32_t opcode_timings_shift_imm_mod3[8] = { +static uint32_t opcode_timings_686_shift_imm_mod3[8] = { // clang-format off PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(4), PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, // clang-format on }; -static uint32_t opcode_timings_shift_cl[8] = { +static uint32_t opcode_timings_686_shift_cl[8] = { // clang-format off PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(8), PAIR_XY | CYCLES(9), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), // clang-format on }; -static uint32_t opcode_timings_shift_cl_mod3[8] = { +static uint32_t opcode_timings_686_shift_cl_mod3[8] = { // clang-format off PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(8), PAIR_XY | CYCLES(9), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), // clang-format on }; -static uint32_t opcode_timings_f6[8] = { +static uint32_t opcode_timings_686_f6[8] = { // clang-format off /* TST NOT NEG*/ PAIR_XY | CYCLES_RM, INVALID, PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), @@ -551,7 +551,7 @@ static uint32_t opcode_timings_f6[8] = { PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(18), PAIR_NP | CYCLES(18) // clang-format on }; -static uint32_t opcode_timings_f6_mod3[8] = { +static uint32_t opcode_timings_686_f6_mod3[8] = { // clang-format off /* TST NOT NEG*/ PAIR_XY | CYCLES_REG, INVALID, PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), @@ -559,7 +559,7 @@ static uint32_t opcode_timings_f6_mod3[8] = { PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(18), PAIR_NP | CYCLES(18) // clang-format on }; -static uint32_t opcode_timings_f7[8] = { +static uint32_t opcode_timings_686_f7[8] = { // clang-format off /* TST NOT NEG*/ PAIR_XY | CYCLES_REG, INVALID, PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), @@ -567,7 +567,7 @@ static uint32_t opcode_timings_f7[8] = { PAIR_NP | CYCLES_MULTI(4,10), PAIR_NP | CYCLES_MULTI(4,10), PAIR_NP | CYCLES_MULTI(19,27), PAIR_NP | CYCLES_MULTI(22,30) // clang-format on }; -static uint32_t opcode_timings_f7_mod3[8] = { +static uint32_t opcode_timings_686_f7_mod3[8] = { // clang-format off /* TST NOT NEG*/ PAIR_XY | CYCLES_REG, INVALID, PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), @@ -575,7 +575,7 @@ static uint32_t opcode_timings_f7_mod3[8] = { PAIR_NP | CYCLES_MULTI(4,10), PAIR_NP | CYCLES_MULTI(4,10), PAIR_NP | CYCLES_MULTI(19,27), PAIR_NP | CYCLES_MULTI(22,30) // clang-format on }; -static uint32_t opcode_timings_ff[8] = { +static uint32_t opcode_timings_686_ff[8] = { // clang-format off /* INC DEC CALL CALL far*/ PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_X_BRANCH | CYCLES(3), PAIR_NP | CYCLES(5), @@ -583,7 +583,7 @@ static uint32_t opcode_timings_ff[8] = { PAIR_X_BRANCH | CYCLES(3), PAIR_NP | CYCLES(5), PAIR_XY | CYCLES(1), INVALID // clang-format on }; -static uint32_t opcode_timings_ff_mod3[8] = { +static uint32_t opcode_timings_686_ff_mod3[8] = { // clang-format off /* INC DEC CALL CALL far*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_X_BRANCH | CYCLES(1), PAIR_XY | CYCLES(5), @@ -592,7 +592,7 @@ static uint32_t opcode_timings_ff_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_d8[8] = { +static uint32_t opcode_timings_686_d8[8] = { // clang-format off /* FADDs FMULs FCOMs FCOMPs*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(6), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), @@ -600,7 +600,7 @@ static uint32_t opcode_timings_d8[8] = { PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(34), PAIR_X | CYCLES(34) // clang-format on }; -static uint32_t opcode_timings_d8_mod3[8] = { +static uint32_t opcode_timings_686_d8_mod3[8] = { // clang-format off /* FADD FMUL FCOM FCOMP*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(6), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), @@ -609,7 +609,7 @@ static uint32_t opcode_timings_d8_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_d9[8] = { +static uint32_t opcode_timings_686_d9[8] = { // clang-format off /* FLDs FSTs FSTPs*/ PAIR_X | CYCLES(2), INVALID, PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), @@ -617,7 +617,7 @@ static uint32_t opcode_timings_d9[8] = { PAIR_X | CYCLES(30), PAIR_X | CYCLES(4), PAIR_X | CYCLES(24), PAIR_X | CYCLES(5) // clang-format on }; -static uint32_t opcode_timings_d9_mod3[64] = { +static uint32_t opcode_timings_686_d9_mod3[64] = { // clang-format off /*FLD*/ PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), @@ -650,7 +650,7 @@ static uint32_t opcode_timings_d9_mod3[64] = { // clang-format on }; -static uint32_t opcode_timings_da[8] = { +static uint32_t opcode_timings_686_da[8] = { // clang-format off /* FIADDl FIMULl FICOMl FICOMPl*/ PAIR_X | CYCLES(12), PAIR_X | CYCLES(11), PAIR_X | CYCLES(10), PAIR_X | CYCLES(10), @@ -658,14 +658,14 @@ static uint32_t opcode_timings_da[8] = { PAIR_X | CYCLES(29), PAIR_X | CYCLES(27), PAIR_X | CYCLES(38), PAIR_X | CYCLES(48) // clang-format on }; -static uint32_t opcode_timings_da_mod3[8] = { +static uint32_t opcode_timings_686_da_mod3[8] = { // clang-format off PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), INVALID, PAIR_X | CYCLES(5), INVALID, INVALID // clang-format on }; -static uint32_t opcode_timings_db[8] = { +static uint32_t opcode_timings_686_db[8] = { // clang-format off /* FLDil FSTil FSTPil*/ PAIR_X | CYCLES(2), INVALID, PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), @@ -673,7 +673,7 @@ static uint32_t opcode_timings_db[8] = { INVALID, PAIR_X | CYCLES(2), INVALID, PAIR_X | CYCLES(2) // clang-format on }; -static uint32_t opcode_timings_db_mod3[64] = { +static uint32_t opcode_timings_686_db_mod3[64] = { // clang-format off PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), @@ -703,7 +703,7 @@ static uint32_t opcode_timings_db_mod3[64] = { // clang-format on }; -static uint32_t opcode_timings_dc[8] = { +static uint32_t opcode_timings_686_dc[8] = { // clang-format off /* FADDd FMULd FCOMd FCOMPd*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), @@ -711,7 +711,7 @@ static uint32_t opcode_timings_dc[8] = { PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(34), PAIR_X | CYCLES(34) // clang-format on }; -static uint32_t opcode_timings_dc_mod3[8] = { +static uint32_t opcode_timings_686_dc_mod3[8] = { // clang-format off /* opFADDr opFMULr*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), INVALID, INVALID, @@ -720,7 +720,7 @@ static uint32_t opcode_timings_dc_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_dd[8] = { +static uint32_t opcode_timings_686_dd[8] = { // clang-format off /* FLDd FSTd FSTPd*/ PAIR_X | CYCLES(2), INVALID, PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), @@ -728,7 +728,7 @@ static uint32_t opcode_timings_dd[8] = { PAIR_X | CYCLES(72), INVALID, PAIR_X | CYCLES(67), PAIR_X | CYCLES(2) // clang-format on }; -static uint32_t opcode_timings_dd_mod3[8] = { +static uint32_t opcode_timings_686_dd_mod3[8] = { // clang-format off /* FFFREE FST FSTP*/ PAIR_X | CYCLES(3), INVALID, PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), @@ -737,14 +737,14 @@ static uint32_t opcode_timings_dd_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_de[8] = { +static uint32_t opcode_timings_686_de[8] = { // clang-format off /* FIADDw FIMULw FICOMw FICOMPw*/ PAIR_X | CYCLES(12), PAIR_X | CYCLES(11), PAIR_X | CYCLES(10), PAIR_X | CYCLES(10), /* FISUBw FISUBRw FIDIVw FIDIVRw*/ PAIR_X | CYCLES(27), PAIR_X | CYCLES(27), PAIR_X | CYCLES(38), PAIR_X | CYCLES(38) }; -static uint32_t opcode_timings_de_mod3[8] = { +static uint32_t opcode_timings_686_de_mod3[8] = { // clang-format off /* FADD FMUL FCOMPP*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), INVALID, PAIR_X | CYCLES(7), @@ -753,7 +753,7 @@ static uint32_t opcode_timings_de_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_df[8] = { +static uint32_t opcode_timings_686_df[8] = { // clang-format off /* FILDiw FISTiw FISTPiw*/ PAIR_X | CYCLES(8), INVALID, PAIR_X | CYCLES(10), PAIR_X | CYCLES(13), @@ -761,7 +761,7 @@ static uint32_t opcode_timings_df[8] = { INVALID, PAIR_X | CYCLES(8), PAIR_X | CYCLES(63), PAIR_X | CYCLES(13) // clang-format on }; -static uint32_t opcode_timings_df_mod3[8] = { +static uint32_t opcode_timings_686_df_mod3[8] = { // clang-format off INVALID, INVALID, INVALID, INVALID, /* FSTSW AX*/ @@ -769,25 +769,25 @@ static uint32_t opcode_timings_df_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_8x[8] = { +static uint32_t opcode_timings_686_8x[8] = { // clang-format off PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RM // clang-format on }; -static uint32_t opcode_timings_8x_mod3[8] = { +static uint32_t opcode_timings_686_8x_mod3[8] = { // clang-format off PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG // clang-format on }; -static uint32_t opcode_timings_81[8] = { +static uint32_t opcode_timings_686_81[8] = { // clang-format off PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RM // clang-format on }; -static uint32_t opcode_timings_81_mod3[8] = { +static uint32_t opcode_timings_686_81_mod3[8] = { // clang-format off PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG @@ -874,47 +874,47 @@ codegen_timing_686_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(u switch (last_prefix) { case 0x0f: - timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + timings = mod3 ? opcode_timings_686_0f_mod3 : opcode_timings_686_0f; deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; break; case 0xd8: - timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + timings = mod3 ? opcode_timings_686_d8_mod3 : opcode_timings_686_d8; deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; opcode = (opcode >> 3) & 7; break; case 0xd9: - timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + timings = mod3 ? opcode_timings_686_d9_mod3 : opcode_timings_686_d9; deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xda: - timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + timings = mod3 ? opcode_timings_686_da_mod3 : opcode_timings_686_da; deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; opcode = (opcode >> 3) & 7; break; case 0xdb: - timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + timings = mod3 ? opcode_timings_686_db_mod3 : opcode_timings_686_db; deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xdc: - timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + timings = mod3 ? opcode_timings_686_dc_mod3 : opcode_timings_686_dc; deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; opcode = (opcode >> 3) & 7; break; case 0xdd: - timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + timings = mod3 ? opcode_timings_686_dd_mod3 : opcode_timings_686_dd; deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; opcode = (opcode >> 3) & 7; break; case 0xde: - timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + timings = mod3 ? opcode_timings_686_de_mod3 : opcode_timings_686_de; deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; opcode = (opcode >> 3) & 7; break; case 0xdf: - timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + timings = mod3 ? opcode_timings_686_df_mod3 : opcode_timings_686_df; deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; opcode = (opcode >> 3) & 7; break; @@ -924,55 +924,55 @@ codegen_timing_686_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(u case 0x80: case 0x82: case 0x83: - timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + timings = mod3 ? opcode_timings_686_8x_mod3 : opcode_timings_686_8x; deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; opcode = (fetchdat >> 3) & 7; break; case 0x81: - timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + timings = mod3 ? opcode_timings_686_81_mod3 : opcode_timings_686_81; deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; opcode = (fetchdat >> 3) & 7; break; case 0xc0: case 0xc1: - timings = mod3 ? opcode_timings_shift_imm_mod3 : opcode_timings_shift_imm; + timings = mod3 ? opcode_timings_686_shift_imm_mod3 : opcode_timings_686_shift_imm; deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; opcode = (fetchdat >> 3) & 7; break; case 0xd0: case 0xd1: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + timings = mod3 ? opcode_timings_686_shift_mod3 : opcode_timings_686_shift; deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; opcode = (fetchdat >> 3) & 7; break; case 0xd2: case 0xd3: - timings = mod3 ? opcode_timings_shift_cl_mod3 : opcode_timings_shift_cl; + timings = mod3 ? opcode_timings_686_shift_cl_mod3 : opcode_timings_686_shift_cl; deps = mod3 ? opcode_deps_shift_cl_mod3 : opcode_deps_shift_cl; opcode = (fetchdat >> 3) & 7; break; case 0xf6: - timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + timings = mod3 ? opcode_timings_686_f6_mod3 : opcode_timings_686_f6; deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; opcode = (fetchdat >> 3) & 7; break; case 0xf7: - timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + timings = mod3 ? opcode_timings_686_f7_mod3 : opcode_timings_686_f7; deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; opcode = (fetchdat >> 3) & 7; break; case 0xff: - timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + timings = mod3 ? opcode_timings_686_ff_mod3 : opcode_timings_686_ff; deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; opcode = (fetchdat >> 3) & 7; break; default: - timings = mod3 ? opcode_timings_mod3 : opcode_timings; + timings = mod3 ? opcode_timings_686_mod3 : opcode_timings_686; deps = mod3 ? opcode_deps_mod3 : opcode_deps; break; } diff --git a/src/cpu/codegen_timing_k5.c b/src/cpu/codegen_timing_k5.c new file mode 100644 index 000000000..42b1129fe --- /dev/null +++ b/src/cpu/codegen_timing_k5.c @@ -0,0 +1,2232 @@ +/*Most of the vector instructions here are a total guess. + Some of the timings are based on https://web.archive.org/web/20181122095446/http://users.atw.hu/instlatx64/AuthenticAMD0000502_k5_InstLatX86.txt*/ +#include +#include +#include +#include +#include <86box/86box.h> +#include <86box/mem.h> +#include "cpu.h" +#include <86box/machine.h> + +#include "x86.h" +#include "x86_ops.h" +#include "x86seg_common.h" +#include "x87_sf.h" +#include "x87.h" +#include "386_common.h" +#include "codegen.h" +#include "codegen_ops.h" +#include "codegen_timing_common.h" + +typedef enum uop_type_t { + UOP_ALU = 0, /*Executes in Integer X or Y units*/ + UOP_ALUX, /*Executes in Integer X unit*/ + UOP_LOAD, /*Executes in Load unit*/ + UOP_STORE, /*Executes in Store unit*/ + UOP_FLOAD, /*Executes in Load unit*/ + UOP_FSTORE, /*Executes in Store unit*/ + UOP_MLOAD, /*Executes in Load unit*/ + UOP_MSTORE, /*Executes in Store unit*/ + UOP_FLOAT, /*Executes in Floating Point unit*/ + UOP_MEU, /*Executes in Multimedia unit*/ + UOP_MEU_SHIFT, /*Executes in Multimedia unit or ALU X/Y. Uses MMX shifter*/ + UOP_MEU_MUL, /*Executes in Multimedia unit or ALU X/Y. Uses MMX/3DNow multiplier*/ + UOP_MEU_3DN, /*Executes in Multimedia unit or ALU X/Y. Uses 3DNow ALU*/ + UOP_BRANCH, /*Executes in Branch unit*/ + UOP_LIMM /*Does not require an execution unit*/ +} uop_type_t; + +typedef enum decode_type_t { + DECODE_SHORT, + DECODE_LONG, + DECODE_VECTOR +} decode_type_t; + +#define MAX_UOPS 10 + +typedef struct risc86_uop_t { + uop_type_t type; + int throughput; + int latency; +} risc86_uop_t; + +typedef struct risc86_instruction_t { + int nr_uops; + decode_type_t decode_type; + risc86_uop_t uop[MAX_UOPS]; +} risc86_instruction_t; + +static const risc86_instruction_t alu_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t alux_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t load_alu_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t load_alux_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t alu_store_op = { + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t alux_store_op = { + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1} +}; + +static const risc86_instruction_t branch_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; + +static const risc86_instruction_t limm_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LIMM, .throughput = 1, .latency = 1} +}; + +static const risc86_instruction_t load_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2} +}; + +static const risc86_instruction_t store_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +}; + +static const risc86_instruction_t bswap_op = { + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t leave_op = { + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t lods_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t loop_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t mov_reg_seg_op = { + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, +}; +static const risc86_instruction_t movs_op = { + .nr_uops = 4, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t pop_reg_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t pop_mem_op = { + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t push_imm_op = { + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2}, +}; +static const risc86_instruction_t push_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t push_seg_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t stos_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t test_reg_op = { + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t test_reg_b_op = { + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t test_mem_imm_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t test_mem_imm_b_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t xchg_op = { + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; + +static const risc86_instruction_t m3dn_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU_3DN, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t mmx_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t mmx_mul_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} +}; +static const risc86_instruction_t mmx_shift_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU_SHIFT, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t load_3dn_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_MEU_3DN, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t load_mmx_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_MEU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t load_mmx_mul_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_MEU_MUL, .throughput = 1, .latency = 2} +}; +static const risc86_instruction_t load_mmx_shift_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_MEU_SHIFT, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t mload_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MLOAD, .throughput = 1, .latency = 2} +}; + +static const risc86_instruction_t mstore_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MSTORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t pmul_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} +}; +static const risc86_instruction_t pmul_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_MEU_MUL, .throughput = 1, .latency = 2} +}; + +static const risc86_instruction_t float_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} +}; +static const risc86_instruction_t load_float_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_FLOAT, .throughput = 2, .latency = 2} +}; +static const risc86_instruction_t fstore_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} +}; + +static const risc86_instruction_t fdiv_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAT, .throughput = 40, .latency = 40} +}; +static const risc86_instruction_t fdiv_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_FLOAT, .throughput = 40, .latency = 40} +}; +static const risc86_instruction_t fsin_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAT, .throughput = 62, .latency = 62} +}; +static const risc86_instruction_t fsqrt_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAT, .throughput = 41, .latency = 41} +}; + +static const risc86_instruction_t vector_fldcw_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAT, .throughput = 8, .latency = 8} +}; +static const risc86_instruction_t vector_float_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} +}; +static const risc86_instruction_t vector_float_l_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAT, .throughput = 50, .latency = 50} +}; +static const risc86_instruction_t vector_flde_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_FLOAD, .throughput = 1, .latency = 2}, + .uop[2] = { .type = UOP_FLOAT, .throughput = 2, .latency = 2} +}; +static const risc86_instruction_t vector_fste_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2}, + .uop[1] = { .type = UOP_FSTORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_FSTORE, .throughput = 1, .latency = 1} +}; + +static const risc86_instruction_t vector_alu1_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alu2_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alu3_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alu6_op = { + .nr_uops = 6, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[4] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[5] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alux1_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alux3_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alux6_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[4] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[5] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alu_store_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alux_store_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_arpl_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, + .uop[1] = { .type = UOP_ALU, .throughput = 3, .latency = 3} +}; +static const risc86_instruction_t vector_bound_op = { + .nr_uops = 4, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_bsx_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 10, .latency = 10} +}; +static const risc86_instruction_t vector_call_far_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_cli_sti_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 7, .latency = 7} +}; +static const risc86_instruction_t vector_cmps_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_cmpsb_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_cmpxchg_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, +}; +static const risc86_instruction_t vector_cmpxchg_b_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, +}; +static const risc86_instruction_t vector_cpuid_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 22, .latency = 22} +}; +static const risc86_instruction_t vector_div16_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 10, .latency = 10} +}; +static const risc86_instruction_t vector_div16_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_ALUX, .throughput = 10, .latency = 10} +}; +static const risc86_instruction_t vector_div32_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 18, .latency = 18} +}; +static const risc86_instruction_t vector_div32_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_ALUX, .throughput = 18, .latency = 18} +}; +static const risc86_instruction_t vector_emms_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 25, .latency = 25} +}; +static const risc86_instruction_t vector_enter_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_ALU, .throughput = 10, .latency = 10} +}; +static const risc86_instruction_t vector_femms_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 6, .latency = 6} +}; +static const risc86_instruction_t vector_in_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11} +}; +static const risc86_instruction_t vector_ins_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1 }, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1 } +}; +static const risc86_instruction_t vector_int_op = { + .nr_uops = 5, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 20, .latency = 20}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1 }, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1 }, + .uop[3] = { .type = UOP_STORE, .throughput = 1, .latency = 1 }, + .uop[4] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1 } +}; +static const risc86_instruction_t vector_iret_op = { + .nr_uops = 5, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[2] = { .type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[3] = { .type = UOP_ALU, .throughput = 20, .latency = 20}, + .uop[4] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1 } +}; +static const risc86_instruction_t vector_invd_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1000, .latency = 1000} +}; +static const risc86_instruction_t vector_jmp_far_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, + .uop[1] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_load_alu_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_load_alux_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_loop_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_lss_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[2] = { .type = UOP_ALU, .throughput = 3, .latency = 3} +}; +static const risc86_instruction_t vector_mov_mem_seg_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_mov_seg_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 3, .latency = 3} +}; +static const risc86_instruction_t vector_mov_seg_reg_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3} +}; +static const risc86_instruction_t vector_mul_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_mul_mem_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_mul64_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_mul64_mem_op = { + .nr_uops = 4, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_out_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_STORE, .throughput = 10, .latency = 10} +}; +static const risc86_instruction_t vector_outs_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1 }, + .uop[1] = { .type = UOP_STORE, .throughput = 10, .latency = 10}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1 } +}; +static const risc86_instruction_t vector_pusha_op = { + .nr_uops = 8, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[4] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[5] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[6] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[7] = { .type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_popa_op = { + .nr_uops = 8, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[4] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[5] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[6] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[7] = { .type = UOP_LOAD, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_popf_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_ALUX, .throughput = 17, .latency = 17} +}; +static const risc86_instruction_t vector_push_mem_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_pushf_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_ret_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_retf_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 3, .latency = 3}, + .uop[2] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_scas_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_scasb_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_setcc_mem_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_FSTORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_setcc_reg_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_test_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_test_mem_b_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_xchg_mem_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_xlat_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 2} +}; +static const risc86_instruction_t vector_wbinvd_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 10000, .latency = 10000} +}; + +#define INVALID NULL + +static const risc86_instruction_t *opcode_timings_k5[256] = { + // clang-format off +/* ADD ADD ADD ADD*/ +/*00*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, +/* ADD ADD PUSH ES POP ES*/ + &alux_op, &alu_op, &push_seg_op, &vector_mov_seg_mem_op, +/* OR OR OR OR*/ + &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, +/* OR OR PUSH CS */ + &alux_op, &alu_op, &push_seg_op, INVALID, + +/* ADC ADC ADC ADC*/ +/*10*/ &vector_alux_store_op, &vector_alu_store_op, &vector_load_alux_op, &vector_load_alu_op, +/* ADC ADC PUSH SS POP SS*/ + &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, +/* SBB SBB SBB SBB*/ +/*10*/ &vector_alux_store_op, &vector_alu_store_op, &vector_load_alux_op, &vector_load_alu_op, +/* SBB SBB PUSH DS POP DS*/ + &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, + +/* AND AND AND AND*/ +/*20*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, +/* AND AND DAA*/ + &alux_op, &alu_op, INVALID, &vector_alux1_op, +/* SUB SUB SUB SUB*/ + &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, +/* SUB SUB DAS*/ + &alux_op, &alu_op, INVALID, &vector_alux1_op, + +/* XOR XOR XOR XOR*/ +/*30*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, +/* XOR XOR AAA*/ + &alux_op, &alu_op, INVALID, &vector_alux6_op, +/* CMP CMP CMP CMP*/ + &load_alux_op, &load_alu_op, &load_alux_op, &load_alu_op, +/* CMP CMP AAS*/ + &alux_op, &alu_op, INVALID, &vector_alux6_op, + +/* INC EAX INC ECX INC EDX INC EBX*/ +/*40*/ &alu_op, &alu_op, &alu_op, &alu_op, +/* INC ESP INC EBP INC ESI INC EDI*/ + &alu_op, &alu_op, &alu_op, &alu_op, +/* DEC EAX DEC ECX DEC EDX DEC EBX*/ + &alu_op, &alu_op, &alu_op, &alu_op, +/* DEC ESP DEC EBP DEC ESI DEC EDI*/ + &alu_op, &alu_op, &alu_op, &alu_op, + +/* PUSH EAX PUSH ECX PUSH EDX PUSH EBX*/ +/*50*/ &store_op, &store_op, &store_op, &store_op, +/* PUSH ESP PUSH EBP PUSH ESI PUSH EDI*/ + &store_op, &store_op, &store_op, &store_op, +/* POP EAX POP ECX POP EDX POP EBX*/ + &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, +/* POP ESP POP EBP POP ESI POP EDI*/ + &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, + +/* PUSHA POPA BOUND ARPL*/ +/*60*/ &vector_pusha_op, &vector_popa_op, &vector_bound_op, &vector_arpl_op, + INVALID, INVALID, INVALID, INVALID, +/* PUSH imm IMUL PUSH imm IMUL*/ + &push_imm_op, &vector_mul_op, &push_imm_op, &vector_mul_op, +/* INSB INSW OUTSB OUTSW*/ + &vector_ins_op, &vector_ins_op, &vector_outs_op, &vector_outs_op, + +/* Jxx*/ +/*70*/ &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + +/*80*/ INVALID, INVALID, INVALID, INVALID, +/* TEST TEST XCHG XCHG*/ + &vector_test_mem_b_op, &vector_test_mem_op, &vector_xchg_mem_op, &vector_xchg_mem_op, +/* MOV MOV MOV MOV*/ + &store_op, &store_op, &load_op, &load_op, +/* MOV from seg LEA MOV to seg POP*/ + &vector_mov_mem_seg_op, &store_op, &vector_mov_seg_mem_op, &pop_mem_op, + +/* NOP XCHG XCHG XCHG*/ +/*90*/ &limm_op, &xchg_op, &xchg_op, &xchg_op, +/* XCHG XCHG XCHG XCHG*/ + &xchg_op, &xchg_op, &xchg_op, &xchg_op, +/* CBW CWD CALL far WAIT*/ + &vector_alu1_op, &vector_alu1_op, &vector_call_far_op, &limm_op, +/* PUSHF POPF SAHF LAHF*/ + &vector_pushf_op, &vector_popf_op, &vector_alux1_op, &vector_alux1_op, + +/* MOV MOV MOV MOV*/ +/*a0*/ &load_op, &load_op, &store_op, &store_op, +/* MOVSB MOVSW CMPSB CMPSW*/ + &movs_op, &movs_op, &vector_cmpsb_op, &vector_cmps_op, +/* TEST TEST STOSB STOSW*/ + &test_reg_b_op, &test_reg_op, &stos_op, &stos_op, +/* LODSB LODSW SCASB SCASW*/ + &lods_op, &lods_op, &vector_scasb_op, &vector_scas_op, + +/* MOV*/ +/*b0*/ &limm_op, &limm_op, &limm_op, &limm_op, + &limm_op, &limm_op, &limm_op, &limm_op, + &limm_op, &limm_op, &limm_op, &limm_op, + &limm_op, &limm_op, &limm_op, &limm_op, + +/* RET imm RET*/ +/*c0*/ INVALID, INVALID, &vector_ret_op, &vector_ret_op, +/* LES LDS MOV MOV*/ + &vector_lss_op, &vector_lss_op, &store_op, &store_op, +/* ENTER LEAVE RETF RETF*/ + &vector_enter_op, &leave_op, &vector_retf_op, &vector_retf_op, +/* INT3 INT INTO IRET*/ + &vector_int_op, &vector_int_op, &vector_int_op, &vector_iret_op, + + +/*d0*/ INVALID, INVALID, INVALID, INVALID, +/* AAM AAD SETALC XLAT*/ + &vector_alux6_op, &vector_alux3_op, &vector_alux1_op, &vector_xlat_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, +/* LOOPNE LOOPE LOOP JCXZ*/ +/*e0*/ &vector_loop_op, &vector_loop_op, &loop_op, &vector_loop_op, +/* IN AL IN AX OUT_AL OUT_AX*/ + &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, +/* CALL JMP JMP JMP*/ + &store_op, &branch_op, &vector_jmp_far_op, &branch_op, +/* IN AL IN AX OUT_AL OUT_AX*/ + &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, + +/* REPNE REPE*/ +/*f0*/ INVALID, INVALID, INVALID, INVALID, +/* HLT CMC*/ + &vector_alux1_op, &vector_alu2_op, INVALID, INVALID, +/* CLC STC CLI STI*/ + &vector_alu1_op, &vector_alu1_op, &vector_cli_sti_op, &vector_cli_sti_op, +/* CLD STD INCDEC*/ + &vector_alu1_op, &vector_alu1_op, &alux_store_op, INVALID + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_mod3[256] = { + // clang-format off +/* ADD ADD ADD ADD*/ +/*00*/ &alux_op, &alu_op, &alux_op, &alu_op, +/* ADD ADD PUSH ES POP ES*/ + &alux_op, &alu_op, &push_seg_op, &vector_mov_seg_mem_op, +/* OR OR OR OR*/ + &alux_op, &alu_op, &alux_op, &alu_op, +/* OR OR PUSH CS */ + &alux_op, &alu_op, &push_seg_op, INVALID, + +/* ADC ADC ADC ADC*/ +/*10*/ &vector_alux1_op, &vector_alu1_op, &vector_alux1_op, &vector_alu1_op, +/* ADC ADC PUSH SS POP SS*/ + &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, +/* SBB SBB SBB SBB*/ + &vector_alux1_op, &vector_alu1_op, &vector_alux1_op, &vector_alu1_op, +/* SBB SBB PUSH DS POP DS*/ + &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, + +/* AND AND AND AND*/ +/*20*/ &alux_op, &alu_op, &alux_op, &alu_op, +/* AND AND DAA*/ + &alux_op, &alu_op, INVALID, &vector_alux1_op, +/* SUB SUB SUB SUB*/ + &alux_op, &alu_op, &alux_op, &alu_op, +/* SUB SUB DAS*/ + &alux_op, &alu_op, INVALID, &vector_alux1_op, + +/* XOR XOR XOR XOR*/ +/*30*/ &alux_op, &alu_op, &alux_op, &alu_op, +/* XOR XOR AAA*/ + &alux_op, &alu_op, INVALID, &vector_alux6_op, +/* CMP CMP CMP CMP*/ + &alux_op, &alu_op, &alux_op, &alu_op, +/* CMP CMP AAS*/ + &alux_op, &alu_op, INVALID, &vector_alux6_op, + +/* INC EAX INC ECX INC EDX INC EBX*/ +/*40*/ &alu_op, &alu_op, &alu_op, &alu_op, +/* INC ESP INC EBP INC ESI INC EDI*/ + &alu_op, &alu_op, &alu_op, &alu_op, +/* DEC EAX DEC ECX DEC EDX DEC EBX*/ + &alu_op, &alu_op, &alu_op, &alu_op, +/* DEC ESP DEC EBP DEC ESI DEC EDI*/ + &alu_op, &alu_op, &alu_op, &alu_op, + +/* PUSH EAX PUSH ECX PUSH EDX PUSH EBX*/ +/*50*/ &store_op, &store_op, &store_op, &store_op, +/* PUSH ESP PUSH EBP PUSH ESI PUSH EDI*/ + &store_op, &store_op, &store_op, &store_op, +/* POP EAX POP ECX POP EDX POP EBX*/ + &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, +/* POP ESP POP EBP POP ESI POP EDI*/ + &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, + +/* PUSHA POPA BOUND ARPL*/ +/*60*/ &vector_pusha_op, &vector_popa_op, &vector_bound_op, &vector_arpl_op, + INVALID, INVALID, INVALID, INVALID, +/* PUSH imm IMUL PUSH imm IMUL*/ + &push_imm_op, &vector_mul_op, &push_imm_op, &vector_mul_op, +/* INSB INSW OUTSB OUTSW*/ + &vector_ins_op, &vector_ins_op, &vector_outs_op, &vector_outs_op, + +/* Jxx*/ +/*70*/ &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + +/*80*/ INVALID, INVALID, INVALID, INVALID, +/* TEST TEST XCHG XCHG*/ + &vector_alu1_op, &vector_alu1_op, &vector_alu3_op, &vector_alu3_op, +/* MOV MOV MOV MOV*/ + &store_op, &store_op, &load_op, &load_op, +/* MOV from seg LEA MOV to seg POP*/ + &mov_reg_seg_op, &store_op, &vector_mov_seg_reg_op, &pop_reg_op, + +/* NOP XCHG XCHG XCHG*/ +/*90*/ &limm_op, &xchg_op, &xchg_op, &xchg_op, +/* XCHG XCHG XCHG XCHG*/ + &xchg_op, &xchg_op, &xchg_op, &xchg_op, +/* CBW CWD CALL far WAIT*/ + &vector_alu1_op, &vector_alu1_op, &vector_call_far_op, &limm_op, +/* PUSHF POPF SAHF LAHF*/ + &vector_pushf_op, &vector_popf_op, &vector_alux1_op, &vector_alux1_op, + +/* MOV MOV MOV MOV*/ +/*a0*/ &load_op, &load_op, &store_op, &store_op, +/* MOVSB MOVSW CMPSB CMPSW*/ + &movs_op, &movs_op, &vector_cmpsb_op, &vector_cmps_op, +/* TEST TEST STOSB STOSW*/ + &test_reg_b_op, &test_reg_op, &stos_op, &stos_op, +/* LODSB LODSW SCASB SCASW*/ + &lods_op, &lods_op, &vector_scasb_op, &vector_scas_op, + +/* MOV*/ +/*b0*/ &limm_op, &limm_op, &limm_op, &limm_op, + &limm_op, &limm_op, &limm_op, &limm_op, + &limm_op, &limm_op, &limm_op, &limm_op, + &limm_op, &limm_op, &limm_op, &limm_op, + +/* RET imm RET*/ +/*c0*/ INVALID, INVALID, &vector_ret_op, &vector_ret_op, +/* LES LDS MOV MOV*/ + &vector_lss_op, &vector_lss_op, &store_op, &store_op, +/* ENTER LEAVE RETF RETF*/ + &vector_enter_op, &leave_op, &vector_retf_op, &vector_retf_op, +/* INT3 INT INTO IRET*/ + &vector_int_op, &vector_int_op, &vector_int_op, &vector_iret_op, + + +/*d0*/ INVALID, INVALID, INVALID, INVALID, +/* AAM AAD SETALC XLAT*/ + &vector_alux6_op, &vector_alux3_op, &vector_alux1_op, &vector_xlat_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, +/* LOOPNE LOOPE LOOP JCXZ*/ +/*e0*/ &vector_loop_op, &vector_loop_op, &loop_op, &vector_loop_op, +/* IN AL IN AX OUT_AL OUT_AX*/ + &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, +/* CALL JMP JMP JMP*/ + &store_op, &branch_op, &vector_jmp_far_op, &branch_op, +/* IN AL IN AX OUT_AL OUT_AX*/ + &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, + +/* REPNE REPE*/ +/*f0*/ INVALID, INVALID, INVALID, INVALID, +/* HLT CMC*/ + &vector_alux1_op, &vector_alu2_op, INVALID, INVALID, +/* CLC STC CLI STI*/ + &vector_alu1_op, &vector_alu1_op, &vector_cli_sti_op, &vector_cli_sti_op, +/* CLD STD INCDEC*/ + &vector_alu1_op, &vector_alu1_op, &vector_alux1_op, INVALID + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_0f[256] = { + // clang-format off +/*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, + INVALID, &vector_alu6_op, &vector_alu6_op, INVALID, + &vector_invd_op, &vector_wbinvd_op, INVALID, INVALID, + INVALID, &load_op, &vector_femms_op, &load_3dn_op, + +/*10*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*20*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, + &vector_alu6_op, &vector_alu6_op, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*30*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*40*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*50*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*60*/ &load_mmx_op, &load_mmx_op, &load_mmx_op, &load_mmx_op, + &load_mmx_op, &load_mmx_op, &load_mmx_op, &load_mmx_op, + &load_mmx_op, &load_mmx_op, &load_mmx_op, &load_mmx_op, + INVALID, INVALID, &mload_op, &mload_op, + +/*70*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, + &load_mmx_op, &load_mmx_op, &load_mmx_op, &vector_emms_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, &mstore_op, &mstore_op, + +/*80*/ &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + +/*90*/ &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, + &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, + &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, + &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, + +/*a0*/ &push_seg_op, &vector_mov_seg_mem_op, &vector_cpuid_op, &vector_load_alu_op, + &vector_alu_store_op, &vector_alu_store_op, INVALID, INVALID, + &push_seg_op, &vector_mov_seg_mem_op, INVALID, &vector_load_alu_op, + &vector_alu_store_op, &vector_alu_store_op, INVALID, &vector_mul_op, + +/*b0*/ &vector_cmpxchg_b_op, &vector_cmpxchg_op, &vector_lss_op, &vector_load_alu_op, + &vector_lss_op, &vector_lss_op, &load_alux_op, &load_alu_op, + INVALID, INVALID, &vector_load_alu_op, &vector_load_alu_op, + &vector_bsx_op, &vector_bsx_op, &load_alux_op, &load_alu_op, + +/*c0*/ &vector_alux_store_op, &vector_alu_store_op, INVALID, INVALID, + INVALID, INVALID, INVALID, &vector_cmpxchg_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + +/*d0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, + INVALID, &load_mmx_mul_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + +/*e0*/ &load_mmx_op, &load_mmx_shift_op, &load_mmx_shift_op, INVALID, + INVALID, &pmul_mem_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + +/*f0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, + INVALID, &pmul_mem_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, + &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_0f_mod3[256] = { + // clang-format off +/*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, + INVALID, &vector_alu6_op, &vector_alu6_op, INVALID, + &vector_invd_op, &vector_wbinvd_op, INVALID, INVALID, + INVALID, INVALID, &vector_femms_op, &m3dn_op, + +/*10*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*20*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, + &vector_alu6_op, &vector_alu6_op, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*30*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*40*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*50*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*60*/ &mmx_op, &mmx_op, &mmx_op, &mmx_op, + &mmx_op, &mmx_op, &mmx_op, &mmx_op, + &mmx_op, &mmx_op, &mmx_op, &mmx_op, + INVALID, INVALID, &mmx_op, &mmx_op, + +/*70*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + &mmx_op, &mmx_op, &mmx_op, &vector_emms_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, &mmx_op, &mmx_op, + +/*80*/ &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + +/*90*/ &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, + &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, + &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, + &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, + +/*a0*/ &push_seg_op, &vector_mov_seg_mem_op, &vector_cpuid_op, &vector_alu1_op, + &vector_alu1_op, &vector_alu1_op, INVALID, INVALID, + &push_seg_op, &vector_mov_seg_mem_op, INVALID, &vector_alu1_op, + &vector_alu1_op, &vector_alu1_op, INVALID, &vector_mul_op, + +/*b0*/ &vector_cmpxchg_b_op, &vector_cmpxchg_op, &vector_lss_op, &vector_alu1_op, + &vector_lss_op, &vector_lss_op, &alux_op, &alu_op, + INVALID, INVALID, &vector_alu1_op, &vector_alu1_op, + &vector_bsx_op, &vector_bsx_op, &alux_op, &alu_op, + +/*c0*/ &vector_alux1_op, &vector_alu1_op, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + +/*d0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + INVALID, &mmx_mul_op, INVALID, INVALID, + &mmx_op, &mmx_op, INVALID, &mmx_op, + &mmx_op, &mmx_op, INVALID, &mmx_op, + +/*e0*/ &mmx_op, &mmx_shift_op, &mmx_shift_op, INVALID, + INVALID, &pmul_op, INVALID, INVALID, + &mmx_op, &mmx_op, INVALID, &mmx_op, + &mmx_op, &mmx_op, INVALID, &mmx_op, + +/*f0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + INVALID, &pmul_op, INVALID, INVALID, + &mmx_op, &mmx_op, &mmx_op, INVALID, + &mmx_op, &mmx_op, &mmx_op, INVALID, + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_0f0f[256] = { + // clang-format off +/*00*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, &load_3dn_op, INVALID, INVALID, + +/*10*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, &load_3dn_op, INVALID, INVALID, + +/*20*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*30*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*40*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*50*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*60*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*70*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*80*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*90*/ &load_3dn_op, INVALID, INVALID, INVALID, + &load_3dn_op, INVALID, &load_3dn_op, &load_3dn_op, + INVALID, INVALID, &load_3dn_op, INVALID, + INVALID, INVALID, &load_3dn_op, INVALID, + +/*a0*/ &load_3dn_op, INVALID, INVALID, INVALID, + &load_3dn_op, INVALID, &load_mmx_mul_op, &load_mmx_mul_op, + INVALID, INVALID, &load_3dn_op, INVALID, + INVALID, INVALID, &load_3dn_op, INVALID, + +/*b0*/ &load_3dn_op, INVALID, INVALID, INVALID, + &load_mmx_mul_op, INVALID, &load_mmx_mul_op, &load_mmx_mul_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, &load_mmx_op, + +/*c0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*d0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*e0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*f0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_0f0f_mod3[256] = { + // clang-format off +/*00*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, &m3dn_op, INVALID, INVALID, + +/*10*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, &m3dn_op, INVALID, INVALID, + +/*20*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*30*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*40*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*50*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*60*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*70*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*80*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*90*/ &m3dn_op, INVALID, INVALID, INVALID, + &m3dn_op, INVALID, &m3dn_op, &m3dn_op, + INVALID, INVALID, &m3dn_op, INVALID, + INVALID, INVALID, &m3dn_op, INVALID, + +/*a0*/ &m3dn_op, INVALID, INVALID, INVALID, + &m3dn_op, INVALID, &mmx_mul_op, &mmx_mul_op, + INVALID, INVALID, &m3dn_op, INVALID, + INVALID, INVALID, &m3dn_op, INVALID, + +/*b0*/ &m3dn_op, INVALID, INVALID, INVALID, + &mmx_mul_op, INVALID, &mmx_mul_op, &mmx_mul_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, &mmx_op, + +/*c0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*d0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*e0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*f0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_shift[8] = { + // clang-format off + &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, + &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_shift_b[8] = { + // clang-format off + &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, + &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_shift_mod3[8] = { + // clang-format off + &vector_alu1_op, &vector_alu1_op, &vector_alu1_op, &vector_alu1_op, + &alu_op, &alu_op, &alu_op, &alu_op + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_shift_b_mod3[8] = { + // clang-format off + &vector_alux1_op, &vector_alux1_op, &vector_alux1_op, &vector_alux1_op, + &alux_op, &alux_op, &alux_op, &alux_op + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_80[8] = { + // clang-format off + &alux_store_op, &alux_store_op, &vector_alux_store_op, &vector_alux_store_op, + &alux_store_op, &alux_store_op, &alux_store_op, &alux_store_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_80_mod3[8] = { + // clang-format off + &alux_op, &alux_op, &alux_store_op, &alux_store_op, + &alux_op, &alux_op, &alux_op, &alux_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_8x[8] = { + // clang-format off + &alu_store_op, &alu_store_op, &vector_alu_store_op, &vector_alu_store_op, + &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_8x_mod3[8] = { + // clang-format off + &alu_op, &alu_op, &alu_store_op, &alu_store_op, + &alu_op, &alu_op, &alu_op, &alu_op, + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_f6[8] = { + // clang-format off +/* TST NOT NEG*/ + &test_mem_imm_b_op, INVALID, &vector_alux_store_op, &vector_alux_store_op, +/* MUL IMUL DIV IDIV*/ + &vector_mul_mem_op, &vector_mul_mem_op, &vector_div16_mem_op, &vector_div16_mem_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_f6_mod3[8] = { + // clang-format off +/* TST NOT NEG*/ + &test_reg_b_op, INVALID, &alux_op, &alux_op, +/* MUL IMUL DIV IDIV*/ + &vector_mul_op, &vector_mul_op, &vector_div16_op, &vector_div16_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_f7[8] = { + // clang-format off +/* TST NOT NEG*/ + &test_mem_imm_op, INVALID, &vector_alu_store_op, &vector_alu_store_op, +/* MUL IMUL DIV IDIV*/ + &vector_mul64_mem_op, &vector_mul64_mem_op, &vector_div32_mem_op, &vector_div32_mem_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_f7_mod3[8] = { + // clang-format off +/* TST NOT NEG*/ + &test_reg_op, INVALID, &alu_op, &alu_op, +/* MUL IMUL DIV IDIV*/ + &vector_mul64_op, &vector_mul64_op, &vector_div32_op, &vector_div32_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_ff[8] = { + // clang-format off +/* INC DEC CALL CALL far*/ + &alu_store_op, &alu_store_op, &store_op, &vector_call_far_op, +/* JMP JMP far PUSH*/ + &branch_op, &vector_jmp_far_op, &push_mem_op, INVALID + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_ff_mod3[8] = { + // clang-format off +/* INC DEC CALL CALL far*/ + &vector_alu1_op, &vector_alu1_op, &store_op, &vector_call_far_op, +/* JMP JMP far PUSH*/ + &branch_op, &vector_jmp_far_op, &vector_push_mem_op, INVALID + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_d8[8] = { + // clang-format off +/* FADDs FMULs FCOMs FCOMPs*/ + &load_float_op, &load_float_op, &load_float_op, &load_float_op, +/* FSUBs FSUBRs FDIVs FDIVRs*/ + &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_d8_mod3[8] = { + // clang-format off +/* FADD FMUL FCOM FCOMP*/ + &float_op, &float_op, &float_op, &float_op, +/* FSUB FSUBR FDIV FDIVR*/ + &float_op, &float_op, &fdiv_op, &fdiv_op, + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_d9[8] = { + // clang-format off +/* FLDs FSTs FSTPs*/ + &load_float_op, INVALID, &fstore_op, &fstore_op, +/* FLDENV FLDCW FSTENV FSTCW*/ + &vector_float_l_op, &vector_fldcw_op, &vector_float_l_op, &vector_float_op + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_d9_mod3[64] = { + // clang-format off + /*FLD*/ + &float_op, &float_op, &float_op, &float_op, + &float_op, &float_op, &float_op, &float_op, + /*FXCH*/ + &float_op, &float_op, &float_op, &float_op, + &float_op, &float_op, &float_op, &float_op, + /*FNOP*/ + &float_op, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + /*FSTP*/ + &float_op, &float_op, &float_op, &float_op, + &float_op, &float_op, &float_op, &float_op, +/* opFCHS opFABS*/ + &float_op, &float_op, INVALID, INVALID, +/* opFTST opFXAM*/ + &float_op, &float_op, INVALID, INVALID, +/* opFLD1 opFLDL2T opFLDL2E opFLDPI*/ + &float_op, &float_op, &float_op, &float_op, +/* opFLDEG2 opFLDLN2 opFLDZ*/ + &float_op, &float_op, &float_op, INVALID, +/* opF2XM1 opFYL2X opFPTAN opFPATAN*/ + &fsin_op, &fsin_op, &fsin_op, &fsin_op, +/* opFDECSTP opFINCSTP,*/ + INVALID, INVALID, &float_op, &float_op, +/* opFPREM opFSQRT opFSINCOS*/ + &fdiv_op, INVALID, &fsqrt_op, &fsin_op, +/* opFRNDINT opFSCALE opFSIN opFCOS*/ + &float_op, &fdiv_op, &fsin_op, &fsin_op + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_da[8] = { + // clang-format off +/* FIADDl FIMULl FICOMl FICOMPl*/ + &load_float_op, &load_float_op, &load_float_op, &load_float_op, +/* FISUBl FISUBRl FIDIVl FIDIVRl*/ + &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_da_mod3[8] = { + // clang-format off + INVALID, INVALID, INVALID, INVALID, +/* FCOMPP*/ + INVALID, &float_op, INVALID, INVALID + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_db[8] = { + // clang-format off +/* FLDil FSTil FSTPil*/ + &load_float_op, INVALID, &fstore_op, &fstore_op, +/* FLDe FSTPe*/ + INVALID, &vector_flde_op, INVALID, &vector_fste_op + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_db_mod3[64] = { + // clang-format off + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/* opFNOP opFCLEX opFINIT*/ + INVALID, &float_op, &float_op, &float_op, +/* opFNOP opFNOP*/ + &float_op, &float_op, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_dc[8] = { + // clang-format off +/* FADDd FMULd FCOMd FCOMPd*/ + &load_float_op, &load_float_op, &load_float_op, &load_float_op, +/* FSUBd FSUBRd FDIVd FDIVRd*/ + &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_dc_mod3[8] = { + // clang-format off +/* opFADDr opFMULr*/ + &float_op, &float_op, INVALID, INVALID, +/* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ + &float_op, &float_op, &fdiv_op, &fdiv_op + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_dd[8] = { + // clang-format off +/* FLDd FSTd FSTPd*/ + &load_float_op, INVALID, &fstore_op, &fstore_op, +/* FRSTOR FSAVE FSTSW*/ + &vector_float_l_op, INVALID, &vector_float_l_op, &vector_float_l_op + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_dd_mod3[8] = { + // clang-format off +/* FFFREE FST FSTP*/ + &float_op, INVALID, &float_op, &float_op, +/* FUCOM FUCOMP*/ + &float_op, &float_op, INVALID, INVALID + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_de[8] = { + // clang-format off +/* FIADDw FIMULw FICOMw FICOMPw*/ + &load_float_op, &load_float_op, &load_float_op, &load_float_op, +/* FISUBw FISUBRw FIDIVw FIDIVRw*/ + &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_de_mod3[8] = { + // clang-format off +/* FADDP FMULP FCOMPP*/ + &float_op, &float_op, INVALID, &float_op, +/* FSUBP FSUBRP FDIVP FDIVRP*/ + &float_op, &float_op, &fdiv_op, &fdiv_op, + // clang-format on +}; + +static const risc86_instruction_t *opcode_timings_k5_df[8] = { + // clang-format off +/* FILDiw FISTiw FISTPiw*/ + &load_float_op, INVALID, &fstore_op, &fstore_op, +/* FILDiq FBSTP FISTPiq*/ + INVALID, &load_float_op, &vector_float_l_op, &fstore_op, + // clang-format on +}; +static const risc86_instruction_t *opcode_timings_k5_df_mod3[8] = { + // clang-format off + INVALID, INVALID, INVALID, INVALID, +/* FSTSW AX*/ + &float_op, INVALID, INVALID, INVALID + // clang-format on +}; + +static uint8_t last_prefix; +static int prefixes; + +static int decode_timestamp; +static int last_complete_timestamp; + +typedef struct k5_unit_t { + uint32_t uop_mask; + int first_available_cycle; +} k5_unit_t; + +static int nr_units; +static k5_unit_t *units; + +/*k5 has dedicated MMX unit*/ +static k5_unit_t k5_units[] = { + { .uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) }, /*Integer X*/ + { .uop_mask = (1 << UOP_ALU) }, /*Integer Y*/ + { .uop_mask = (1 << UOP_MEU) | (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL) }, /*Multimedia*/ + { .uop_mask = (1 << UOP_FLOAT) }, /*Floating point*/ + { .uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD) }, /*Load*/ + { .uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE) }, /*Store*/ + { .uop_mask = (1 << UOP_BRANCH) } /*Branch*/ +}; +#define NR_k5_UNITS (sizeof(k5_units) / sizeof(k5_unit_t)) + +/*k5-2 and later integrate MMX into ALU X & Y, sharing multiplier, shifter and + 3DNow ALU between two execution units*/ +static k5_unit_t k5_2_units[] = { + { .uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) | (1 << UOP_MEU) | /*Integer X*/ + (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL) | (1 << UOP_MEU_3DN) }, + { .uop_mask = (1 << UOP_ALU) | (1 << UOP_MEU) | /*Integer Y*/ + (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL) | (1 << UOP_MEU_3DN) }, + { .uop_mask = (1 << UOP_FLOAT) }, /*Floating point*/ + { .uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD) }, /*Load*/ + { .uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE) }, /*Store*/ + { .uop_mask = (1 << UOP_BRANCH) } /*Branch*/ +}; +#define NR_k5_2_UNITS (sizeof(k5_2_units) / sizeof(k5_unit_t)) + +/*First available cycles of shared execution units. Each of these can be submitted + to by ALU X and Y*/ +static int mul_first_available_cycle; +static int shift_first_available_cycle; +static int m3dnow_first_available_cycle; + +static int +uop_run(const risc86_uop_t *uop, int decode_time) +{ + k5_unit_t *best_unit = NULL; + int best_start_cycle = 99999; + + /*UOP_LIMM does not require execution*/ + if (uop->type == UOP_LIMM) + return decode_time; + + /*Handle shared units on k5-2 and later*/ + if (units == k5_2_units) { + if (uop->type == UOP_MEU_MUL && decode_time < mul_first_available_cycle) + decode_time = mul_first_available_cycle; + else if (uop->type == UOP_MEU_SHIFT && decode_time < mul_first_available_cycle) + decode_time = shift_first_available_cycle; + else if (uop->type == UOP_MEU_3DN && decode_time < mul_first_available_cycle) + decode_time = m3dnow_first_available_cycle; + } + + /*Find execution unit for this uOP*/ + for (int c = 0; c < nr_units; c++) { + if (units[c].uop_mask & (1 << uop->type)) { + if (units[c].first_available_cycle < best_start_cycle) { + best_unit = &units[c]; + best_start_cycle = units[c].first_available_cycle; + } + } + } + if (!best_unit) + fatal("uop_run: can not find execution unit\n"); + + if (best_start_cycle < decode_time) + best_start_cycle = decode_time; + best_unit->first_available_cycle = best_start_cycle + uop->throughput; + + if (units == k5_2_units) { + if (uop->type == UOP_MEU_MUL) + mul_first_available_cycle = best_start_cycle + uop->throughput; + else if (uop->type == UOP_MEU_SHIFT) + shift_first_available_cycle = best_start_cycle + uop->throughput; + else if (uop->type == UOP_MEU_3DN) + m3dnow_first_available_cycle = best_start_cycle + uop->throughput; + } + + return best_start_cycle + uop->throughput; +} + +/*The k5 decoder can decode, per clock : + - 1 or 2 'short' instructions, each up to 2 uOPs and 7 bytes long + - 1 'long' instruction, up to 4 uOPs + - 1 'vector' instruction, up to 4 uOPs per cycle, plus (I think) 1 cycle startup delay) +*/ +static struct { + int nr_uops; + const risc86_uop_t *uops[4]; + /*Earliest time a uop can start. If the timestamp is -1, then the uop is + part of a dependency chain and the start time is the completion time of + the previous uop*/ + int earliest_start[4]; +} decode_buffer; + +#define NR_OPQUADS 6 +/*Timestamps of when the last six opquads completed. The k5 scheduler retires + opquads in order, so this is needed to determine when the next can be scheduled*/ +static int opquad_completion_timestamp[NR_OPQUADS]; +static int next_opquad = 0; + +#define NR_REGS 8 +/*Timestamp of when last operation on an integer register completed*/ +static int reg_available_timestamp[NR_REGS]; +/*Timestamp of when last operation on an FPU register completed*/ +static int fpu_st_timestamp[8]; +/*Completion time of the last uop to be processed. Used to calculate timing of + dependent uop chains*/ +static int last_uop_timestamp = 0; + +void +decode_flush_k5(void) +{ + int uop_timestamp = 0; + + /*Decoded opquad can not be submitted if there are no free spaces in the + opquad buffer*/ + if (decode_timestamp < opquad_completion_timestamp[next_opquad]) + decode_timestamp = opquad_completion_timestamp[next_opquad]; + + /*Ensure that uops can not be submitted before they have been decoded*/ + if (decode_timestamp > last_uop_timestamp) + last_uop_timestamp = decode_timestamp; + + /*Submit uops to execution units, and determine the latest completion time*/ + for (int c = 0; c < decode_buffer.nr_uops; c++) { + int start_timestamp; + + if (decode_buffer.earliest_start[c] == -1) + start_timestamp = last_uop_timestamp; + else + start_timestamp = decode_buffer.earliest_start[c]; + + last_uop_timestamp = uop_run(decode_buffer.uops[c], start_timestamp); + if (last_uop_timestamp > uop_timestamp) + uop_timestamp = last_uop_timestamp; + } + + /*Calculate opquad completion time. Since opquads complete in order, it + must be after the last completion.*/ + if (uop_timestamp <= last_complete_timestamp) + last_complete_timestamp = last_complete_timestamp + 1; + else + last_complete_timestamp = uop_timestamp; + + /*Advance to next opquad in buffer*/ + opquad_completion_timestamp[next_opquad] = last_complete_timestamp; + next_opquad++; + if (next_opquad == NR_OPQUADS) + next_opquad = 0; + + decode_timestamp++; + decode_buffer.nr_uops = 0; +} + +/*The instruction is only of interest here if it's longer than 7 bytes, as that's the + limit on k5 short decoding*/ +static int +codegen_timing_instr_length(uint64_t deps, uint32_t fetchdat, int op_32) +{ + int len = prefixes + 1; /*Opcode*/ + if (deps & MODRM) { + len++; /*ModR/M*/ + if (deps & HAS_IMM8) + len++; + if (deps & HAS_IMM1632) + len += (op_32 & 0x100) ? 4 : 2; + + if (op_32 & 0x200) { + if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) { + /* Has SIB*/ + len++; + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 4; + else if ((fetchdat & 0x700) == 0x500) + len += 4; + } else { + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 4; + else if ((fetchdat & 0xc7) == 0x05) + len += 4; + } + } else { + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 2; + else if ((fetchdat & 0xc7) == 0x06) + len += 2; + } + } + + return len; +} + +static void +decode_instruction(const risc86_instruction_t *ins, uint64_t deps, uint32_t fetchdat, int op_32, int bit8) +{ + uint32_t regmask_required; + uint32_t regmask_modified; + int c; + int d; + int earliest_start = 0; + decode_type_t decode_type = ins->decode_type; + int instr_length = codegen_timing_instr_length(deps, fetchdat, op_32); + + /*Generate input register mask, and determine the earliest time this + instruction can start. This is not accurate, as this is calculated per + x86 instruction when it should be handled per uop*/ + regmask_required = get_dstdep_mask(deps, fetchdat, bit8); + regmask_required |= get_addr_regmask(deps, fetchdat, op_32); + for (c = 0; c < 8; c++) { + if (regmask_required & (1 << c)) { + if (reg_available_timestamp[c] > decode_timestamp) + earliest_start = reg_available_timestamp[c]; + } + } + if ((deps & FPU_RW_ST0) && fpu_st_timestamp[0] > decode_timestamp) + earliest_start = fpu_st_timestamp[0]; + if ((deps & FPU_RW_ST1) && fpu_st_timestamp[1] > decode_timestamp) + earliest_start = fpu_st_timestamp[1]; + if (deps & FPU_RW_STREG) { + int reg = fetchdat & 7; + + if (fpu_st_timestamp[reg] > decode_timestamp) + earliest_start = fpu_st_timestamp[reg]; + } + + /*Short decoders are limited to 7 bytes*/ + if (decode_type == DECODE_SHORT && instr_length > 7) + decode_type = DECODE_LONG; + /*Long decoder is limited to 11 bytes*/ + else if (instr_length > 11) + decode_type = DECODE_VECTOR; + + switch (decode_type) { + case DECODE_SHORT: + if (decode_buffer.nr_uops) { + decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; + decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; + if (ins->nr_uops > 1) { + decode_buffer.uops[decode_buffer.nr_uops + 1] = &ins->uop[1]; + decode_buffer.earliest_start[decode_buffer.nr_uops + 1] = -1; + } + decode_buffer.nr_uops += ins->nr_uops; + + decode_flush_k5(); + } else { + decode_buffer.nr_uops = ins->nr_uops; + decode_buffer.uops[0] = &ins->uop[0]; + decode_buffer.earliest_start[0] = earliest_start; + if (ins->nr_uops > 1) { + decode_buffer.uops[1] = &ins->uop[1]; + decode_buffer.earliest_start[1] = -1; + } + } + break; + + case DECODE_LONG: + if (decode_buffer.nr_uops) + decode_flush_k5(); + + decode_buffer.nr_uops = ins->nr_uops; + for (c = 0; c < ins->nr_uops; c++) { + decode_buffer.uops[c] = &ins->uop[c]; + if (c == 0) + decode_buffer.earliest_start[c] = earliest_start; + else + decode_buffer.earliest_start[c] = -1; + } + decode_flush_k5(); + break; + + case DECODE_VECTOR: + if (decode_buffer.nr_uops) + decode_flush_k5(); + + decode_timestamp++; + d = 0; + + for (c = 0; c < ins->nr_uops; c++) { + decode_buffer.uops[d] = &ins->uop[c]; + if (c == 0) + decode_buffer.earliest_start[d] = earliest_start; + else + decode_buffer.earliest_start[d] = -1; + d++; + + if (d == 4) { + d = 0; + decode_buffer.nr_uops = 4; + decode_flush_k5(); + } + } + if (d) { + decode_buffer.nr_uops = d; + decode_flush_k5(); + } + break; + } + + /*Update write timestamps for any output registers*/ + regmask_modified = get_dstdep_mask(deps, fetchdat, bit8); + for (c = 0; c < 8; c++) { + if (regmask_modified & (1 << c)) + reg_available_timestamp[c] = last_complete_timestamp; + } + if (deps & FPU_POP) { + for (c = 0; c < 7; c++) + fpu_st_timestamp[c] = fpu_st_timestamp[c + 1]; + fpu_st_timestamp[7] = 0; + } + if (deps & FPU_POP2) { + for (c = 0; c < 6; c++) + fpu_st_timestamp[c] = fpu_st_timestamp[c + 2]; + fpu_st_timestamp[6] = fpu_st_timestamp[7] = 0; + } + if (deps & FPU_PUSH) { + for (c = 0; c < 7; c++) + fpu_st_timestamp[c + 1] = fpu_st_timestamp[c]; + fpu_st_timestamp[0] = 0; + } + if (deps & FPU_WRITE_ST0) + fpu_st_timestamp[0] = last_complete_timestamp; + if (deps & FPU_WRITE_ST1) + fpu_st_timestamp[1] = last_complete_timestamp; + if (deps & FPU_WRITE_STREG) { + int reg = fetchdat & 7; + if (deps & FPU_POP) + reg--; + if (reg >= 0 && !(reg == 0 && (deps & FPU_WRITE_ST0)) && !(reg == 1 && (deps & FPU_WRITE_ST1))) + fpu_st_timestamp[reg] = last_complete_timestamp; + } +} + +void +codegen_timing_k5_block_start(void) +{ + int c; + + for (c = 0; c < nr_units; c++) + units[c].first_available_cycle = 0; + + mul_first_available_cycle = 0; + shift_first_available_cycle = 0; + m3dnow_first_available_cycle = 0; + + decode_timestamp = 0; + last_complete_timestamp = 0; + + for (c = 0; c < NR_OPQUADS; c++) + opquad_completion_timestamp[c] = 0; + next_opquad = 0; + + for (c = 0; c < NR_REGS; c++) + reg_available_timestamp[c] = 0; + for (c = 0; c < 8; c++) + fpu_st_timestamp[c] = 0; +} + +void +codegen_timing_k5_start(void) +{ + if (cpu_s->cpu_type == CPU_K5) { + units = k5_units; + nr_units = NR_k5_UNITS; + } else { + units = k5_2_units; + nr_units = NR_k5_2_UNITS; + } + last_prefix = 0; + prefixes = 0; +} + +void +codegen_timing_k5_prefix(uint8_t prefix, uint32_t fetchdat) +{ + if (prefix != 0x0f) + decode_timestamp++; + + last_prefix = prefix; + prefixes++; +} + +void +codegen_timing_k5_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +{ + const risc86_instruction_t **ins_table; + const uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int old_last_complete_timestamp = last_complete_timestamp; + int bit8 = !(opcode & 1); + + switch (last_prefix) { + case 0x0f: + if (opcode == 0x0f) { + /*3DNow has the actual opcode after ModR/M, SIB and any offset*/ + uint32_t opcode_pc = op_pc + 1; /*Byte after ModR/M*/ + uint8_t modrm = fetchdat & 0xff; + uint8_t sib = (fetchdat >> 8) & 0xff; + + if ((modrm & 0xc0) != 0xc0) { + if (op_32 & 0x200) { + if ((modrm & 7) == 4) { + /* Has SIB*/ + opcode_pc++; + if ((modrm & 0xc0) == 0x40) + opcode_pc++; + else if ((modrm & 0xc0) == 0x80) + opcode_pc += 4; + else if ((sib & 0x07) == 0x05) + opcode_pc += 4; + } else { + if ((modrm & 0xc0) == 0x40) + opcode_pc++; + else if ((modrm & 0xc0) == 0x80) + opcode_pc += 4; + else if ((modrm & 0xc7) == 0x05) + opcode_pc += 4; + } + } else { + if ((modrm & 0xc0) == 0x40) + opcode_pc++; + else if ((modrm & 0xc0) == 0x80) + opcode_pc += 2; + else if ((modrm & 0xc7) == 0x06) + opcode_pc += 2; + } + } + + opcode = fastreadb(cs + opcode_pc); + + ins_table = mod3 ? opcode_timings_k5_0f0f_mod3 : opcode_timings_k5_0f0f; + deps = mod3 ? opcode_deps_0f0f_mod3 : opcode_deps_0f0f; + } else { + ins_table = mod3 ? opcode_timings_k5_0f_mod3 : opcode_timings_k5_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + } + break; + + case 0xd8: + ins_table = mod3 ? opcode_timings_k5_d8_mod3 : opcode_timings_k5_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + ins_table = mod3 ? opcode_timings_k5_d9_mod3 : opcode_timings_k5_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + ins_table = mod3 ? opcode_timings_k5_da_mod3 : opcode_timings_k5_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + ins_table = mod3 ? opcode_timings_k5_db_mod3 : opcode_timings_k5_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + ins_table = mod3 ? opcode_timings_k5_dc_mod3 : opcode_timings_k5_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + ins_table = mod3 ? opcode_timings_k5_dd_mod3 : opcode_timings_k5_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + ins_table = mod3 ? opcode_timings_k5_de_mod3 : opcode_timings_k5_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + ins_table = mod3 ? opcode_timings_k5_df_mod3 : opcode_timings_k5_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) { + case 0x80: + case 0x82: + ins_table = mod3 ? opcode_timings_k5_80_mod3 : opcode_timings_k5_80; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: + case 0x83: + ins_table = mod3 ? opcode_timings_k5_8x_mod3 : opcode_timings_k5_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: + case 0xd0: + case 0xd2: + ins_table = mod3 ? opcode_timings_k5_shift_b_mod3 : opcode_timings_k5_shift_b; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc1: + case 0xd1: + case 0xd3: + ins_table = mod3 ? opcode_timings_k5_shift_mod3 : opcode_timings_k5_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + ins_table = mod3 ? opcode_timings_k5_f6_mod3 : opcode_timings_k5_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + ins_table = mod3 ? opcode_timings_k5_f7_mod3 : opcode_timings_k5_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + ins_table = mod3 ? opcode_timings_k5_ff_mod3 : opcode_timings_k5_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; + + default: + ins_table = mod3 ? opcode_timings_k5_mod3 : opcode_timings_k5; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } + + if (ins_table[opcode]) + decode_instruction(ins_table[opcode], deps[opcode], fetchdat, op_32, bit8); + else + decode_instruction(&vector_alu1_op, 0, fetchdat, op_32, bit8); + codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); +} + +void +codegen_timing_k5_block_end(void) +{ + if (decode_buffer.nr_uops) { + int old_last_complete_timestamp = last_complete_timestamp; + decode_flush_k5(); + codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); + } +} + +int +codegen_timing_k5_jump_cycles(void) +{ + if (decode_buffer.nr_uops) + return 1; + return 0; +} + +codegen_timing_t codegen_timing_k5 = { + codegen_timing_k5_start, + codegen_timing_k5_prefix, + codegen_timing_k5_opcode, + codegen_timing_k5_block_start, + codegen_timing_k5_block_end, + codegen_timing_k5_jump_cycles +}; diff --git a/src/cpu/codegen_timing_k6.c b/src/cpu/codegen_timing_k6.c index 6a2871884..5566fbbcd 100644 --- a/src/cpu/codegen_timing_k6.c +++ b/src/cpu/codegen_timing_k6.c @@ -1,5 +1,5 @@ /*Most of the vector instructions here are a total guess. - Some of the timings are based on http://http://web.archive.org/web/20181122095446/http://users.atw.hu/instlatx64/AuthenticAMD0000562_K6_InstLatX86.txt*/ + Some of the timings are based on https://web.archive.org/web/20181122095446/http://users.atw.hu/instlatx64/AuthenticAMD0000562_K6_InstLatX86.txt*/ #include #include #include @@ -759,7 +759,7 @@ static const risc86_instruction_t vector_wbinvd_op = { #define INVALID NULL -static const risc86_instruction_t *opcode_timings[256] = { +static const risc86_instruction_t *opcode_timings_k6[256] = { // clang-format off /* ADD ADD ADD ADD*/ /*00*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, @@ -896,7 +896,7 @@ static const risc86_instruction_t *opcode_timings[256] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_mod3[256] = { +static const risc86_instruction_t *opcode_timings_k6_mod3[256] = { // clang-format off /* ADD ADD ADD ADD*/ /*00*/ &alux_op, &alu_op, &alux_op, &alu_op, @@ -1033,7 +1033,7 @@ static const risc86_instruction_t *opcode_timings_mod3[256] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_0f[256] = { +static const risc86_instruction_t *opcode_timings_k6_0f[256] = { // clang-format off /*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID, &vector_alu6_op, &vector_alu6_op, INVALID, @@ -1116,7 +1116,7 @@ static const risc86_instruction_t *opcode_timings_0f[256] = { &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, // clang-format on }; -static const risc86_instruction_t *opcode_timings_0f_mod3[256] = { +static const risc86_instruction_t *opcode_timings_k6_0f_mod3[256] = { // clang-format off /*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID, &vector_alu6_op, &vector_alu6_op, INVALID, @@ -1200,7 +1200,7 @@ static const risc86_instruction_t *opcode_timings_0f_mod3[256] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_0f0f[256] = { +static const risc86_instruction_t *opcode_timings_k6_0f0f[256] = { // clang-format off /*00*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -1283,7 +1283,7 @@ static const risc86_instruction_t *opcode_timings_0f0f[256] = { INVALID, INVALID, INVALID, INVALID, // clang-format on }; -static const risc86_instruction_t *opcode_timings_0f0f_mod3[256] = { +static const risc86_instruction_t *opcode_timings_k6_0f0f_mod3[256] = { // clang-format off /*00*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -1367,57 +1367,57 @@ static const risc86_instruction_t *opcode_timings_0f0f_mod3[256] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_shift[8] = { +static const risc86_instruction_t *opcode_timings_k6_shift[8] = { // clang-format off &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op // clang-format on }; -static const risc86_instruction_t *opcode_timings_shift_b[8] = { +static const risc86_instruction_t *opcode_timings_k6_shift_b[8] = { // clang-format off &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op // clang-format on }; -static const risc86_instruction_t *opcode_timings_shift_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_shift_mod3[8] = { // clang-format off &vector_alu1_op, &vector_alu1_op, &vector_alu1_op, &vector_alu1_op, &alu_op, &alu_op, &alu_op, &alu_op // clang-format on }; -static const risc86_instruction_t *opcode_timings_shift_b_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_shift_b_mod3[8] = { // clang-format off &vector_alux1_op, &vector_alux1_op, &vector_alux1_op, &vector_alux1_op, &alux_op, &alux_op, &alux_op, &alux_op // clang-format on }; -static const risc86_instruction_t *opcode_timings_80[8] = { +static const risc86_instruction_t *opcode_timings_k6_80[8] = { // clang-format off &alux_store_op, &alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &alux_store_op, &alux_store_op, &alux_store_op, &alux_store_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_80_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_80_mod3[8] = { // clang-format off &alux_op, &alux_op, &alux_store_op, &alux_store_op, &alux_op, &alux_op, &alux_op, &alux_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_8x[8] = { +static const risc86_instruction_t *opcode_timings_k6_8x[8] = { // clang-format off &alu_store_op, &alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_8x_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_8x_mod3[8] = { // clang-format off &alu_op, &alu_op, &alu_store_op, &alu_store_op, &alu_op, &alu_op, &alu_op, &alu_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_f6[8] = { +static const risc86_instruction_t *opcode_timings_k6_f6[8] = { // clang-format off /* TST NOT NEG*/ &test_mem_imm_b_op, INVALID, &vector_alux_store_op, &vector_alux_store_op, @@ -1425,7 +1425,7 @@ static const risc86_instruction_t *opcode_timings_f6[8] = { &vector_mul_mem_op, &vector_mul_mem_op, &vector_div16_mem_op, &vector_div16_mem_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_f6_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_f6_mod3[8] = { // clang-format off /* TST NOT NEG*/ &test_reg_b_op, INVALID, &alux_op, &alux_op, @@ -1433,7 +1433,7 @@ static const risc86_instruction_t *opcode_timings_f6_mod3[8] = { &vector_mul_op, &vector_mul_op, &vector_div16_op, &vector_div16_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_f7[8] = { +static const risc86_instruction_t *opcode_timings_k6_f7[8] = { // clang-format off /* TST NOT NEG*/ &test_mem_imm_op, INVALID, &vector_alu_store_op, &vector_alu_store_op, @@ -1441,7 +1441,7 @@ static const risc86_instruction_t *opcode_timings_f7[8] = { &vector_mul64_mem_op, &vector_mul64_mem_op, &vector_div32_mem_op, &vector_div32_mem_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_f7_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_f7_mod3[8] = { // clang-format off /* TST NOT NEG*/ &test_reg_op, INVALID, &alu_op, &alu_op, @@ -1449,7 +1449,7 @@ static const risc86_instruction_t *opcode_timings_f7_mod3[8] = { &vector_mul64_op, &vector_mul64_op, &vector_div32_op, &vector_div32_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_ff[8] = { +static const risc86_instruction_t *opcode_timings_k6_ff[8] = { // clang-format off /* INC DEC CALL CALL far*/ &alu_store_op, &alu_store_op, &store_op, &vector_call_far_op, @@ -1457,7 +1457,7 @@ static const risc86_instruction_t *opcode_timings_ff[8] = { &branch_op, &vector_jmp_far_op, &push_mem_op, INVALID // clang-format on }; -static const risc86_instruction_t *opcode_timings_ff_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_ff_mod3[8] = { // clang-format off /* INC DEC CALL CALL far*/ &vector_alu1_op, &vector_alu1_op, &store_op, &vector_call_far_op, @@ -1466,7 +1466,7 @@ static const risc86_instruction_t *opcode_timings_ff_mod3[8] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_d8[8] = { +static const risc86_instruction_t *opcode_timings_k6_d8[8] = { // clang-format off /* FADDs FMULs FCOMs FCOMPs*/ &load_float_op, &load_float_op, &load_float_op, &load_float_op, @@ -1474,7 +1474,7 @@ static const risc86_instruction_t *opcode_timings_d8[8] = { &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_d8_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_d8_mod3[8] = { // clang-format off /* FADD FMUL FCOM FCOMP*/ &float_op, &float_op, &float_op, &float_op, @@ -1483,7 +1483,7 @@ static const risc86_instruction_t *opcode_timings_d8_mod3[8] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_d9[8] = { +static const risc86_instruction_t *opcode_timings_k6_d9[8] = { // clang-format off /* FLDs FSTs FSTPs*/ &load_float_op, INVALID, &fstore_op, &fstore_op, @@ -1491,7 +1491,7 @@ static const risc86_instruction_t *opcode_timings_d9[8] = { &vector_float_l_op, &vector_fldcw_op, &vector_float_l_op, &vector_float_op // clang-format on }; -static const risc86_instruction_t *opcode_timings_d9_mod3[64] = { +static const risc86_instruction_t *opcode_timings_k6_d9_mod3[64] = { // clang-format off /*FLD*/ &float_op, &float_op, &float_op, &float_op, @@ -1524,7 +1524,7 @@ static const risc86_instruction_t *opcode_timings_d9_mod3[64] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_da[8] = { +static const risc86_instruction_t *opcode_timings_k6_da[8] = { // clang-format off /* FIADDl FIMULl FICOMl FICOMPl*/ &load_float_op, &load_float_op, &load_float_op, &load_float_op, @@ -1532,7 +1532,7 @@ static const risc86_instruction_t *opcode_timings_da[8] = { &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_da_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_da_mod3[8] = { // clang-format off INVALID, INVALID, INVALID, INVALID, /* FCOMPP*/ @@ -1540,7 +1540,7 @@ static const risc86_instruction_t *opcode_timings_da_mod3[8] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_db[8] = { +static const risc86_instruction_t *opcode_timings_k6_db[8] = { // clang-format off /* FLDil FSTil FSTPil*/ &load_float_op, INVALID, &fstore_op, &fstore_op, @@ -1548,7 +1548,7 @@ static const risc86_instruction_t *opcode_timings_db[8] = { INVALID, &vector_flde_op, INVALID, &vector_fste_op // clang-format on }; -static const risc86_instruction_t *opcode_timings_db_mod3[64] = { +static const risc86_instruction_t *opcode_timings_k6_db_mod3[64] = { // clang-format off INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -1578,7 +1578,7 @@ static const risc86_instruction_t *opcode_timings_db_mod3[64] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_dc[8] = { +static const risc86_instruction_t *opcode_timings_k6_dc[8] = { // clang-format off /* FADDd FMULd FCOMd FCOMPd*/ &load_float_op, &load_float_op, &load_float_op, &load_float_op, @@ -1586,7 +1586,7 @@ static const risc86_instruction_t *opcode_timings_dc[8] = { &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_dc_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_dc_mod3[8] = { // clang-format off /* opFADDr opFMULr*/ &float_op, &float_op, INVALID, INVALID, @@ -1595,7 +1595,7 @@ static const risc86_instruction_t *opcode_timings_dc_mod3[8] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_dd[8] = { +static const risc86_instruction_t *opcode_timings_k6_dd[8] = { // clang-format off /* FLDd FSTd FSTPd*/ &load_float_op, INVALID, &fstore_op, &fstore_op, @@ -1603,7 +1603,7 @@ static const risc86_instruction_t *opcode_timings_dd[8] = { &vector_float_l_op, INVALID, &vector_float_l_op, &vector_float_l_op // clang-format on }; -static const risc86_instruction_t *opcode_timings_dd_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_dd_mod3[8] = { // clang-format off /* FFFREE FST FSTP*/ &float_op, INVALID, &float_op, &float_op, @@ -1612,7 +1612,7 @@ static const risc86_instruction_t *opcode_timings_dd_mod3[8] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_de[8] = { +static const risc86_instruction_t *opcode_timings_k6_de[8] = { // clang-format off /* FIADDw FIMULw FICOMw FICOMPw*/ &load_float_op, &load_float_op, &load_float_op, &load_float_op, @@ -1620,7 +1620,7 @@ static const risc86_instruction_t *opcode_timings_de[8] = { &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_de_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_de_mod3[8] = { // clang-format off /* FADDP FMULP FCOMPP*/ &float_op, &float_op, INVALID, &float_op, @@ -1629,7 +1629,7 @@ static const risc86_instruction_t *opcode_timings_de_mod3[8] = { // clang-format on }; -static const risc86_instruction_t *opcode_timings_df[8] = { +static const risc86_instruction_t *opcode_timings_k6_df[8] = { // clang-format off /* FILDiw FISTiw FISTPiw*/ &load_float_op, INVALID, &fstore_op, &fstore_op, @@ -1637,7 +1637,7 @@ static const risc86_instruction_t *opcode_timings_df[8] = { INVALID, &load_float_op, &vector_float_l_op, &fstore_op, // clang-format on }; -static const risc86_instruction_t *opcode_timings_df_mod3[8] = { +static const risc86_instruction_t *opcode_timings_k6_df_mod3[8] = { // clang-format off INVALID, INVALID, INVALID, INVALID, /* FSTSW AX*/ @@ -1769,7 +1769,7 @@ static int fpu_st_timestamp[8]; static int last_uop_timestamp = 0; void -decode_flush(void) +decode_flush_k6(void) { int uop_timestamp = 0; @@ -1908,7 +1908,7 @@ decode_instruction(const risc86_instruction_t *ins, uint64_t deps, uint32_t fetc } decode_buffer.nr_uops += ins->nr_uops; - decode_flush(); + decode_flush_k6(); } else { decode_buffer.nr_uops = ins->nr_uops; decode_buffer.uops[0] = &ins->uop[0]; @@ -1922,7 +1922,7 @@ decode_instruction(const risc86_instruction_t *ins, uint64_t deps, uint32_t fetc case DECODE_LONG: if (decode_buffer.nr_uops) - decode_flush(); + decode_flush_k6(); decode_buffer.nr_uops = ins->nr_uops; for (c = 0; c < ins->nr_uops; c++) { @@ -1932,12 +1932,12 @@ decode_instruction(const risc86_instruction_t *ins, uint64_t deps, uint32_t fetc else decode_buffer.earliest_start[c] = -1; } - decode_flush(); + decode_flush_k6(); break; case DECODE_VECTOR: if (decode_buffer.nr_uops) - decode_flush(); + decode_flush_k6(); decode_timestamp++; d = 0; @@ -1953,12 +1953,12 @@ decode_instruction(const risc86_instruction_t *ins, uint64_t deps, uint32_t fetc if (d == 4) { d = 0; decode_buffer.nr_uops = 4; - decode_flush(); + decode_flush_k6(); } } if (d) { decode_buffer.nr_uops = d; - decode_flush(); + decode_flush_k6(); } break; } @@ -2094,51 +2094,51 @@ codegen_timing_k6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t opcode = fastreadb(cs + opcode_pc); - ins_table = mod3 ? opcode_timings_0f0f_mod3 : opcode_timings_0f0f; + ins_table = mod3 ? opcode_timings_k6_0f0f_mod3 : opcode_timings_k6_0f0f; deps = mod3 ? opcode_deps_0f0f_mod3 : opcode_deps_0f0f; } else { - ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + ins_table = mod3 ? opcode_timings_k6_0f_mod3 : opcode_timings_k6_0f; deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; } break; case 0xd8: - ins_table = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + ins_table = mod3 ? opcode_timings_k6_d8_mod3 : opcode_timings_k6_d8; deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; opcode = (opcode >> 3) & 7; break; case 0xd9: - ins_table = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + ins_table = mod3 ? opcode_timings_k6_d9_mod3 : opcode_timings_k6_d9; deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xda: - ins_table = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + ins_table = mod3 ? opcode_timings_k6_da_mod3 : opcode_timings_k6_da; deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; opcode = (opcode >> 3) & 7; break; case 0xdb: - ins_table = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + ins_table = mod3 ? opcode_timings_k6_db_mod3 : opcode_timings_k6_db; deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xdc: - ins_table = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + ins_table = mod3 ? opcode_timings_k6_dc_mod3 : opcode_timings_k6_dc; deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; opcode = (opcode >> 3) & 7; break; case 0xdd: - ins_table = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + ins_table = mod3 ? opcode_timings_k6_dd_mod3 : opcode_timings_k6_dd; deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; opcode = (opcode >> 3) & 7; break; case 0xde: - ins_table = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + ins_table = mod3 ? opcode_timings_k6_de_mod3 : opcode_timings_k6_de; deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; opcode = (opcode >> 3) & 7; break; case 0xdf: - ins_table = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + ins_table = mod3 ? opcode_timings_k6_df_mod3 : opcode_timings_k6_df; deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; opcode = (opcode >> 3) & 7; break; @@ -2147,13 +2147,13 @@ codegen_timing_k6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t switch (opcode) { case 0x80: case 0x82: - ins_table = mod3 ? opcode_timings_80_mod3 : opcode_timings_80; + ins_table = mod3 ? opcode_timings_k6_80_mod3 : opcode_timings_k6_80; deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; opcode = (fetchdat >> 3) & 7; break; case 0x81: case 0x83: - ins_table = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + ins_table = mod3 ? opcode_timings_k6_8x_mod3 : opcode_timings_k6_8x; deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; opcode = (fetchdat >> 3) & 7; break; @@ -2161,7 +2161,7 @@ codegen_timing_k6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t case 0xc0: case 0xd0: case 0xd2: - ins_table = mod3 ? opcode_timings_shift_b_mod3 : opcode_timings_shift_b; + ins_table = mod3 ? opcode_timings_k6_shift_b_mod3 : opcode_timings_k6_shift_b; deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; opcode = (fetchdat >> 3) & 7; break; @@ -2169,29 +2169,29 @@ codegen_timing_k6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t case 0xc1: case 0xd1: case 0xd3: - ins_table = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + ins_table = mod3 ? opcode_timings_k6_shift_mod3 : opcode_timings_k6_shift; deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; opcode = (fetchdat >> 3) & 7; break; case 0xf6: - ins_table = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + ins_table = mod3 ? opcode_timings_k6_f6_mod3 : opcode_timings_k6_f6; deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; opcode = (fetchdat >> 3) & 7; break; case 0xf7: - ins_table = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + ins_table = mod3 ? opcode_timings_k6_f7_mod3 : opcode_timings_k6_f7; deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; opcode = (fetchdat >> 3) & 7; break; case 0xff: - ins_table = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + ins_table = mod3 ? opcode_timings_k6_ff_mod3 : opcode_timings_k6_ff; deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; opcode = (fetchdat >> 3) & 7; break; default: - ins_table = mod3 ? opcode_timings_mod3 : opcode_timings; + ins_table = mod3 ? opcode_timings_k6_mod3 : opcode_timings_k6; deps = mod3 ? opcode_deps_mod3 : opcode_deps; break; } @@ -2209,7 +2209,7 @@ codegen_timing_k6_block_end(void) { if (decode_buffer.nr_uops) { int old_last_complete_timestamp = last_complete_timestamp; - decode_flush(); + decode_flush_k6(); codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); } } diff --git a/src/cpu/codegen_timing_p6.c b/src/cpu/codegen_timing_p6.c index 25c098ca4..a22813068 100644 --- a/src/cpu/codegen_timing_p6.c +++ b/src/cpu/codegen_timing_p6.c @@ -787,7 +787,7 @@ static const macro_op_t wbinvd_op = { }; #define INVALID NULL -static const macro_op_t *opcode_timings[256] = { +static const macro_op_t *opcode_timings_p6[256] = { // clang-format off /* ADD ADD ADD ADD*/ /*00*/ &alup0_store_op, &alu_store_op, &load_alup0_op, &load_alu_op, @@ -924,7 +924,7 @@ static const macro_op_t *opcode_timings[256] = { // clang-format on }; -static const macro_op_t *opcode_timings_mod3[256] = { +static const macro_op_t *opcode_timings_p6_mod3[256] = { // clang-format off /* ADD ADD ADD ADD*/ /*00*/ &alup0_op, &alu_op, &alup0_op, &alu_op, @@ -1062,7 +1062,7 @@ static const macro_op_t *opcode_timings_mod3[256] = { // clang-format on }; -static const macro_op_t *opcode_timings_0f[256] = { +static const macro_op_t *opcode_timings_p6_0f[256] = { // clang-format off /*00*/ &alu6_op, &alu6_op, &alu6_op, &alu6_op, INVALID, &alu6_op, &alu6_op, INVALID, @@ -1145,7 +1145,7 @@ static const macro_op_t *opcode_timings_0f[256] = { &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, // clang-format on }; -static const macro_op_t *opcode_timings_0f_mod3[256] = { +static const macro_op_t *opcode_timings_p6_0f_mod3[256] = { // clang-format off /*00*/ &alu6_op, &alu6_op, &alu6_op, &alu6_op, INVALID, &alu6_op, &alu6_op, INVALID, @@ -1228,58 +1228,58 @@ static const macro_op_t *opcode_timings_0f_mod3[256] = { &mmx_op, &mmx_op, &mmx_op, INVALID, }; -static const macro_op_t *opcode_timings_shift[8] = +static const macro_op_t *opcode_timings_p6_shift[8] = { // clang-format off &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op // clang-format on }; -static const macro_op_t *opcode_timings_shift_b[8] = { +static const macro_op_t *opcode_timings_p6_shift_b[8] = { // clang-format off &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op // clang-format on }; -static const macro_op_t *opcode_timings_shift_mod3[8] = { +static const macro_op_t *opcode_timings_p6_shift_mod3[8] = { // clang-format off &complex_alu1_op, &complex_alu1_op, &complex_alu1_op, &complex_alu1_op, &alu_op, &alu_op, &alu_op, &alu_op // clang-format on }; -static const macro_op_t *opcode_timings_shift_b_mod3[8] = { +static const macro_op_t *opcode_timings_p6_shift_b_mod3[8] = { // clang-format off &complex_alup0_1_op, &complex_alup0_1_op, &complex_alup0_1_op, &complex_alup0_1_op, &alup0_op, &alup0_op, &alup0_op, &alup0_op // clang-format on }; -static const macro_op_t *opcode_timings_80[8] = { +static const macro_op_t *opcode_timings_p6_80[8] = { // clang-format off &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, // clang-format on }; -static const macro_op_t *opcode_timings_80_mod3[8] = { +static const macro_op_t *opcode_timings_p6_80_mod3[8] = { // clang-format off &alup0_op, &alup0_op, &alup0_store_op, &alup0_store_op, &alup0_op, &alup0_op, &alup0_op, &alup0_op, // clang-format on }; -static const macro_op_t *opcode_timings_8x[8] = { +static const macro_op_t *opcode_timings_p6_8x[8] = { // clang-format off &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, // clang-format on }; -static const macro_op_t *opcode_timings_8x_mod3[8] = { +static const macro_op_t *opcode_timings_p6_8x_mod3[8] = { // clang-format off &alu_op, &alu_op, &alu_store_op, &alu_store_op, &alu_op, &alu_op, &alu_op, &alu_op, // clang-format on }; -static const macro_op_t *opcode_timings_f6[8] = { +static const macro_op_t *opcode_timings_p6_f6[8] = { // clang-format off /* TST NOT NEG*/ &test_mem_imm_b_op, INVALID, &alup0_store_op, &alup0_store_op, @@ -1287,7 +1287,7 @@ static const macro_op_t *opcode_timings_f6[8] = { &mul_mem_op, &mul_mem_op, &div16_mem_op, &div16_mem_op, // clang-format on }; -static const macro_op_t *opcode_timings_f6_mod3[8] = { +static const macro_op_t *opcode_timings_p6_f6_mod3[8] = { // clang-format off /* TST NOT NEG*/ &test_reg_b_op, INVALID, &alup0_op, &alup0_op, @@ -1295,7 +1295,7 @@ static const macro_op_t *opcode_timings_f6_mod3[8] = { &mul_op, &mul_op, &div16_op, &div16_op, // clang-format on }; -static const macro_op_t *opcode_timings_f7[8] = { +static const macro_op_t *opcode_timings_p6_f7[8] = { // clang-format off /* TST NOT NEG*/ &test_mem_imm_op, INVALID, &alu_store_op, &alu_store_op, @@ -1303,7 +1303,7 @@ static const macro_op_t *opcode_timings_f7[8] = { &mul64_mem_op, &mul64_mem_op, &div32_mem_op, &div32_mem_op, // clang-format on }; -static const macro_op_t *opcode_timings_f7_mod3[8] = { +static const macro_op_t *opcode_timings_p6_f7_mod3[8] = { // clang-format off /* TST NOT NEG*/ &test_reg_op, INVALID, &alu_op, &alu_op, @@ -1311,7 +1311,7 @@ static const macro_op_t *opcode_timings_f7_mod3[8] = { &mul64_op, &mul64_op, &div32_op, &div32_op, // clang-format on }; -static const macro_op_t *opcode_timings_ff[8] = { +static const macro_op_t *opcode_timings_p6_ff[8] = { // clang-format off /* INC DEC CALL CALL far*/ &alu_store_op, &alu_store_op, &store_op, &call_far_op, @@ -1319,7 +1319,7 @@ static const macro_op_t *opcode_timings_ff[8] = { &branch_op, &jmp_far_op, &push_mem_op, INVALID // clang-format on }; -static const macro_op_t *opcode_timings_ff_mod3[8] = { +static const macro_op_t *opcode_timings_p6_ff_mod3[8] = { // clang-format off /* INC DEC CALL CALL far*/ &complex_alu1_op, &complex_alu1_op, &store_op, &call_far_op, @@ -1328,7 +1328,7 @@ static const macro_op_t *opcode_timings_ff_mod3[8] = { // clang-format on }; -static const macro_op_t *opcode_timings_d8[8] = { +static const macro_op_t *opcode_timings_p6_d8[8] = { // clang-format off /* FADDs FMULs FCOMs FCOMPs*/ &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, @@ -1336,7 +1336,7 @@ static const macro_op_t *opcode_timings_d8[8] = { &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, // clang-format on }; -static const macro_op_t *opcode_timings_d8_mod3[8] = { +static const macro_op_t *opcode_timings_p6_d8_mod3[8] = { // clang-format off /* FADD FMUL FCOM FCOMP*/ &fadd_op, &fmul_op, &float_op, &float_op, @@ -1345,7 +1345,7 @@ static const macro_op_t *opcode_timings_d8_mod3[8] = { // clang-format on }; -static const macro_op_t *opcode_timings_d9[8] = { +static const macro_op_t *opcode_timings_p6_d9[8] = { // clang-format off /* FLDs FSTs FSTPs*/ &load_float_op, INVALID, &fstore_op, &fstore_op, @@ -1353,7 +1353,7 @@ static const macro_op_t *opcode_timings_d9[8] = { &complex_float_l_op, &fldcw_op, &complex_float_l_op, &complex_float_op // clang-format on }; -static const macro_op_t *opcode_timings_d9_mod3[64] = { +static const macro_op_t *opcode_timings_p6_d9_mod3[64] = { // clang-format off /*FLD*/ &float_op, &float_op, &float_op, &float_op, @@ -1386,7 +1386,7 @@ static const macro_op_t *opcode_timings_d9_mod3[64] = { // clang-format on }; -static const macro_op_t *opcode_timings_da[8] = { +static const macro_op_t *opcode_timings_p6_da[8] = { // clang-format off /* FIADDl FIMULl FICOMl FICOMPl*/ &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, @@ -1394,7 +1394,7 @@ static const macro_op_t *opcode_timings_da[8] = { &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, // clang-format on }; -static const macro_op_t *opcode_timings_da_mod3[8] = { +static const macro_op_t *opcode_timings_p6_da_mod3[8] = { // clang-format off INVALID, INVALID, INVALID, INVALID, /* FCOMPP*/ @@ -1402,7 +1402,7 @@ static const macro_op_t *opcode_timings_da_mod3[8] = { // clang-format on }; -static const macro_op_t *opcode_timings_db[8] = { +static const macro_op_t *opcode_timings_p6_db[8] = { // clang-format off /* FLDil FSTil FSTPil*/ &load_float_op, INVALID, &fstore_op, &fstore_op, @@ -1410,7 +1410,7 @@ static const macro_op_t *opcode_timings_db[8] = { INVALID, &flde_op, INVALID, &fste_op // clang-format on }; -static const macro_op_t *opcode_timings_db_mod3[64] = { +static const macro_op_t *opcode_timings_p6_db_mod3[64] = { // clang-format off INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -1440,7 +1440,7 @@ static const macro_op_t *opcode_timings_db_mod3[64] = { // clang-format on }; -static const macro_op_t *opcode_timings_dc[8] = { +static const macro_op_t *opcode_timings_p6_dc[8] = { // clang-format off /* FADDd FMULd FCOMd FCOMPd*/ &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, @@ -1448,7 +1448,7 @@ static const macro_op_t *opcode_timings_dc[8] = { &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, // clang-format on }; -static const macro_op_t *opcode_timings_dc_mod3[8] = { +static const macro_op_t *opcode_timings_p6_dc_mod3[8] = { // clang-format off /* opFADDr opFMULr*/ &fadd_op, &fmul_op, INVALID, INVALID, @@ -1457,7 +1457,7 @@ static const macro_op_t *opcode_timings_dc_mod3[8] = { // clang-format on }; -static const macro_op_t *opcode_timings_dd[8] = { +static const macro_op_t *opcode_timings_p6_dd[8] = { // clang-format off /* FLDd FSTd FSTPd*/ &load_float_op, INVALID, &fstore_op, &fstore_op, @@ -1465,7 +1465,7 @@ static const macro_op_t *opcode_timings_dd[8] = { &complex_float_l_op, INVALID, &complex_float_l_op, &complex_float_l_op // clang-format on }; -static const macro_op_t *opcode_timings_dd_mod3[8] = { +static const macro_op_t *opcode_timings_p6_dd_mod3[8] = { // clang-format off /* FFFREE FST FSTP*/ &float_op, INVALID, &float_op, &float_op, @@ -1474,7 +1474,7 @@ static const macro_op_t *opcode_timings_dd_mod3[8] = { // clang-format on }; -static const macro_op_t *opcode_timings_de[8] = { +static const macro_op_t *opcode_timings_p6_de[8] = { // clang-format off /* FIADDw FIMULw FICOMw FICOMPw*/ &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, @@ -1482,7 +1482,7 @@ static const macro_op_t *opcode_timings_de[8] = { &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, // clang-format on }; -static const macro_op_t *opcode_timings_de_mod3[8] = { +static const macro_op_t *opcode_timings_p6_de_mod3[8] = { // clang-format off /* FADDP FMULP FCOMPP*/ &fadd_op, &fmul_op, INVALID, &float_op, @@ -1491,7 +1491,7 @@ static const macro_op_t *opcode_timings_de_mod3[8] = { // clang-format on }; -static const macro_op_t *opcode_timings_df[8] = { +static const macro_op_t *opcode_timings_p6_df[8] = { // clang-format off /* FILDiw FISTiw FISTPiw*/ &load_float_op, INVALID, &fstore_op, &fstore_op, @@ -1499,7 +1499,7 @@ static const macro_op_t *opcode_timings_df[8] = { INVALID, &load_float_op, &complex_float_l_op, &fstore_op, // clang-format on }; -static const macro_op_t *opcode_timings_df_mod3[8] = { +static const macro_op_t *opcode_timings_p6_df_mod3[8] = { // clang-format off INVALID, INVALID, INVALID, INVALID, /* FSTSW AX*/ @@ -1865,47 +1865,47 @@ codegen_timing_p6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(ui switch (last_prefix) { case 0x0f: - ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + ins_table = mod3 ? opcode_timings_p6_0f_mod3 : opcode_timings_p6_0f; deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; break; case 0xd8: - ins_table = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + ins_table = mod3 ? opcode_timings_p6_d8_mod3 : opcode_timings_p6_d8; deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; opcode = (opcode >> 3) & 7; break; case 0xd9: - ins_table = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + ins_table = mod3 ? opcode_timings_p6_d9_mod3 : opcode_timings_p6_d9; deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xda: - ins_table = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + ins_table = mod3 ? opcode_timings_p6_da_mod3 : opcode_timings_p6_da; deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; opcode = (opcode >> 3) & 7; break; case 0xdb: - ins_table = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + ins_table = mod3 ? opcode_timings_p6_db_mod3 : opcode_timings_p6_db; deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xdc: - ins_table = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + ins_table = mod3 ? opcode_timings_p6_dc_mod3 : opcode_timings_p6_dc; deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; opcode = (opcode >> 3) & 7; break; case 0xdd: - ins_table = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + ins_table = mod3 ? opcode_timings_p6_dd_mod3 : opcode_timings_p6_dd; deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; opcode = (opcode >> 3) & 7; break; case 0xde: - ins_table = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + ins_table = mod3 ? opcode_timings_p6_de_mod3 : opcode_timings_p6_de; deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; opcode = (opcode >> 3) & 7; break; case 0xdf: - ins_table = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + ins_table = mod3 ? opcode_timings_p6_df_mod3 : opcode_timings_p6_df; deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; opcode = (opcode >> 3) & 7; break; @@ -1914,13 +1914,13 @@ codegen_timing_p6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(ui switch (opcode) { case 0x80: case 0x82: - ins_table = mod3 ? opcode_timings_80_mod3 : opcode_timings_80; + ins_table = mod3 ? opcode_timings_p6_80_mod3 : opcode_timings_p6_80; deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; opcode = (fetchdat >> 3) & 7; break; case 0x81: case 0x83: - ins_table = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + ins_table = mod3 ? opcode_timings_p6_8x_mod3 : opcode_timings_p6_8x; deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; opcode = (fetchdat >> 3) & 7; break; @@ -1928,7 +1928,7 @@ codegen_timing_p6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(ui case 0xc0: case 0xd0: case 0xd2: - ins_table = mod3 ? opcode_timings_shift_b_mod3 : opcode_timings_shift_b; + ins_table = mod3 ? opcode_timings_p6_shift_b_mod3 : opcode_timings_p6_shift_b; deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; opcode = (fetchdat >> 3) & 7; break; @@ -1936,29 +1936,29 @@ codegen_timing_p6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(ui case 0xc1: case 0xd1: case 0xd3: - ins_table = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + ins_table = mod3 ? opcode_timings_p6_shift_mod3 : opcode_timings_p6_shift; deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; opcode = (fetchdat >> 3) & 7; break; case 0xf6: - ins_table = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + ins_table = mod3 ? opcode_timings_p6_f6_mod3 : opcode_timings_p6_f6; deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; opcode = (fetchdat >> 3) & 7; break; case 0xf7: - ins_table = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + ins_table = mod3 ? opcode_timings_p6_f7_mod3 : opcode_timings_p6_f7; deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; opcode = (fetchdat >> 3) & 7; break; case 0xff: - ins_table = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + ins_table = mod3 ? opcode_timings_p6_ff_mod3 : opcode_timings_p6_ff; deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; opcode = (fetchdat >> 3) & 7; break; default: - ins_table = mod3 ? opcode_timings_mod3 : opcode_timings; + ins_table = mod3 ? opcode_timings_p6_mod3 : opcode_timings_p6; deps = mod3 ? opcode_deps_mod3 : opcode_deps; break; } diff --git a/src/cpu/codegen_timing_pentium.c b/src/cpu/codegen_timing_pentium.c index af344307f..aa78ee1c9 100644 --- a/src/cpu/codegen_timing_pentium.c +++ b/src/cpu/codegen_timing_pentium.c @@ -110,7 +110,7 @@ static uint32_t addr_regmask; static int fpu_latency; static int fpu_st_latency[8]; -static uint64_t opcode_timings[256] = { +static uint64_t opcode_timings_p6[256] = { // clang-format off /* ADD ADD ADD ADD*/ /*00*/ PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, @@ -247,7 +247,7 @@ static uint64_t opcode_timings[256] = { // clang-format on }; -static uint64_t opcode_timings_mod3[256] = { +static uint64_t opcode_timings_p6_mod3[256] = { // clang-format off /* ADD ADD ADD ADD*/ /*00*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, @@ -385,7 +385,7 @@ static uint64_t opcode_timings_mod3[256] = { // clang-format on }; -static uint64_t opcode_timings_0f[256] = { +static uint64_t opcode_timings_p6_0f[256] = { // clang-format off /*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10), INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID, @@ -468,7 +468,7 @@ static uint64_t opcode_timings_0f[256] = { PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, // clang-format on }; -static uint64_t opcode_timings_0f_mod3[256] = { +static uint64_t opcode_timings_p6_0f_mod3[256] = { // clang-format off /*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10), INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID, @@ -552,20 +552,20 @@ static uint64_t opcode_timings_0f_mod3[256] = { // clang-format on }; -static uint64_t opcode_timings_shift[8] = { +static uint64_t opcode_timings_p6_shift[8] = { // clang-format off PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, // clang-format on }; -static uint64_t opcode_timings_shift_mod3[8] = { +static uint64_t opcode_timings_p6_shift_mod3[8] = { // clang-format off PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, // clang-format on }; -static uint64_t opcode_timings_f6[8] = { +static uint64_t opcode_timings_p6_f6[8] = { // clang-format off /* TST NOT NEG*/ PAIR_UV | CYCLES_RM, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), @@ -573,7 +573,7 @@ static uint64_t opcode_timings_f6[8] = { PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(17), PAIR_NP | CYCLES(22) // clang-format on }; -static uint64_t opcode_timings_f6_mod3[8] = { +static uint64_t opcode_timings_p6_f6_mod3[8] = { // clang-format off /* TST NOT NEG*/ PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), @@ -581,7 +581,7 @@ static uint64_t opcode_timings_f6_mod3[8] = { PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(17), PAIR_NP | CYCLES(22) // clang-format on }; -static uint64_t opcode_timings_f7[8] = { +static uint64_t opcode_timings_p6_f7[8] = { // clang-format off /* TST NOT NEG*/ PAIR_UV | CYCLES_RM, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), @@ -589,7 +589,7 @@ static uint64_t opcode_timings_f7[8] = { PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(25,41), PAIR_NP | CYCLES_MULTI(30,46) // clang-format on }; -static uint64_t opcode_timings_f7_mod3[8] = { +static uint64_t opcode_timings_p6_f7_mod3[8] = { // clang-format off /* TST NOT NEG*/ PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), @@ -597,7 +597,7 @@ static uint64_t opcode_timings_f7_mod3[8] = { PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(25,41), PAIR_NP | CYCLES_MULTI(30,46) // clang-format on }; -static uint64_t opcode_timings_ff[8] = { +static uint64_t opcode_timings_p6_ff[8] = { // clang-format off /* INC DEC CALL CALL far*/ PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(0), @@ -605,7 +605,7 @@ static uint64_t opcode_timings_ff[8] = { PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(2), INVALID // clang-format on }; -static uint64_t opcode_timings_ff_mod3[8] = { +static uint64_t opcode_timings_p6_ff_mod3[8] = { // clang-format off /* INC DEC CALL CALL far*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(0), @@ -614,7 +614,7 @@ static uint64_t opcode_timings_ff_mod3[8] = { // clang-format on }; -static uint64_t opcode_timings_d8[8] = { +static uint64_t opcode_timings_p6_d8[8] = { // clang-format off /* FADDs FMULs FCOMs FCOMPs*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), @@ -622,7 +622,7 @@ static uint64_t opcode_timings_d8[8] = { PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) // clang-format on }; -static uint64_t opcode_timings_d8_mod3[8] = { +static uint64_t opcode_timings_p6_d8_mod3[8] = { // clang-format off /* FADD FMUL FCOM FCOMP*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), @@ -631,7 +631,7 @@ static uint64_t opcode_timings_d8_mod3[8] = { // clang-format on }; -static uint64_t opcode_timings_d9[8] = { +static uint64_t opcode_timings_p6_d9[8] = { // clang-format off /* FLDs FSTs FSTPs*/ PAIR_FX | FPU_CYCLES(1,0,0), INVALID, PAIR_NP | FPU_CYCLES(2,0,0), PAIR_NP | FPU_CYCLES(2,0,0), @@ -639,7 +639,7 @@ static uint64_t opcode_timings_d9[8] = { PAIR_NP | FPU_CYCLES(32,0,0), PAIR_NP | FPU_CYCLES(8,0,0), PAIR_NP | FPU_CYCLES(48,0,0), PAIR_NP | FPU_CYCLES(2,0,0) // clang-format on }; -static uint64_t opcode_timings_d9_mod3[64] = { +static uint64_t opcode_timings_p6_d9_mod3[64] = { // clang-format off /*FLD*/ PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), @@ -672,7 +672,7 @@ static uint64_t opcode_timings_d9_mod3[64] = { // clang-format on }; -static uint64_t opcode_timings_da[8] = { +static uint64_t opcode_timings_p6_da[8] = { // clang-format off /* FIADDl FIMULl FICOMl FICOMPl*/ PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(4,0,0), PAIR_NP | FPU_CYCLES(4,0,0), @@ -680,7 +680,7 @@ static uint64_t opcode_timings_da[8] = { PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(42,38,2), PAIR_NP | FPU_CYCLES(42,38,2) // clang-format on }; -static uint64_t opcode_timings_da_mod3[8] = { +static uint64_t opcode_timings_p6_da_mod3[8] = { // clang-format off INVALID, INVALID, INVALID, INVALID, /* FCOMPP*/ @@ -688,7 +688,7 @@ static uint64_t opcode_timings_da_mod3[8] = { // clang-format on }; -static uint64_t opcode_timings_db[8] = { +static uint64_t opcode_timings_p6_db[8] = { // clang-format off /* FLDil FSTil FSTPil*/ PAIR_NP | FPU_CYCLES(3,2,2), INVALID, PAIR_NP | FPU_CYCLES(6,0,0), PAIR_NP | FPU_CYCLES(6,0,0), @@ -696,7 +696,7 @@ static uint64_t opcode_timings_db[8] = { INVALID, PAIR_NP | FPU_CYCLES(3,0,0), INVALID, PAIR_NP | FPU_CYCLES(3,0,0) // clang-format on }; -static uint64_t opcode_timings_db_mod3[64] = { +static uint64_t opcode_timings_p6_db_mod3[64] = { // clang-format off INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -726,7 +726,7 @@ static uint64_t opcode_timings_db_mod3[64] = { // clang-format on }; -static uint64_t opcode_timings_dc[8] = { +static uint64_t opcode_timings_p6_dc[8] = { // clang-format off /* FADDd FMULd FCOMd FCOMPd*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), @@ -734,7 +734,7 @@ static uint64_t opcode_timings_dc[8] = { PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) // clang-format on }; -static uint64_t opcode_timings_dc_mod3[8] = { +static uint64_t opcode_timings_p6_dc_mod3[8] = { // clang-format off /* opFADDr opFMULr*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), INVALID, INVALID, @@ -743,7 +743,7 @@ static uint64_t opcode_timings_dc_mod3[8] = { // clang-format on }; -static uint64_t opcode_timings_dd[8] = { +static uint64_t opcode_timings_p6_dd[8] = { // clang-format off /* FLDd FSTd FSTPd*/ PAIR_FX | FPU_CYCLES(1,0,0), INVALID, PAIR_NP | FPU_CYCLES(2,0,0), PAIR_NP | FPU_CYCLES(2,0,0), @@ -751,7 +751,7 @@ static uint64_t opcode_timings_dd[8] = { PAIR_NP | FPU_CYCLES(70,0,0), INVALID, PAIR_NP | FPU_CYCLES(127,0,0), PAIR_NP | FPU_CYCLES(6,0,0) // clang-format on }; -static uint64_t opcode_timings_dd_mod3[8] = { +static uint64_t opcode_timings_p6_dd_mod3[8] = { // clang-format off /* FFFREE FST FSTP*/ PAIR_NP | FPU_CYCLES(2,0,0), INVALID, PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), @@ -760,7 +760,7 @@ static uint64_t opcode_timings_dd_mod3[8] = { // clang-format on }; -static uint64_t opcode_timings_de[8] = { +static uint64_t opcode_timings_p6_de[8] = { // clang-format off /* FIADDw FIMULw FICOMw FICOMPw*/ PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(4,0,0), PAIR_NP | FPU_CYCLES(4,0,0), @@ -768,7 +768,7 @@ static uint64_t opcode_timings_de[8] = { PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(42,38,2), PAIR_NP | FPU_CYCLES(42,38,2) // clang-format on }; -static uint64_t opcode_timings_de_mod3[8] = { +static uint64_t opcode_timings_p6_de_mod3[8] = { // clang-format off /* FADDP FMULP FCOMPP*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), INVALID, PAIR_FX | FPU_CYCLES(1,0,0), @@ -777,7 +777,7 @@ static uint64_t opcode_timings_de_mod3[8] = { // clang-format on }; -static uint64_t opcode_timings_df[8] = { +static uint64_t opcode_timings_p6_df[8] = { // clang-format off /* FILDiw FISTiw FISTPiw*/ PAIR_NP | FPU_CYCLES(3,2,2), INVALID, PAIR_NP | FPU_CYCLES(6,0,0), PAIR_NP | FPU_CYCLES(6,0,0), @@ -785,7 +785,7 @@ static uint64_t opcode_timings_df[8] = { INVALID, PAIR_NP | FPU_CYCLES(3,2,2), PAIR_NP | FPU_CYCLES(148,0,0), PAIR_NP | FPU_CYCLES(6,0,0) // clang-format on }; -static uint64_t opcode_timings_df_mod3[8] = { +static uint64_t opcode_timings_p6_df_mod3[8] = { // clang-format off INVALID, INVALID, INVALID, INVALID, /* FSTSW AX*/ @@ -793,25 +793,25 @@ static uint64_t opcode_timings_df_mod3[8] = { // clang-format on }; -static uint64_t opcode_timings_81[8] = { +static uint64_t opcode_timings_p6_81[8] = { // clang-format off PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RM | CYCLES_IMM1632 // clang-format on }; -static uint64_t opcode_timings_81_mod3[8] = { +static uint64_t opcode_timings_p6_81_mod3[8] = { // clang-format off PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG // clang-format on }; -static uint64_t opcode_timings_8x[8] = { +static uint64_t opcode_timings_p6_8x[8] = { // clang-format off PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RM | CYCLES_IMM8 // clang-format on }; -static uint64_t opcode_timings_8x_mod3[8] = { +static uint64_t opcode_timings_p6_8x_mod3[8] = { // clang-format off PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG @@ -1097,47 +1097,47 @@ codegen_timing_pentium_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUS switch (last_prefix) { case 0x0f: - timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + timings = mod3 ? opcode_timings_p6_0f_mod3 : opcode_timings_p6_0f; deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; break; case 0xd8: - timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + timings = mod3 ? opcode_timings_p6_d8_mod3 : opcode_timings_p6_d8; deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; opcode = (opcode >> 3) & 7; break; case 0xd9: - timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + timings = mod3 ? opcode_timings_p6_d9_mod3 : opcode_timings_p6_d9; deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xda: - timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + timings = mod3 ? opcode_timings_p6_da_mod3 : opcode_timings_p6_da; deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; opcode = (opcode >> 3) & 7; break; case 0xdb: - timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + timings = mod3 ? opcode_timings_p6_db_mod3 : opcode_timings_p6_db; deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xdc: - timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + timings = mod3 ? opcode_timings_p6_dc_mod3 : opcode_timings_p6_dc; deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; opcode = (opcode >> 3) & 7; break; case 0xdd: - timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + timings = mod3 ? opcode_timings_p6_dd_mod3 : opcode_timings_p6_dd; deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; opcode = (opcode >> 3) & 7; break; case 0xde: - timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + timings = mod3 ? opcode_timings_p6_de_mod3 : opcode_timings_p6_de; deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; opcode = (opcode >> 3) & 7; break; case 0xdf: - timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + timings = mod3 ? opcode_timings_p6_df_mod3 : opcode_timings_p6_df; deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; opcode = (opcode >> 3) & 7; break; @@ -1147,12 +1147,12 @@ codegen_timing_pentium_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUS case 0x80: case 0x82: case 0x83: - timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + timings = mod3 ? opcode_timings_p6_8x_mod3 : opcode_timings_p6_8x; deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; opcode = (fetchdat >> 3) & 7; break; case 0x81: - timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + timings = mod3 ? opcode_timings_p6_81_mod3 : opcode_timings_p6_81; deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; opcode = (fetchdat >> 3) & 7; break; @@ -1161,36 +1161,36 @@ codegen_timing_pentium_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUS case 0xc1: case 0xd0: case 0xd1: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + timings = mod3 ? opcode_timings_p6_shift_mod3 : opcode_timings_p6_shift; deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; opcode = (fetchdat >> 3) & 7; break; case 0xd2: case 0xd3: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + timings = mod3 ? opcode_timings_p6_shift_mod3 : opcode_timings_p6_shift; deps = mod3 ? opcode_deps_shift_cl_mod3 : opcode_deps_shift_cl; opcode = (fetchdat >> 3) & 7; break; case 0xf6: - timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + timings = mod3 ? opcode_timings_p6_f6_mod3 : opcode_timings_p6_f6; deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; opcode = (fetchdat >> 3) & 7; break; case 0xf7: - timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + timings = mod3 ? opcode_timings_p6_f7_mod3 : opcode_timings_p6_f7; deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; opcode = (fetchdat >> 3) & 7; break; case 0xff: - timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + timings = mod3 ? opcode_timings_p6_ff_mod3 : opcode_timings_p6_ff; deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; opcode = (fetchdat >> 3) & 7; break; default: - timings = mod3 ? opcode_timings_mod3 : opcode_timings; + timings = mod3 ? opcode_timings_p6_mod3 : opcode_timings_p6; deps = mod3 ? opcode_deps_mod3 : opcode_deps; break; } diff --git a/src/cpu/codegen_timing_winchip.c b/src/cpu/codegen_timing_winchip.c index 76ea8b954..3597517ce 100644 --- a/src/cpu/codegen_timing_winchip.c +++ b/src/cpu/codegen_timing_winchip.c @@ -18,7 +18,7 @@ #define CYCLES(c) (int *) c #define CYCLES2(c16, c32) (int *) ((-1 & ~0xffff) | c16 | (c32 << 8)) -static int *opcode_timings[256] = { +static int *opcode_timings_winchip[256] = { // clang-format off /*00*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), NULL, /*10*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), @@ -42,7 +42,7 @@ static int *opcode_timings[256] = { // clang-format on }; -static int *opcode_timings_mod3[256] = { +static int *opcode_timings_winchip_mod3[256] = { // clang-format off /*00*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), NULL, /*10*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), @@ -66,7 +66,7 @@ static int *opcode_timings_mod3[256] = { // clang-format on }; -static int *opcode_timings_0f[256] = { +static int *opcode_timings_winchip_0f[256] = { // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), NULL, CYCLES(195), CYCLES(7), NULL, CYCLES(1000), CYCLES(10000), NULL, NULL, NULL, NULL, NULL, NULL, /*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -89,7 +89,7 @@ static int *opcode_timings_0f[256] = { /*f0*/ NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, // clang-format on }; -static int *opcode_timings_0f_mod3[256] = { +static int *opcode_timings_winchip_0f_mod3[256] = { // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), NULL, CYCLES(195), CYCLES(7), NULL, CYCLES(1000), CYCLES(10000), NULL, NULL, NULL, NULL, NULL, NULL, /*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -113,68 +113,68 @@ static int *opcode_timings_0f_mod3[256] = { // clang-format on }; -static int *opcode_timings_shift[8] = { +static int *opcode_timings_winchip_shift[8] = { // clang-format off CYCLES(7), CYCLES(7), CYCLES(10), CYCLES(10), CYCLES(7), CYCLES(7), CYCLES(7), CYCLES(7) // clang-format on }; -static int *opcode_timings_shift_mod3[8] = { +static int *opcode_timings_winchip_shift_mod3[8] = { // clang-format off CYCLES(3), CYCLES(3), CYCLES(9), CYCLES(9), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3) // clang-format on }; -static int *opcode_timings_f6[8] = { +static int *opcode_timings_winchip_f6[8] = { // clang-format off &timing_rm, NULL, &timing_mm, &timing_mm, CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) // clang-format on }; -static int *opcode_timings_f6_mod3[8] = { +static int *opcode_timings_winchip_f6_mod3[8] = { // clang-format off &timing_rr, NULL, &timing_rr, &timing_rr, CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) // clang-format on }; -static int *opcode_timings_f7[8] = { +static int *opcode_timings_winchip_f7[8] = { // clang-format off &timing_rm, NULL, &timing_mm, &timing_mm, CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) // clang-format on }; -static int *opcode_timings_f7_mod3[8] = { +static int *opcode_timings_winchip_f7_mod3[8] = { // clang-format off &timing_rr, NULL, &timing_rr, &timing_rr, CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) // clang-format on }; -static int *opcode_timings_ff[8] = { +static int *opcode_timings_winchip_ff[8] = { // clang-format off &timing_mm, &timing_mm, CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), NULL // clang-format on }; -static int *opcode_timings_ff_mod3[8] = { +static int *opcode_timings_winchip_ff_mod3[8] = { // clang-format off &timing_rr, &timing_rr, CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), NULL // clang-format on }; -static int *opcode_timings_d8[8] = { +static int *opcode_timings_winchip_d8[8] = { // clang-format off /* FADDil FMULil FCOMil FCOMPil FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(10), CYCLES(12), CYCLES(9), CYCLES(9), CYCLES(10), CYCLES(10), CYCLES(78), CYCLES(78) // clang-format on }; -static int *opcode_timings_d8_mod3[8] = { +static int *opcode_timings_winchip_d8_mod3[8] = { // clang-format off /* FADD FMUL FCOM FCOMP FSUB FSUBR FDIV FDIVR*/ CYCLES(4), CYCLES(6), CYCLES(3), CYCLES(3), CYCLES(4), CYCLES(4), CYCLES(72), CYCLES(72) // clang-format on }; -static int *opcode_timings_d9[8] = { +static int *opcode_timings_winchip_d9[8] = { // clang-format off /* FLDs FSTs FSTPs FLDENV FLDCW FSTENV FSTCW*/ CYCLES(2), NULL, CYCLES(7), CYCLES(7), CYCLES(34), CYCLES(4), CYCLES(67), CYCLES(3) // clang-format on }; -static int *opcode_timings_d9_mod3[64] = { +static int *opcode_timings_winchip_d9_mod3[64] = { // clang-format off /*FLD*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), @@ -195,25 +195,25 @@ static int *opcode_timings_d9_mod3[64] = { // clang-format on }; -static int *opcode_timings_da[8] = { +static int *opcode_timings_winchip_da[8] = { // clang-format off /* FADDil FMULil FCOMil FCOMPil FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(10), CYCLES(12), CYCLES(9), CYCLES(9), CYCLES(10), CYCLES(10), CYCLES(78), CYCLES(78) // clang-format on }; -static int *opcode_timings_da_mod3[8] = { +static int *opcode_timings_winchip_da_mod3[8] = { // clang-format off NULL, NULL, NULL, NULL, NULL, CYCLES(5), NULL, NULL // clang-format on }; -static int *opcode_timings_db[8] = { +static int *opcode_timings_winchip_db[8] = { // clang-format off /* FLDil FSTil FSTPil FLDe FSTPe*/ CYCLES(6), NULL, CYCLES(7), CYCLES(7), NULL, CYCLES(8), NULL, CYCLES(8) // clang-format on }; -static int *opcode_timings_db_mod3[64] = { +static int *opcode_timings_winchip_db_mod3[64] = { // clang-format off NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -227,71 +227,71 @@ static int *opcode_timings_db_mod3[64] = { // clang-format on }; -static int *opcode_timings_dc[8] = { +static int *opcode_timings_winchip_dc[8] = { // clang-format off /* opFADDd_a16 opFMULd_a16 opFCOMd_a16 opFCOMPd_a16 opFSUBd_a16 opFSUBRd_a16 opFDIVd_a16 opFDIVRd_a16*/ CYCLES(6), CYCLES(8), CYCLES(5), CYCLES(5), CYCLES(6), CYCLES(6), CYCLES(74), CYCLES(74) // clang-format on }; -static int *opcode_timings_dc_mod3[8] = { +static int *opcode_timings_winchip_dc_mod3[8] = { // clang-format off /* opFADDr opFMULr opFSUBRr opFSUBr opFDIVRr opFDIVr*/ CYCLES(4), CYCLES(6), NULL, NULL, CYCLES(4), CYCLES(4), CYCLES(72), CYCLES(72) // clang-format on }; -static int *opcode_timings_dd[8] = { +static int *opcode_timings_winchip_dd[8] = { // clang-format off /* FLDd FSTd FSTPd FRSTOR FSAVE FSTSW*/ CYCLES(2), NULL, CYCLES(8), CYCLES(8), CYCLES(131), NULL, CYCLES(154), CYCLES(5) // clang-format on }; -static int *opcode_timings_dd_mod3[8] = { +static int *opcode_timings_winchip_dd_mod3[8] = { // clang-format off /* FFFREE FST FSTP FUCOM FUCOMP*/ CYCLES(3), NULL, CYCLES(1), CYCLES(1), CYCLES(3), CYCLES(3), NULL, NULL // clang-format on }; -static int *opcode_timings_de[8] = { +static int *opcode_timings_winchip_de[8] = { // clang-format off /* FADDiw FMULiw FCOMiw FCOMPiw FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(10), CYCLES(12), CYCLES(9), CYCLES(9), CYCLES(10), CYCLES(10), CYCLES(78), CYCLES(78) // clang-format on }; -static int *opcode_timings_de_mod3[8] = { +static int *opcode_timings_winchip_de_mod3[8] = { // clang-format off /* FADD FMUL FCOMPP FSUB FSUBR FDIV FDIVR*/ CYCLES(4), CYCLES(6), NULL, CYCLES(3), CYCLES(4), CYCLES(4), CYCLES(72), CYCLES(72) // clang-format on }; -static int *opcode_timings_df[8] = { +static int *opcode_timings_winchip_df[8] = { // clang-format off /* FILDiw FISTiw FISTPiw FILDiq FBSTP FISTPiq*/ CYCLES(6), NULL, CYCLES(7), CYCLES(7), NULL, CYCLES(8), CYCLES(172), CYCLES(8) // clang-format on }; -static int *opcode_timings_df_mod3[8] = { +static int *opcode_timings_winchip_df_mod3[8] = { // clang-format off /* FFREE FST FSTP FUCOM FUCOMP*/ CYCLES(3), NULL, CYCLES(1), CYCLES(1), CYCLES(3), CYCLES(3), NULL, NULL // clang-format on }; -static int *opcode_timings_8x[8] = { +static int *opcode_timings_winchip_8x[8] = { // clang-format off &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm }; -static int *opcode_timings_8x_mod3[8] = +static int *opcode_timings_winchip_8x_mod3[8] = { &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm }; -static int *opcode_timings_81[8] = +static int *opcode_timings_winchip_81[8] = { &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm }; -static int *opcode_timings_81_mod3[8] = +static int *opcode_timings_winchip_81_mod3[8] = { &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm // clang-format on @@ -330,7 +330,7 @@ codegen_timing_winchip_start(void) void codegen_timing_winchip_prefix(uint8_t prefix, uint32_t fetchdat) { - timing_count += COUNT(opcode_timings[prefix], 0); + timing_count += COUNT(opcode_timings_winchip[prefix], 0); last_prefix = prefix; } @@ -344,47 +344,47 @@ codegen_timing_winchip_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUS switch (last_prefix) { case 0x0f: - timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + timings = mod3 ? opcode_timings_winchip_0f_mod3 : opcode_timings_winchip_0f; deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; break; case 0xd8: - timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + timings = mod3 ? opcode_timings_winchip_d8_mod3 : opcode_timings_winchip_d8; deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; opcode = (opcode >> 3) & 7; break; case 0xd9: - timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + timings = mod3 ? opcode_timings_winchip_d9_mod3 : opcode_timings_winchip_d9; deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xda: - timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + timings = mod3 ? opcode_timings_winchip_da_mod3 : opcode_timings_winchip_da; deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; opcode = (opcode >> 3) & 7; break; case 0xdb: - timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + timings = mod3 ? opcode_timings_winchip_db_mod3 : opcode_timings_winchip_db; deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xdc: - timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + timings = mod3 ? opcode_timings_winchip_dc_mod3 : opcode_timings_winchip_dc; deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; opcode = (opcode >> 3) & 7; break; case 0xdd: - timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + timings = mod3 ? opcode_timings_winchip_dd_mod3 : opcode_timings_winchip_dd; deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; opcode = (opcode >> 3) & 7; break; case 0xde: - timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + timings = mod3 ? opcode_timings_winchip_de_mod3 : opcode_timings_winchip_de; deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; opcode = (opcode >> 3) & 7; break; case 0xdf: - timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + timings = mod3 ? opcode_timings_winchip_df_mod3 : opcode_timings_winchip_df; deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; opcode = (opcode >> 3) & 7; break; @@ -394,12 +394,12 @@ codegen_timing_winchip_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUS case 0x80: case 0x82: case 0x83: - timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + timings = mod3 ? opcode_timings_winchip_8x_mod3 : opcode_timings_winchip_8x; deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; opcode = (fetchdat >> 3) & 7; break; case 0x81: - timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + timings = mod3 ? opcode_timings_winchip_81_mod3 : opcode_timings_winchip_81; deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; opcode = (fetchdat >> 3) & 7; break; @@ -410,29 +410,29 @@ codegen_timing_winchip_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUS case 0xd1: case 0xd2: case 0xd3: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + timings = mod3 ? opcode_timings_winchip_shift_mod3 : opcode_timings_winchip_shift; deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; opcode = (fetchdat >> 3) & 7; break; case 0xf6: - timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + timings = mod3 ? opcode_timings_winchip_f6_mod3 : opcode_timings_winchip_f6; deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; opcode = (fetchdat >> 3) & 7; break; case 0xf7: - timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + timings = mod3 ? opcode_timings_winchip_f7_mod3 : opcode_timings_winchip_f7; deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; opcode = (fetchdat >> 3) & 7; break; case 0xff: - timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + timings = mod3 ? opcode_timings_winchip_ff_mod3 : opcode_timings_winchip_ff; deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; opcode = (fetchdat >> 3) & 7; break; default: - timings = mod3 ? opcode_timings_mod3 : opcode_timings; + timings = mod3 ? opcode_timings_winchip_mod3 : opcode_timings_winchip; deps = mod3 ? opcode_deps_mod3 : opcode_deps; break; } diff --git a/src/cpu/codegen_timing_winchip2.c b/src/cpu/codegen_timing_winchip2.c index ea206f068..f37fe3366 100644 --- a/src/cpu/codegen_timing_winchip2.c +++ b/src/cpu/codegen_timing_winchip2.c @@ -63,7 +63,7 @@ #define INVALID 0 -static uint32_t opcode_timings[256] = { +static uint32_t opcode_timings_winchip2[256] = { // clang-format off /*00*/ CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), INVALID, /*10*/ CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), @@ -87,7 +87,7 @@ static uint32_t opcode_timings[256] = { // clang-format on }; -static uint32_t opcode_timings_mod3[256] = { +static uint32_t opcode_timings_winchip2_mod3[256] = { // clang-format off /*00*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), INVALID, /*10*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), @@ -111,7 +111,7 @@ static uint32_t opcode_timings_mod3[256] = { // clang-format on }; -static uint32_t opcode_timings_0f[256] = { +static uint32_t opcode_timings_winchip2_0f[256] = { // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), INVALID, CYCLES(195), CYCLES(7), INVALID, CYCLES(1000), CYCLES(10000), INVALID, INVALID, INVALID, CYCLES_3DNOW(1), CYCLES(1), CYCLES_3DNOW(1), /*10*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -134,7 +134,7 @@ static uint32_t opcode_timings_0f[256] = { /*f0*/ INVALID, CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), INVALID, CYCLES_MMX_MUL(2), INVALID, INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, // clang-format on }; -static uint32_t opcode_timings_0f_mod3[256] = { +static uint32_t opcode_timings_winchip2_0f_mod3[256] = { // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), INVALID, CYCLES(195), CYCLES(7), INVALID, CYCLES(1000), CYCLES(10000), INVALID, INVALID, INVALID, CYCLES_3DNOW(1), CYCLES(1), CYCLES_3DNOW(1), /*10*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -158,49 +158,49 @@ static uint32_t opcode_timings_0f_mod3[256] = { // clang-format on }; -static uint32_t opcode_timings_shift[8] = { +static uint32_t opcode_timings_winchip2_shift[8] = { // clang-format off CYCLES(7), CYCLES(7), CYCLES(10), CYCLES(10), CYCLES(7), CYCLES(7), CYCLES(7), CYCLES(7) // clang-format on }; -static uint32_t opcode_timings_shift_mod3[8] = { +static uint32_t opcode_timings_winchip2_shift_mod3[8] = { // clang-format off CYCLES(3), CYCLES(3), CYCLES(9), CYCLES(9), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3) // clang-format on }; -static uint32_t opcode_timings_f6[8] = { +static uint32_t opcode_timings_winchip2_f6[8] = { // clang-format off CYCLES(2), INVALID, CYCLES(2), CYCLES(2), CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) // clang-format on }; -static uint32_t opcode_timings_f6_mod3[8] = { +static uint32_t opcode_timings_winchip2_f6_mod3[8] = { // clang-format off CYCLES(1), INVALID, CYCLES(1), CYCLES(1), CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) // clang-format on }; -static uint32_t opcode_timings_f7[8] = { +static uint32_t opcode_timings_winchip2_f7[8] = { // clang-format off CYCLES(2), INVALID, CYCLES(2), CYCLES(2), CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) // clang-format on }; -static uint32_t opcode_timings_f7_mod3[8] = { +static uint32_t opcode_timings_winchip2_f7_mod3[8] = { // clang-format off CYCLES(1), INVALID, CYCLES(1), CYCLES(1), CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) // clang-format on }; -static uint32_t opcode_timings_ff[8] = { +static uint32_t opcode_timings_winchip2_ff[8] = { // clang-format off CYCLES(2), CYCLES(2), CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), INVALID // clang-format on }; -static uint32_t opcode_timings_ff_mod3[8] = { +static uint32_t opcode_timings_winchip2_ff_mod3[8] = { // clang-format off CYCLES(1), CYCLES(1), CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), INVALID // clang-format on }; -static uint32_t opcode_timings_d8[8] = { +static uint32_t opcode_timings_winchip2_d8[8] = { // clang-format off /* FADDs FMULs FCOMs FCOMPs*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), @@ -208,7 +208,7 @@ static uint32_t opcode_timings_d8[8] = { FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2) // clang-format on }; -static uint32_t opcode_timings_d8_mod3[8] = { +static uint32_t opcode_timings_winchip2_d8_mod3[8] = { // clang-format off /* FADD FMUL FCOM FCOMP*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), @@ -217,7 +217,7 @@ static uint32_t opcode_timings_d8_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_d9[8] = { +static uint32_t opcode_timings_winchip2_d9[8] = { // clang-format off /* FLDs FSTs FSTPs*/ FPU_CYCLES(1,0,0), INVALID, FPU_CYCLES(2,0,0), FPU_CYCLES(2,0,0), @@ -225,7 +225,7 @@ static uint32_t opcode_timings_d9[8] = { FPU_CYCLES(32,0,0), FPU_CYCLES(8,0,0), FPU_CYCLES(48,0,0), FPU_CYCLES(2,0,0) // clang-format on }; -static uint32_t opcode_timings_d9_mod3[64] = { +static uint32_t opcode_timings_winchip2_d9_mod3[64] = { // clang-format off /*FLD*/ FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), @@ -258,7 +258,7 @@ static uint32_t opcode_timings_d9_mod3[64] = { // clang-format on }; -static uint32_t opcode_timings_da[8] = { +static uint32_t opcode_timings_winchip2_da[8] = { // clang-format off /* FIADDl FIMULl FICOMl FICOMPl*/ FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(4,0,0), FPU_CYCLES(4,0,0), @@ -266,7 +266,7 @@ static uint32_t opcode_timings_da[8] = { FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(42,38,2), FPU_CYCLES(42,38,2) // clang-format on }; -static uint32_t opcode_timings_da_mod3[8] = { +static uint32_t opcode_timings_winchip2_da_mod3[8] = { // clang-format off INVALID, INVALID, INVALID, INVALID, /* FCOMPP*/ @@ -274,7 +274,7 @@ static uint32_t opcode_timings_da_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_db[8] = { +static uint32_t opcode_timings_winchip2_db[8] = { // clang-format off /* FLDil FSTil FSTPil*/ FPU_CYCLES(3,2,2), INVALID, FPU_CYCLES(6,0,0), FPU_CYCLES(6,0,0), @@ -282,7 +282,7 @@ static uint32_t opcode_timings_db[8] = { INVALID, FPU_CYCLES(3,0,0), INVALID, FPU_CYCLES(3,0,0) // clang-format on }; -static uint32_t opcode_timings_db_mod3[64] = { +static uint32_t opcode_timings_winchip2_db_mod3[64] = { // clang-format off INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -312,7 +312,7 @@ static uint32_t opcode_timings_db_mod3[64] = { // clang-format on }; -static uint32_t opcode_timings_dc[8] = { +static uint32_t opcode_timings_winchip2_dc[8] = { // clang-format off /* FADDd FMULd FCOMd FCOMPd*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), @@ -320,7 +320,7 @@ static uint32_t opcode_timings_dc[8] = { FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2) // clang-format on }; -static uint32_t opcode_timings_dc_mod3[8] = { +static uint32_t opcode_timings_winchip2_dc_mod3[8] = { // clang-format off /* opFADDr opFMULr*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2),INVALID, INVALID, @@ -329,7 +329,7 @@ static uint32_t opcode_timings_dc_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_dd[8] = { +static uint32_t opcode_timings_winchip2_dd[8] = { // clang-format off /* FLDd FSTd FSTPd*/ FPU_CYCLES(1,0,0), INVALID, FPU_CYCLES(2,0,0), FPU_CYCLES(2,0,0), @@ -337,7 +337,7 @@ static uint32_t opcode_timings_dd[8] = { FPU_CYCLES(70,0,0), INVALID, FPU_CYCLES(127,0,0), FPU_CYCLES(6,0,0) // clang-format on }; -static uint32_t opcode_timings_dd_mod3[8] = { +static uint32_t opcode_timings_winchip2_dd_mod3[8] = { // clang-format off /* FFFREE FST FSTP*/ FPU_CYCLES(2,0,0), INVALID, FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), @@ -346,7 +346,7 @@ static uint32_t opcode_timings_dd_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_de[8] = { +static uint32_t opcode_timings_winchip2_de[8] = { // clang-format off /* FIADDw FIMULw FICOMw FICOMPw*/ FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(4,0,0), FPU_CYCLES(4,0,0), @@ -354,7 +354,7 @@ static uint32_t opcode_timings_de[8] = { FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(42,38,2), FPU_CYCLES(42,38,2) // clang-format on }; -static uint32_t opcode_timings_de_mod3[8] = { +static uint32_t opcode_timings_winchip2_de_mod3[8] = { // clang-format off /* FADDP FMULP FCOMPP*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), INVALID, FPU_CYCLES(1,0,0), @@ -363,7 +363,7 @@ static uint32_t opcode_timings_de_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_df[8] = { +static uint32_t opcode_timings_winchip2_df[8] = { // clang-format off /* FILDiw FISTiw FISTPiw*/ FPU_CYCLES(3,2,2), INVALID, FPU_CYCLES(6,0,0), FPU_CYCLES(6,0,0), @@ -371,7 +371,7 @@ static uint32_t opcode_timings_df[8] = { INVALID, FPU_CYCLES(3,2,2), FPU_CYCLES(148,0,0), FPU_CYCLES(6,0,0) // clang-format on }; -static uint32_t opcode_timings_df_mod3[8] = { +static uint32_t opcode_timings_winchip2_df_mod3[8] = { // clang-format off INVALID, INVALID, INVALID, INVALID, /* FSTSW AX*/ @@ -379,22 +379,22 @@ static uint32_t opcode_timings_df_mod3[8] = { // clang-format on }; -static uint32_t opcode_timings_8x[8] = { +static uint32_t opcode_timings_winchip2_8x[8] = { // clang-format off CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2) // clang-format on }; -static uint32_t opcode_timings_8x_mod3[8] = { +static uint32_t opcode_timings_winchip2_8x_mod3[8] = { // clang-format off CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2) // clang-format on }; -static uint32_t opcode_timings_81[8] = { +static uint32_t opcode_timings_winchip2_81[8] = { // clang-format off CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2) // clang-format on }; -static uint32_t opcode_timings_81_mod3[8] = { +static uint32_t opcode_timings_winchip2_81_mod3[8] = { // clang-format off CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2) // clang-format on @@ -613,47 +613,47 @@ codegen_timing_winchip2_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNU switch (last_prefix) { case 0x0f: - timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + timings = mod3 ? opcode_timings_winchip2_0f_mod3 : opcode_timings_winchip2_0f; deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; break; case 0xd8: - timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + timings = mod3 ? opcode_timings_winchip2_d8_mod3 : opcode_timings_winchip2_d8; deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; opcode = (opcode >> 3) & 7; break; case 0xd9: - timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + timings = mod3 ? opcode_timings_winchip2_d9_mod3 : opcode_timings_winchip2_d9; deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xda: - timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + timings = mod3 ? opcode_timings_winchip2_da_mod3 : opcode_timings_winchip2_da; deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; opcode = (opcode >> 3) & 7; break; case 0xdb: - timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + timings = mod3 ? opcode_timings_winchip2_db_mod3 : opcode_timings_winchip2_db; deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xdc: - timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + timings = mod3 ? opcode_timings_winchip2_dc_mod3 : opcode_timings_winchip2_dc; deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; opcode = (opcode >> 3) & 7; break; case 0xdd: - timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + timings = mod3 ? opcode_timings_winchip2_dd_mod3 : opcode_timings_winchip2_dd; deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; opcode = (opcode >> 3) & 7; break; case 0xde: - timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + timings = mod3 ? opcode_timings_winchip2_de_mod3 : opcode_timings_winchip2_de; deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; opcode = (opcode >> 3) & 7; break; case 0xdf: - timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + timings = mod3 ? opcode_timings_winchip2_df_mod3 : opcode_timings_winchip2_df; deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; opcode = (opcode >> 3) & 7; break; @@ -663,12 +663,12 @@ codegen_timing_winchip2_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNU case 0x80: case 0x82: case 0x83: - timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + timings = mod3 ? opcode_timings_winchip2_8x_mod3 : opcode_timings_winchip2_8x; deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; opcode = (fetchdat >> 3) & 7; break; case 0x81: - timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + timings = mod3 ? opcode_timings_winchip2_81_mod3 : opcode_timings_winchip2_81; deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; opcode = (fetchdat >> 3) & 7; break; @@ -679,29 +679,29 @@ codegen_timing_winchip2_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNU case 0xd1: case 0xd2: case 0xd3: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + timings = mod3 ? opcode_timings_winchip2_shift_mod3 : opcode_timings_winchip2_shift; deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; opcode = (fetchdat >> 3) & 7; break; case 0xf6: - timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + timings = mod3 ? opcode_timings_winchip2_f6_mod3 : opcode_timings_winchip2_f6; deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; opcode = (fetchdat >> 3) & 7; break; case 0xf7: - timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + timings = mod3 ? opcode_timings_winchip2_f7_mod3 : opcode_timings_winchip2_f7; deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; opcode = (fetchdat >> 3) & 7; break; case 0xff: - timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + timings = mod3 ? opcode_timings_winchip2_ff_mod3 : opcode_timings_winchip2_ff; deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; opcode = (fetchdat >> 3) & 7; break; default: - timings = mod3 ? opcode_timings_mod3 : opcode_timings; + timings = mod3 ? opcode_timings_winchip2_mod3 : opcode_timings_winchip2; deps = mod3 ? opcode_deps_mod3 : opcode_deps; break; } diff --git a/src/cpu/cpu.c b/src/cpu/cpu.c index 943715a4e..12ea4bdbc 100644 --- a/src/cpu/cpu.c +++ b/src/cpu/cpu.c @@ -47,7 +47,7 @@ #ifdef USE_DYNAREC # include "codegen.h" -#endif +#endif /* USE_DYNAREC */ #include "x87_timings.h" #define CCR1_USE_SMI (1 << 1) @@ -119,7 +119,7 @@ const OpFn *x86_dynarec_opcodes_df_a32; const OpFn *x86_dynarec_opcodes_REPE; const OpFn *x86_dynarec_opcodes_REPNE; const OpFn *x86_dynarec_opcodes_3DNOW; -#endif +#endif /* USE_DYNAREC */ const OpFn *x86_opcodes; const OpFn *x86_opcodes_0f; @@ -504,7 +504,7 @@ cpu_set(void) #ifdef USE_ACYCS acycs = 0; -#endif +#endif /* USE_ACYCS */ soft_reset_pci = 0; cpu_init = 0; @@ -576,7 +576,7 @@ cpu_set(void) x86_setopcodes(ops_386, ops_386_0f, dynarec_ops_386, dynarec_ops_386_0f); #else x86_setopcodes(ops_386, ops_386_0f); -#endif +#endif /* USE_DYNAREC */ x86_setopcodes_2386(ops_2386_386, ops_2386_386_0f); x86_opcodes_REPE = ops_REPE; x86_opcodes_REPNE = ops_REPNE; @@ -587,7 +587,7 @@ cpu_set(void) x86_dynarec_opcodes_REPE = dynarec_ops_REPE; x86_dynarec_opcodes_REPNE = dynarec_ops_REPNE; x86_dynarec_opcodes_3DNOW = dynarec_ops_3DNOW; -#endif +#endif /* USE_DYNAREC */ if (hasfpu) { #ifdef USE_DYNAREC @@ -626,7 +626,7 @@ cpu_set(void) x86_dynarec_opcodes_df_a16 = dynarec_ops_fpu_df_a16; x86_dynarec_opcodes_df_a32 = dynarec_ops_fpu_df_a32; } -#endif +#endif /* USE_DYNAREC */ if (fpu_softfloat) { x86_opcodes_d8_a16 = ops_sf_fpu_d8_a16; x86_opcodes_d8_a32 = ops_sf_fpu_d8_a32; @@ -714,7 +714,7 @@ cpu_set(void) x86_dynarec_opcodes_de_a32 = dynarec_ops_nofpu_a32; x86_dynarec_opcodes_df_a16 = dynarec_ops_nofpu_a16; x86_dynarec_opcodes_df_a32 = dynarec_ops_nofpu_a32; -#endif +#endif /* USE_DYNAREC */ x86_opcodes_d8_a16 = ops_nofpu_a16; x86_opcodes_d8_a32 = ops_nofpu_a32; x86_opcodes_d9_a16 = ops_nofpu_a16; @@ -752,7 +752,7 @@ cpu_set(void) #ifdef USE_DYNAREC codegen_timing_set(&codegen_timing_486); -#endif +#endif /* USE_DYNAREC */ memset(&msr, 0, sizeof(msr)); @@ -774,7 +774,7 @@ cpu_set(void) x86_setopcodes(ops_186, ops_186_0f, dynarec_ops_186, dynarec_ops_186_0f); #else x86_setopcodes(ops_186, ops_186_0f); -#endif +#endif /* USE_DYNAREC */ x86_setopcodes_2386(ops_2386_186, ops_2386_186_0f); break; @@ -783,7 +783,7 @@ cpu_set(void) x86_setopcodes(ops_286, ops_286_0f, dynarec_ops_286, dynarec_ops_286_0f); #else x86_setopcodes(ops_286, ops_286_0f); -#endif +#endif /* USE_DYNAREC */ x86_setopcodes_2386(ops_2386_286, ops_2386_286_0f); if (fpu_type == FPU_287) { @@ -819,7 +819,7 @@ cpu_set(void) x86_dynarec_opcodes_df_a16 = dynarec_ops_fpu_287_df_a16; x86_dynarec_opcodes_df_a32 = dynarec_ops_fpu_287_df_a32; } -#endif +#endif /* USE_DYNAREC */ if (fpu_softfloat) { x86_opcodes_d9_a16 = ops_sf_fpu_287_d9_a16; x86_opcodes_d9_a32 = ops_sf_fpu_287_d9_a32; @@ -921,7 +921,7 @@ cpu_set(void) x86_setopcodes(ops_386, ops_ibm486_0f, dynarec_ops_386, dynarec_ops_ibm486_0f); #else x86_setopcodes(ops_386, ops_ibm486_0f); -#endif +#endif /* USE_DYNAREC */ x86_setopcodes_2386(ops_2386_386, ops_2386_ibm486_0f); cpu_features = CPU_FEATURE_MSR; fallthrough; @@ -961,7 +961,7 @@ cpu_set(void) x86_dynarec_opcodes_df_a16 = dynarec_ops_fpu_287_df_a16; x86_dynarec_opcodes_df_a32 = dynarec_ops_fpu_287_df_a32; } -#endif +#endif /* USE_DYNAREC */ if (fpu_softfloat) { x86_opcodes_d9_a16 = ops_sf_fpu_287_d9_a16; x86_opcodes_d9_a32 = ops_sf_fpu_287_d9_a32; @@ -1067,7 +1067,7 @@ cpu_set(void) x86_setopcodes(ops_386, ops_486_0f, dynarec_ops_386, dynarec_ops_486_0f); #else x86_setopcodes(ops_386, ops_486_0f); -#endif +#endif /* USE_DYNAREC */ x86_setopcodes_2386(ops_2386_386, ops_2386_486_0f); timing_rr = 1; /* register dest - register src */ @@ -1107,7 +1107,7 @@ cpu_set(void) x86_setopcodes(ops_386, ops_486_0f, dynarec_ops_386, dynarec_ops_486_0f); #else x86_setopcodes(ops_386, ops_486_0f); -#endif +#endif /* USE_DYNAREC */ x86_setopcodes_2386(ops_2386_386, ops_2386_486_0f); timing_rr = 1; /* register dest - register src */ @@ -1160,7 +1160,7 @@ cpu_set(void) x86_setopcodes(ops_386, ops_486_0f, dynarec_ops_386, dynarec_ops_486_0f); #else x86_setopcodes(ops_386, ops_486_0f); -#endif +#endif /* USE_DYNAREC */ x86_setopcodes_2386(ops_2386_386, ops_2386_486_0f); timing_rr = 1; /* register dest - register src */ @@ -1209,7 +1209,7 @@ cpu_set(void) x86_setopcodes(ops_386, ops_stpc_0f); else x86_setopcodes(ops_386, ops_c486_0f); -#endif +#endif /* USE_DYNAREC */ timing_rr = 1; /* register dest - register src */ timing_rm = 3; /* register dest - memory src */ @@ -1252,7 +1252,7 @@ cpu_set(void) x86_setopcodes(ops_386, ops_c486_0f, dynarec_ops_386, dynarec_ops_c486_0f); #else x86_setopcodes(ops_386, ops_c486_0f); -#endif +#endif /* USE_DYNAREC */ timing_rr = 1; /* register dest - register src */ timing_rm = 1; /* register dest - memory src */ @@ -1301,7 +1301,7 @@ cpu_set(void) x86_setopcodes(ops_386, ops_winchip2_0f); else x86_setopcodes(ops_386, ops_winchip_0f); -#endif +#endif /* USE_DYNAREC */ timing_rr = 1; /* register dest - register src */ timing_rm = 2; /* register dest - memory src */ @@ -1350,7 +1350,7 @@ cpu_set(void) codegen_timing_set(&codegen_timing_winchip2); else codegen_timing_set(&codegen_timing_winchip); -#endif +#endif /* USE_DYNAREC */ break; case CPU_P24T: @@ -1366,7 +1366,7 @@ cpu_set(void) x86_setopcodes(ops_386, ops_pentiummmx_0f); else x86_setopcodes(ops_386, ops_pentium_0f); -#endif +#endif /* USE_DYNAREC */ timing_rr = 1; /* register dest - register src */ timing_rm = 2; /* register dest - memory src */ @@ -1409,10 +1409,10 @@ cpu_set(void) cpu_CR4_mask = CR4_VME | CR4_PVI | CR4_TSD | CR4_DE | CR4_PSE | CR4_MCE | CR4_PCE; #ifdef USE_DYNAREC codegen_timing_set(&codegen_timing_pentium); -#endif +#endif /* USE_DYNAREC */ break; -#if defined(DEV_BRANCH) && defined(USE_CYRIX_6X86) +#ifdef USE_CYRIX_6X86 case CPU_Cx6x86: case CPU_Cx6x86L: case CPU_CxGX1: @@ -1434,7 +1434,7 @@ cpu_set(void) x86_dynarec_opcodes_df_a16 = dynarec_ops_fpu_686_df_a16; x86_dynarec_opcodes_df_a32 = dynarec_ops_fpu_686_df_a32; } -# endif +# endif /* USE_DYNAREC */ if (fpu_softfloat) { x86_opcodes_da_a16 = ops_sf_fpu_686_da_a16; x86_opcodes_da_a32 = ops_sf_fpu_686_da_a32; @@ -1472,7 +1472,7 @@ cpu_set(void) # if 0 x86_setopcodes(ops_386, ops_c6x86_0f); # endif -# endif +# endif /* USE_DYNAREC */ timing_rr = 1; /* register dest - register src */ timing_rm = 1; /* register dest - memory src */ @@ -1524,19 +1524,19 @@ cpu_set(void) # ifdef USE_DYNAREC codegen_timing_set(&codegen_timing_686); -# endif +# endif /* USE_DYNAREC */ if ((cpu_s->cpu_type == CPU_Cx6x86L) || (cpu_s->cpu_type == CPU_Cx6x86MX)) ccr4 = 0x80; else if (CPU_Cx6x86) CPUID = 0; /* Disabled on powerup by default */ break; -#endif +#endif /* USE_CYRIX_6X86 */ -#if defined(DEV_BRANCH) && defined(USE_AMD_K5) +#ifdef USE_AMD_K5 case CPU_K5: case CPU_5K86: -#endif +#endif /* USE_AMD_K5 */ case CPU_K6: case CPU_K6_2: case CPU_K6_2C: @@ -1546,7 +1546,7 @@ cpu_set(void) #ifdef USE_DYNAREC if (cpu_s->cpu_type >= CPU_K6_2) x86_setopcodes(ops_386, ops_k62_0f, dynarec_ops_386, dynarec_ops_k62_0f); -# if defined(DEV_BRANCH) && defined(USE_AMD_K5) +# ifdef USE_AMD_K5 else if (cpu_s->cpu_type == CPU_K6) x86_setopcodes(ops_386, ops_k6_0f, dynarec_ops_386, dynarec_ops_k6_0f); else @@ -1554,11 +1554,11 @@ cpu_set(void) # else else x86_setopcodes(ops_386, ops_k6_0f, dynarec_ops_386, dynarec_ops_k6_0f); -# endif +# endif /* USE_AMD_K5 */ #else if (cpu_s->cpu_type >= CPU_K6_2) x86_setopcodes(ops_386, ops_k62_0f); -# if defined(DEV_BRANCH) && defined(USE_AMD_K5) +# ifdef USE_AMD_K5 else if (cpu_s->cpu_type == CPU_K6) x86_setopcodes(ops_386, ops_k6_0f); else @@ -1566,14 +1566,14 @@ cpu_set(void) # else else x86_setopcodes(ops_386, ops_k6_0f); -# endif -#endif +# endif /* USE_AMD_K5 */ +#endif /* USE_DYNAREC */ if ((cpu_s->cpu_type == CPU_K6_2P) || (cpu_s->cpu_type == CPU_K6_3P)) { x86_opcodes_3DNOW = ops_3DNOWE; #ifdef USE_DYNAREC x86_dynarec_opcodes_3DNOW = dynarec_ops_3DNOWE; -#endif +#endif /* USE_DYNAREC */ } timing_rr = 1; /* register dest - register src */ @@ -1613,7 +1613,7 @@ cpu_set(void) cpu_features |= CPU_FEATURE_3DNOW; if ((cpu_s->cpu_type == CPU_K6_2P) || (cpu_s->cpu_type == CPU_K6_3P)) cpu_features |= CPU_FEATURE_3DNOWE; -#if defined(DEV_BRANCH) && defined(USE_AMD_K5) +#ifdef USE_AMD_K5 cpu_CR4_mask = CR4_TSD | CR4_DE | CR4_MCE; if (cpu_s->cpu_type >= CPU_K6) { cpu_CR4_mask |= (CR4_VME | CR4_PVI | CR4_PSE); @@ -1629,11 +1629,11 @@ cpu_set(void) cpu_CR4_mask |= CR4_PCE; else if (cpu_s->cpu_type >= CPU_K6_2C) cpu_CR4_mask |= CR4_PGE; -#endif +#endif /* USE_AMD_K5 */ #ifdef USE_DYNAREC codegen_timing_set(&codegen_timing_k6); -#endif +#endif /* USE_DYNAREC */ break; case CPU_PENTIUMPRO: @@ -1668,7 +1668,7 @@ cpu_set(void) x86_setopcodes(ops_386, ops_pentium2d_0f); else x86_setopcodes(ops_386, ops_pentium2_0f); -#endif +#endif /* USE_DYNAREC */ if (fpu_softfloat) { x86_opcodes_da_a16 = ops_sf_fpu_686_da_a16; x86_opcodes_da_a32 = ops_sf_fpu_686_da_a32; @@ -1726,7 +1726,7 @@ cpu_set(void) #ifdef USE_DYNAREC codegen_timing_set(&codegen_timing_p6); -#endif +#endif /* USE_DYNAREC */ break; case CPU_CYRIX3S: @@ -1734,7 +1734,7 @@ cpu_set(void) x86_setopcodes(ops_386, ops_winchip2_0f, dynarec_ops_386, dynarec_ops_winchip2_0f); #else x86_setopcodes(ops_386, ops_winchip2_0f); -#endif +#endif /* USE_DYNAREC */ timing_rr = 1; /* register dest - register src */ timing_rm = 2; /* register dest - memory src */ timing_mr = 2; /* memory dest - register src */ @@ -1774,7 +1774,7 @@ cpu_set(void) #ifdef USE_DYNAREC codegen_timing_set(&codegen_timing_winchip); -#endif +#endif /* USE_DYNAREC */ break; default: @@ -1812,7 +1812,7 @@ cpu_set(void) cpu_exec = exec386_dynarec; cpu_use_exec = 1; } else -#endif +#endif /* defined(USE_DYNAREC) && !defined(USE_GDBSTUB) */ /* Use exec386 for CPU_IBM486SLC because it can reach 100 MHz. */ if ((cpu_s->cpu_type == CPU_IBM486SLC) || (cpu_s->cpu_type == CPU_IBM486BL) || cpu_iscyrix || (cpu_s->cpu_type > CPU_486DLC) || cpu_override_interpreter) { @@ -2065,7 +2065,7 @@ cpu_CPUID(void) EAX = EBX = ECX = EDX = 0; break; -#if defined(DEV_BRANCH) && defined(USE_AMD_K5) +#ifdef USE_AMD_K5 case CPU_K5: if (!EAX) { EAX = 0x00000001; @@ -2123,7 +2123,7 @@ cpu_CPUID(void) break; } break; -#endif +#endif /* USE_AMD_K5 */ case CPU_K6: switch (EAX) { @@ -2354,7 +2354,7 @@ cpu_CPUID(void) EAX = EBX = ECX = EDX = 0; break; -#if defined(DEV_BRANCH) && defined(USE_CYRIX_6X86) +#ifdef USE_CYRIX_6X86 case CPU_Cx6x86: if (!EAX) { EAX = 0x00000001; @@ -2410,7 +2410,7 @@ cpu_CPUID(void) } else EAX = EBX = ECX = EDX = 0; break; -#endif +#endif /* USE_CYRIX_6X86 */ case CPU_PENTIUMPRO: if (!EAX) { @@ -2565,10 +2565,10 @@ cpu_ven_reset(void) msr.amd_psor = (cpu_s->cpu_type >= CPU_K6_3) ? 0x008cULL : 0x018cULL; fallthrough; case CPU_K6_2: -#if defined(DEV_BRANCH) && defined(USE_AMD_K5) +#ifdef USE_AMD_K5 case CPU_K5: case CPU_5K86: -#endif +#endif /* USE_AMD_K5 */ case CPU_K6: msr.amd_efer = (cpu_s->cpu_type >= CPU_K6_2C) ? 2ULL : 0ULL; break; @@ -2789,10 +2789,10 @@ cpu_RDMSR(void) } break; -#if defined(DEV_BRANCH) && defined(USE_AMD_K5) +#ifdef USE_AMD_K5 case CPU_K5: case CPU_5K86: -#endif +#endif /* USE_AMD_K5 */ case CPU_K6: case CPU_K6_2: case CPU_K6_2C: @@ -3075,7 +3075,7 @@ pentium_invalid_rdmsr: cpu_log("RDMSR: ECX = %08X, val = %08X%08X\n", ECX, EDX, EAX); break; -#if defined(DEV_BRANCH) && defined(USE_CYRIX_6X86) +#ifdef USE_CYRIX_6X86 case CPU_Cx6x86: case CPU_Cx6x86L: case CPU_CxGX1: @@ -3115,7 +3115,7 @@ pentium_invalid_rdmsr: } cpu_log("RDMSR: ECX = %08X, val = %08X%08X\n", ECX, EDX, EAX); break; -#endif +#endif /* USE_CYRIX_6X86 */ case CPU_PENTIUMPRO: case CPU_PENTIUM2: @@ -3654,10 +3654,10 @@ cpu_WRMSR(void) } break; -#if defined(DEV_BRANCH) && defined(USE_AMD_K5) +#ifdef USE_AMD_K5 case CPU_K5: case CPU_5K86: -#endif +#endif /* USE_AMD_K5 */ case CPU_K6: case CPU_K6_2: case CPU_K6_2C: @@ -3918,7 +3918,7 @@ pentium_invalid_wrmsr: } break; -#if defined(DEV_BRANCH) && defined(USE_CYRIX_6X86) +#ifdef USE_CYRIX_6X86 case CPU_Cx6x86: case CPU_Cx6x86L: case CPU_CxGX1: @@ -3952,7 +3952,7 @@ pentium_invalid_wrmsr: break; } break; -#endif +#endif /* USE_CYRIX_6X86 */ case CPU_PENTIUMPRO: case CPU_PENTIUM2: @@ -4277,14 +4277,14 @@ cpu_write(uint16_t addr, uint8_t val, UNUSED(void *priv)) case 0xe8: /* CCR4 */ if ((ccr3 & 0xf0) == 0x10) { ccr4 = val; -#if defined(DEV_BRANCH) && defined(USE_CYRIX_6X86) +#ifdef USE_CYRIX_6X86 if (cpu_s->cpu_type >= CPU_Cx6x86) { if (val & 0x80) CPUID = cpu_s->cpuid_model; else CPUID = 0; } -#endif +#endif /* USE_CYRIX_6X86 */ } break; case 0xe9: /* CCR5 */ @@ -4358,7 +4358,7 @@ x86_setopcodes(const OpFn *opcodes, const OpFn *opcodes_0f) x86_opcodes = opcodes; x86_opcodes_0f = opcodes_0f; } -#endif +#endif /* USE_DYNAREC */ void x86_setopcodes_2386(const OpFn *opcodes, const OpFn *opcodes_0f) diff --git a/src/cpu/cpu_table.c b/src/cpu/cpu_table.c index ace51eed8..120cc6a1e 100644 --- a/src/cpu/cpu_table.c +++ b/src/cpu/cpu_table.c @@ -4474,7 +4474,7 @@ const cpu_family_t cpu_families[] = { { .name = "", 0 } } }, -#if defined(DEV_BRANCH) && defined(USE_AMD_K5) +#ifdef USE_AMD_K5 { .package = CPU_PKG_SOCKET5_7, .manufacturer = "AMD", @@ -4629,7 +4629,7 @@ const cpu_family_t cpu_families[] = { { .name = "", 0 } } }, -#endif +#endif /* USE_AMD_K5 */ { .package = CPU_PKG_SOCKET5_7, .manufacturer = "AMD", @@ -5982,7 +5982,7 @@ const cpu_family_t cpu_families[] = { { .name = "", 0 } } }, -#if defined(DEV_BRANCH) && defined(USE_CYRIX_6X86) +#ifdef USE_CYRIX_6X86 { .package = CPU_PKG_SOCKET5_7, .manufacturer = "Cyrix", @@ -6342,7 +6342,7 @@ const cpu_family_t cpu_families[] = { { .name = "", 0 } } }, -#endif +#endif /* USE_CYRIX_6X86 */ { .package = CPU_PKG_SOCKET8, .manufacturer = "Intel", diff --git a/src/cpu/x86.c b/src/cpu/x86.c index 5435f366d..4bf381ee4 100644 --- a/src/cpu/x86.c +++ b/src/cpu/x86.c @@ -83,7 +83,9 @@ int fpu_cycles = 0; int in_lock = 0; #ifdef ENABLE_X86_LOG +#if 0 void dumpregs(int); +#endif int x86_do_log = ENABLE_X86_LOG; int indump = 0; @@ -93,13 +95,14 @@ x86_log(const char *fmt, ...) { va_list ap; - if (x808x_do_log) { + if (x86_do_log) { va_start(ap, fmt); pclog_ex(fmt, ap); va_end(ap); } } +#if 0 void dumpregs(int force) { @@ -144,6 +147,7 @@ dumpregs(int force) x87_dumpregs(); indump = 0; } +#endif #else # define x86_log(fmt, ...) #endif diff --git a/src/cpu/x86_ops.h b/src/cpu/x86_ops.h index 86f54ee3b..3dfb3f917 100644 --- a/src/cpu/x86_ops.h +++ b/src/cpu/x86_ops.h @@ -90,10 +90,10 @@ extern const OpFn dynarec_ops_winchip2_0f[1024]; extern const OpFn dynarec_ops_pentium_0f[1024]; extern const OpFn dynarec_ops_pentiummmx_0f[1024]; -# if defined(DEV_BRANCH) && defined(USE_CYRIX_6X86) +# ifdef USE_CYRIX_6X86 extern const OpFn dynarec_ops_c6x86_0f[1024]; extern const OpFn dynarec_ops_c6x86mx_0f[1024]; -# endif +# endif /* USE_CYRIX_6X86 */ extern const OpFn dynarec_ops_k6_0f[1024]; extern const OpFn dynarec_ops_k62_0f[1024]; @@ -232,10 +232,10 @@ extern const OpFn ops_winchip2_0f[1024]; extern const OpFn ops_pentium_0f[1024]; extern const OpFn ops_pentiummmx_0f[1024]; -#if defined(DEV_BRANCH) && defined(USE_CYRIX_6X86) +#ifdef USE_CYRIX_6X86 extern const OpFn ops_c6x86_0f[1024]; extern const OpFn ops_c6x86mx_0f[1024]; -#endif +#endif /* USE_CYRIX_6X86 */ extern const OpFn ops_k6_0f[1024]; extern const OpFn ops_k62_0f[1024]; diff --git a/src/cpu/x86_ops_mmx_mov.h b/src/cpu/x86_ops_mmx_mov.h index c72c8143d..8855f8ccd 100644 --- a/src/cpu/x86_ops_mmx_mov.h +++ b/src/cpu/x86_ops_mmx_mov.h @@ -110,7 +110,7 @@ opMOVD_mm_l_a32(uint32_t fetchdat) return 0; } -#if defined(DEV_BRANCH) && defined(USE_CYRIX_6X86) +#ifdef USE_CYRIX_6X86 /*Cyrix maps both MOVD and SMINT to the same opcode*/ static int opMOVD_mm_l_a16_cx(uint32_t fetchdat) @@ -170,7 +170,7 @@ opMOVD_mm_l_a32_cx(uint32_t fetchdat) return 0; } -#endif +#endif /* USE_CYRIX_6X86 */ static int opMOVQ_q_mm_a16(uint32_t fetchdat) diff --git a/src/cpu/x87.c b/src/cpu/x87.c index 2aafd44e6..390c06dc2 100644 --- a/src/cpu/x87.c +++ b/src/cpu/x87.c @@ -26,22 +26,22 @@ uint32_t x87_op_off; uint16_t x87_pc_seg; uint16_t x87_op_seg; -#ifdef ENABLE_FPU_LOG -int fpu_do_log = ENABLE_FPU_LOG; +#ifdef ENABLE_FPU_X87_LOG +int fpu_x87_do_log = ENABLE_FPU_X87_LOG; void -fpu_log(const char *fmt, ...) +fpu_x87_log(const char *fmt, ...) { va_list ap; - if (fpu_do_log) { + if (fpu_x87_do_log) { va_start(ap, fmt); pclog_ex(fmt, ap); va_end(ap); } } #else -# define fpu_log(fmt, ...) +# define fpu_x87_log(fmt, ...) #endif #ifdef USE_NEW_DYNAREC @@ -546,17 +546,17 @@ unpack_FPU_TW(uint16_t tag_byte) return (twd >> 2); } -#ifdef ENABLE_808X_LOG +#ifdef ENABLE_FPU_X87_LOG void x87_dumpregs(void) { if (cpu_state.ismmx) { - fpu_log("MM0=%016llX\tMM1=%016llX\tMM2=%016llX\tMM3=%016llX\n", cpu_state.MM[0].q, cpu_state.MM[1].q, cpu_state.MM[2].q, cpu_state.MM[3].q); - fpu_log("MM4=%016llX\tMM5=%016llX\tMM6=%016llX\tMM7=%016llX\n", cpu_state.MM[4].q, cpu_state.MM[5].q, cpu_state.MM[6].q, cpu_state.MM[7].q); + fpu_x87_log("MM0=%016llX\tMM1=%016llX\tMM2=%016llX\tMM3=%016llX\n", cpu_state.MM[0].q, cpu_state.MM[1].q, cpu_state.MM[2].q, cpu_state.MM[3].q); + fpu_x87_log("MM4=%016llX\tMM5=%016llX\tMM6=%016llX\tMM7=%016llX\n", cpu_state.MM[4].q, cpu_state.MM[5].q, cpu_state.MM[6].q, cpu_state.MM[7].q); } else { - fpu_log("ST(0)=%f\tST(1)=%f\tST(2)=%f\tST(3)=%f\t\n", cpu_state.ST[cpu_state.TOP], cpu_state.ST[(cpu_state.TOP + 1) & 7], cpu_state.ST[(cpu_state.TOP + 2) & 7], cpu_state.ST[(cpu_state.TOP + 3) & 7]); - fpu_log("ST(4)=%f\tST(5)=%f\tST(6)=%f\tST(7)=%f\t\n", cpu_state.ST[(cpu_state.TOP + 4) & 7], cpu_state.ST[(cpu_state.TOP + 5) & 7], cpu_state.ST[(cpu_state.TOP + 6) & 7], cpu_state.ST[(cpu_state.TOP + 7) & 7]); + fpu_x87_log("ST(0)=%f\tST(1)=%f\tST(2)=%f\tST(3)=%f\t\n", cpu_state.ST[cpu_state.TOP], cpu_state.ST[(cpu_state.TOP + 1) & 7], cpu_state.ST[(cpu_state.TOP + 2) & 7], cpu_state.ST[(cpu_state.TOP + 3) & 7]); + fpu_x87_log("ST(4)=%f\tST(5)=%f\tST(6)=%f\tST(7)=%f\t\n", cpu_state.ST[(cpu_state.TOP + 4) & 7], cpu_state.ST[(cpu_state.TOP + 5) & 7], cpu_state.ST[(cpu_state.TOP + 6) & 7], cpu_state.ST[(cpu_state.TOP + 7) & 7]); } - fpu_log("Status = %04X Control = %04X Tag = %04X\n", cpu_state.npxs, cpu_state.npxc, x87_gettag()); + fpu_x87_log("Status = %04X Control = %04X Tag = %04X\n", cpu_state.npxs, cpu_state.npxc, x87_gettag()); } #endif diff --git a/src/device/CMakeLists.txt b/src/device/CMakeLists.txt index e521fc465..a044cb1b0 100644 --- a/src/device/CMakeLists.txt +++ b/src/device/CMakeLists.txt @@ -47,3 +47,12 @@ endif() if(LASERXT) target_compile_definitions(dev PRIVATE USE_LASERXT) endif() + +if(PCL) + target_compile_definitions(dev PRIVATE USE_PCL) +endif() + +if(WACOM) + target_compile_definitions(dev PRIVATE USE_WACOM) + target_sources(dev PRIVATE mouse_wacom_tablet.c) +endif() diff --git a/src/device/isamem.c b/src/device/isamem.c index d6fa04a64..f2332a5cc 100644 --- a/src/device/isamem.c +++ b/src/device/isamem.c @@ -1699,7 +1699,7 @@ static const device_t brxt_device = { .config = brxt_config }; -#if defined(DEV_BRANCH) && defined(USE_ISAMEM_BRAT) +#ifdef USE_ISAMEM_BRAT static const device_config_t brat_config[] = { // clang-format off { @@ -1804,7 +1804,7 @@ static const device_t brat_device = { .force_redraw = NULL, .config = brat_config }; -#endif +#endif /* USE_ISAMEM_BRAT */ static const device_config_t lotech_config[] = { // clang-format off @@ -1871,7 +1871,7 @@ static const device_t lotech_device = { .config = lotech_config }; -#if defined(DEV_BRANCH) && defined(USE_ISAMEM_RAMPAGE) +#ifdef USE_ISAMEM_RAMPAGE // TODO: Dual Paging support // TODO: Conventional memory suppport static const device_config_t rampage_config[] = { @@ -1939,9 +1939,9 @@ static const device_t rampage_device = { .force_redraw = NULL, .config = rampage_config }; -#endif +#endif /* USE_ISAMEM_RAMPAGE */ -#if defined(DEV_BRANCH) && defined(USE_ISAMEM_IAB) +#ifdef USE_ISAMEM_IAB static const device_config_t iab_config[] = { // clang-format off { @@ -2038,7 +2038,7 @@ static const device_t iab_device = { .force_redraw = NULL, .config = iab_config }; -#endif +#endif /* USE_ISAMEM_IAB */ static const struct { const device_t *dev; @@ -2063,15 +2063,15 @@ static const struct { { &ev159_device }, { &ev165a_device }, { &brxt_device }, -#if defined(DEV_BRANCH) && defined(USE_ISAMEM_BRAT) +#ifdef USE_ISAMEM_BRAT { &brat_device }, -#endif -#if defined(DEV_BRANCH) && defined(USE_ISAMEM_RAMPAGE) +#endif /* USE_ISAMEM_BRAT */ +#ifdef USE_ISAMEM_RAMPAGE { &rampage_device }, -#endif -#if defined(DEV_BRANCH) && defined(USE_ISAMEM_IAB) +#endif /* USE_ISAMEM_RAMPAGE */ +#ifdef USE_ISAMEM_IAB { &iab_device }, -#endif +#endif /* USE_ISAMEM_IAB */ { &lotech_device }, { NULL } // clang-format on diff --git a/src/device/keyboard.c b/src/device/keyboard.c index 5f9986d7b..ea81e7525 100644 --- a/src/device/keyboard.c +++ b/src/device/keyboard.c @@ -25,6 +25,7 @@ #include <86box/86box.h> #include <86box/machine.h> #include <86box/keyboard.h> +#include <86box/plat.h> #include "cpu.h" @@ -50,7 +51,8 @@ uint16_t key_uncapture_2 = 0x14f; /* End */ void (*keyboard_send)(uint16_t val); -static int recv_key[512]; /* keyboard input buffer */ +static int recv_key[512] = { 0 }; /* keyboard input buffer */ +static int recv_key_ui[512] = { 0 }; /* keyboard input buffer */ static int oldkey[512]; #if 0 static int keydelay[512]; @@ -238,16 +240,13 @@ keyboard_input(int down, uint16_t scan) } } - /* NOTE: Shouldn't this be some sort of bit shift? An array of 8 unsigned 64-bit integers - should be enough. */ -#if 0 - recv_key[scan >> 6] |= ((uint64_t) down << ((uint64_t) scan & 0x3fLL)); -#endif - /* pclog("Received scan code: %03X (%s)\n", scan & 0x1ff, down ? "down" : "up"); */ - recv_key[scan & 0x1ff] = down; + recv_key_ui[scan & 0x1ff] = down; - key_process(scan & 0x1ff, down); + if (mouse_capture || !kbd_req_capture || video_fullscreen) { + recv_key[scan & 0x1ff] = down; + key_process(scan & 0x1ff, down); + } } static uint8_t @@ -343,30 +342,36 @@ keyboard_recv(uint16_t key) return recv_key[key]; } +int +keyboard_recv_ui(uint16_t key) +{ + return recv_key_ui[key]; +} + /* Do we have Control-Alt-PgDn in the keyboard buffer? */ int keyboard_isfsenter(void) { - return ((recv_key[0x01d] || recv_key[0x11d]) && (recv_key[0x038] || recv_key[0x138]) && (recv_key[0x049] || recv_key[0x149])); + return ((recv_key_ui[0x01d] || recv_key_ui[0x11d]) && (recv_key_ui[0x038] || recv_key_ui[0x138]) && (recv_key_ui[0x049] || recv_key_ui[0x149])); } int keyboard_isfsenter_up(void) { - return (!recv_key[0x01d] && !recv_key[0x11d] && !recv_key[0x038] && !recv_key[0x138] && !recv_key[0x049] && !recv_key[0x149]); + return (!recv_key_ui[0x01d] && !recv_key_ui[0x11d] && !recv_key_ui[0x038] && !recv_key_ui[0x138] && !recv_key_ui[0x049] && !recv_key_ui[0x149]); } /* Do we have Control-Alt-PgDn in the keyboard buffer? */ int keyboard_isfsexit(void) { - return ((recv_key[0x01d] || recv_key[0x11d]) && (recv_key[0x038] || recv_key[0x138]) && (recv_key[0x051] || recv_key[0x151])); + return ((recv_key_ui[0x01d] || recv_key_ui[0x11d]) && (recv_key_ui[0x038] || recv_key_ui[0x138]) && (recv_key_ui[0x051] || recv_key_ui[0x151])); } int keyboard_isfsexit_up(void) { - return (!recv_key[0x01d] && !recv_key[0x11d] && !recv_key[0x038] && !recv_key[0x138] && !recv_key[0x051] && !recv_key[0x151]); + return (!recv_key_ui[0x01d] && !recv_key_ui[0x11d] && !recv_key_ui[0x038] && !recv_key_ui[0x138] && !recv_key_ui[0x051] && !recv_key_ui[0x151]); } /* Do we have the mouse uncapture combination in the keyboard buffer? */ @@ -374,10 +379,10 @@ int keyboard_ismsexit(void) { if ((key_prefix_2_1 != 0x000) || (key_prefix_2_2 != 0x000)) - return ((recv_key[key_prefix_1_1] || recv_key[key_prefix_1_2]) && - (recv_key[key_prefix_2_1] || recv_key[key_prefix_2_2]) && - (recv_key[key_uncapture_1] || recv_key[key_uncapture_2])); + return ((recv_key_ui[key_prefix_1_1] || recv_key_ui[key_prefix_1_2]) && + (recv_key_ui[key_prefix_2_1] || recv_key_ui[key_prefix_2_2]) && + (recv_key_ui[key_uncapture_1] || recv_key_ui[key_uncapture_2])); else - return ((recv_key[key_prefix_1_1] || recv_key[key_prefix_1_2]) && - (recv_key[key_uncapture_1] || recv_key[key_uncapture_2])); + return ((recv_key_ui[key_prefix_1_1] || recv_key_ui[key_prefix_1_2]) && + (recv_key_ui[key_uncapture_1] || recv_key_ui[key_uncapture_2])); } diff --git a/src/device/keyboard_at.c b/src/device/keyboard_at.c index bcb4d646b..882174e4e 100644 --- a/src/device/keyboard_at.c +++ b/src/device/keyboard_at.c @@ -2102,6 +2102,7 @@ keyboard_at_write(void *priv) /* TODO: This is supposed to resend multiple bytes after some commands. */ case 0xfe: /* resend last scan code */ keyboard_at_log("%s: resend last scan code\n", dev->name); + kbc_at_dev_queue_add(dev, 0xfa, 0); kbc_at_dev_queue_add(dev, dev->last_scan_code, 0); break; diff --git a/src/device/keyboard_xt.c b/src/device/keyboard_xt.c index 63882222e..b90f4a77a 100644 --- a/src/device/keyboard_xt.c +++ b/src/device/keyboard_xt.c @@ -928,11 +928,11 @@ kbd_read(uint16_t port, void *priv) else { /* LaserXT = Always 512k RAM; LaserXT/3 = Bit 0: set = 512k, clear = 256k. */ -#if defined(DEV_BRANCH) && defined(USE_LASERXT) +#ifdef USE_LASERXT if (kbd->type == KBD_TYPE_VTECH) ret = ((mem_size == 512) ? 0x0d : 0x0c) | (hasfpu ? 0x02 : 0x00); else -#endif +#endif /* USE_LASERXT */ ret = (kbd->pd & 0x0d) | (hasfpu ? 0x02 : 0x00); } } @@ -1292,7 +1292,7 @@ const device_t keyboard_xt_t1x00_device = { .config = NULL }; -#if defined(DEV_BRANCH) && defined(USE_LASERXT) +#ifdef USE_LASERXT const device_t keyboard_xt_lxt3_device = { .name = "VTech Laser XT3 Keyboard", .internal_name = "keyboard_xt_lxt3", @@ -1306,7 +1306,7 @@ const device_t keyboard_xt_lxt3_device = { .force_redraw = NULL, .config = NULL }; -#endif +#endif /* USE_LASERXT */ const device_t keyboard_xt_olivetti_device = { .name = "Olivetti XT Keyboard", diff --git a/src/device/mouse.c b/src/device/mouse.c index 8fd783ee2..fc193398b 100644 --- a/src/device/mouse.c +++ b/src/device/mouse.c @@ -98,7 +98,7 @@ static mouse_t mouse_devices[] = { { &mouse_wacom_device }, { &mouse_wacom_artpad_device }, #endif - { &mouse_mtouch_device }, + { &mouse_mtouch_device }, { NULL } // clang-format on }; diff --git a/src/device/smbus_sis5595.c b/src/device/smbus_sis5595.c index b76b38e17..191e7a6a5 100644 --- a/src/device/smbus_sis5595.c +++ b/src/device/smbus_sis5595.c @@ -148,7 +148,7 @@ smbus_sis5595_read_data(void *priv) break; } - smbus_sis5595_log("SMBus SIS5595: read(%02X) = %02x\n", addr, ret); + smbus_sis5595_log("SMBus SIS5595: read(%02X) = %02x\n", dev->addr, ret); return ret; } @@ -171,7 +171,7 @@ smbus_sis5595_write_data(void *priv, uint8_t val) uint16_t prev_stat; uint16_t timer_bytes = 0; - smbus_sis5595_log("SMBus SIS5595: write(%02X, %02X)\n", addr, val); + smbus_sis5595_log("SMBus SIS5595: write(%02X, %02X)\n", dev->addr, val); prev_stat = dev->next_stat; dev->next_stat = 0x0000; diff --git a/src/disk/hdc_ide_ali5213.c b/src/disk/hdc_ide_ali5213.c index eee3844c4..8ff3d392c 100644 --- a/src/disk/hdc_ide_ali5213.c +++ b/src/disk/hdc_ide_ali5213.c @@ -83,7 +83,7 @@ ali5213_write(uint16_t addr, uint8_t val, void *priv) { ali5213_t *dev = (ali5213_t *) priv; - ali5213_log("[%04X:%08X] [W] %02X = %02X (%i)\n", CS, cpu_state.pc, port, val, dev->tries); + ali5213_log("[%04X:%08X] [W] %02X = %02X\n", CS, cpu_state.pc, addr, val); switch (addr) { case 0xf4: /* Usually it writes 30h here */ @@ -179,7 +179,7 @@ ali5213_read(uint16_t addr, void *priv) break; } - ali5213_log("[%04X:%08X] [R] %02X = %02X\n", CS, cpu_state.pc, port, ret); + ali5213_log("[%04X:%08X] [R] %02X = %02X\n", CS, cpu_state.pc, addr, ret); return ret; } diff --git a/src/disk/hdc_ide_um8673f.c b/src/disk/hdc_ide_um8673f.c index 9ee149c7f..bc046dd26 100644 --- a/src/disk/hdc_ide_um8673f.c +++ b/src/disk/hdc_ide_um8673f.c @@ -79,7 +79,7 @@ um8673f_write(uint16_t addr, uint8_t val, void *priv) { um8673f_t *dev = (um8673f_t *) priv; - um8673f_log("[%04X:%08X] [W] %02X = %02X (%i)\n", CS, cpu_state.pc, port, val, dev->tries); + um8673f_log("[%04X:%08X] [W] %02X = %02X (%i)\n", CS, cpu_state.pc, addr, val, dev->tries); switch (addr) { case 0x108: @@ -140,7 +140,7 @@ um8673f_read(uint16_t addr, void *priv) break; } - um8673f_log("[%04X:%08X] [R] %02X = %02X\n", CS, cpu_state.pc, port, ret); + um8673f_log("[%04X:%08X] [R] %02X = %02X\n", CS, cpu_state.pc, addr, ret); return ret; } diff --git a/src/disk/hdc_ide_w83769f.c b/src/disk/hdc_ide_w83769f.c index ed34bc9fc..c2b053814 100644 --- a/src/disk/hdc_ide_w83769f.c +++ b/src/disk/hdc_ide_w83769f.c @@ -59,7 +59,7 @@ w83769f_log(const char *fmt, ...) { va_list ap; - if (cmd640_do_log) { + if (w83769f_do_log) { va_start(ap, fmt); pclog_ex(fmt, ap); va_end(ap); diff --git a/src/dma.c b/src/dma.c index cf7a75169..917bf5529 100644 --- a/src/dma.c +++ b/src/dma.c @@ -18,6 +18,7 @@ * Copyright 2016-2020 Miran Grca. * Copyright 2017-2020 Fred N. van Kempen. */ +#include #include #include #include diff --git a/src/fifo8.c b/src/fifo8.c index feef0deb2..9c73b4e82 100644 --- a/src/fifo8.c +++ b/src/fifo8.c @@ -21,14 +21,19 @@ #include <86box/86box.h> #include <86box/fifo8.h> +void +fifo8_reset(Fifo8 *fifo) +{ + fifo->num = 0; + fifo->head = 0; +} + void fifo8_create(Fifo8 *fifo, uint32_t capacity) { - fifo->data = (uint8_t *) malloc(capacity); - memset(fifo->data, 0, capacity); + fifo->data = (uint8_t *) calloc(1, capacity); fifo->capacity = capacity; - fifo->head = 0; - fifo->num = 0; + fifo8_reset(fifo); } void @@ -54,7 +59,7 @@ fifo8_push_all(Fifo8 *fifo, const uint8_t *data, uint32_t num) uint32_t start; uint32_t avail; - assert(fifo->num + num <= fifo->capacity); + assert((fifo->num + num) <= fifo->capacity); start = (fifo->head + fifo->num) % fifo->capacity; @@ -81,25 +86,72 @@ fifo8_pop(Fifo8 *fifo) return ret; } -const uint8_t * -fifo8_pop_buf(Fifo8 *fifo, uint32_t max, uint32_t *num) +static const uint8_t +*fifo8_peekpop_buf(Fifo8 *fifo, uint32_t max, uint32_t *numptr, int do_pop) { - const uint8_t *ret; + uint8_t *ret; + uint32_t num; + + assert((max > 0) && (max <= fifo->num)); + num = MIN(fifo->capacity - fifo->head, max); + ret = &fifo->data[fifo->head]; + + if (do_pop) { + fifo->head += num; + fifo->head %= fifo->capacity; + fifo->num -= num; + } + if (numptr) + *numptr = num; - assert(max > 0 && max <= fifo->num); - *num = MIN(fifo->capacity - fifo->head, max); - ret = &fifo->data[fifo->head]; - fifo->head += *num; - fifo->head %= fifo->capacity; - fifo->num -= *num; return ret; } -void -fifo8_reset(Fifo8 *fifo) +const uint8_t +*fifo8_peek_bufptr(Fifo8 *fifo, uint32_t max, uint32_t *numptr) { - fifo->num = 0; - fifo->head = 0; + return fifo8_peekpop_buf(fifo, max, numptr, 0); +} + +const uint8_t +*fifo8_pop_bufptr(Fifo8 *fifo, uint32_t max, uint32_t *numptr) +{ + return fifo8_peekpop_buf(fifo, max, numptr, 1); +} + +uint32_t +fifo8_pop_buf(Fifo8 *fifo, uint8_t *dest, uint32_t destlen) +{ + const uint8_t *buf; + uint32_t n1, n2 = 0; + uint32_t len; + + if (destlen == 0) + return 0; + + len = destlen; + buf = fifo8_pop_bufptr(fifo, len, &n1); + if (dest) + memcpy(dest, buf, n1); + + /* Add FIFO wraparound if needed */ + len -= n1; + len = MIN(len, fifo8_num_used(fifo)); + if (len) { + buf = fifo8_pop_bufptr(fifo, len, &n2); + if (dest) { + memcpy(&dest[n1], buf, n2); + } + } + + return n1 + n2; +} + +void +fifo8_drop(Fifo8 *fifo, uint32_t len) +{ + len -= fifo8_pop_buf(fifo, NULL, len); + assert(len == 0); } int diff --git a/src/floppy/fdc.c b/src/floppy/fdc.c index 3fd492bd2..6baef8ab8 100644 --- a/src/floppy/fdc.c +++ b/src/floppy/fdc.c @@ -1635,6 +1635,8 @@ fdc_callback(void *priv) return; } if (fdd_get_head(real_drive(fdc, fdc->drive)) == 0) { + fdc->sector = 1; + fdc->head |= 1; fdd_set_head(real_drive(fdc, fdc->drive), 1); if (!fdd_is_double_sided(real_drive(fdc, fdc->drive))) { fdc_noidam(fdc); diff --git a/src/floppy/fdc_monster.c b/src/floppy/fdc_monster.c index d2dedb5c0..504254ee0 100644 --- a/src/floppy/fdc_monster.c +++ b/src/floppy/fdc_monster.c @@ -11,10 +11,11 @@ * * * Authors: Jasmine Iwanek, + * Miran Grca, * - * Copyright 2022 Jasmine Iwanek. + * Copyright 2022-2024 Jasmine Iwanek. + * Copyright 2024 Miran Grca. */ - #include #include #include @@ -103,10 +104,10 @@ monster_fdc_close(void *priv) monster_fdc_t *dev = (monster_fdc_t *) priv; if (dev->nvr_path[0] != 0x00) { - FILE *f = nvr_fopen(dev->nvr_path, "wb"); - if (f != NULL) { - fwrite(dev->bios_rom.rom, 1, 0x2000, f); - fclose(f); + FILE *fp = nvr_fopen(dev->nvr_path, "wb"); + if (fp != NULL) { + fwrite(dev->bios_rom.rom, 1, 0x2000, fp); + fclose(fp); } } @@ -144,10 +145,10 @@ monster_fdc_init(UNUSED(const device_t *info)) if (rom_writes_enabled) { mem_mapping_set_write_handler(&dev->bios_rom.mapping, rom_write, rom_writew, rom_writel); sprintf(dev->nvr_path, "monster_fdc_%i.nvr", device_get_instance()); - FILE *f = nvr_fopen(dev->nvr_path, "rb"); - if (f != NULL) { - fread(dev->bios_rom.rom, 1, 0x2000, f); - fclose(f); + FILE *fp = nvr_fopen(dev->nvr_path, "rb"); + if (fp != NULL) { + (void) !fread(dev->bios_rom.rom, 1, 0x2000, fp); + fclose(fp); } } diff --git a/src/gdbstub.c b/src/gdbstub.c index 8322c5ef6..900f40dab 100644 --- a/src/gdbstub.c +++ b/src/gdbstub.c @@ -40,6 +40,7 @@ #include <86box/86box.h> #include "cpu.h" #include "x86seg.h" +#include "x87_sf.h" #include "x87.h" #include "x87_ops_conv.h" #include <86box/io.h> diff --git a/src/include/86box/chipset.h b/src/include/86box/chipset.h index f92c4e0f4..edec4b022 100644 --- a/src/include/86box/chipset.h +++ b/src/include/86box/chipset.h @@ -199,7 +199,7 @@ extern const device_t nec_mate_unk_device; extern const device_t phoenix_486_jumper_device; extern const device_t phoenix_486_jumper_pci_device; -#if defined(DEV_BRANCH) && defined(USE_OLIVETTI) +#ifdef USE_OLIVETTI extern const device_t olivetti_eva_device; -#endif +#endif /* USE_OLIVETTI */ #endif /*EMU_CHIPSET_H*/ diff --git a/src/include/86box/fifo8.h b/src/include/86box/fifo8.h index 9f88ec408..7dec1d57d 100644 --- a/src/include/86box/fifo8.h +++ b/src/include/86box/fifo8.h @@ -69,28 +69,80 @@ extern uint8_t fifo8_pop(Fifo8 *fifo); /** * fifo8_pop_buf: * @fifo: FIFO to pop from - * @max: maximum number of bytes to pop - * @num: actual number of returned bytes + * @dest: the buffer to write the data into (can be NULL) + * @destlen: size of @dest and maximum number of bytes to pop * - * Pop a number of elements from the FIFO up to a maximum of max. The buffer + * Pop a number of elements from the FIFO up to a maximum of @destlen. + * The popped data is copied into the @dest buffer. + * Care is taken when the data wraps around in the ring buffer. + * + * Returns: number of bytes popped. + */ +extern uint32_t fifo8_pop_buf(Fifo8 *fifo, uint8_t *dest, uint32_t destlen); + +/** + * fifo8_pop_bufptr: + * @fifo: FIFO to pop from + * @max: maximum number of bytes to pop + * @numptr: pointer filled with number of bytes returned (can be NULL) + * + * New code should prefer to use fifo8_pop_buf() instead of fifo8_pop_bufptr(). + * + * Pop a number of elements from the FIFO up to a maximum of @max. The buffer * containing the popped data is returned. This buffer points directly into - * the FIFO backing store and data is invalidated once any of the fifo8_* APIs - * are called on the FIFO. + * the internal FIFO backing store and data (without checking for overflow!) + * and is invalidated once any of the fifo8_* APIs are called on the FIFO. * * The function may return fewer bytes than requested when the data wraps * around in the ring buffer; in this case only a contiguous part of the data * is returned. * - * The number of valid bytes returned is populated in *num; will always return - * at least 1 byte. max must not be 0 or greater than the number of bytes in - * the FIFO. + * The number of valid bytes returned is populated in *@numptr; will always + * return at least 1 byte. max must not be 0 or greater than the number of + * bytes in the FIFO. * * Clients are responsible for checking the availability of requested data * using fifo8_num_used(). * * Returns: A pointer to popped data. */ -extern const uint8_t *fifo8_pop_buf(Fifo8 *fifo, uint32_t max, uint32_t *num); +extern const uint8_t *fifo8_pop_bufptr(Fifo8 *fifo, uint32_t max, uint32_t *numptr); + +/** + * fifo8_peek_bufptr: read upto max bytes from the fifo + * @fifo: FIFO to read from + * @max: maximum number of bytes to peek + * @numptr: pointer filled with number of bytes returned (can be NULL) + * + * Peek into a number of elements from the FIFO up to a maximum of @max. + * The buffer containing the data peeked into is returned. This buffer points + * directly into the FIFO backing store. Since data is invalidated once any + * of the fifo8_* APIs are called on the FIFO, it is the caller responsibility + * to access it before doing further API calls. + * + * The function may return fewer bytes than requested when the data wraps + * around in the ring buffer; in this case only a contiguous part of the data + * is returned. + * + * The number of valid bytes returned is populated in *@numptr; will always + * return at least 1 byte. max must not be 0 or greater than the number of + * bytes in the FIFO. + * + * Clients are responsible for checking the availability of requested data + * using fifo8_num_used(). + * + * Returns: A pointer to peekable data. + */ +extern const uint8_t *fifo8_peek_bufptr(Fifo8 *fifo, uint32_t max, uint32_t *numptr); + +/** + * fifo8_drop: + * @fifo: FIFO to drop bytes + * @len: number of bytes to drop + * + * Drop (consume) bytes from a FIFO. + */ +extern void fifo8_drop(Fifo8 *fifo, uint32_t len); /** * fifo8_reset: diff --git a/src/include/86box/keyboard.h b/src/include/86box/keyboard.h index bf9a76d73..f233637ff 100644 --- a/src/include/86box/keyboard.h +++ b/src/include/86box/keyboard.h @@ -222,9 +222,9 @@ extern const device_t keyboard_xt86_device; extern const device_t keyboard_xt_compaq_device; extern const device_t keyboard_xt_t1x00_device; extern const device_t keyboard_tandy_device; -# if defined(DEV_BRANCH) && defined(USE_LASERXT) +# ifdef USE_LASERXT extern const device_t keyboard_xt_lxt3_device; -# endif /*defined(DEV_BRANCH) && defined(USE_LASERXT) */ +# endif /* USE_LASERXT */ extern const device_t keyboard_xt_olivetti_device; extern const device_t keyboard_xt_zenith_device; extern const device_t keyboard_xt_hyundai_device; @@ -272,6 +272,7 @@ extern uint8_t keyboard_get_shift(void); extern void keyboard_get_states(uint8_t *cl, uint8_t *nl, uint8_t *sl); extern void keyboard_set_states(uint8_t cl, uint8_t nl, uint8_t sl); extern int keyboard_recv(uint16_t key); +extern int keyboard_recv_ui(uint16_t key); extern int keyboard_isfsenter(void); extern int keyboard_isfsenter_up(void); extern int keyboard_isfsexit(void); diff --git a/src/include/86box/machine.h b/src/include/86box/machine.h index 65cc0c71f..761e8ad04 100644 --- a/src/include/86box/machine.h +++ b/src/include/86box/machine.h @@ -438,9 +438,9 @@ extern int machine_at_ibmxt286_init(const machine_t *); extern int machine_at_siemens_init(const machine_t *); // Siemens PCD-2L. N82330 discrete machine. It segfaults in some places extern int machine_at_wellamerastar_init(const machine_t *); // Wells American A*Star with custom award BIOS -#if defined(DEV_BRANCH) && defined(USE_OPEN_AT) +#ifdef USE_OPEN_AT extern int machine_at_openat_init(const machine_t *); -#endif +#endif /* USE_OPEN_AT */ /* m_at_286_386sx.c */ extern int machine_at_tg286m_init(const machine_t *); @@ -469,9 +469,9 @@ extern int machine_at_deskmaster286_init(const machine_t *); extern int machine_at_pc8_init(const machine_t *); extern int machine_at_3302_init(const machine_t *); -#if defined(DEV_BRANCH) && defined(USE_OLIVETTI) +#ifdef USE_OLIVETTI extern int machine_at_m290_init(const machine_t *); -#endif +#endif /* USE_OLIVETTI */ extern int machine_at_shuttle386sx_init(const machine_t *); extern int machine_at_adi386sx_init(const machine_t *); @@ -727,9 +727,9 @@ extern int machine_at_i430vx_init(const machine_t *); extern int machine_at_ma23c_init(const machine_t *); extern int machine_at_nupro592_init(const machine_t *); extern int machine_at_tx97_init(const machine_t *); -#if defined(DEV_BRANCH) && defined(USE_AN430TX) +#ifdef USE_AN430TX extern int machine_at_an430tx_init(const machine_t *); -#endif +#endif /* USE_AN430TX */ extern int machine_at_ym430tx_init(const machine_t *); extern int machine_at_thunderbolt_init(const machine_t *); extern int machine_at_mb540n_init(const machine_t *); @@ -896,7 +896,7 @@ extern int machine_ps2_model_70_type4_init(const machine_t *); /* m_tandy.c */ extern int tandy1k_eeprom_read(void); -extern int machine_tandy_init(const machine_t *); +extern int machine_tandy1000sx_init(const machine_t *); extern int machine_tandy1000hx_init(const machine_t *); extern int machine_tandy1000sl2_init(const machine_t *); @@ -947,10 +947,10 @@ extern int machine_xt_compaq_deskpro_init(const machine_t *); extern int machine_xt_compaq_portable_init(const machine_t *); /* m_xt_laserxt.c */ -#if defined(DEV_BRANCH) && defined(USE_LASERXT) +#ifdef USE_LASERXT extern int machine_xt_laserxt_init(const machine_t *); extern int machine_xt_lxt3_init(const machine_t *); -#endif +#endif /* USE_LASERXT */ /* m_xt_philips.c */ extern int machine_xt_p3105_init(const machine_t *); diff --git a/src/include/86box/midi.h b/src/include/86box/midi.h index 57a20ae5c..3a41c2cb5 100644 --- a/src/include/86box/midi.h +++ b/src/include/86box/midi.h @@ -104,18 +104,18 @@ extern void midi_in_sysex(uint8_t *buffer, uint32_t len); #ifdef EMU_DEVICE_H extern const device_t rtmidi_output_device; extern const device_t rtmidi_input_device; -# if defined(DEV_BRANCH) && defined(USE_OPL4ML) +# ifdef USE_OPL4ML extern const device_t opl4_midi_device; -# endif +# endif /* USE_OPL4ML */ # ifdef USE_FLUIDSYNTH extern const device_t fluidsynth_device; -# endif +# endif /* USE_FLUIDSYNTH */ # ifdef USE_MUNT extern const device_t mt32_old_device; extern const device_t mt32_new_device; extern const device_t cm32l_device; extern const device_t cm32ln_device; -# endif +# endif /* USE_MUNT */ #endif #endif /*EMU_SOUND_MIDI_H*/ diff --git a/src/include/86box/mouse.h b/src/include/86box/mouse.h index 810d62293..bbe78413b 100644 --- a/src/include/86box/mouse.h +++ b/src/include/86box/mouse.h @@ -71,8 +71,10 @@ extern const device_t mouse_mssystems_device; extern const device_t mouse_msserial_device; extern const device_t mouse_ltserial_device; extern const device_t mouse_ps2_device; +# ifdef USE_WACOM extern const device_t mouse_wacom_device; extern const device_t mouse_wacom_artpad_device; +# endif extern const device_t mouse_mtouch_device; #endif diff --git a/src/include/86box/pit.h b/src/include/86box/pit.h index eb03fd2c9..3c5a9cb52 100644 --- a/src/include/86box/pit.h +++ b/src/include/86box/pit.h @@ -19,6 +19,8 @@ #ifndef EMU_PIT_H #define EMU_PIT_H +#define NUM_COUNTERS 3 + typedef struct ctr_t { uint8_t m; uint8_t ctrl; @@ -68,7 +70,7 @@ typedef struct PIT { int clock; pc_timer_t callback_timer; - ctr_t counters[3]; + ctr_t counters[NUM_COUNTERS]; uint8_t ctrl; diff --git a/src/include/86box/pit_fast.h b/src/include/86box/pit_fast.h index 5650ffb4d..f824bad68 100644 --- a/src/include/86box/pit_fast.h +++ b/src/include/86box/pit_fast.h @@ -68,7 +68,7 @@ typedef struct ctrf_t { typedef struct pitf_t { int flags; - ctrf_t counters[3]; + ctrf_t counters[NUM_COUNTERS]; uint8_t ctrl; diff --git a/src/include/86box/scsi_pcscsi.h b/src/include/86box/scsi_pcscsi.h index 3acee78f9..8547aba25 100644 --- a/src/include/86box/scsi_pcscsi.h +++ b/src/include/86box/scsi_pcscsi.h @@ -25,6 +25,7 @@ #ifndef SCSI_PCSCSI_H #define SCSI_PCSCSI_H +extern const device_t am53c974_pci_device; extern const device_t dc390_pci_device; extern const device_t ncr53c90a_mca_device; diff --git a/src/include/86box/sio.h b/src/include/86box/sio.h index 0e699f12c..c349b0422 100644 --- a/src/include/86box/sio.h +++ b/src/include/86box/sio.h @@ -83,9 +83,9 @@ extern const device_t prime3b_ide_device; extern const device_t prime3c_device; extern const device_t prime3c_ide_device; extern const device_t ps1_m2133_sio; -#if defined(DEV_BRANCH) && defined(USE_SIO_DETECT) +#ifdef USE_SIO_DETECT extern const device_t sio_detect_device; -#endif +#endif /* USE_SIO_DETECT */ extern const device_t um8663af_device; extern const device_t um8663af_ide_device; extern const device_t um8663af_sec_device; diff --git a/src/include/86box/snd_sn76489.h b/src/include/86box/snd_sn76489.h index 6e7399d54..81d9ad229 100644 --- a/src/include/86box/snd_sn76489.h +++ b/src/include/86box/snd_sn76489.h @@ -20,6 +20,9 @@ typedef struct sn76489_t { int freqhi[4]; int vol[4]; uint32_t shift; + uint32_t white_noise_tap_1; + uint32_t white_noise_tap_2; + uint32_t feedback_mask; uint8_t noise; int lasttone; uint8_t firstdat; diff --git a/src/include/86box/vid_svga.h b/src/include/86box/vid_svga.h index 3d68c1a34..fef9b2122 100644 --- a/src/include/86box/vid_svga.h +++ b/src/include/86box/vid_svga.h @@ -87,6 +87,7 @@ typedef struct svga_t { int dac_b; int vtotal; int dispend; + int vdisp; int vsyncstart; int split; int vblankstart; diff --git a/src/include/86box/video.h b/src/include/86box/video.h index 9cbd0399f..1037e213f 100644 --- a/src/include/86box/video.h +++ b/src/include/86box/video.h @@ -330,9 +330,9 @@ extern const device_t ati28800k_device; extern const device_t ati28800k_spc4620p_device; extern const device_t ati28800k_spc6033p_device; extern const device_t compaq_ati28800_device; -# if defined(DEV_BRANCH) && defined(USE_XL24) +# ifdef USE_XL24 extern const device_t ati28800_wonderxl24_device; -# endif +# endif /* USE_XL24 */ /* Bochs */ extern const device_t bochs_svga_device; @@ -448,7 +448,7 @@ extern const device_t mystique_220_device; extern const device_t millennium_ii_device; #ifdef USE_G100 extern const device_t productiva_g100_device; -#endif +#endif /* USE_G100 */ /* Oak OTI-0x7 */ extern const device_t oti037c_device; diff --git a/src/machine/m_at.c b/src/machine/m_at.c index 6343af4dc..122c5cdef 100644 --- a/src/machine/m_at.c +++ b/src/machine/m_at.c @@ -275,7 +275,7 @@ machine_at_wellamerastar_init(const machine_t *model) return ret; } -#if defined(DEV_BRANCH) && defined(USE_OPEN_AT) +#ifdef USE_OPEN_AT int machine_at_openat_init(const machine_t *model) { @@ -291,4 +291,4 @@ machine_at_openat_init(const machine_t *model) return ret; } -#endif +#endif /* USE_OPEN_AT */ diff --git a/src/machine/m_at_286_386sx.c b/src/machine/m_at_286_386sx.c index 4632e00df..995301e4f 100644 --- a/src/machine/m_at_286_386sx.c +++ b/src/machine/m_at_286_386sx.c @@ -965,7 +965,7 @@ machine_at_pc916sx_init(const machine_t *model) return ret; } -#if defined(DEV_BRANCH) && defined(USE_OLIVETTI) +#ifdef USE_OLIVETTI int machine_at_m290_init(const machine_t *model) { @@ -988,4 +988,4 @@ machine_at_m290_init(const machine_t *model) return ret; } -#endif +#endif /* USE_OLIVETTI */ diff --git a/src/machine/m_at_socket7.c b/src/machine/m_at_socket7.c index 12cec1357..d7e2840a9 100644 --- a/src/machine/m_at_socket7.c +++ b/src/machine/m_at_socket7.c @@ -935,7 +935,7 @@ machine_at_tx97_init(const machine_t *model) return ret; } -#if defined(DEV_BRANCH) && defined(USE_AN430TX) +#ifdef USE_AN430TX int machine_at_an430tx_init(const machine_t *model) { @@ -979,7 +979,7 @@ machine_at_an430tx_init(const machine_t *model) return ret; } -#endif +#endif /* USE_AN430TX */ int machine_at_ym430tx_init(const machine_t *model) diff --git a/src/machine/m_tandy.c b/src/machine/m_tandy.c index e677ff8b4..d51209123 100644 --- a/src/machine/m_tandy.c +++ b/src/machine/m_tandy.c @@ -52,6 +52,7 @@ enum { enum { TYPE_TANDY = 0, + TYPE_TANDY1000SX, TYPE_TANDY1000HX, TYPE_TANDY1000SL2 }; @@ -129,6 +130,7 @@ typedef struct tandy_t { uint32_t base; uint32_t mask; + int is_hx; int is_sl2; t1kvid_t *vid; @@ -715,13 +717,8 @@ recalc_timings(tandy_t *dev) double _dispofftime; double disptime; - if (vid->mode & 1) { - disptime = vid->crtc[0] + 1; - _dispontime = vid->crtc[1]; - } else { - disptime = (vid->crtc[0] + 1) << 1; - _dispontime = vid->crtc[1] << 1; - } + disptime = vid->crtc[0] + 1; + _dispontime = vid->crtc[1]; _dispofftime = disptime - _dispontime; _dispontime *= CGACONST; @@ -984,10 +981,10 @@ vid_poll(void *priv) for (x = 0; x < vid->crtc[1]; x++) { dat = (vid->vram[((vid->ma << 1) & 0x1fff) + ((vid->sc & 3) * 0x2000)] << 8) | vid->vram[((vid->ma << 1) & 0x1fff) + ((vid->sc & 3) * 0x2000) + 1]; vid->ma++; - buffer32->line[vid->displine << 1][(x << 3) + 8] = buffer32->line[(vid->displine << 1) + 1][(x << 3) + 8] = buffer32->line[vid->displine << 1][(x << 3) + 9] = buffer32->line[(vid->displine << 1) + 1][(x << 3) + 9] = vid->array[((dat >> 12) & vid->array[1]) + 16] + 16; - buffer32->line[vid->displine << 1][(x << 3) + 10] = buffer32->line[(vid->displine << 1) + 1][(x << 3) + 10] = buffer32->line[vid->displine << 1][(x << 3) + 11] = buffer32->line[(vid->displine << 1) + 1][(x << 3) + 11] = vid->array[((dat >> 8) & vid->array[1]) + 16] + 16; - buffer32->line[vid->displine << 1][(x << 3) + 12] = buffer32->line[(vid->displine << 1) + 1][(x << 3) + 12] = buffer32->line[vid->displine << 1][(x << 3) + 13] = buffer32->line[(vid->displine << 1) + 1][(x << 3) + 13] = vid->array[((dat >> 4) & vid->array[1]) + 16] + 16; - buffer32->line[vid->displine << 1][(x << 3) + 14] = buffer32->line[(vid->displine << 1) + 1][(x << 3) + 14] = buffer32->line[vid->displine << 1][(x << 3) + 15] = buffer32->line[(vid->displine << 1) + 1][(x << 3) + 15] = vid->array[(dat & vid->array[1]) + 16] + 16; + buffer32->line[vid->displine << 1][(x << 3) + 8] = buffer32->line[(vid->displine << 1) + 1][(x << 3) + 8] = buffer32->line[vid->displine << 1][(x << 3) + 9] = buffer32->line[(vid->displine << 1) + 1][(x << 3) + 9] = vid->array[((dat >> 12) & vid->array[1] & 0x0f) + 16] + 16; + buffer32->line[vid->displine << 1][(x << 3) + 10] = buffer32->line[(vid->displine << 1) + 1][(x << 3) + 10] = buffer32->line[vid->displine << 1][(x << 3) + 11] = buffer32->line[(vid->displine << 1) + 1][(x << 3) + 11] = vid->array[((dat >> 8) & vid->array[1] & 0x0f) + 16] + 16; + buffer32->line[vid->displine << 1][(x << 3) + 12] = buffer32->line[(vid->displine << 1) + 1][(x << 3) + 12] = buffer32->line[vid->displine << 1][(x << 3) + 13] = buffer32->line[(vid->displine << 1) + 1][(x << 3) + 13] = vid->array[((dat >> 4) & vid->array[1] & 0x0f) + 16] + 16; + buffer32->line[vid->displine << 1][(x << 3) + 14] = buffer32->line[(vid->displine << 1) + 1][(x << 3) + 14] = buffer32->line[vid->displine << 1][(x << 3) + 15] = buffer32->line[(vid->displine << 1) + 1][(x << 3) + 15] = vid->array[(dat & vid->array[1] & 0x0f) + 16] + 16; } } else if (vid->array[3] & 0x10) { /*160x200x16*/ for (x = 0; x < vid->crtc[1]; x++) { @@ -997,10 +994,10 @@ vid_poll(void *priv) dat = (vid->vram[((vid->ma << 1) & 0x1fff) + ((vid->sc & 3) * 0x2000)] << 8) | vid->vram[((vid->ma << 1) & 0x1fff) + ((vid->sc & 3) * 0x2000) + 1]; } vid->ma++; - buffer32->line[vid->displine << 1][(x << 4) + 8] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 8] = buffer32->line[vid->displine << 1][(x << 4) + 9] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 9] = buffer32->line[vid->displine << 1][(x << 4) + 10] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 10] = buffer32->line[vid->displine << 1][(x << 4) + 11] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 11] = vid->array[((dat >> 12) & vid->array[1]) + 16] + 16; - buffer32->line[vid->displine << 1][(x << 4) + 12] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 12] = buffer32->line[vid->displine << 1][(x << 4) + 13] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 13] = buffer32->line[vid->displine << 1][(x << 4) + 14] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 14] = buffer32->line[vid->displine << 1][(x << 4) + 15] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 15] = vid->array[((dat >> 8) & vid->array[1]) + 16] + 16; - buffer32->line[vid->displine << 1][(x << 4) + 16] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 16] = buffer32->line[vid->displine << 1][(x << 4) + 17] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 17] = buffer32->line[vid->displine << 1][(x << 4) + 18] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 18] = buffer32->line[vid->displine << 1][(x << 4) + 19] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 19] = vid->array[((dat >> 4) & vid->array[1]) + 16] + 16; - buffer32->line[vid->displine << 1][(x << 4) + 20] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 20] = buffer32->line[vid->displine << 1][(x << 4) + 21] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 21] = buffer32->line[vid->displine << 1][(x << 4) + 22] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 22] = buffer32->line[vid->displine << 1][(x << 4) + 23] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 23] = vid->array[(dat & vid->array[1]) + 16] + 16; + buffer32->line[vid->displine << 1][(x << 4) + 8] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 8] = buffer32->line[vid->displine << 1][(x << 4) + 9] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 9] = buffer32->line[vid->displine << 1][(x << 4) + 10] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 10] = buffer32->line[vid->displine << 1][(x << 4) + 11] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 11] = vid->array[((dat >> 12) & vid->array[1] & 0x0f) + 16] + 16; + buffer32->line[vid->displine << 1][(x << 4) + 12] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 12] = buffer32->line[vid->displine << 1][(x << 4) + 13] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 13] = buffer32->line[vid->displine << 1][(x << 4) + 14] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 14] = buffer32->line[vid->displine << 1][(x << 4) + 15] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 15] = vid->array[((dat >> 8) & vid->array[1] & 0x0f) + 16] + 16; + buffer32->line[vid->displine << 1][(x << 4) + 16] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 16] = buffer32->line[vid->displine << 1][(x << 4) + 17] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 17] = buffer32->line[vid->displine << 1][(x << 4) + 18] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 18] = buffer32->line[vid->displine << 1][(x << 4) + 19] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 19] = vid->array[((dat >> 4) & vid->array[1] & 0x0f) + 16] + 16; + buffer32->line[vid->displine << 1][(x << 4) + 20] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 20] = buffer32->line[vid->displine << 1][(x << 4) + 21] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 21] = buffer32->line[vid->displine << 1][(x << 4) + 22] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 22] = buffer32->line[vid->displine << 1][(x << 4) + 23] = buffer32->line[(vid->displine << 1) + 1][(x << 4) + 23] = vid->array[(dat & vid->array[1] & 0x0f) + 16] + 16; } } else if (vid->array[3] & 0x08) { /*640x200x4 - this implementation is a complete guess!*/ for (x = 0; x < vid->crtc[1]; x++) { @@ -1576,12 +1573,12 @@ tandy_write(uint16_t addr, uint8_t val, void *priv) switch (addr) { case 0x00a0: - if (val & 0x10) { + if (dev->is_hx && (val & 0x10)) { dev->base = (mem_size - 256) * 1024; dev->mask = 0x3ffff; mem_mapping_set_addr(&ram_low_mapping, 0, dev->base); mem_mapping_set_addr(&dev->ram_mapping, - ((val >> 1) & 7) * 128 * 1024, 0x40000); + (((val >> 1) & 7) - 1) * 128 * 1024, 0x40000); } else { dev->base = (mem_size - 128) * 1024; dev->mask = 0x1ffff; @@ -1589,6 +1586,22 @@ tandy_write(uint16_t addr, uint8_t val, void *priv) mem_mapping_set_addr(&dev->ram_mapping, ((val >> 1) & 7) * 128 * 1024, 0x20000); } + if (dev->is_hx) { + io_removehandler(0x03d0, 16, + vid_in, NULL, NULL, vid_out, NULL, NULL, dev); + if (val & 0x01) + mem_mapping_disable(&dev->vid->mapping); + else { + io_sethandler(0x03d0, 16, + vid_in, NULL, NULL, vid_out, NULL, NULL, dev); + mem_mapping_set_addr(&dev->vid->mapping, 0xb8000, 0x8000); + } + } else { + if (val & 0x01) + mem_mapping_set_addr(&dev->vid->mapping, 0xc0000, 0x10000); + else + mem_mapping_set_addr(&dev->vid->mapping, 0xb8000, 0x8000); + } dev->ram_bank = val; break; @@ -1718,8 +1731,7 @@ machine_tandy1k_init(const machine_t *model, int type) { tandy_t *dev; - dev = malloc(sizeof(tandy_t)); - memset(dev, 0x00, sizeof(tandy_t)); + dev = calloc(1, sizeof(tandy_t)); machine_common_init(model); @@ -1745,15 +1757,17 @@ machine_tandy1k_init(const machine_t *model, int type) switch (type) { case TYPE_TANDY: + case TYPE_TANDY1000SX: keyboard_set_table(scancode_tandy); io_sethandler(0x00a0, 1, tandy_read, NULL, NULL, tandy_write, NULL, NULL, dev); vid_init(dev); device_add_ex(&vid_device, dev); - device_add(&sn76489_device); + device_add((type == TYPE_TANDY1000SX) ? &ncr8496_device : &sn76489_device); break; case TYPE_TANDY1000HX: + dev->is_hx = 1; keyboard_set_table(scancode_tandy); io_sethandler(0x00a0, 1, tandy_read, NULL, NULL, tandy_write, NULL, NULL, dev); @@ -1790,7 +1804,7 @@ tandy1k_eeprom_read(void) } int -machine_tandy_init(const machine_t *model) +machine_tandy1000sx_init(const machine_t *model) { int ret; @@ -1800,7 +1814,7 @@ machine_tandy_init(const machine_t *model) if (bios_only || !ret) return ret; - machine_tandy1k_init(model, TYPE_TANDY); + machine_tandy1k_init(model, TYPE_TANDY1000SX); return ret; } diff --git a/src/machine/machine_table.c b/src/machine/machine_table.c index 18481b740..a060a0781 100644 --- a/src/machine/machine_table.c +++ b/src/machine/machine_table.c @@ -1505,7 +1505,7 @@ const machine_t machines[] = { .internal_name = "tandy", .type = MACHINE_TYPE_8088, .chipset = MACHINE_CHIPSET_PROPRIETARY, - .init = machine_tandy_init, + .init = machine_tandy1000sx_init, .p1_handler = NULL, .gpio_handler = NULL, .available_flag = MACHINE_AVAILABLE, @@ -1523,7 +1523,7 @@ const machine_t machines[] = { .bus_flags = MACHINE_PC, .flags = MACHINE_VIDEO_FIXED, .ram = { - .min = 128, + .min = 384, .max = 640, .step = 128 }, @@ -1562,7 +1562,7 @@ const machine_t machines[] = { .bus_flags = MACHINE_PC, .flags = MACHINE_VIDEO_FIXED, .ram = { - .min = 384, + .min = 256, .max = 640, .step = 128 }, @@ -1656,7 +1656,7 @@ const machine_t machines[] = { .snd_device = NULL, .net_device = NULL }, -#if defined(DEV_BRANCH) && defined(USE_LASERXT) +#ifdef USE_LASERXT { .name = "[8088] VTech Laser Turbo XT", .internal_name = "ltxt", @@ -1696,7 +1696,7 @@ const machine_t machines[] = { .snd_device = NULL, .net_device = NULL }, -#endif /* defined(DEV_BRANCH) && defined(USE_LASERXT) */ +#endif /* USE_LASERXT */ /* Has a standard PS/2 KBC (so, use IBM PS/2 Type 1). */ { .name = "[8088] Xi8088", @@ -2521,7 +2521,7 @@ const machine_t machines[] = { .net_device = NULL }, -#if defined(DEV_BRANCH) && defined(USE_LASERXT) +#ifdef USE_LASERXT { .name = "[8086] VTech Laser XT3", .internal_name = "lxt3", @@ -2561,7 +2561,7 @@ const machine_t machines[] = { .snd_device = NULL, .net_device = NULL }, -#endif /* defined(DEV_BRANCH) && defined(USE_LASERXT) */ +#endif /* USE_LASERXT */ /* 286 AT machines */ /* Has IBM AT KBC firmware. */ @@ -3005,7 +3005,7 @@ const machine_t machines[] = { .snd_device = NULL, .net_device = NULL }, -#if defined(DEV_BRANCH) && defined(USE_OLIVETTI) +#ifdef USE_OLIVETTI /* Has Olivetti KBC firmware. */ { .name = "[ISA] Olivetti M290", @@ -3046,8 +3046,8 @@ const machine_t machines[] = { .snd_device = NULL, .net_device = NULL }, -#endif /* defined(DEV_BRANCH) && defined(USE_OLIVETTI) */ -#if defined(DEV_BRANCH) && defined(USE_OPEN_AT) +#endif /* USE_OLIVETTI */ +#ifdef USE_OPEN_AT /* Has IBM AT KBC firmware. */ { .name = "[ISA] OpenAT", @@ -3088,7 +3088,7 @@ const machine_t machines[] = { .snd_device = NULL, .net_device = NULL }, -#endif /* defined(DEV_BRANCH) && defined(USE_OPEN_AT) */ +#endif /* USE_OPEN_AT */ /* Has IBM AT KBC firmware. */ { .name = "[ISA] Phoenix IBM AT", @@ -12107,11 +12107,9 @@ const machine_t machines[] = { .ram = { .min = 8192, .max = 262144, - .max = 786432, .step = 8192 }, .nvrmask = 127, - .nvrmask = 255, .kbc_device = NULL, .kbc_p1 = 0xff, .gpio = 0xffffffff, @@ -12286,7 +12284,7 @@ const machine_t machines[] = { .snd_device = &cs4236b_device, .net_device = &pcnet_am79c973_onboard_device }, -#if defined(DEV_BRANCH) && defined(USE_AN430TX) +#ifdef USE_AN430TX /* This has the Phoenix MultiKey KBC firmware. */ { .name = "[i430TX] Intel AN430TX", @@ -12327,7 +12325,7 @@ const machine_t machines[] = { .snd_device = NULL, .net_device = NULL }, -#endif /* defined(DEV_BRANCH) && defined(USE_AN430TX) */ +#endif /* USE_AN430TX */ /* This has the AMIKey KBC firmware, which is an updated 'F' type. */ { .name = "[i430TX] Intel YM430TX", diff --git a/src/network/net_dp8390.c b/src/network/net_dp8390.c index b563b703a..d6062604f 100644 --- a/src/network/net_dp8390.c +++ b/src/network/net_dp8390.c @@ -396,7 +396,7 @@ dp8390_rx_common(void *priv, uint8_t *buf, int io_len) } else { endbytes = (dev->page_stop - dev->curr_page) * 256; memcpy(startptr + sizeof(pkthdr), buf, endbytes - sizeof(pkthdr)); - startptr = &dev->mem[((dev->tx_page_start * 256) - dev->mem_start) & dev->mem_wrap]; + startptr = &dev->mem[((dev->page_start * 256) - dev->mem_start) & dev->mem_wrap]; memcpy(startptr, buf + endbytes - sizeof(pkthdr), io_len - endbytes + 8); } dev->curr_page = nextpage; diff --git a/src/network/net_rtl8139.c b/src/network/net_rtl8139.c index 2e85d67bf..bc44b3fb1 100644 --- a/src/network/net_rtl8139.c +++ b/src/network/net_rtl8139.c @@ -22,13 +22,14 @@ * Copyright 2011-2023 Benjamin Poirier. * Copyright 2023 Cacodemon345. */ +#include #include #include #include #include #include #include - +#define HAVE_STDARG_H #include <86box/86box.h> #include <86box/timer.h> #include <86box/pci.h> diff --git a/src/network/net_tulip.c b/src/network/net_tulip.c index 03eb21e88..060e11fba 100644 --- a/src/network/net_tulip.c +++ b/src/network/net_tulip.c @@ -452,7 +452,9 @@ tulip_copy_rx_bytes(TULIPState *s, struct tulip_descriptor *desc) static bool tulip_filter_address(TULIPState *s, const uint8_t *addr) { +#ifdef BLOCK_BROADCAST static const char broadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +#endif bool ret = false; for (uint8_t i = 0; i < 16 && ret == false; i++) { @@ -461,9 +463,15 @@ tulip_filter_address(TULIPState *s, const uint8_t *addr) } } +/* + Do not block broadcast packets - needed for connections to the guest + to succeed when using SLiRP. + */ +#ifdef BLOCK_BROADCAST if (!memcmp(addr, broadcast, ETH_ALEN)) { return true; } +#endif if (s->csr[6] & (CSR6_PR | CSR6_RA)) { /* Promiscuous mode enabled */ @@ -488,7 +496,7 @@ tulip_receive(void *priv, uint8_t *buf, int size) { struct tulip_descriptor desc; TULIPState *s = (TULIPState *) priv; - + int first = 1; if (size < 14 || size > sizeof(s->rx_frame) - 4 || s->rx_frame_len || tulip_rx_stopped(s)) @@ -506,7 +514,11 @@ tulip_receive(void *priv, uint8_t *buf, int size) if (!(desc.status & RDES0_OWN)) { s->csr[5] |= CSR5_RU; tulip_update_int(s); - return s->rx_frame_size - s->rx_frame_len; + if (first) + /* Stop at the very beginning, tell the host 0 bytes have been received. */ + return 0; + else + return (s->rx_frame_size - s->rx_frame_len) % s->rx_frame_size; } desc.status = 0; @@ -527,6 +539,7 @@ tulip_receive(void *priv, uint8_t *buf, int size) } tulip_desc_write(s, s->current_rx_desc, &desc); tulip_next_rx_descriptor(s, &desc); + first = 0; } while (s->rx_frame_len); return 1; diff --git a/src/pit.c b/src/pit.c index 0816094cb..8b9f23a79 100644 --- a/src/pit.c +++ b/src/pit.c @@ -526,7 +526,7 @@ pit_timer_over(void *priv) dev->clock ^= 1; - for (uint8_t i = 0; i < 3; i++) + for (uint8_t i = 0; i < NUM_COUNTERS; i++) pit_ctr_set_clock_common(&dev->counters[i], dev->clock, dev); timer_advance_u64(&dev->callback_timer, dev->pit_const >> 1ULL); @@ -874,7 +874,7 @@ pit_device_reset(pit_t *dev) { dev->clock = 0; - for (uint8_t i = 0; i < 3; i++) + for (uint8_t i = 0; i < NUM_COUNTERS; i++) ctr_reset(&dev->counters[i]); } @@ -885,7 +885,7 @@ pit_reset(pit_t *dev) dev->clock = 0; - for (uint8_t i = 0; i < 3; i++) + for (uint8_t i = 0; i < NUM_COUNTERS; i++) ctr_reset(&dev->counters[i]); /* Disable speaker gate. */ diff --git a/src/pit_fast.c b/src/pit_fast.c index e986600ee..0d56a6616 100644 --- a/src/pit_fast.c +++ b/src/pit_fast.c @@ -47,22 +47,22 @@ #define PIT_CUSTOM_CLOCK 64 /* The PIT uses custom clock inputs provided by another provider. */ #define PIT_SECONDARY 128 /* The PIT is secondary (ports 0048-004B). */ -#ifdef ENABLE_PIT_LOG -int pit_do_log = ENABLE_PIT_LOG; +#ifdef ENABLE_PIT_FAST_LOG +int pit_fast_do_log = ENABLE_PIT_FAST_LOG; static void -pit_log(const char *fmt, ...) +pit_fast_log(const char *fmt, ...) { va_list ap; - if (pit_do_log) { + if (pit_fast_do_log) { va_start(ap, fmt); pclog_ex(fmt, ap); va_end(ap); } } #else -# define pit_log(fmt, ...) +# define pit_fast_log(fmt, ...) #endif static void @@ -420,7 +420,7 @@ pitf_write(uint16_t addr, uint8_t val, void *priv) int t = (addr & 3); ctrf_t *ctr; - pit_log("[%04X:%08X] pit_write(%04X, %02X, %08X)\n", CS, cpu_state.pc, addr, val, priv); + pit_fast_log("[%04X:%08X] pit_write(%04X, %02X, %08X)\n", CS, cpu_state.pc, addr, val, priv); cycles -= ISA_CYCLES(8); @@ -438,7 +438,7 @@ pitf_write(uint16_t addr, uint8_t val, void *priv) pitf_ctr_latch_count(&dev->counters[1]); if (val & 8) pitf_ctr_latch_count(&dev->counters[2]); - pit_log("PIT %i: Initiated readback command\n", t); + pit_fast_log("PIT %i: Initiated readback command\n", t); } if (!(val & 0x10)) { if (val & 2) @@ -456,7 +456,7 @@ pitf_write(uint16_t addr, uint8_t val, void *priv) if (!(dev->ctrl & 0x30)) { pitf_ctr_latch_count(ctr); dev->ctrl |= 0x30; - pit_log("PIT %i: Initiated latched read, %i bytes latched\n", + pit_fast_log("PIT %i: Initiated latched read, %i bytes latched\n", t, ctr->latched); } else { ctr->ctrl = val; @@ -476,7 +476,7 @@ pitf_write(uint16_t addr, uint8_t val, void *priv) pitf_ctr_set_out(ctr, 1, dev); ctr->disabled = 1; - pit_log("PIT %i: M = %i, RM/WM = %i, State = %i, Out = %i\n", t, ctr->m, ctr->rm, ctr->state, ctr->out); + pit_fast_log("PIT %i: M = %i, RM/WM = %i, Out = %i\n", t, ctr->m, ctr->rm, ctr->out); } ctr->thit = 0; } @@ -619,7 +619,7 @@ pitf_read(uint16_t addr, void *priv) break; } - pit_log("[%04X:%08X] pit_read(%04X, %08X) = %02X\n", CS, cpu_state.pc, addr, priv, ret); + pit_fast_log("[%04X:%08X] pit_read(%04X, %08X) = %02X\n", CS, cpu_state.pc, addr, priv, ret); return ret; } @@ -670,7 +670,7 @@ pitf_reset(pitf_t *dev) { memset(dev, 0, sizeof(pitf_t)); - for (uint8_t i = 0; i < 3; i++) + for (uint8_t i = 0; i < NUM_COUNTERS; i++) ctr_reset(&dev->counters[i]); /* Disable speaker gate. */ @@ -683,7 +683,7 @@ pitf_set_pit_const(void *data, uint64_t pit_const) pitf_t *pit = (pitf_t *) data; ctrf_t *ctr; - for (uint8_t i = 0; i < 3; i++) { + for (uint8_t i = 0; i < NUM_COUNTERS; i++) { ctr = &pit->counters[i]; ctr->pit_const = pit_const; } @@ -728,7 +728,7 @@ pitf_init(const device_t *info) dev->flags = info->local; if (!(dev->flags & PIT_PS2) && !(dev->flags & PIT_CUSTOM_CLOCK)) { - for (int i = 0; i < 3; i++) { + for (int i = 0; i < NUM_COUNTERS; i++) { ctrf_t *ctr = &dev->counters[i]; ctr->priv = dev; timer_add(&ctr->timer, pitf_timer_over, (void *) ctr, 0); diff --git a/src/qt/CMakeLists.txt b/src/qt/CMakeLists.txt index db0a6353f..171e0a77c 100644 --- a/src/qt/CMakeLists.txt +++ b/src/qt/CMakeLists.txt @@ -190,6 +190,10 @@ if(RTMIDI) target_compile_definitions(ui PRIVATE USE_RTMIDI) endif() +if(WACOM) + target_compile_definitions(ui PRIVATE USE_WACOM) +endif() + if(WIN32) enable_language(RC) target_sources(86Box PUBLIC 86Box-qt.rc) diff --git a/src/qt/qt_mainwindow.cpp b/src/qt/qt_mainwindow.cpp index 0b7ebb33d..32720c48b 100644 --- a/src/qt/qt_mainwindow.cpp +++ b/src/qt/qt_mainwindow.cpp @@ -998,7 +998,7 @@ MainWindow::processKeyboardInput(bool down, uint32_t keycode) case 0x10b: /* Microsoft scroll up normal */ case 0x180 ... 0x1ff: /* E0 break codes (including Microsoft scroll down normal) */ /* This key uses a break code as make. Send it manually, only on press. */ - if (down) { + if (down && (mouse_capture || !kbd_req_capture || video_fullscreen)) { if (keycode & 0x100) keyboard_send(0xe0); keyboard_send(keycode & 0xff); @@ -1011,7 +1011,7 @@ MainWindow::processKeyboardInput(bool down, uint32_t keycode) break; case 0x137: /* Print Screen */ - if (keyboard_recv(0x38) || keyboard_recv(0x138)) { /* Alt+ */ + if (keyboard_recv_ui(0x38) || keyboard_recv_ui(0x138)) { /* Alt+ */ keycode = 0x54; } else if (down) { keyboard_input(down, 0x12a); @@ -1022,7 +1022,7 @@ MainWindow::processKeyboardInput(bool down, uint32_t keycode) break; case 0x145: /* Pause */ - if (keyboard_recv(0x1d) || keyboard_recv(0x11d)) { /* Ctrl+ */ + if (keyboard_recv_ui(0x1d) || keyboard_recv_ui(0x11d)) { /* Ctrl+ */ keycode = 0x146; } else { keyboard_input(down, 0xe11d); @@ -1196,6 +1196,8 @@ MainWindow::on_actionFullscreen_triggered() ui->stackedWidget->setFixedSize(QWIDGETSIZE_MAX, QWIDGETSIZE_MAX); showFullScreen(); } + fs_on_signal = false; + fs_off_signal = false; ui->stackedWidget->onResize(width(), height()); } @@ -1218,7 +1220,7 @@ MainWindow::getTitle(wchar_t *title) bool MainWindow::eventFilter(QObject *receiver, QEvent *event) { - if (!dopause && (mouse_capture || !kbd_req_capture)) { + if (!dopause) { if (event->type() == QEvent::Shortcut) { auto shortcutEvent = (QShortcutEvent *) event; if (shortcutEvent->key() == ui->actionExit->shortcut()) { @@ -1299,7 +1301,7 @@ MainWindow::showMessage_(int flags, const QString &header, const QString &messag void MainWindow::keyPressEvent(QKeyEvent *event) { - if (send_keyboard_input && !(kbd_req_capture && !mouse_capture)) { + if (send_keyboard_input) { #ifdef Q_OS_MACOS processMacKeyboardInput(true, event); #else @@ -1312,10 +1314,10 @@ MainWindow::keyPressEvent(QKeyEvent *event) if (keyboard_ismsexit()) plat_mouse_capture(0); - if ((video_fullscreen > 0) && (keyboard_recv(0x1D) || keyboard_recv(0x11D))) { - if (keyboard_recv(0x57)) + if ((video_fullscreen > 0) && (keyboard_recv_ui(0x1D) || keyboard_recv_ui(0x11D))) { + if (keyboard_recv_ui(0x57)) ui->actionTake_screenshot->trigger(); - else if (keyboard_recv(0x58)) + else if (keyboard_recv_ui(0x58)) pc_send_cad(); } @@ -1338,7 +1340,7 @@ void MainWindow::keyReleaseEvent(QKeyEvent *event) { if (event->key() == Qt::Key_Pause) { - if (keyboard_recv(0x38) && keyboard_recv(0x138)) { + if (keyboard_recv_ui(0x38) && keyboard_recv_ui(0x138)) { plat_pause(dopause ^ 1); } } diff --git a/src/qt/qt_mainwindow.ui b/src/qt/qt_mainwindow.ui index 90dbfdab7..b1dc0ecf1 100644 --- a/src/qt/qt_mainwindow.ui +++ b/src/qt/qt_mainwindow.ui @@ -373,6 +373,9 @@ &Fullscreen + + Ctrl+Alt+PgUp + diff --git a/src/qt/qt_winrawinputfilter.cpp b/src/qt/qt_winrawinputfilter.cpp index 857ccef3b..66d8ad8e5 100644 --- a/src/qt/qt_winrawinputfilter.cpp +++ b/src/qt/qt_winrawinputfilter.cpp @@ -176,9 +176,6 @@ WindowsRawInputFilter::keyboard_handle(PRAWINPUT raw) RAWKEYBOARD rawKB = raw->data.keyboard; scancode = rawKB.MakeCode; - if (kbd_req_capture && !mouse_capture) - return; - /* If it's not a scan code that starts with 0xE1 */ if ((rawKB.Flags & RI_KEY_E1)) { if (rawKB.MakeCode == 0x1D) { diff --git a/src/qt/win_thread.c b/src/qt/win_thread.c new file mode 100644 index 000000000..9a4f046b3 --- /dev/null +++ b/src/qt/win_thread.c @@ -0,0 +1,179 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * Implement threads and mutexes for the Win32 platform. + * + * + * + * Authors: Sarah Walker, + * Fred N. van Kempen, + * + * Copyright 2008-2018 Sarah Walker. + * Copyright 2017-2018 Fred N. van Kempen. + */ +#define UNICODE +#define BITMAP WINDOWS_BITMAP +#include +#include +#include +#undef BITMAP +#include +#include +#include +#include +#include +#include <86box/86box.h> +#include <86box/plat.h> +#include <86box/thread.h> + +typedef struct { + HANDLE handle; +} win_event_t; + +/* For compatibility with thread.h, but Win32 does not allow named threads. */ +thread_t * +thread_create_named(void (*func)(void *param), void *param, UNUSED(const char *name)) +{ + uintptr_t bt = _beginthread(func, 0, param); + return ((thread_t *) bt); +} + +int +thread_test_mutex(thread_t *arg) +{ + if (arg == NULL) + return (0); + + return (WaitForSingleObject(arg, 0) == WAIT_OBJECT_0) ? 1 : 0; +} + +int +thread_wait(thread_t *arg) +{ + if (arg == NULL) + return (0); + + if (WaitForSingleObject(arg, INFINITE)) + return (1); + + return (0); +} + +event_t * +thread_create_event(void) +{ + win_event_t *ev = malloc(sizeof(win_event_t)); + + ev->handle = CreateEvent(NULL, FALSE, FALSE, NULL); + + return ((event_t *) ev); +} + +void +thread_set_event(event_t *arg) +{ + win_event_t *ev = (win_event_t *) arg; + + if (arg == NULL) + return; + + SetEvent(ev->handle); +} + +void +thread_reset_event(event_t *arg) +{ + win_event_t *ev = (win_event_t *) arg; + + if (arg == NULL) + return; + + ResetEvent(ev->handle); +} + +int +thread_wait_event(event_t *arg, int timeout) +{ + win_event_t *ev = (win_event_t *) arg; + + if (arg == NULL) + return (0); + + if (ev->handle == NULL) + return (0); + + if (timeout == -1) + timeout = INFINITE; + + if (WaitForSingleObject(ev->handle, timeout)) + return (1); + + return (0); +} + +void +thread_destroy_event(event_t *arg) +{ + win_event_t *ev = (win_event_t *) arg; + + if (arg == NULL) + return; + + CloseHandle(ev->handle); + + free(ev); +} + +mutex_t * +thread_create_mutex(void) +{ + mutex_t *mutex = malloc(sizeof(CRITICAL_SECTION)); + + InitializeCriticalSection(mutex); + + return mutex; +} + +int +thread_wait_mutex(mutex_t *mutex) +{ + if (mutex == NULL) + return (0); + + LPCRITICAL_SECTION critsec = (LPCRITICAL_SECTION) mutex; + + EnterCriticalSection(critsec); + + return 1; +} + +int +thread_release_mutex(mutex_t *mutex) +{ + if (mutex == NULL) + return (0); + + LPCRITICAL_SECTION critsec = (LPCRITICAL_SECTION) mutex; + + LeaveCriticalSection(critsec); + + return 1; +} + +void +thread_close_mutex(mutex_t *mutex) +{ + if (mutex == NULL) + return; + + LPCRITICAL_SECTION critsec = (LPCRITICAL_SECTION) mutex; + + DeleteCriticalSection(critsec); + + free(critsec); +} diff --git a/src/scsi/scsi.c b/src/scsi/scsi.c index 591ef2aa3..bbce63651 100644 --- a/src/scsi/scsi.c +++ b/src/scsi/scsi.c @@ -82,6 +82,7 @@ static SCSI_CARD scsi_cards[] = { { &ncr53c825a_pci_device, }, { &ncr53c860_pci_device, }, { &ncr53c875_pci_device, }, + { &am53c974_pci_device, }, { &dc390_pci_device, }, { &buslogic_445s_device, }, { &buslogic_445c_device, }, diff --git a/src/scsi/scsi_cdrom.c b/src/scsi/scsi_cdrom.c index a049c69d0..be9809e08 100644 --- a/src/scsi/scsi_cdrom.c +++ b/src/scsi/scsi_cdrom.c @@ -1724,9 +1724,6 @@ scsi_cdrom_request_sense(scsi_cdrom_t *dev, uint8_t *buffer, uint8_t alloc_lengt that condition. */ dev->unit_attention = 0; } - - /* Clear the sense stuff as per the spec. */ - scsi_cdrom_sense_clear(dev, GPCMD_REQUEST_SENSE); } void @@ -1841,6 +1838,10 @@ scsi_cdrom_command(scsi_common_t *sc, uint8_t *cdb) return; begin: + if (cdb[0] != GPCMD_REQUEST_SENSE) { + /* Clear the sense stuff as per the spec. */ + scsi_cdrom_sense_clear(dev, cdb[0]); + } switch (cdb[0]) { case GPCMD_TEST_UNIT_READY: scsi_cdrom_set_phase(dev, SCSI_PHASE_STATUS); @@ -3231,9 +3232,6 @@ begin: if (dev->drv->bus_type == CDROM_BUS_SCSI) { dev->buffer[3] = 0x02; switch (dev->drv->type) { - case CDROM_TYPE_86BOX_100: - dev->buffer[2] = 0x05; /*SCSI-2 compliant*/ - break; case CDROM_TYPE_CHINON_CDS431_H42: case CDROM_TYPE_DEC_RRD45_0436: case CDROM_TYPE_MATSHITA_501_10b: diff --git a/src/scsi/scsi_pcscsi.c b/src/scsi/scsi_pcscsi.c index a714ada96..be50d9a89 100644 --- a/src/scsi/scsi_pcscsi.c +++ b/src/scsi/scsi_pcscsi.c @@ -26,6 +26,7 @@ #include #include #include +#include #define HAVE_STDARG_H #include #include <86box/86box.h> @@ -47,6 +48,7 @@ #include <86box/fifo8.h> #define DC390_ROM "roms/scsi/esp_pci/INT13.BIN" +#define AM53C974_ROM "roms/scsi/esp_pci/harom.bin" #define ESP_REGS 16 #define ESP_FIFO_SZ 16 @@ -174,6 +176,7 @@ typedef struct esp_t { Fifo8 cmdfifo; uint32_t do_cmd; uint8_t cmdfifo_cdb_offset; + int data_ready; int32_t xfer_counter; int dma_enabled; @@ -186,6 +189,7 @@ typedef struct esp_t { pc_timer_t timer; + int local; int mca; uint16_t Base; uint8_t HostID; @@ -204,6 +208,9 @@ typedef struct esp_t { #define READ_FROM_DEVICE 1 #define WRITE_TO_DEVICE 0 +uint8_t esp_pci_regs[256]; +bar_t esp_pci_bar[2]; + #ifdef ENABLE_ESP_LOG int esp_do_log = ENABLE_ESP_LOG; @@ -223,16 +230,62 @@ esp_log(const char *fmt, ...) #endif static void esp_dma_enable(esp_t *dev, int level); -static void esp_do_dma(esp_t *dev, scsi_device_t *sd); -static void esp_do_nodma(esp_t *dev, scsi_device_t *sd); +static void esp_do_dma(esp_t *dev); +static void esp_do_nodma(esp_t *dev); static void esp_pci_dma_memory_rw(esp_t *dev, uint8_t *buf, uint32_t len, int dir); static void esp_timer_on(esp_t *dev, scsi_device_t *sd, double p); static void esp_command_complete(void *priv, uint32_t status); -static void esp_pci_command_complete(void *priv, uint32_t status); +static void esp_dma_ti_check(esp_t *dev); +static void esp_nodma_ti_dataout(esp_t *dev); static void esp_pci_soft_reset(esp_t *dev); static void esp_pci_hard_reset(esp_t *dev); static void handle_ti(void *priv); +static int +esp_cdb_length(uint8_t *buf) +{ + int cdb_len; + + switch (buf[0] >> 5) { + case 0: + case 3: + cdb_len = 6; + break; + case 1: + case 2: + case 6: + cdb_len = 10; + break; + case 4: + cdb_len = 16; + break; + case 5: + cdb_len = 12; + break; + default: + cdb_len = -1; + break; + } + return cdb_len; +} + +static void +esp_pci_update_irq(esp_t *dev) +{ + int scsi_level = !!(dev->dma_regs[DMA_STAT] & DMA_STAT_SCSIINT); + int dma_level = (dev->dma_regs[DMA_CMD] & DMA_CMD_INTE_D) ? + !!(dev->dma_regs[DMA_STAT] & DMA_STAT_DONE) : 0; + int level = scsi_level || dma_level; + + if (level) { + pci_set_irq(dev->pci_slot, PCI_INTA, &dev->irq_state); + esp_log("Raising PCI IRQ...\n"); + } else { + pci_clear_irq(dev->pci_slot, PCI_INTA, &dev->irq_state); + esp_log("Lowering PCI IRQ...\n"); + } +} + static void esp_irq(esp_t *dev, int level) { @@ -246,12 +299,21 @@ esp_irq(esp_t *dev, int level) } } else { if (level) { - pci_set_irq(dev->pci_slot, PCI_INTA, &dev->irq_state); - esp_log("Raising IRQ...\n"); - } else { - pci_clear_irq(dev->pci_slot, PCI_INTA, &dev->irq_state); - esp_log("Lowering IRQ...\n"); - } + dev->dma_regs[DMA_STAT] |= DMA_STAT_SCSIINT; + /* + * If raising the ESP IRQ to indicate end of DMA transfer, set + * DMA_STAT_DONE at the same time. In theory this should be done in + * esp_pci_dma_memory_rw(), however there is a delay between setting + * DMA_STAT_DONE and the ESP IRQ arriving which is visible to the + * guest that can cause confusion e.g. Linux + */ + if ((dev->dma_regs[DMA_CMD] & DMA_CMD_MASK) == 0x3 && + dev->dma_regs[DMA_WBC] == 0) + dev->dma_regs[DMA_STAT] |= DMA_STAT_DONE; + } else + dev->dma_regs[DMA_STAT] &= ~DMA_STAT_SCSIINT; + + esp_pci_update_irq(dev); } } @@ -274,41 +336,46 @@ esp_lower_irq(esp_t *dev) } static void -esp_fifo_push(Fifo8 *fifo, uint8_t val) +esp_set_phase(esp_t *dev, uint8_t phase) { - if (fifo8_num_used(fifo) == fifo->capacity) { - return; - } - - fifo8_push(fifo, val); + dev->rregs[ESP_RSTAT] &= ~7; + dev->rregs[ESP_RSTAT] |= phase; } static uint8_t -esp_fifo_pop(Fifo8 *fifo) +esp_get_phase(esp_t *dev) { - if (fifo8_is_empty(fifo)) { - return 0; - } + return dev->rregs[ESP_RSTAT] & 7; +} - return fifo8_pop(fifo); +static void +esp_fifo_push(esp_t *dev, uint8_t val) +{ + if (fifo8_num_used(&dev->fifo) == dev->fifo.capacity) + return; + + fifo8_push(&dev->fifo, val); +} + +static uint8_t +esp_fifo_pop(esp_t *dev) +{ + uint8_t val; + + if (fifo8_is_empty(&dev->fifo)) + val = 0; + else + val = fifo8_pop(&dev->fifo); + + return val; } static uint32_t -esp_fifo_pop_buf(Fifo8 *fifo, uint8_t *dest, int maxlen) +esp_fifo_pop_buf(esp_t *dev, uint8_t *dest, int maxlen) { - const uint8_t *buf; - uint32_t n; + uint32_t len = fifo8_pop_buf(&dev->fifo, dest, maxlen); - if (maxlen == 0) { - return 0; - } - - buf = fifo8_pop_buf(fifo, maxlen, &n); - if (dest) { - memcpy(dest, buf, n); - } - - return n; + return len; } static uint32_t @@ -326,9 +393,14 @@ esp_get_tc(esp_t *dev) static void esp_set_tc(esp_t *dev, uint32_t dmalen) { + uint32_t old_tc = esp_get_tc(dev); + dev->rregs[ESP_TCLO] = dmalen; dev->rregs[ESP_TCMID] = dmalen >> 8; dev->rregs[ESP_TCHI] = dmalen >> 16; + + if (old_tc && !dmalen) + dev->rregs[ESP_RSTAT] |= STAT_TC; } static uint32_t @@ -343,64 +415,91 @@ esp_get_stc(esp_t *dev) return dmalen; } -static void -esp_dma_done(esp_t *dev) +static int +esp_select(esp_t *dev) { - dev->rregs[ESP_RSTAT] |= STAT_TC; - dev->rregs[ESP_RINTR] = INTR_BS; - dev->rregs[ESP_RSEQ] = 0; - dev->rregs[ESP_RFLAGS] = 0; - esp_set_tc(dev, 0); - esp_log("ESP DMA Finished\n"); - esp_raise_irq(dev); -} - -static uint32_t -esp_get_cmd(esp_t *dev, uint32_t maxlen) -{ - uint8_t buf[ESP_CMDFIFO_SZ]; - uint32_t dmalen; - uint32_t n; + scsi_device_t *sd; dev->id = dev->wregs[ESP_WBUSID] & BUSID_DID; - if (dev->dma) { - dmalen = MIN(esp_get_tc(dev), maxlen); - esp_log("ESP Get data, dmalen = %d\n", dmalen); - if (dmalen == 0) - return 0; - if (dev->mca) { - dma_set_drq(dev->DmaChannel, 1); - while (dev->dma_86c01.pos < dmalen) { - int val = dma_channel_read(dev->DmaChannel); - buf[dev->dma_86c01.pos++] = val & 0xff; - } - dev->dma_86c01.pos = 0; - dma_set_drq(dev->DmaChannel, 0); - } else { - esp_pci_dma_memory_rw(dev, buf, dmalen, WRITE_TO_DEVICE); - } - dmalen = MIN(fifo8_num_free(&dev->cmdfifo), dmalen); - fifo8_push_all(&dev->cmdfifo, buf, dmalen); - } else { - dmalen = MIN(fifo8_num_used(&dev->fifo), maxlen); - esp_log("ESP Get command, dmalen = %i\n", dmalen); - if (dmalen == 0) { - return 0; - } - n = esp_fifo_pop_buf(&dev->fifo, buf, dmalen); - n = MIN(fifo8_num_free(&dev->cmdfifo), n); - fifo8_push_all(&dev->cmdfifo, buf, n); - } + sd = &scsi_devices[dev->bus][dev->id]; dev->ti_size = 0; - fifo8_reset(&dev->fifo); + dev->rregs[ESP_RSEQ] = SEQ_0; - dev->rregs[ESP_RINTR] |= INTR_FC; - dev->rregs[ESP_RSEQ] = SEQ_CD; + if (!scsi_device_present(sd)) { + esp_log("ESP SCSI no devices on ID %d, LUN %d\n", dev->id, dev->lun); + /* No such drive */ + dev->rregs[ESP_RSTAT] = 0; + dev->rregs[ESP_RINTR] = INTR_DC; + esp_raise_irq(dev); + return -1; + } else + esp_log("ESP SCSI device present on ID %d, LUN %d\n", dev->id, dev->lun); - return dmalen; + return 0; } + /* Callback to indicate that the SCSI layer has completed a transfer. */ +static void +esp_transfer_data(esp_t *dev) +{ + if (!dev->data_ready) { + dev->data_ready = 1; + + switch (dev->rregs[ESP_CMD]) { + case CMD_SEL: + case (CMD_SEL | CMD_DMA): + case CMD_SELATN: + case (CMD_SELATN | CMD_DMA): + /* + * Initial incoming data xfer is complete for sequencer command + * so raise deferred bus service and function complete interrupt + */ + dev->rregs[ESP_RINTR] |= (INTR_BS | INTR_FC); + dev->rregs[ESP_RSEQ] = SEQ_CD; + break; + + case CMD_SELATNS: + case (CMD_SELATNS | CMD_DMA): + /* + * Initial incoming data xfer is complete so raise command + * completion interrupt + */ + dev->rregs[ESP_RINTR] |= INTR_BS; + dev->rregs[ESP_RSEQ] = SEQ_MO; + break; + + case CMD_TI: + case (CMD_TI | CMD_DMA): + /* + * Bus service interrupt raised because of initial change to + * DATA phase + */ + dev->rregs[ESP_CMD] = 0; + dev->rregs[ESP_RINTR] |= INTR_BS; + break; + } + + esp_raise_irq(dev); + } + + /* + * Always perform the initial transfer upon reception of the next TI + * command to ensure the DMA/non-DMA status of the command is correct. + * It is not possible to use s->dma directly in the section below as + * some OSs send non-DMA NOP commands after a DMA transfer. Hence if the + * async data transfer is delayed then s->dma is set incorrectly. + */ + + if (dev->rregs[ESP_CMD] == (CMD_TI | CMD_DMA)) { + /* When the SCSI layer returns more data, raise deferred INTR_BS */ + esp_dma_ti_check(dev); + esp_do_dma(dev); + } else if (dev->rregs[ESP_CMD] == CMD_TI) + esp_do_nodma(dev); +} + + static void esp_do_command_phase(esp_t *dev) { @@ -416,7 +515,7 @@ esp_do_command_phase(esp_t *dev) if (!cmdlen) return; - esp_fifo_pop_buf(&dev->cmdfifo, buf, cmdlen); + fifo8_pop_buf(&dev->cmdfifo, buf, cmdlen); for (int i = 0; i < cmdlen; i++) esp_log("CDB[%i] = %02x\n", i, buf[i]); @@ -427,46 +526,32 @@ esp_do_command_phase(esp_t *dev) dev->ti_size = sd->buffer_length; dev->xfer_counter = sd->buffer_length; - esp_log("ESP SCSI Command = 0x%02x, ID = %d, LUN = %d, len = %d\n", buf[0], dev->id, dev->lun, sd->buffer_length); + esp_log("ESP SCSI Command = 0x%02x, ID = %d, LUN = %d, len = %d, phase = %02x.\n", buf[0], dev->id, dev->lun, sd->buffer_length, sd->phase); fifo8_reset(&dev->cmdfifo); + dev->data_ready = 0; if (sd->buffer_length > 0) { - /* This should be set to the underlying device's buffer by command phase 0. */ - dev->rregs[ESP_RSTAT] = STAT_TC; - dev->rregs[ESP_RSEQ] = SEQ_CD; - if (sd->phase == SCSI_PHASE_DATA_IN) { - dev->rregs[ESP_RSTAT] |= STAT_DI; + esp_set_phase(dev, STAT_DI); esp_log("ESP Data In\n"); esp_timer_on(dev, sd, scsi_device_get_callback(sd)); } else if (sd->phase == SCSI_PHASE_DATA_OUT) { - dev->rregs[ESP_RSTAT] |= STAT_DO; - esp_log("ESP Data Out\n"); + esp_set_phase(dev, STAT_DO); dev->ti_size = -sd->buffer_length; + esp_log("ESP Data Out\n"); esp_timer_on(dev, sd, scsi_device_get_callback(sd)); } esp_log("ESP SCSI Start reading/writing\n"); - esp_do_dma(dev, sd); + esp_do_dma(dev); } else { esp_log("ESP SCSI Command with no length\n"); - if (dev->mca) { - if (buf[0] == 0x43) { - dev->rregs[ESP_RSTAT] = STAT_DI | STAT_TC; - dev->rregs[ESP_RSEQ] = SEQ_CD; - esp_do_dma(dev, sd); - } else - esp_command_complete(dev, sd->status); - } else - esp_pci_command_complete(dev, sd->status); + esp_command_complete(dev, sd->status); } - - scsi_device_identify(sd, SCSI_LUN_USE_CDB); - - dev->rregs[ESP_RINTR] |= (INTR_BS | INTR_FC); - esp_raise_irq(dev); + esp_transfer_data(dev); } + static void esp_do_message_phase(esp_t *dev) { @@ -474,7 +559,8 @@ esp_do_message_phase(esp_t *dev) uint8_t message; if (dev->cmdfifo_cdb_offset) { - message = esp_fifo_pop(&dev->cmdfifo); + message = fifo8_is_empty(&dev->cmdfifo) ? 0 : + fifo8_pop(&dev->cmdfifo); dev->lun = message & 7; dev->cmdfifo_cdb_offset--; @@ -497,7 +583,7 @@ esp_do_message_phase(esp_t *dev) if (dev->cmdfifo_cdb_offset) { len = MIN(dev->cmdfifo_cdb_offset, fifo8_num_used(&dev->cmdfifo)); - esp_fifo_pop_buf(&dev->cmdfifo, NULL, len); + fifo8_drop(&dev->cmdfifo, len); dev->cmdfifo_cdb_offset = 0; } } @@ -505,9 +591,10 @@ esp_do_message_phase(esp_t *dev) static void esp_do_cmd(esp_t *dev) { + esp_log("DO CMD.\n"); esp_do_message_phase(dev); - if (dev->cmdfifo_cdb_offset == 0) - esp_do_command_phase(dev); + assert(dev->cmdfifo_cdb_offset == 0); + esp_do_command_phase(dev); } static void @@ -544,265 +631,550 @@ esp_hard_reset(esp_t *dev) esp_log("ESP Reset\n"); for (uint8_t i = 0; i < 16; i++) scsi_device_reset(&scsi_devices[dev->bus][i]); + timer_stop(&dev->timer); } -static void -esp_do_nodma(esp_t *dev, scsi_device_t *sd) +static int +esp_cdb_ready(esp_t *dev) { - int count; + int len = fifo8_num_used(&dev->cmdfifo) - dev->cmdfifo_cdb_offset; + const uint8_t *pbuf; + uint32_t n; + int cdblen; - esp_log("ESP SCSI Actual FIFO len = %d\n", dev->xfer_counter); + if (len <= 0) + return 0; - if (dev->do_cmd) { - esp_log("ESP Command on FIFO\n"); - dev->ti_size = 0; - - if ((dev->rregs[ESP_RSTAT] & 7) == STAT_CD) { - if (dev->cmdfifo_cdb_offset == fifo8_num_used(&dev->cmdfifo)) { - esp_log("CDB offset = %i used return\n", dev->cmdfifo_cdb_offset); - return; - } - - dev->do_cmd = 0; - esp_do_cmd(dev); - } else { - dev->cmdfifo_cdb_offset = fifo8_num_used(&dev->cmdfifo); - esp_log("CDB offset = %i used\n", dev->cmdfifo_cdb_offset); - - dev->rregs[ESP_RSTAT] = STAT_TC | STAT_CD; - dev->rregs[ESP_RSEQ] = SEQ_CD; - dev->rregs[ESP_RINTR] |= INTR_BS; - esp_raise_irq(dev); - } - return; + pbuf = fifo8_peek_bufptr(&dev->cmdfifo, len, &n); + if (n < len) { + /* + * In normal use the cmdfifo should never wrap, but include this check + * to prevent a malicious guest from reading past the end of the + * cmdfifo data buffer below + */ + return 0; } - if (dev->xfer_counter == 0) { - /* Wait until data is available. */ - esp_log("(ID=%02i LUN=%02i): FIFO no data available\n", dev->id, dev->lun); - return; - } + cdblen = esp_cdb_length((uint8_t *)&pbuf[dev->cmdfifo_cdb_offset]); - esp_log("ESP FIFO = %d, buffer length = %d\n", dev->xfer_counter, sd->buffer_length); + return (cdblen < 0) ? 0 : (len >= cdblen); +} - if (sd->phase == SCSI_PHASE_DATA_IN) { - if (fifo8_is_empty(&dev->fifo)) { - fifo8_push(&dev->fifo, sd->sc->temp_buffer[dev->buffer_pos]); - dev->buffer_pos++; - dev->ti_size--; - dev->xfer_counter--; - } - } else if (sd->phase == SCSI_PHASE_DATA_OUT) { - count = MIN(fifo8_num_used(&dev->fifo), ESP_FIFO_SZ); - esp_fifo_pop_buf(&dev->fifo, sd->sc->temp_buffer + dev->buffer_pos, count); - dev->buffer_pos += count; - dev->ti_size += count; - dev->xfer_counter -= count; - } - - esp_log("ESP FIFO Transfer bytes = %d\n", dev->xfer_counter); - if (dev->xfer_counter <= 0) { - if (sd->phase == SCSI_PHASE_DATA_OUT) { - if (dev->ti_size < 0) { - esp_log("ESP FIFO Keep writing\n"); - esp_do_nodma(dev, sd); - } else { - esp_log("ESP FIFO Write finished\n"); - scsi_device_command_phase1(sd); - if (dev->mca) { - esp_command_complete(dev, sd->status); - } else - esp_pci_command_complete(dev, sd->status); - } - } else if (sd->phase == SCSI_PHASE_DATA_IN) { - /* If there is still data to be read from the device then - complete the DMA operation immediately. Otherwise defer - until the scsi layer has completed. */ - if (dev->ti_size <= 0) { - esp_log("ESP FIFO Read finished\n"); - scsi_device_command_phase1(sd); - if (dev->mca) { - esp_command_complete(dev, sd->status); - } else - esp_pci_command_complete(dev, sd->status); - } else { - esp_log("ESP FIFO Keep reading\n"); - esp_do_nodma(dev, sd); - } - } - } else { - /* Partially filled a scsi buffer. Complete immediately. */ - esp_log("ESP SCSI Partially filled the FIFO buffer\n"); +static void +esp_dma_ti_check(esp_t *dev) +{ + if ((esp_get_tc(dev) == 0) && (fifo8_num_used(&dev->fifo) < 2)) { dev->rregs[ESP_RINTR] |= INTR_BS; esp_raise_irq(dev); } } static void -esp_do_dma(esp_t *dev, scsi_device_t *sd) +esp_do_dma(esp_t *dev) { + scsi_device_t *sd = &scsi_devices[dev->bus][dev->id]; uint8_t buf[ESP_CMDFIFO_SZ]; - uint32_t tdbc; - int count; + uint32_t len; esp_log("ESP SCSI Actual DMA len = %d\n", esp_get_tc(dev)); - if (!scsi_device_present(sd)) { - esp_log("ESP SCSI no devices on ID %d, LUN %d\n", dev->id, dev->lun); - /* No such drive */ - dev->rregs[ESP_RSTAT] = 0; - dev->rregs[ESP_RINTR] = INTR_DC; - dev->rregs[ESP_RSEQ] = SEQ_0; - esp_raise_irq(dev); - fifo8_reset(&dev->cmdfifo); - return; - } else { - esp_log("ESP SCSI device found on ID %d, LUN %d\n", dev->id, dev->lun); - } + len = esp_get_tc(dev); - count = tdbc = esp_get_tc(dev); + switch (esp_get_phase(dev)) { + case STAT_MO: + len = MIN(len, fifo8_num_free(&dev->cmdfifo)); + if (dev->mca) { + dma_set_drq(dev->DmaChannel, 1); + while (dev->dma_86c01.pos < len) { + int val = dma_channel_read(dev->DmaChannel); + buf[dev->dma_86c01.pos++] = val & 0xff; + } + dev->dma_86c01.pos = 0; + dma_set_drq(dev->DmaChannel, 0); + } else + esp_pci_dma_memory_rw(dev, buf, len, WRITE_TO_DEVICE); - if (dev->mca) { - if (sd->buffer_length < 0) { - if (dev->dma_enabled) - goto done; - else - goto partial; - } - } + esp_set_tc(dev, esp_get_tc(dev) - len); + fifo8_push_all(&dev->cmdfifo, buf, len); + dev->cmdfifo_cdb_offset += len; - if (dev->do_cmd) { - esp_log("ESP Command on DMA\n"); - count = MIN(count, fifo8_num_free(&dev->cmdfifo)); - if (dev->mca) { - dma_set_drq(dev->DmaChannel, 1); - while (dev->dma_86c01.pos < count) { - dma_channel_write(dev->DmaChannel, buf[dev->dma_86c01.pos]); - dev->dma_86c01.pos++; + switch (dev->rregs[ESP_CMD]) { + case (CMD_SELATN | CMD_DMA): + if (fifo8_num_used(&dev->cmdfifo) >= 1) { + /* First byte received, switch to command phase */ + esp_set_phase(dev, STAT_CD); + dev->rregs[ESP_RSEQ] = SEQ_CD; + dev->cmdfifo_cdb_offset = 1; + + if (fifo8_num_used(&dev->cmdfifo) > 1) { + /* Process any additional command phase data */ + esp_do_dma(dev); + } + } + break; + + case (CMD_SELATNS | CMD_DMA): + if (fifo8_num_used(&dev->cmdfifo) == 1) { + /* First byte received, stop in message out phase */ + dev->rregs[ESP_RSEQ] = SEQ_MO; + dev->cmdfifo_cdb_offset = 1; + + /* Raise command completion interrupt */ + dev->rregs[ESP_RINTR] |= (INTR_BS | INTR_FC); + esp_raise_irq(dev); + } + break; + + case (CMD_TI | CMD_DMA): + /* ATN remains asserted until TC == 0 */ + if (esp_get_tc(dev) == 0) { + esp_set_phase(dev, STAT_CD); + dev->rregs[ESP_CMD] = 0; + dev->rregs[ESP_RINTR] |= INTR_BS; + esp_raise_irq(dev); + } + break; + + default: + break; } - dev->dma_86c01.pos = 0; - dma_set_drq(dev->DmaChannel, 0); - } else - esp_pci_dma_memory_rw(dev, buf, count, READ_FROM_DEVICE); - fifo8_push_all(&dev->cmdfifo, buf, count); - dev->ti_size = 0; + break; - if ((dev->rregs[ESP_RSTAT] & 7) == STAT_CD) { - if (dev->cmdfifo_cdb_offset == fifo8_num_used(&dev->cmdfifo)) + case STAT_CD: + len = MIN(len, fifo8_num_free(&dev->cmdfifo)); + if (dev->mca) { + dma_set_drq(dev->DmaChannel, 1); + while (dev->dma_86c01.pos < len) { + int val = dma_channel_read(dev->DmaChannel); + buf[dev->dma_86c01.pos++] = val & 0xff; + } + dev->dma_86c01.pos = 0; + dma_set_drq(dev->DmaChannel, 0); + } else + esp_pci_dma_memory_rw(dev, buf, len, WRITE_TO_DEVICE); + + fifo8_push_all(&dev->cmdfifo, buf, len); + esp_set_tc(dev, esp_get_tc(dev) - len); + dev->ti_size = 0; + if (esp_get_tc(dev) == 0) { + /* Command has been received */ + esp_do_cmd(dev); + } + break; + + case STAT_DO: + if (!dev->xfer_counter && esp_get_tc(dev)) { + /* Defer until data is available. */ return; - - dev->do_cmd = 0; - esp_do_cmd(dev); - } else { - dev->cmdfifo_cdb_offset = fifo8_num_used(&dev->cmdfifo); - - dev->rregs[ESP_RSTAT] = STAT_TC | STAT_CD; - dev->rregs[ESP_RSEQ] = SEQ_CD; - dev->rregs[ESP_RINTR] |= INTR_BS; - esp_raise_irq(dev); - } - return; - } - - if (dev->xfer_counter == 0) { - /* Wait until data is available. */ - esp_log("(ID=%02i LUN=%02i): DMA no data available\n", dev->id, dev->lun); - return; - } - - esp_log("ESP SCSI dmaleft = %d, buffer length = %d\n", esp_get_tc(dev), sd->buffer_length); - - /* Make sure count is never bigger than buffer_length. */ - if (count > dev->xfer_counter) - count = dev->xfer_counter; - - if (sd->phase == SCSI_PHASE_DATA_IN) { - esp_log("ESP SCSI Read, dma cnt = %i, ti size = %i, positive len = %i\n", esp_get_tc(dev), dev->ti_size, count); - if (dev->mca) { - dma_set_drq(dev->DmaChannel, 1); - while (dev->dma_86c01.pos < count) { - dma_channel_write(dev->DmaChannel, sd->sc->temp_buffer[dev->buffer_pos + dev->dma_86c01.pos]); - esp_log("ESP SCSI DMA read for 53C9x: pos = %i, val = %02x\n", dev->dma_86c01.pos, sd->sc->temp_buffer[dev->buffer_pos + dev->dma_86c01.pos]); - dev->dma_86c01.pos++; } - dev->dma_86c01.pos = 0; - dma_set_drq(dev->DmaChannel, 0); - } else { - esp_pci_dma_memory_rw(dev, sd->sc->temp_buffer + dev->buffer_pos, count, READ_FROM_DEVICE); - } - } else if (sd->phase == SCSI_PHASE_DATA_OUT) { - esp_log("ESP SCSI Write, negative len = %i, ti size = %i, dma cnt = %i\n", count, -dev->ti_size, esp_get_tc(dev)); - if (dev->mca) { - dma_set_drq(dev->DmaChannel, 1); - while (dev->dma_86c01.pos < count) { - int val = dma_channel_read(dev->DmaChannel); - esp_log("ESP SCSI DMA write for 53C9x: pos = %i, val = %02x\n", dev->dma_86c01.pos, val & 0xff); - sd->sc->temp_buffer[dev->buffer_pos + dev->dma_86c01.pos] = val & 0xff; - dev->dma_86c01.pos++; - } - dma_set_drq(dev->DmaChannel, 0); - dev->dma_86c01.pos = 0; - } else - esp_pci_dma_memory_rw(dev, sd->sc->temp_buffer + dev->buffer_pos, count, WRITE_TO_DEVICE); - } - esp_set_tc(dev, esp_get_tc(dev) - count); - dev->buffer_pos += count; - dev->xfer_counter -= count; - if (sd->phase == SCSI_PHASE_DATA_IN) { - dev->ti_size -= count; - } else if (sd->phase == SCSI_PHASE_DATA_OUT) { - dev->ti_size += count; - } + if (len > dev->xfer_counter) + len = dev->xfer_counter; - esp_log("ESP SCSI Transfer bytes = %d\n", dev->xfer_counter); - if (dev->xfer_counter <= 0) { - if (sd->phase == SCSI_PHASE_DATA_OUT) { - if (dev->ti_size < 0) { - esp_log("ESP SCSI Keep writing\n"); - esp_do_dma(dev, sd); - } else { - esp_log("ESP SCSI Write finished\n"); - scsi_device_command_phase1(sd); - if (dev->mca) { + switch (dev->rregs[ESP_CMD]) { + case (CMD_TI | CMD_DMA): + if (dev->mca) { + dma_set_drq(dev->DmaChannel, 1); + while (dev->dma_86c01.pos < len) { + int val = dma_channel_read(dev->DmaChannel); + esp_log("ESP SCSI DMA write for 53C9x: pos = %i, val = %02x\n", dev->dma_86c01.pos, val & 0xff); + sd->sc->temp_buffer[dev->buffer_pos + dev->dma_86c01.pos] = val & 0xff; + dev->dma_86c01.pos++; + } + dma_set_drq(dev->DmaChannel, 0); + dev->dma_86c01.pos = 0; + } else + esp_pci_dma_memory_rw(dev, sd->sc->temp_buffer + dev->buffer_pos, len, WRITE_TO_DEVICE); + + esp_set_tc(dev, esp_get_tc(dev) - len); + + dev->buffer_pos += len; + dev->xfer_counter -= len; + dev->ti_size += len; + break; + + case (CMD_PAD | CMD_DMA): + /* Copy TC zero bytes into the incoming stream */ + memset(sd->sc->temp_buffer + dev->buffer_pos, 0, len); + + dev->buffer_pos += len; + dev->xfer_counter -= len; + dev->ti_size += len; + break; + + default: + break; + } + + if ((dev->xfer_counter <= 0) && (fifo8_num_used(&dev->fifo) < 2)) { + /* Defer until the scsi layer has completed */ + if (dev->ti_size < 0) { + esp_log("ESP SCSI Keep writing\n"); + esp_do_dma(dev); + } else { + esp_log("ESP SCSI Write finished\n"); + scsi_device_command_phase1(sd); esp_command_complete(dev, sd->status); - } else - esp_pci_command_complete(dev, sd->status); + } + return; } - } else if (sd->phase == SCSI_PHASE_DATA_IN) { - /* If there is still data to be read from the device then - complete the DMA operation immediately. Otherwise defer - until the scsi layer has completed. */ - if (dev->ti_size <= 0) { -done: - esp_log("ESP SCSI Read finished\n"); + + esp_dma_ti_check(dev); + break; + + case STAT_DI: + if (!dev->xfer_counter && esp_get_tc(dev)) { + /* Defer until data is available. */ + return; + } + if (len > dev->xfer_counter) + len = dev->xfer_counter; + + switch (dev->rregs[ESP_CMD]) { + case (CMD_TI | CMD_DMA): + if (dev->mca) { + dma_set_drq(dev->DmaChannel, 1); + while (dev->dma_86c01.pos < len) { + dma_channel_write(dev->DmaChannel, sd->sc->temp_buffer[dev->buffer_pos + dev->dma_86c01.pos]); + esp_log("ESP SCSI DMA read for 53C9x: pos = %i, val = %02x\n", dev->dma_86c01.pos, sd->sc->temp_buffer[dev->buffer_pos + dev->dma_86c01.pos]); + dev->dma_86c01.pos++; + } + dev->dma_86c01.pos = 0; + dma_set_drq(dev->DmaChannel, 0); + } else + esp_pci_dma_memory_rw(dev, sd->sc->temp_buffer + dev->buffer_pos, len, READ_FROM_DEVICE); + + dev->buffer_pos += len; + dev->xfer_counter -= len; + dev->ti_size -= len; + esp_set_tc(dev, esp_get_tc(dev) - len); + break; + + case (CMD_PAD | CMD_DMA): + dev->buffer_pos += len; + dev->xfer_counter -= len; + dev->ti_size -= len; + esp_set_tc(dev, esp_get_tc(dev) - len); + break; + + default: + break; + } + + if ((dev->xfer_counter <= 0) && !dev->ti_size && esp_get_tc(dev)) { + /* If the guest underflows TC then terminate SCSI request */ + esp_log("ESP SCSI Read finished (underflow).\n"); scsi_device_command_phase1(sd); - if (dev->mca) { - esp_command_complete(dev, sd->status); - } else - esp_pci_command_complete(dev, sd->status); - } else { - esp_log("ESP SCSI Keep reading\n"); - esp_do_dma(dev, sd); + esp_command_complete(dev, sd->status); + return; } - } - } else { - /* Partially filled a scsi buffer. Complete immediately. */ -partial: - esp_log("ESP SCSI Partially filled the SCSI buffer\n"); - esp_dma_done(dev); + + if ((dev->xfer_counter <= 0) && (fifo8_num_used(&dev->fifo) < 2)) { + /* Defer until the scsi layer has completed */ + if (dev->ti_size <= 0) { + esp_log("ESP SCSI Read finished\n"); + scsi_device_command_phase1(sd); + esp_command_complete(dev, sd->status); + } else { + esp_log("ESP SCSI Keep reading\n"); + esp_do_dma(dev); + } + return; + } + esp_dma_ti_check(dev); + break; + + case STAT_ST: + switch (dev->rregs[ESP_CMD]) { + case (CMD_ICCS | CMD_DMA): + len = MIN(len, 1); + + if (len) { + buf[0] = dev->status; + + if (dev->mca) { + dma_set_drq(dev->DmaChannel, 1); + while (dev->dma_86c01.pos < len) { + dma_channel_write(dev->DmaChannel, buf[dev->dma_86c01.pos]); + dev->dma_86c01.pos++; + } + dev->dma_86c01.pos = 0; + dma_set_drq(dev->DmaChannel, 0); + } else + esp_pci_dma_memory_rw(dev, buf, len, READ_FROM_DEVICE); + + esp_set_tc(dev, esp_get_tc(dev) - len); + esp_set_phase(dev, STAT_MI); + + if (esp_get_tc(dev) > 0) { + /* Process any message in phase data */ + esp_do_dma(dev); + } + } + break; + + default: + /* Consume remaining data if the guest underflows TC */ + if (fifo8_num_used(&dev->fifo) < 2) { + dev->rregs[ESP_RINTR] |= INTR_BS; + esp_raise_irq(dev); + } + break; + } + break; + + case STAT_MI: + switch (dev->rregs[ESP_CMD]) { + case (CMD_ICCS | CMD_DMA): + len = MIN(len, 1); + + if (len) { + buf[0] = 0; + + if (dev->mca) { + dma_set_drq(dev->DmaChannel, 1); + while (dev->dma_86c01.pos < len) { + dma_channel_write(dev->DmaChannel, buf[dev->dma_86c01.pos]); + dev->dma_86c01.pos++; + } + dev->dma_86c01.pos = 0; + dma_set_drq(dev->DmaChannel, 0); + } else + esp_pci_dma_memory_rw(dev, buf, len, READ_FROM_DEVICE); + + esp_set_tc(dev, esp_get_tc(dev) - len); + + /* Raise end of command interrupt */ + dev->rregs[ESP_RINTR] |= INTR_FC; + esp_raise_irq(dev); + } + break; + } + break; + + default: + break; } } static void -esp_report_command_complete(esp_t *dev, uint32_t status) +esp_nodma_ti_dataout(esp_t *dev) { - esp_log("ESP Command complete\n"); + scsi_device_t *sd = &scsi_devices[dev->bus][dev->id]; + int len; - dev->ti_size = 0; - dev->status = status; - dev->rregs[ESP_RSTAT] = STAT_TC | STAT_ST; - esp_dma_done(dev); + if (!dev->xfer_counter) { + /* Defer until data is available. */ + return; + } + len = MIN(dev->xfer_counter, ESP_FIFO_SZ); + len = MIN(len, fifo8_num_used(&dev->fifo)); + esp_fifo_pop_buf(dev, sd->sc->temp_buffer + dev->buffer_pos, len); + dev->buffer_pos += len; + dev->xfer_counter -= len; + dev->ti_size += len; + + if (dev->xfer_counter <= 0) { + if (dev->ti_size < 0) { + esp_log("ESP SCSI Keep writing\n"); + esp_nodma_ti_dataout(dev); + } else { + esp_log("ESP SCSI Write finished\n"); + scsi_device_command_phase1(sd); + esp_command_complete(dev, sd->status); + } + return; + } + + dev->rregs[ESP_RINTR] |= INTR_BS; + esp_raise_irq(dev); +} + +static void +esp_do_nodma(esp_t *dev) +{ + scsi_device_t *sd = &scsi_devices[dev->bus][dev->id]; + uint8_t buf[ESP_FIFO_SZ]; + int len; + + switch (esp_get_phase(dev)) { + case STAT_MO: + switch (dev->rregs[ESP_CMD]) { + case CMD_SELATN: + /* Copy FIFO into cmdfifo */ + len = esp_fifo_pop_buf(dev, buf, fifo8_num_used(&dev->fifo)); + len = MIN(fifo8_num_free(&dev->cmdfifo), len); + fifo8_push_all(&dev->cmdfifo, buf, len); + + if (fifo8_num_used(&dev->cmdfifo) >= 1) { + /* First byte received, switch to command phase */ + esp_set_phase(dev, STAT_CD); + dev->rregs[ESP_RSEQ] = SEQ_CD; + dev->cmdfifo_cdb_offset = 1; + + if (fifo8_num_used(&dev->cmdfifo) > 1) { + /* Process any additional command phase data */ + esp_do_nodma(dev); + } + } + break; + + case CMD_SELATNS: + /* Copy one byte from FIFO into cmdfifo */ + len = esp_fifo_pop_buf(dev, buf, + MIN(fifo8_num_used(&dev->fifo), 1)); + len = MIN(fifo8_num_free(&dev->cmdfifo), len); + fifo8_push_all(&dev->cmdfifo, buf, len); + + if (fifo8_num_used(&dev->cmdfifo) >= 1) { + /* First byte received, stop in message out phase */ + dev->rregs[ESP_RSEQ] = SEQ_MO; + dev->cmdfifo_cdb_offset = 1; + + /* Raise command completion interrupt */ + dev->rregs[ESP_RINTR] |= (INTR_BS | INTR_FC); + esp_raise_irq(dev); + } + break; + + case CMD_TI: + /* Copy FIFO into cmdfifo */ + len = esp_fifo_pop_buf(dev, buf, fifo8_num_used(&dev->fifo)); + len = MIN(fifo8_num_free(&dev->cmdfifo), len); + fifo8_push_all(&dev->cmdfifo, buf, len); + + /* ATN remains asserted until FIFO empty */ + dev->cmdfifo_cdb_offset = fifo8_num_used(&dev->cmdfifo); + esp_set_phase(dev, STAT_CD); + dev->rregs[ESP_CMD] = 0; + dev->rregs[ESP_RINTR] |= INTR_BS; + esp_raise_irq(dev); + break; + + default: + break; + } + break; + + case STAT_CD: + switch (dev->rregs[ESP_CMD]) { + case CMD_TI: + /* Copy FIFO into cmdfifo */ + len = esp_fifo_pop_buf(dev, buf, fifo8_num_used(&dev->fifo)); + len = MIN(fifo8_num_free(&dev->cmdfifo), len); + fifo8_push_all(&dev->cmdfifo, buf, len); + + /* CDB may be transferred in one or more TI commands */ + if (esp_cdb_ready(dev)) { + /* Command has been received */ + esp_do_cmd(dev); + } else { + /* + * If data was transferred from the FIFO then raise bus + * service interrupt to indicate transfer complete. Otherwise + * defer until the next FIFO write. + */ + if (len) { + /* Raise interrupt to indicate transfer complete */ + dev->rregs[ESP_RINTR] |= INTR_BS; + esp_raise_irq(dev); + } + } + break; + + case (CMD_SEL | CMD_DMA): + case (CMD_SELATN | CMD_DMA): + /* Copy FIFO into cmdfifo */ + len = esp_fifo_pop_buf(dev, buf, fifo8_num_used(&dev->fifo)); + len = MIN(fifo8_num_free(&dev->cmdfifo), len); + fifo8_push_all(&dev->cmdfifo, buf, len); + + /* Handle when DMA transfer is terminated by non-DMA FIFO write */ + if (esp_cdb_ready(dev)) { + /* Command has been received */ + esp_do_cmd(dev); + } + break; + + case CMD_SEL: + case CMD_SELATN: + /* FIFO already contain entire CDB: copy to cmdfifo and execute */ + len = esp_fifo_pop_buf(dev, buf, fifo8_num_used(&dev->fifo)); + len = MIN(fifo8_num_free(&dev->cmdfifo), len); + fifo8_push_all(&dev->cmdfifo, buf, len); + + esp_do_cmd(dev); + break; + + default: + break; + } + break; + + case STAT_DO: + /* Accumulate data in FIFO until non-DMA TI is executed */ + break; + + case STAT_DI: + if (!dev->xfer_counter) { + /* Defer until data is available. */ + return; + } + if (fifo8_is_empty(&dev->fifo)) { + esp_fifo_push(dev, sd->sc->temp_buffer[dev->buffer_pos]); + dev->buffer_pos++; + dev->ti_size--; + dev->xfer_counter--; + } + + if (dev->xfer_counter <= 0) { + if (dev->ti_size <= 0) { + esp_log("ESP FIFO Read finished\n"); + scsi_device_command_phase1(sd); + esp_command_complete(dev, sd->status); + } else { + esp_log("ESP FIFO Keep reading\n"); + esp_do_nodma(dev); + } + return; + } + + /* If preloading the FIFO, defer until TI command issued */ + if (dev->rregs[ESP_CMD] != CMD_TI) + return; + + dev->rregs[ESP_RINTR] |= INTR_BS; + esp_raise_irq(dev); + break; + + case STAT_ST: + switch (dev->rregs[ESP_CMD]) { + case CMD_ICCS: + esp_fifo_push(dev, dev->status); + esp_set_phase(dev, STAT_MI); + + /* Process any message in phase data */ + esp_do_nodma(dev); + break; + default: + break; + } + break; + + case STAT_MI: + switch (dev->rregs[ESP_CMD]) { + case CMD_ICCS: + esp_fifo_push(dev, 0); + + /* Raise end of command interrupt */ + dev->rregs[ESP_RINTR] |= INTR_FC; + esp_raise_irq(dev); + break; + default: + break; + } + break; + } } /* Callback to indicate that the SCSI layer has completed a command. */ @@ -810,18 +1182,36 @@ static void esp_command_complete(void *priv, uint32_t status) { esp_t *dev = (esp_t *) priv; + scsi_device_t *sd = &scsi_devices[dev->bus][dev->id]; - esp_report_command_complete(dev, status); -} + dev->ti_size = 0; + dev->status = status; -static void -esp_pci_command_complete(void *priv, uint32_t status) -{ - esp_t *dev = (esp_t *) priv; + switch (dev->rregs[ESP_CMD]) { + case CMD_SEL: + case (CMD_SEL | CMD_DMA): + case CMD_SELATN: + case (CMD_SELATN | CMD_DMA): + /* + * Switch to status phase. For non-DMA transfers from the target the last + * byte is still in the FIFO + */ + dev->rregs[ESP_RINTR] |= (INTR_BS | INTR_FC); + dev->rregs[ESP_RSEQ] = SEQ_CD; + break; + case CMD_TI: + case (CMD_TI | CMD_DMA): + dev->rregs[ESP_CMD] = 0; + break; + default: + break; + } + /* Raise bus service interrupt to indicate change to STATUS phase */ + scsi_device_identify(sd, SCSI_LUN_USE_CDB); - esp_command_complete(dev, status); - dev->dma_regs[DMA_WBC] = 0; - dev->dma_regs[DMA_STAT] |= DMA_STAT_DONE; + esp_set_phase(dev, STAT_ST); + dev->rregs[ESP_RINTR] |= INTR_BS; + esp_raise_irq(dev); } static void @@ -838,18 +1228,31 @@ esp_timer_on(esp_t *dev, scsi_device_t *sd, double p) timer_on_auto(&dev->timer, dev->period + 40.0); } +static void +handle_pad(esp_t *dev) +{ + if (dev->dma) { + esp_log("ESP Handle PAD, do data, minlen = %i\n", esp_get_tc(dev)); + esp_do_dma(dev); + } else { + esp_log("ESP Handle PAD, do nodma, minlen = %i\n", dev->xfer_counter); + esp_do_nodma(dev); + } +} + static void handle_ti(void *priv) { esp_t *dev = (esp_t *) priv; - scsi_device_t *sd = &scsi_devices[dev->bus][dev->id]; if (dev->dma) { esp_log("ESP Handle TI, do data, minlen = %i\n", esp_get_tc(dev)); - esp_do_dma(dev, sd); + esp_do_dma(dev); } else { esp_log("ESP Handle TI, do nodma, minlen = %i\n", dev->xfer_counter); - esp_do_nodma(dev, sd); + esp_do_nodma(dev); + if (esp_get_phase(dev) == STAT_DO) + esp_nodma_ti_dataout(dev); } } @@ -857,95 +1260,59 @@ static void handle_s_without_atn(void *priv) { esp_t *dev = (esp_t *) priv; - int len; - len = esp_get_cmd(dev, ESP_CMDFIFO_SZ); - esp_log("ESP SEL w/o ATN len = %d, id = %d\n", len, dev->id); - if (len > 0) { - dev->cmdfifo_cdb_offset = 0; - dev->do_cmd = 0; - esp_do_cmd(dev); - } else if (len == 0) { - dev->do_cmd = 1; - /* Target present, but no cmd yet - switch to command phase */ - dev->rregs[ESP_RSEQ] = SEQ_CD; - dev->rregs[ESP_RSTAT] = STAT_CD; - } + if (esp_select(dev) < 0) + return; + + esp_set_phase(dev, STAT_CD); + dev->cmdfifo_cdb_offset = 0; + + if (dev->dma) + esp_do_dma(dev); + else + esp_do_nodma(dev); } static void handle_satn(void *priv) { esp_t *dev = (esp_t *) priv; - int len; - len = esp_get_cmd(dev, ESP_CMDFIFO_SZ); - esp_log("ESP SEL with ATN len = %d, id = %d\n", len, dev->id); - if (len > 0) { - dev->cmdfifo_cdb_offset = 1; - dev->do_cmd = 0; - esp_do_cmd(dev); - } else if (len == 0) { - dev->do_cmd = 1; - /* Target present, but no cmd yet - switch to command phase */ - dev->rregs[ESP_RSEQ] = SEQ_CD; - dev->rregs[ESP_RSTAT] = STAT_CD; - } + if (esp_select(dev) < 0) + return; + + esp_set_phase(dev, STAT_MO); + + if (dev->dma) + esp_do_dma(dev); + else + esp_do_nodma(dev); } static void handle_satn_stop(void *priv) { esp_t *dev = (esp_t *) priv; - int cmdlen; - cmdlen = esp_get_cmd(dev, 1); - if (cmdlen > 0) { - dev->do_cmd = 1; - dev->cmdfifo_cdb_offset = 1; - dev->rregs[ESP_RSTAT] = STAT_MO; - dev->rregs[ESP_RINTR] = INTR_BS | INTR_FC; - dev->rregs[ESP_RSEQ] = SEQ_MO; - esp_log("ESP SCSI Command len = %d, raising IRQ\n", cmdlen); - esp_raise_irq(dev); - } else if (cmdlen == 0) { - dev->do_cmd = 1; - /* Target present, switch to message out phase */ - dev->rregs[ESP_RSEQ] = SEQ_MO; - dev->rregs[ESP_RSTAT] = STAT_MO; - } + if (esp_select(dev) < 0) + return; + + esp_set_phase(dev, STAT_MO); + dev->cmdfifo_cdb_offset = 0; + + if (dev->dma) + esp_do_dma(dev); + else + esp_do_nodma(dev); } static void esp_write_response(esp_t *dev) { - uint8_t buf[2]; - - buf[0] = dev->status; - buf[1] = 0; - esp_log("esp_write_response(): %02X %02X\n", buf[0], buf[1]); - - if (dev->dma) { - if (dev->mca) { - dma_set_drq(dev->DmaChannel, 1); - while (dev->dma_86c01.pos < 2) { - int val = dma_channel_read(dev->DmaChannel); - buf[dev->dma_86c01.pos++] = val & 0xff; - } - dev->dma_86c01.pos = 0; - dma_set_drq(dev->DmaChannel, 0); - } else - esp_pci_dma_memory_rw(dev, buf, 2, WRITE_TO_DEVICE); - dev->rregs[ESP_RSTAT] = STAT_TC | STAT_ST; - dev->rregs[ESP_RINTR] = INTR_BS | INTR_FC; - dev->rregs[ESP_RSEQ] = SEQ_CD; - } else { - fifo8_reset(&dev->fifo); - fifo8_push_all(&dev->fifo, buf, 2); - dev->rregs[ESP_RFLAGS] = 2; - } - esp_log("ESP SCSI ICCS IRQ\n"); - esp_raise_irq(dev); + if (dev->dma) + esp_do_dma(dev); + else + esp_do_nodma(dev); } static void @@ -953,13 +1320,13 @@ esp_callback(void *priv) { esp_t *dev = (esp_t *) priv; - if (dev->dma_enabled || dev->do_cmd || ((dev->rregs[ESP_CMD] & CMD_CMD) == CMD_PAD)) { + if (dev->dma_enabled || !dev->dma || ((dev->rregs[ESP_CMD] & CMD_CMD) == CMD_PAD)) { if ((dev->rregs[ESP_CMD] & CMD_CMD) == CMD_TI) { esp_log("ESP SCSI Handle TI Callback\n"); handle_ti(dev); } else if ((dev->rregs[ESP_CMD] & CMD_CMD) == CMD_PAD) { esp_log("ESP SCSI Handle PAD Callback\n"); - handle_ti(dev); + handle_pad(dev); } } @@ -973,20 +1340,7 @@ esp_reg_read(esp_t *dev, uint32_t saddr) switch (saddr) { case ESP_FIFO: - if ((dev->rregs[ESP_RSTAT] & 7) == STAT_DI) { - if (dev->ti_size) { - esp_log("TI size FIFO = %i\n", dev->ti_size); - esp_do_nodma(dev, &scsi_devices[dev->bus][dev->id]); - } else { - /* - * The last byte of a non-DMA transfer has been read out - * of the FIFO so switch to status phase - */ - dev->rregs[ESP_RSTAT] = STAT_TC | STAT_ST; - } - } - - dev->rregs[ESP_FIFO] = esp_fifo_pop(&dev->fifo); + dev->rregs[ESP_FIFO] = esp_fifo_pop(dev); ret = dev->rregs[ESP_FIFO]; break; case ESP_RINTR: @@ -1033,21 +1387,10 @@ esp_reg_write(esp_t *dev, uint32_t saddr, uint32_t val) dev->rregs[ESP_RSTAT] &= ~STAT_TC; break; case ESP_FIFO: - if (dev->do_cmd) { - esp_fifo_push(&dev->cmdfifo, val); - esp_log("ESP CmdVal = %02x\n", val); - /* - * If any unexpected message out/command phase data is - * transferred using non-DMA, raise the interrupt - */ - if (dev->rregs[ESP_CMD] == CMD_TI) { - dev->rregs[ESP_RINTR] |= INTR_BS; - esp_raise_irq(dev); - } - } else { - esp_fifo_push(&dev->fifo, val); - esp_log("ESP fifoval = %02x\n", val); - } + if (!fifo8_is_full(&dev->fifo)) + esp_fifo_push(dev, val); + + esp_do_nodma(dev); break; case ESP_CMD: dev->rregs[saddr] = val; @@ -1056,6 +1399,8 @@ esp_reg_write(esp_t *dev, uint32_t saddr, uint32_t val) dev->dma = 1; /* Reload DMA counter. */ esp_set_tc(dev, esp_get_stc(dev)); + if (!esp_get_stc(dev)) + esp_set_tc(dev, 0x10000); } else { dev->dma = 0; esp_log("ESP Command not for DMA\n"); @@ -1091,6 +1436,11 @@ esp_reg_write(esp_t *dev, uint32_t saddr, uint32_t val) case CMD_TI: esp_log("Transfer Information val = %02X\n", val); break; + case CMD_ICCS: + esp_write_response(dev); + dev->rregs[ESP_RINTR] |= INTR_FC; + dev->rregs[ESP_RSTAT] |= STAT_MI; + break; case CMD_SEL: handle_s_without_atn(dev); break; @@ -1100,11 +1450,6 @@ esp_reg_write(esp_t *dev, uint32_t saddr, uint32_t val) case CMD_SELATNS: handle_satn_stop(dev); break; - case CMD_ICCS: - esp_write_response(dev); - dev->rregs[ESP_RINTR] |= INTR_FC; - dev->rregs[ESP_RSTAT] |= STAT_MI; - break; case CMD_MSGACC: dev->rregs[ESP_RINTR] |= INTR_DC; dev->rregs[ESP_RSEQ] = 0; @@ -1160,6 +1505,7 @@ esp_reg_write(esp_t *dev, uint32_t saddr, uint32_t val) static void esp_pci_dma_memory_rw(esp_t *dev, uint8_t *buf, uint32_t len, int dir) { + uint32_t addr; int expected_dir; if (dev->dma_regs[DMA_CMD] & DMA_CMD_DIR) @@ -1174,14 +1520,15 @@ esp_pci_dma_memory_rw(esp_t *dev, uint8_t *buf, uint32_t len, int dir) return; } + addr = dev->dma_regs[DMA_WAC]; if (dev->dma_regs[DMA_WBC] < len) len = dev->dma_regs[DMA_WBC]; - if (expected_dir) { - dma_bm_write(dev->dma_regs[DMA_SPA], buf, len, 4); - } else { - dma_bm_read(dev->dma_regs[DMA_SPA], buf, len, 4); - } + if (expected_dir) + dma_bm_write(addr, buf, len, 4); + else + dma_bm_read(addr, buf, len, 4); + esp_log("DMA: Address = %08X, Length = %08X (%02X %02X %02X %02X -> %02X %02X %02X %02X)\n", dev->dma_regs[DMA_SPA], len, ram[dev->dma_regs[DMA_SPA]], ram[dev->dma_regs[DMA_SPA] + 1], ram[dev->dma_regs[DMA_SPA] + 2], ram[dev->dma_regs[DMA_SPA] + 3], buf[0], buf[1], buf[2], buf[3]); @@ -1201,13 +1548,10 @@ esp_pci_dma_read(esp_t *dev, uint16_t saddr) ret = dev->dma_regs[saddr]; if (saddr == DMA_STAT) { - if (dev->rregs[ESP_RSTAT] & STAT_INT) { - ret |= DMA_STAT_SCSIINT; - esp_log("ESP PCI DMA Read SCSI interrupt issued\n"); - } if (!(dev->sbac & SBAC_STATUS)) { dev->dma_regs[DMA_STAT] &= ~(DMA_STAT_ERROR | DMA_STAT_ABORT | DMA_STAT_DONE); esp_log("ESP PCI DMA Read done cleared\n"); + esp_pci_update_irq(dev); } } @@ -1230,6 +1574,7 @@ esp_pci_dma_write(esp_t *dev, uint16_t saddr, uint32_t val) esp_log("PCI DMA disable\n"); break; case 1: /*BLAST*/ + dev->dma_regs[DMA_STAT] |= DMA_STAT_BCMBLT; break; case 2: /*ABORT*/ scsi_device_command_stop(&scsi_devices[dev->bus][dev->id]); @@ -1256,6 +1601,7 @@ esp_pci_dma_write(esp_t *dev, uint16_t saddr, uint32_t val) /* clear some bits on write */ mask = DMA_STAT_ERROR | DMA_STAT_ABORT | DMA_STAT_DONE; dev->dma_regs[DMA_STAT] &= ~(val & mask); + esp_pci_update_irq(dev); } break; @@ -1628,9 +1974,6 @@ dc390_load_eeprom(esp_t *dev) } } -uint8_t esp_pci_regs[256]; -bar_t esp_pci_bar[2]; - static uint8_t esp_pci_read(UNUSED(int func), int addr, void *priv) { @@ -1641,7 +1984,7 @@ esp_pci_read(UNUSED(int func), int addr, void *priv) switch (addr) { case 0x00: // esp_log("ESP PCI: Read DO line = %02x\n", dev->eeprom.out); - if (!dev->has_bios) + if (!dev->has_bios || dev->local) return 0x22; else { if (dev->eeprom.out) @@ -1651,6 +1994,7 @@ esp_pci_read(UNUSED(int func), int addr, void *priv) return 2; } } + break; case 0x01: return 0x10; case 0x02: @@ -1703,6 +2047,7 @@ esp_pci_read(UNUSED(int func), int addr, void *priv) return PCI_INTA; case 0x40 ... 0x4f: + esp_log("ESP PCI: Read value %02X to register %02X, ID=%d\n", esp_pci_regs[addr], addr, dev->id); return esp_pci_regs[addr]; default: @@ -1722,15 +2067,17 @@ esp_pci_write(UNUSED(int func), int addr, uint8_t val, void *priv) // esp_log("ESP PCI: Write value %02X to register %02X\n", val, addr); - if ((addr >= 0x80) && (addr <= 0xFF)) { - if (addr == 0x80) { - eesk = val & 0x80 ? 1 : 0; - eedi = val & 0x40 ? 1 : 0; - dc390_write_eeprom(dev, 1, eesk, eedi); - } else if (addr == 0xc0) - dc390_write_eeprom(dev, 0, 0, 0); - // esp_log("ESP PCI: Write value %02X to register %02X\n", val, addr); - return; + if (!dev->local) { + if ((addr >= 0x80) && (addr <= 0xFF)) { + if (addr == 0x80) { + eesk = val & 0x80 ? 1 : 0; + eedi = val & 0x40 ? 1 : 0; + dc390_write_eeprom(dev, 1, eesk, eedi); + } else if (addr == 0xc0) + dc390_write_eeprom(dev, 0, 0, 0); + // esp_log("ESP PCI: Write value %02X to register %02X\n", val, addr); + return; + } } switch (addr) { @@ -1809,6 +2156,7 @@ esp_pci_write(UNUSED(int func), int addr, uint8_t val, void *priv) return; case 0x40 ... 0x4f: + esp_log("ESP PCI: Write value %02X to register %02X, ID=%i.\n", val, addr, dev->id); esp_pci_regs[addr] = val; return; @@ -1827,6 +2175,7 @@ dc390_init(UNUSED(const device_t *info)) dev->bus = scsi_get_bus(); + dev->local = info->local; dev->mca = 0; fifo8_create(&dev->fifo, ESP_FIFO_SZ); @@ -1843,23 +2192,27 @@ dc390_init(UNUSED(const device_t *info)) dev->has_bios = device_get_config_int("bios"); if (dev->has_bios) { dev->BIOSBase = 0xd0000; - rom_init(&dev->bios, DC390_ROM, 0xd0000, 0x8000, 0x7fff, 0, MEM_MAPPING_EXTERNAL); + if (dev->local) { + ;//rom_init(&dev->bios, AM53C974_ROM, 0xd0000, 0x8000, 0x7fff, 0, MEM_MAPPING_EXTERNAL); + } else + rom_init(&dev->bios, DC390_ROM, 0xd0000, 0x8000, 0x7fff, 0, MEM_MAPPING_EXTERNAL); } /* Enable our BIOS space in PCI, if needed. */ - if (dev->has_bios) { + if (dev->has_bios) esp_pci_bar[1].addr = 0xffff0000; - } else { + else esp_pci_bar[1].addr = 0; - } if (dev->has_bios) esp_bios_disable(dev); - sprintf(dev->nvr_path, "dc390_%i.nvr", device_get_instance()); + if (!dev->local) { + sprintf(dev->nvr_path, "dc390_%i.nvr", device_get_instance()); - /* Load the serial EEPROM. */ - dc390_load_eeprom(dev); + /* Load the serial EEPROM. */ + dc390_load_eeprom(dev); + } esp_pci_hard_reset(dev); @@ -2092,6 +2445,20 @@ const device_t dc390_pci_device = { .config = bios_enable_config }; +const device_t am53c974_pci_device = { + .name = "AMD 53c974 PCI", + .internal_name = "am53c974", + .flags = DEVICE_PCI, + .local = 1, + .init = dc390_init, + .close = esp_close, + .reset = NULL, + { .available = NULL }, + .speed_changed = NULL, + .force_redraw = NULL, + .config = NULL +}; + const device_t ncr53c90a_mca_device = { .name = "NCR 53c90a MCA", .internal_name = "ncr53c90a", diff --git a/src/sound/midi.c b/src/sound/midi.c index bf92249c4..9edf0a2aa 100644 --- a/src/sound/midi.c +++ b/src/sound/midi.c @@ -76,19 +76,19 @@ static const MIDI_OUT_DEVICE devices[] = { { &device_none }, #ifdef USE_FLUIDSYNTH { &fluidsynth_device }, -#endif +#endif /* USE_FLUIDSYNTH */ #ifdef USE_MUNT { &mt32_old_device }, { &mt32_new_device }, { &cm32l_device }, { &cm32ln_device }, -#endif +#endif /*USE_MUNT */ #ifdef USE_RTMIDI { &rtmidi_output_device }, -#endif -#if defined(DEV_BRANCH) && defined(USE_OPL4ML) +#endif /* USE_RTMIDI */ +#ifdef USE_OPL4ML { &opl4_midi_device }, -#endif +#endif /* USE_OPL4ML */ { NULL } // clang-format on }; @@ -98,7 +98,7 @@ static const MIDI_IN_DEVICE midi_in_devices[] = { { &device_none }, #ifdef USE_RTMIDI { &rtmidi_input_device }, -#endif +#endif /* USE_RTMIDI */ { NULL } // clang-format on }; diff --git a/src/sound/snd_cs423x.c b/src/sound/snd_cs423x.c index 02ca16fa6..95d183e8f 100644 --- a/src/sound/snd_cs423x.c +++ b/src/sound/snd_cs423x.c @@ -752,7 +752,7 @@ cs423x_init(const device_t *info) FILE *fp = rom_fopen(PNP_ROM_CS4236B, "rb"); if (fp) { - fread(&(dev->eeprom_data[23]), 1, 8201, fp); + (void) !fread(&(dev->eeprom_data[23]), 1, 8201, fp); fclose(fp); } diff --git a/src/sound/snd_gus.c b/src/sound/snd_gus.c index d0af5c564..10b6f00dc 100644 --- a/src/sound/snd_gus.c +++ b/src/sound/snd_gus.c @@ -16,9 +16,9 @@ #include <86box/pic.h> #include <86box/sound.h> #include <86box/timer.h> -#if defined(DEV_BRANCH) && defined(USE_GUSMAX) +#ifdef USE_GUSMAX # include <86box/snd_ad1848.h> -#endif +#endif /*USE_GUSMAX */ #include <86box/plat_fallthrough.h> #include <86box/plat_unused.h> @@ -144,11 +144,11 @@ typedef struct gus_t { uint8_t usrr; -#if defined(DEV_BRANCH) && defined(USE_GUSMAX) +#ifdef USE_GUSMAX uint8_t max_ctrl; ad1848_t ad1848; -#endif +#endif /*USE_GUSMAX */ } gus_t; static int gus_gf1_irqs[8] = { -1, 2, 5, 3, 7, 11, 12, 15 }; @@ -256,9 +256,9 @@ writegus(uint16_t addr, uint8_t val, void *priv) int d; int old; uint16_t port; -#if defined(DEV_BRANCH) && defined(USE_GUSMAX) +#ifdef USE_GUSMAX uint16_t csioport; -#endif +#endif /*USE_GUSMAX */ if ((addr == 0x388) || (addr == 0x389)) port = addr; @@ -606,10 +606,10 @@ writegus(uint16_t addr, uint8_t val, void *priv) gus->irq_midi = gus->irq; } else gus->irq_midi = gus_midi_irqs[(val >> 3) & 7]; -#if defined(DEV_BRANCH) && defined(USE_GUSMAX) +#ifdef USE_GUSMAX if (gus->type == GUS_MAX) ad1848_setirq(&gus->ad1848, gus->irq); -#endif +#endif /*USE_GUSMAX */ gus->sb_nmi = val & 0x80; } else { @@ -622,10 +622,10 @@ writegus(uint16_t addr, uint8_t val, void *priv) gus->dma2 = gus->dma; } else gus->dma2 = gus_dmas[(val >> 3) & 7]; -#if defined(DEV_BRANCH) && defined(USE_GUSMAX) +#ifdef USE_GUSMAX if (gus->type == GUS_MAX) ad1848_setdma(&gus->ad1848, gus->dma2); -#endif +#endif /*USE_GUSMAX */ } break; case 1: @@ -683,7 +683,7 @@ writegus(uint16_t addr, uint8_t val, void *priv) break; case 0x306: case 0x706: -#if defined(DEV_BRANCH) && defined(USE_GUSMAX) +#ifdef USE_GUSMAX if (gus->type == GUS_MAX) { if (gus->dma >= 4) val |= 0x10; @@ -703,7 +703,7 @@ writegus(uint16_t addr, uint8_t val, void *priv) } } } -#endif +#endif /*USE_GUSMAX */ break; default: @@ -755,11 +755,11 @@ readgus(uint16_t addr, void *priv) return val; case 0x20F: -#if defined(DEV_BRANCH) && defined(USE_GUSMAX) +#ifdef USE_GUSMAX if (gus->type == GUS_MAX) val = 0x02; else -#endif +#endif /*USE_GUSMAX */ val = 0x00; break; @@ -878,11 +878,11 @@ readgus(uint16_t addr, void *priv) break; case 0x306: case 0x706: -#if defined(DEV_BRANCH) && defined(USE_GUSMAX) +#ifdef USE_GUSMAX if (gus->type == GUS_MAX) val = 0x0a; /* GUS MAX */ else -#endif +#endif /*USE_GUSMAX */ val = 0xff; /*Pre 3.7 - no mixer*/ break; @@ -939,7 +939,7 @@ readgus(uint16_t addr, void *priv) gus->ad_status &= ~0x01; #ifdef OLD_NMI_BEHAVIOR nmi = 0; -#endif +#endif /* OLD_NMI_BEHAVIOR */ fallthrough; case 0x389: val = gus->ad_data; @@ -1182,24 +1182,24 @@ gus_get_buffer(int32_t *buffer, int len, void *priv) { gus_t *gus = (gus_t *) priv; -#if defined(DEV_BRANCH) && defined(USE_GUSMAX) +#ifdef USE_GUSMAX if ((gus->type == GUS_MAX) && (gus->max_ctrl)) ad1848_update(&gus->ad1848); -#endif +#endif /*USE_GUSMAX */ gus_update(gus); for (int c = 0; c < len * 2; c++) { -#if defined(DEV_BRANCH) && defined(USE_GUSMAX) +#ifdef USE_GUSMAX if ((gus->type == GUS_MAX) && (gus->max_ctrl)) buffer[c] += (int32_t) (gus->ad1848.buffer[c] / 2); -#endif +#endif /*USE_GUSMAX */ buffer[c] += (int32_t) gus->buffer[c & 1][c >> 1]; } -#if defined(DEV_BRANCH) && defined(USE_GUSMAX) +#ifdef USE_GUSMAX if ((gus->type == GUS_MAX) && (gus->max_ctrl)) gus->ad1848.pos = 0; -#endif +#endif /*USE_GUSMAX */ gus->pos = 0; } @@ -1332,9 +1332,9 @@ gus_reset(void *priv) gus->usrr = 0; -#if defined(DEV_BRANCH) && defined(USE_GUSMAX) +#ifdef USE_GUSMAX gus->max_ctrl = 0; -#endif +#endif /*USE_GUSMAX */ gus->irq_state = 0; gus->midi_irq_state = 0; @@ -1383,7 +1383,7 @@ gus_init(UNUSED(const device_t *info)) io_sethandler(0x0506 + gus->base, 0x0001, readgus, NULL, NULL, writegus, NULL, NULL, gus); io_sethandler(0x0388, 0x0002, readgus, NULL, NULL, writegus, NULL, NULL, gus); -#if defined(DEV_BRANCH) && defined(USE_GUSMAX) +#ifdef USE_GUSMAX if (gus->type == GUS_MAX) { ad1848_init(&gus->ad1848, AD1848_TYPE_CS4231); ad1848_setirq(&gus->ad1848, 5); @@ -1391,7 +1391,7 @@ gus_init(UNUSED(const device_t *info)) io_sethandler(0x10C + gus->base, 4, ad1848_read, NULL, NULL, ad1848_write, NULL, NULL, &gus->ad1848); } -#endif +#endif /*USE_GUSMAX */ timer_add(&gus->samp_timer, gus_poll_wave, gus, 1); timer_add(&gus->timer_1, gus_poll_timer_1, gus, 1); @@ -1424,10 +1424,10 @@ gus_speed_changed(void *priv) else gus->samp_latch = (uint64_t) (TIMER_USEC * (1000000.0 / gusfreqs[gus->voices - 14])); -#if defined(DEV_BRANCH) && defined(USE_GUSMAX) +#ifdef USE_GUSMAX if ((gus->type == GUS_MAX) && (gus->max_ctrl)) ad1848_speed_changed(&gus->ad1848); -#endif +#endif /*USE_GUSMAX */ } static const device_config_t gus_config[] = { @@ -1445,12 +1445,12 @@ static const device_config_t gus_config[] = { .description = "Classic", .value = GUS_CLASSIC }, -#if defined(DEV_BRANCH) && defined(USE_GUSMAX) +#ifdef USE_GUSMAX { .description = "MAX", .value = GUS_MAX }, -#endif +#endif /*USE_GUSMAX */ { NULL } }, }, diff --git a/src/sound/snd_mpu401.c b/src/sound/snd_mpu401.c index 40853d98b..21595e373 100644 --- a/src/sound/snd_mpu401.c +++ b/src/sound/snd_mpu401.c @@ -342,11 +342,15 @@ MPU401_Reset(mpu_t *mpu) static uint8_t MPU401_ReadStatus(mpu_t *mpu) { - uint8_t ret = 0x3f; + uint8_t ret = 0x00; + if (mpu->state.cmd_pending) - ret |= STATUS_OUTPUT_NOT_READY; + ret = STATUS_OUTPUT_NOT_READY; if (!mpu->queue_used) - ret |= STATUS_INPUT_NOT_READY; + ret = STATUS_INPUT_NOT_READY; + + ret |= 0x3f; + return ret; } @@ -378,15 +382,6 @@ MPU401_WriteCommand(mpu_t *mpu, uint8_t val) if ((val != 0xff) && (mpu->mode == M_UART)) return; - if (mpu->state.reset) { - if (mpu->state.cmd_pending || (val != 0xff)) { - mpu->state.cmd_pending = val + 1; - return; - } - timer_disable(&mpu->mpu401_reset_callback); - mpu->state.reset = 0; - } - /* In Intelligent mode, UART-only variants of the MPU-401 only support commands 0x3F and 0xFF. */ if (!mpu->intelligent && (val != 0x3f) && (val != 0xff)) return; diff --git a/src/sound/snd_sb.c b/src/sound/snd_sb.c index b9e2dea5a..88a7d875a 100644 --- a/src/sound/snd_sb.c +++ b/src/sound/snd_sb.c @@ -3190,7 +3190,8 @@ sb_16_init(UNUSED(const device_t *info)) if (mpu_addr) { sb->mpu = (mpu_t *) malloc(sizeof(mpu_t)); memset(sb->mpu, 0, sizeof(mpu_t)); - mpu401_init(sb->mpu, device_get_config_hex16("base401"), device_get_config_int("irq"), M_UART, device_get_config_int("receive_input401")); + mpu401_init(sb->mpu, device_get_config_hex16("base401"), 0, M_UART, + device_get_config_int("receive_input401")); } else sb->mpu = NULL; sb_dsp_set_mpu(&sb->dsp, sb->mpu); @@ -3534,7 +3535,8 @@ sb_awe32_init(UNUSED(const device_t *info)) if (mpu_addr) { sb->mpu = (mpu_t *) malloc(sizeof(mpu_t)); memset(sb->mpu, 0, sizeof(mpu_t)); - mpu401_init(sb->mpu, device_get_config_hex16("base401"), device_get_config_int("irq"), M_UART, device_get_config_int("receive_input401")); + mpu401_init(sb->mpu, device_get_config_hex16("base401"), 0, M_UART, + device_get_config_int("receive_input401")); } else sb->mpu = NULL; sb_dsp_set_mpu(&sb->dsp, sb->mpu); @@ -4051,7 +4053,7 @@ static const device_config_t sb_config[] = { }, { .name = "receive_input", - .description = "Receive input (SB MIDI)", + .description = "Receive input (DSP MIDI)", .type = CONFIG_BINARY, .default_string = "", .default_int = 1 @@ -4161,7 +4163,7 @@ static const device_config_t sb15_config[] = { }, { .name = "receive_input", - .description = "Receive input (SB MIDI)", + .description = "Receive input (DSP MIDI)", .type = CONFIG_BINARY, .default_string = "", .default_int = 1 @@ -4290,7 +4292,7 @@ static const device_config_t sb2_config[] = { }, { .name = "receive_input", - .description = "Receive input (SB MIDI)", + .description = "Receive input (DSP MIDI)", .type = CONFIG_BINARY, .default_string = "", .default_int = 1 @@ -4356,7 +4358,7 @@ static const device_config_t sb_mcv_config[] = { }, { .name = "receive_input", - .description = "Receive input (SB MIDI)", + .description = "Receive input (DSP MIDI)", .type = CONFIG_BINARY, .default_string = "", .default_int = 1 @@ -4446,7 +4448,7 @@ static const device_config_t sb_pro_config[] = { }, { .name = "receive_input", - .description = "Receive input (SB MIDI)", + .description = "Receive input (DSP MIDI)", .type = CONFIG_BINARY, .default_string = "", .default_int = 1 @@ -4457,7 +4459,7 @@ static const device_config_t sb_pro_config[] = { static const device_config_t sb_pro_mcv_config[] = { { .name = "receive_input", - .description = "Receive input (SB MIDI)", + .description = "Receive input (DSP MIDI)", .type = CONFIG_BINARY, .default_string = "", .default_int = 1 @@ -4610,7 +4612,7 @@ static const device_config_t sb_16_config[] = { }, { .name = "receive_input", - .description = "Receive input (SB MIDI)", + .description = "Receive input (DSP MIDI)", .type = CONFIG_BINARY, .default_string = "", .default_int = 1 @@ -4635,7 +4637,7 @@ static const device_config_t sb_16_pnp_config[] = { }, { .name = "receive_input", - .description = "Receive input (SB MIDI)", + .description = "Receive input (DSP MIDI)", .type = CONFIG_BINARY, .default_string = "", .default_int = 1 @@ -4692,7 +4694,7 @@ static const device_config_t sb_32_pnp_config[] = { }, { .name = "receive_input", - .description = "Receive input (SB MIDI)", + .description = "Receive input (DSP MIDI)", .type = CONFIG_BINARY, .default_string = "", .default_int = 1 @@ -4912,7 +4914,7 @@ static const device_config_t sb_awe32_config[] = { }, { .name = "receive_input", - .description = "Receive input (SB MIDI)", + .description = "Receive input (DSP MIDI)", .type = CONFIG_BINARY, .default_string = "", .default_int = 1 @@ -4969,7 +4971,7 @@ static const device_config_t sb_awe32_pnp_config[] = { }, { .name = "receive_input", - .description = "Receive input (SB MIDI)", + .description = "Receive input (DSP MIDI)", .type = CONFIG_BINARY, .default_string = "", .default_int = 1 @@ -5046,7 +5048,7 @@ static const device_config_t sb_awe64_value_config[] = { }, { .name = "receive_input", - .description = "Receive input (SB MIDI)", + .description = "Receive input (DSP MIDI)", .type = CONFIG_BINARY, .default_string = "", .default_int = 1 @@ -5119,7 +5121,7 @@ static const device_config_t sb_awe64_config[] = { }, { .name = "receive_input", - .description = "Receive input (SB MIDI)", + .description = "Receive input (DSP MIDI)", .type = CONFIG_BINARY, .default_string = "", .default_int = 1 @@ -5184,7 +5186,7 @@ static const device_config_t sb_awe64_gold_config[] = { }, { .name = "receive_input", - .description = "Receive input (SB MIDI)", + .description = "Receive input (DSP MIDI)", .type = CONFIG_BINARY, .default_string = "", .default_int = 1 @@ -5314,7 +5316,7 @@ static const device_config_t ess_688_config[] = { }, { .name = "receive_input", - .description = "Receive input (SB MIDI)", + .description = "Receive input (DSP MIDI)", .type = CONFIG_BINARY, .default_string = "", .default_int = 1 @@ -5445,7 +5447,7 @@ static const device_config_t ess_1688_config[] = { }, { .name = "receive_input", - .description = "Receive input (SB MIDI)", + .description = "Receive input (DSP MIDI)", .type = CONFIG_BINARY, .default_string = "", .default_int = 1 @@ -5464,7 +5466,7 @@ static const device_config_t ess_1688_config[] = { static const device_config_t ess_688_pnp_config[] = { { .name = "receive_input", - .description = "Receive input (SB MIDI)", + .description = "Receive input (DSP MIDI)", .type = CONFIG_BINARY, .default_string = "", .default_int = 1 @@ -5483,7 +5485,7 @@ static const device_config_t ess_1688_pnp_config[] = { }, { .name = "receive_input", - .description = "Receive input (SB MIDI)", + .description = "Receive input (DSP MIDI)", .type = CONFIG_BINARY, .default_string = "", .default_int = 1 diff --git a/src/sound/snd_sn76489.c b/src/sound/snd_sn76489.c index d7bfa87b9..5ccc0cd10 100644 --- a/src/sound/snd_sn76489.c +++ b/src/sound/snd_sn76489.c @@ -21,7 +21,15 @@ static float volslog[16] = { 7.51785f, 9.46440f, 11.9194f, 15.0000f }; -void +static int +sn76489_check_tap_2(sn76489_t *sn76489) +{ + int ret = ((sn76489->shift >> sn76489->white_noise_tap_2) & 1); + + return (sn76489->type == SN76496) ? ret : !ret; +} + +static void sn76489_update(sn76489_t *sn76489) { for (; sn76489->pos < sound_pos_global; sn76489->pos++) { @@ -42,24 +50,28 @@ sn76489_update(sn76489_t *sn76489) result += (((sn76489->shift & 1) ^ 1) * 127 * volslog[sn76489->vol[0]] * 2); sn76489->count[0] -= (512 * sn76489->psgconst); - while (sn76489->count[0] < 0 && sn76489->latch[0]) { + while ((sn76489->count[0] < 0) && sn76489->latch[0]) { sn76489->count[0] += (sn76489->latch[0] * 4); if (!(sn76489->noise & 4)) { - if (sn76489->shift & 1) - sn76489->shift |= 0x8000; - sn76489->shift >>= 1; + if ((sn76489->shift >> sn76489->white_noise_tap_1) & 1) { + sn76489->shift >>= 1; + sn76489->shift |= sn76489->feedback_mask; + } else + sn76489->shift >>= 1; } else { - if ((sn76489->shift & 1) ^ ((sn76489->shift >> 1) & 1)) - sn76489->shift |= 0x8000; - sn76489->shift >>= 1; + if (((sn76489->shift >> sn76489->white_noise_tap_1) & 1) ^ sn76489_check_tap_2(sn76489)) { + sn76489->shift >>= 1; + sn76489->shift |= sn76489->feedback_mask; + } else + sn76489->shift >>= 1; } } - sn76489->buffer[sn76489->pos] = result; + sn76489->buffer[sn76489->pos] = (sn76489->type == NCR8496) ? -result : result; } } -void +static void sn76489_get_buffer(int32_t *buffer, int len, void *priv) { sn76489_t *sn76489 = (sn76489_t *) priv; @@ -74,7 +86,7 @@ sn76489_get_buffer(int32_t *buffer, int len, void *priv) sn76489->pos = 0; } -void +static void sn76489_write(UNUSED(uint16_t addr), uint8_t data, void *priv) { sn76489_t *sn76489 = (sn76489_t *) priv; @@ -125,15 +137,13 @@ sn76489_write(UNUSED(uint16_t addr), uint8_t data, void *priv) sn76489->vol[1] = 0xf - data; break; case 0x60: - if ((data & 4) != (sn76489->noise & 4) || sn76489->type == SN76496) - sn76489->shift = 0x4000; + if (((data & 4) != (sn76489->noise & 4)) || (sn76489->type == SN76496)) + sn76489->shift = sn76489->feedback_mask; sn76489->noise = data & 0xf; if ((data & 3) == 3) sn76489->latch[0] = sn76489->latch[1]; else sn76489->latch[0] = 0x400 << (data & 3); - if (!sn76489->extra_divide) - sn76489->latch[0] &= 0x3ff; if (!sn76489->latch[0]) sn76489->latch[0] = (sn76489->extra_divide ? 2048 : 1024) << 6; break; @@ -146,22 +156,24 @@ sn76489_write(UNUSED(uint16_t addr), uint8_t data, void *priv) break; } } else { + /* NCR8496 ignores writes to registers 1, 3, 5, 6 and 7 with bit 7 clear. */ + if ((sn76489->type != SN76496) && ((sn76489->firstdat & 0x10) || ((sn76489->firstdat & 0x70) == 0x60))) + return; + if ((sn76489->firstdat & 0x70) == 0x60 && (sn76489->type == SN76496)) { - if ((data & 4) != (sn76489->noise & 4) || sn76489->type == SN76496) - sn76489->shift = 0x4000; + if (sn76489->type == SN76496) + sn76489->shift = sn76489->feedback_mask; sn76489->noise = data & 0xf; if ((data & 3) == 3) sn76489->latch[0] = sn76489->latch[1]; else sn76489->latch[0] = 0x400 << (data & 3); if (!sn76489->latch[0]) - sn76489->latch[0] = 1024 << 6; + sn76489->latch[0] = (sn76489->extra_divide ? 2048 : 1024) << 6; } else if ((sn76489->firstdat & 0x70) != 0x60) { - if (sn76489->extra_divide) - sn76489->freqhi[sn76489->lasttone] = data & 0x7F; - else - sn76489->freqhi[sn76489->lasttone] = data & 0x3F; - freq = sn76489->freqlo[sn76489->lasttone] | (sn76489->freqhi[sn76489->lasttone] << 4); + sn76489->freqhi[sn76489->lasttone] = data & 0x7F; + freq = sn76489->freqlo[sn76489->lasttone] | + (sn76489->freqhi[sn76489->lasttone] << 4); if (!sn76489->extra_divide) freq &= 0x3ff; if (!freq) @@ -190,6 +202,16 @@ sn76489_init(sn76489_t *sn76489, uint16_t base, uint16_t size, int type, int fre { sound_add_handler(sn76489_get_buffer, sn76489); + if (type == SN76496) { + sn76489->white_noise_tap_1 = 0; + sn76489->white_noise_tap_2 = 1; + sn76489->feedback_mask = 0x4000; + } else { + sn76489->white_noise_tap_1 = 1; + sn76489->white_noise_tap_2 = 5; + sn76489->feedback_mask = 0x8000; + } + sn76489->latch[0] = sn76489->latch[1] = sn76489->latch[2] = sn76489->latch[3] = 0x3FF << 6; sn76489->vol[0] = 0; sn76489->vol[1] = sn76489->vol[2] = sn76489->vol[3] = 8; @@ -200,7 +222,7 @@ sn76489_init(sn76489_t *sn76489, uint16_t base, uint16_t size, int type, int fre sn76489->count[2] = (rand() & 0x3FF) << 6; sn76489->count[3] = (rand() & 0x3FF) << 6; sn76489->noise = 3; - sn76489->shift = 0x4000; + sn76489->shift = sn76489->feedback_mask; sn76489->type = type; sn76489->psgconst = (((double) freq / 64.0) / (double) FREQ_48000); diff --git a/src/unix/dummy_cdrom_ioctl.c b/src/unix/dummy_cdrom_ioctl.c index 5c533b216..ed19dfb1f 100644 --- a/src/unix/dummy_cdrom_ioctl.c +++ b/src/unix/dummy_cdrom_ioctl.c @@ -205,7 +205,7 @@ plat_cdrom_read_sector(uint8_t *buffer, int raw, uint32_t sector) /* Cooked */ } plat_cdrom_close(); - dummy_cdrom_ioctl_log("ReadSector status=%d, sector=%d, size=%" PRId64 ".\n", status, sector, (long long) size); + dummy_cdrom_ioctl_log("ReadSector sector=%d.\n", sector); return 0; } diff --git a/src/video/vid_ati28800.c b/src/video/vid_ati28800.c index 368312fcb..16bb36201 100644 --- a/src/video/vid_ati28800.c +++ b/src/video/vid_ati28800.c @@ -37,9 +37,9 @@ #include <86box/vid_svga_render.h> #define VGAWONDERXL 1 -#if defined(DEV_BRANCH) && defined(USE_XL24) +#ifdef USE_XL24 # define VGAWONDERXL24 2 -#endif +#endif /* USE_XL24 */ #define BIOS_ATIKOR_PATH "roms/video/ati28800/atikorvga.bin" #define BIOS_ATIKOR_4620P_PATH_L "roms/machines/spc4620p/31005h.u8" @@ -52,10 +52,10 @@ #define BIOS_VGAXL_EVEN_PATH "roms/video/ati28800/xleven.bin" #define BIOS_VGAXL_ODD_PATH "roms/video/ati28800/xlodd.bin" -#if defined(DEV_BRANCH) && defined(USE_XL24) +#ifdef USE_XL24 # define BIOS_XL24_EVEN_PATH "roms/video/ati28800/112-14318-102.bin" # define BIOS_XL24_ODD_PATH "roms/video/ati28800/112-14319-102.bin" -#endif +#endif /* USE_XL24 */ #define BIOS_ROM_PATH "roms/video/ati28800/bios.bin" #define BIOS_VGAXL_ROM_PATH "roms/video/ati28800/ATI_VGAWonder_XL.bin" @@ -609,7 +609,7 @@ ati28800_init(const device_t *info) ati28800->svga.ramdac = device_add(&sc11486_ramdac_device); break; -#if defined(DEV_BRANCH) && defined(USE_XL24) +#ifdef USE_XL24 case VGAWONDERXL24: ati28800->id = 6; rom_init_interleaved(&ati28800->bios_rom, @@ -618,7 +618,7 @@ ati28800_init(const device_t *info) 0xc0000, 0x10000, 0xffff, 0, MEM_MAPPING_EXTERNAL); break; -#endif +#endif /* USE_XL24 */ default: ati28800->id = 5; @@ -653,11 +653,11 @@ ati28800_init(const device_t *info) ati_eeprom_load(&ati28800->eeprom, "ati28800xl.nvr", 0); break; -#if defined(DEV_BRANCH) && defined(USE_XL24) +#ifdef USE_XL24 case VGAWONDERXL24: ati_eeprom_load(&ati28800->eeprom, "ati28800xl24.nvr", 0); break; -#endif +#endif /* USE_XL24 */ default: ati_eeprom_load(&ati28800->eeprom, "ati28800.nvr", 0); @@ -685,13 +685,13 @@ compaq_ati28800_available(void) return (rom_present(BIOS_VGAXL_ROM_PATH)); } -#if defined(DEV_BRANCH) && defined(USE_XL24) +#ifdef USE_XL24 static int ati28800_wonderxl24_available(void) { return (rom_present(BIOS_XL24_EVEN_PATH) && rom_present(BIOS_XL24_ODD_PATH)); } -#endif +#endif /* USE_XL24 */ static void ati28800_close(void *priv) @@ -749,7 +749,7 @@ static const device_config_t ati28800_config[] = { } }; -#if defined(DEV_BRANCH) && defined(USE_XL24) +#ifdef USE_XL24 static const device_config_t ati28800_wonderxl_config[] = { { .name = "memory", @@ -778,7 +778,7 @@ static const device_config_t ati28800_wonderxl_config[] = { .type = CONFIG_END } }; -#endif +#endif /* USE_XL24 */ // clang-format on const device_t ati28800_device = { @@ -851,7 +851,7 @@ const device_t compaq_ati28800_device = { .config = ati28800_config }; -#if defined(DEV_BRANCH) && defined(USE_XL24) +#ifdef USE_XL24 const device_t ati28800_wonderxl24_device = { .name = "ATI-28800 (VGA Wonder XL24)", .internal_name = "ati28800w", @@ -865,4 +865,4 @@ const device_t ati28800_wonderxl24_device = { .force_redraw = ati28800_force_redraw, .config = ati28800_wonderxl_config }; -#endif +#endif /* USE_XL24 */ diff --git a/src/video/vid_ega.c b/src/video/vid_ega.c index 8f995117c..5a4f100df 100644 --- a/src/video/vid_ega.c +++ b/src/video/vid_ega.c @@ -54,11 +54,18 @@ enum { EGA_TSENG }; +enum { + EGA_TYPE_IBM = 0, + EGA_TYPE_OTHER = 1, + EGA_TYPE_COMPAQ = 2 +}; + static video_timings_t timing_ega = { .type = VIDEO_ISA, .write_b = 8, .write_w = 16, .write_l = 32, .read_b = 8, .read_w = 16, .read_l = 32 }; static uint8_t ega_rotate[8][256]; +static int active = 0; static uint32_t pallook16[256]; static uint32_t pallook64[256]; -static int ega_type = 0; +static int ega_type = EGA_TYPE_IBM; static int old_overscan_color = 0; /* 3C2 controls default mode on EGA. On VGA, it determines monitor type (mono or colour): @@ -149,6 +156,25 @@ ega_out(uint16_t addr, uint8_t val, void *priv) if (!(val & 1)) io_sethandler(0x03a0, 0x0020, ega_in, NULL, NULL, ega_out, NULL, NULL, ega); ega_recalctimings(ega); + if ((ega_type == EGA_TYPE_COMPAQ) && !(val & 0x02)) + mem_mapping_disable(&ega->mapping); + else switch (ega->gdcreg[6] & 0xc) { + case 0x0: /*128k at A0000*/ + mem_mapping_set_addr(&ega->mapping, 0xa0000, 0x20000); + break; + case 0x4: /*64k at A0000*/ + mem_mapping_set_addr(&ega->mapping, 0xa0000, 0x10000); + break; + case 0x8: /*32k at B0000*/ + mem_mapping_set_addr(&ega->mapping, 0xb0000, 0x08000); + break; + case 0xC: /*32k at B8000*/ + mem_mapping_set_addr(&ega->mapping, 0xb8000, 0x08000); + break; + + default: + break; + } break; case 0x3c4: ega->seqaddr = val; @@ -180,7 +206,7 @@ ega_out(uint16_t addr, uint8_t val, void *priv) } break; case 0x3c6: - if (ega_type == 2) + if (ega_type == EGA_TYPE_COMPAQ) ega->ctl_mode = val; break; case 0x3ce: @@ -201,7 +227,9 @@ ega_out(uint16_t addr, uint8_t val, void *priv) ega->chain2_read = val & 0x10; break; case 6: - switch (val & 0xc) { + if ((ega_type == EGA_TYPE_COMPAQ) && !(ega->miscout & 0x02)) + mem_mapping_disable(&ega->mapping); + else switch (val & 0xc) { case 0x0: /*128k at A0000*/ mem_mapping_set_addr(&ega->mapping, 0xa0000, 0x20000); break; @@ -295,47 +323,47 @@ ega_in(uint16_t addr, void *priv) break; case 0x3c0: - if (ega_type == 1) + if (ega_type == EGA_TYPE_OTHER) ret = ega->attraddr | ega->attr_palette_enable; break; case 0x3c1: - if (ega_type == 1) + if (ega_type == EGA_TYPE_OTHER) ret = ega->attrregs[ega->attraddr]; break; case 0x3c2: ret = (egaswitches & (8 >> egaswitchread)) ? 0x10 : 0x00; break; case 0x3c4: - if (ega_type == 1) + if (ega_type == EGA_TYPE_OTHER) ret = ega->seqaddr; break; case 0x3c5: - if (ega_type == 1) + if (ega_type == EGA_TYPE_OTHER) ret = ega->seqregs[ega->seqaddr & 0xf]; break; case 0x3c6: - if (ega_type == 2) + if (ega_type == EGA_TYPE_COMPAQ) ret = ega->ctl_mode; break; case 0x3c8: - if (ega_type == 1) + if (ega_type == EGA_TYPE_OTHER) ret = 2; break; case 0x3cc: - if (ega_type == 1) + if (ega_type == EGA_TYPE_OTHER) ret = ega->miscout; break; case 0x3ce: - if (ega_type == 1) + if (ega_type == EGA_TYPE_OTHER) ret = ega->gdcaddr; break; case 0x3cf: - if (ega_type == 1) + if (ega_type == EGA_TYPE_OTHER) ret = ega->gdcreg[ega->gdcaddr & 0xf]; break; case 0x3d0: case 0x3d4: - if (ega_type == 1) + if (ega_type == EGA_TYPE_OTHER) ret = ega->crtcreg; break; case 0x3d1: @@ -349,28 +377,28 @@ ega_in(uint16_t addr, void *priv) break; case 0x10: - if (ega_type == 1) + if (ega_type == EGA_TYPE_OTHER) ret = ega->crtc[ega->crtcreg]; else ret = ega->light_pen >> 8; break; case 0x11: - if (ega_type == 1) + if (ega_type == EGA_TYPE_OTHER) ret = ega->crtc[ega->crtcreg]; else ret = ega->light_pen & 0xff; break; default: - if (ega_type == 1) + if (ega_type == EGA_TYPE_OTHER) ret = ega->crtc[ega->crtcreg]; break; } break; case 0x3da: ega->attrff = 0; - if (ega_type == 2) { + if (ega_type == EGA_TYPE_COMPAQ) { ret = ega->stat & 0xcf; switch ((ega->attrregs[0x12] >> 4) & 0x03) { case 0x00: @@ -466,7 +494,7 @@ ega_recalctimings(ega_t *ega) ega->linedbl = ega->crtc[9] & 0x80; ega->rowcount = ega->crtc[9] & 0x1f; - if (ega_type == 2) { + if (ega_type == EGA_TYPE_COMPAQ) { color = (ega->miscout & 1); clksel = ((ega->miscout & 0xc) >> 2); @@ -587,9 +615,16 @@ ega_recalctimings(ega_t *ega) if (ega->dispofftime < TIMER_USEC) ega->dispofftime = TIMER_USEC; - ega->dot_time = (uint64_t) (ega->dot_clock); - if (ega->dot_time < TIMER_USEC) - ega->dot_time = TIMER_USEC; + if (ega_type == EGA_TYPE_COMPAQ) { + ega->dot_time = (uint64_t) (ega->dot_clock); + if (ega->dot_time < TIMER_USEC) + ega->dot_time = TIMER_USEC; + timer_disable(&ega->dot_timer); + timer_set_delay_u64(&ega->dot_timer, ega->dot_time); + ega->cca = 0; + active = 1; + ega->dot = 0; + } ega_recalc_remap_func(ega); } @@ -616,14 +651,13 @@ ega_dot_poll(void *priv) uint32_t addr; int drawcursor; uint32_t charaddr; - static int fg; - static int bg; - static uint32_t dat; + static int fg = 0; + static int bg = 0; + static uint32_t dat = 0x00000000; + static int cclock = 0; static int disptime; static int _dispontime; static int _dispofftime; - static int cclock = 0; - static int active = 0; if (ega->seqregs[1] & 8) { disptime = ((ega->crtc[0] + 2) << 1); @@ -1278,6 +1312,10 @@ ega_read(uint32_t addr, void *priv) temp4 &= (ega->colournocare & 8) ? 0xff : 0; return ~(temp | temp2 | temp3 | temp4); } + + if ((ega_type == EGA_TYPE_COMPAQ) && (ega->gdcreg[4] & 0x04)) + return 0xff; + return ega->vram[addr | readplane]; } @@ -1392,7 +1430,7 @@ ega_init(ega_t *ega, int monitor_type, int is_mono) ega->crtc[6] = 255; timer_add(&ega->timer, ega_poll, ega, 1); - if (ega_type == 2) + if (ega_type == EGA_TYPE_COMPAQ) timer_add(&ega->dot_timer, ega_dot_poll, ega, 1); } @@ -1412,11 +1450,11 @@ ega_standalone_init(const device_t *info) ega->y_add = 14; if ((info->local == EGA_IBM) || (info->local == EGA_ISKRA) || (info->local == EGA_TSENG)) - ega_type = 0; + ega_type = EGA_TYPE_IBM; else if (info->local == EGA_COMPAQ) - ega_type = 2; + ega_type = EGA_TYPE_COMPAQ; else - ega_type = 1; + ega_type = EGA_TYPE_OTHER; ega->actual_type = info->local; ega->chipset = 0; @@ -1466,6 +1504,8 @@ ega_standalone_init(const device_t *info) ega->vrammask = ega->vram_limit - 1; mem_mapping_add(&ega->mapping, 0xa0000, 0x20000, ega_read, NULL, NULL, ega_write, NULL, NULL, NULL, MEM_MAPPING_EXTERNAL, ega); + if (ega_type == EGA_TYPE_COMPAQ) + mem_mapping_disable(&ega->mapping); io_sethandler(0x03c0, 0x0020, ega_in, NULL, NULL, ega_out, NULL, NULL, ega); if (ega->chipset) { diff --git a/src/video/vid_ega_render.c b/src/video/vid_ega_render.c index 0cb1216ad..fe2632574 100644 --- a/src/video/vid_ega_render.c +++ b/src/video/vid_ega_render.c @@ -201,7 +201,8 @@ ega_render_graphics(ega_t *ega) const bool crtcreset = ((ega->crtc[0x17] & 0x80) == 0); const bool seq9dot = ((ega->seqregs[1] & 1) == 0); const bool seqoddeven = ((ega->seqregs[1] & 4) != 0); - const uint8_t blinkmask = (attrblink && blinked ? 0x8 : 0x0); + const uint8_t blinkmask = (attrblink ? 0x8 : 0x0); + const uint8_t blinkval = (attrblink && blinked ? 0x8 : 0x0); uint32_t *p = &buffer32->line[ega->displine + ega->y_add][ega->x_add]; const int dwshift = doublewidth ? 1 : 0; const int dotwidth = 1 << dwshift; @@ -254,8 +255,14 @@ ega_render_graphics(ega_t *ega) uint8_t dat = (edatlookup[(edat[0] >> inshift) & 3][(edat[1] >> inshift) & 3]) | (edatlookup[(edat[2] >> inshift) & 3][(edat[3] >> inshift) & 3] << 2); // FIXME: Confirm blink behaviour is actually XOR on real hardware - uint32_t p0 = ega->pallook[ega->egapal[((dat >> 4) & ega->plane_mask) ^ blinkmask]]; - uint32_t p1 = ega->pallook[ega->egapal[(dat & ega->plane_mask) ^ blinkmask]]; + uint32_t c0 = (dat >> 4) & 0xF; + uint32_t c1 = dat & 0xF; + c0 = ((c0 & ega->plane_mask & ~blinkmask) | + ((c0 | ~ega->plane_mask) & blinkmask & blinkval)) ^ blinkmask; + c1 = ((c1 & ega->plane_mask & ~blinkmask) | + ((c1 | ~ega->plane_mask) & blinkmask & blinkval)) ^ blinkmask; + uint32_t p0 = ega->pallook[ega->egapal[c0]]; + uint32_t p1 = ega->pallook[ega->egapal[c1]]; for (int subx = 0; subx < dotwidth; subx++) p[outoffs + subx] = p0; for (int subx = 0; subx < dotwidth; subx++) diff --git a/src/video/vid_s3.c b/src/video/vid_s3.c index 2ad525892..8a9bedf02 100644 --- a/src/video/vid_s3.c +++ b/src/video/vid_s3.c @@ -3233,6 +3233,8 @@ s3_recalctimings(svga_t *svga) svga->hdisp = svga->hdisp_old; svga->ma_latch |= (s3->ma_ext << 16); + svga->lowres = (!!(svga->attrregs[0x10] & 0x40) && !(svga->crtc[0x3a] & 0x10)); + if (s3->chip >= S3_86C928) { if (svga->crtc[0x5d] & 0x01) svga->htotal |= 0x100; @@ -3246,8 +3248,8 @@ s3_recalctimings(svga_t *svga) svga->dispend |= 0x400; if (svga->crtc[0x5e] & 0x04) svga->vblankstart |= 0x400; - else - svga->vblankstart = svga->dispend; + else if ((svga->crtc[0x3a] & 0x10) && !svga->lowres) + svga->vblankstart = svga->dispend; /*Applies only to Enhanced modes*/ if (svga->crtc[0x5e] & 0x10) svga->vsyncstart |= 0x400; if (svga->crtc[0x5e] & 0x40) @@ -3294,8 +3296,6 @@ s3_recalctimings(svga_t *svga) break; } - svga->lowres = (!!(svga->attrregs[0x10] & 0x40) && !(svga->crtc[0x3a] & 0x10)); - if (s3->chip != S3_86C801) mask |= 0x01; switch (svga->crtc[0x50] & mask) { @@ -3965,6 +3965,17 @@ s3_recalctimings(svga_t *svga) svga->dots_per_clock = (svga->dots_per_clock << 1) / 3; break; + case S3_VISION968: + switch (s3->card_type) { + case S3_MIROVIDEO40SV_ERGO_968: + svga->hdisp = (svga->hdisp / 3) << 2; + svga->dots_per_clock = (svga->hdisp / 3) << 2; + break; + default: + break; + } + break; + case S3_TRIO64: case S3_TRIO32: svga->hdisp /= 3; @@ -4140,6 +4151,9 @@ s3_recalctimings(svga_t *svga) if (svga->crtc[0x31] & 0x08) { svga->vram_display_mask = s3->vram_mask; if (svga->bpp == 8) { + if (!(svga->crtc[0x5e] & 0x04)) + svga->vblankstart = svga->dispend; /*Applies only to Enhanced modes*/ + /*Enhanced 4bpp mode, just like the 8bpp mode per the spec. */ svga->render = svga_render_8bpp_highres; svga->rowoffset <<= 1; @@ -9916,8 +9930,14 @@ s3_init(const device_t *info) s3->width = 1024; svga->ramdac = device_add(&sc11483_ramdac_device); - svga->clock_gen = device_add(&av9194_device); - svga->getclock = av9194_getclock; + if (s3->card_type == S3_ORCHID_86C911) { + svga->clock_gen = device_add(&av9194_device); + svga->getclock = av9194_getclock; + } else { + /* DCS2824-0 = Diamond ICD2061A-compatible. */ + svga->clock_gen = device_add(&icd2061_device); + svga->getclock = icd2061_getclock; + } break; case S3_AMI_86C924: diff --git a/src/video/vid_s3_virge.c b/src/video/vid_s3_virge.c index d2f99ba14..286e459e7 100644 --- a/src/video/vid_s3_virge.c +++ b/src/video/vid_s3_virge.c @@ -40,29 +40,25 @@ #include <86box/vid_svga.h> #include <86box/vid_svga_render.h> +#ifdef MIN + #undef MIN +#endif +#ifdef MAX + #undef MAX +#endif +#ifdef CLAMP + #undef CLAMP +#endif + +static uint64_t virge_time = 0; + static int dither[4][4] = { - {0, 4, 1, 5}, - { 6, 2, 7, 3}, - { 1, 5, 0, 4}, - { 7, 3, 6, 2}, + {0, 4, 1, 5}, + {6, 2, 7, 3}, + {1, 5, 0, 4}, + {7, 3, 6, 2} }; -#define RB_SIZE 256 -#define RB_MASK (RB_SIZE - 1) - -#define RB_ENTRIES (virge->s3d_write_idx - virge->s3d_read_idx) -#define RB_FULL (RB_ENTRIES == RB_SIZE) -#define RB_EMPTY (!RB_ENTRIES) - -#define FIFO_SIZE (16 * 4096) - -#define FIFO_ENTRIES (virge->fifo_write_idx - virge->fifo_read_idx) -#define FIFO_FULL ((virge->fifo_write_idx - virge->fifo_read_idx) >= FIFO_SIZE) -#define FIFO_EMPTY (virge->fifo_read_idx == virge->fifo_write_idx) - -#define FIFO_TYPE 0xff000000 -#define FIFO_ADDR 0x00ffffff - #define ROM_VIRGE_325 "roms/video/s3virge/86c325.bin" #define ROM_DIAMOND_STEALTH3D_2000 "roms/video/s3virge/s3virge.bin" #define ROM_DIAMOND_STEALTH3D_3000 "roms/video/s3virge/diamondstealth3000.vbi" @@ -74,8 +70,23 @@ static int dither[4][4] = { #define ROM_DIAMOND_STEALTH3D_4000 "roms/video/s3virge/86c357.bin" #define ROM_TRIO3D2X "roms/video/s3virge/TRIO3D2X_8mbsdr.VBI" -#define FIFO_STATE_IDLE 0 -#define FIFO_STATE_RUN 1 +#define RB_SIZE 256 +#define RB_MASK (RB_SIZE - 1) + +#define RB_ENTRIES (virge->s3d_write_idx - virge->s3d_read_idx) +#define RB_FULL (RB_ENTRIES == RB_SIZE) +#define RB_EMPTY (!RB_ENTRIES) + +#define FIFO_SIZE 65536 +#define FIFO_MASK (FIFO_SIZE - 1) +#define FIFO_ENTRY_SIZE (1 << 31) + +#define FIFO_ENTRIES (virge->fifo_write_idx - virge->fifo_read_idx) +#define FIFO_FULL ((virge->fifo_write_idx - virge->fifo_read_idx) >= FIFO_SIZE) +#define FIFO_EMPTY (virge->fifo_read_idx == virge->fifo_write_idx) + +#define FIFO_TYPE 0xff000000 +#define FIFO_ADDR 0x00ffffff enum { S3_VIRGE_325, @@ -99,58 +110,80 @@ enum { }; enum { - FIFO_INVALID = (0x00 << 24), - FIFO_WRITE_BYTE = (0x01 << 24), - FIFO_WRITE_WORD = (0x02 << 24), + FIFO_INVALID = (0x00 << 24), + FIFO_WRITE_BYTE = (0x01 << 24), + FIFO_WRITE_WORD = (0x02 << 24), FIFO_WRITE_DWORD = (0x03 << 24) }; -typedef struct -{ - uint32_t addr_type; - uint32_t val; +typedef struct { + uint32_t addr_type; + uint32_t val; } fifo_entry_t; typedef struct s3d_t { - uint32_t cmd_set; - int clip_l, clip_r, clip_t, clip_b; + uint32_t cmd_set; + int clip_l; + int clip_r; + int clip_t; + int clip_b; - uint32_t dest_base; - uint32_t dest_str; + uint32_t dest_base; + uint32_t dest_str; - uint32_t z_base; - uint32_t z_str; + uint32_t z_base; + uint32_t z_str; - uint32_t tex_base; - uint32_t tex_bdr_clr; - uint32_t tbv, tbu; - int32_t TdVdX, TdUdX; - int32_t TdVdY, TdUdY; - uint32_t tus, tvs; + uint32_t tex_base; + uint32_t tex_bdr_clr; + uint32_t tbv; + uint32_t tbu; + int32_t TdVdX; + int32_t TdUdX; + int32_t TdVdY; + int32_t TdUdY; + uint32_t tus; + uint32_t tvs; - int32_t TdZdX, TdZdY; - uint32_t tzs; + int32_t TdZdX; + int32_t TdZdY; + uint32_t tzs; - int32_t TdWdX, TdWdY; - uint32_t tws; + int32_t TdWdX; + int32_t TdWdY; + uint32_t tws; - int32_t TdDdX, TdDdY; - uint32_t tds; + int32_t TdDdX; + int32_t TdDdY; + uint32_t tds; - int16_t TdGdX, TdBdX, TdRdX, TdAdX; - int16_t TdGdY, TdBdY, TdRdY, TdAdY; - uint32_t tgs, tbs, trs, tas; + int16_t TdGdX; + int16_t TdBdX; + int16_t TdRdX; + int16_t TdAdX; + int16_t TdGdY; + int16_t TdBdY; + int16_t TdRdY; + int16_t TdAdY; + uint32_t tgs; + uint32_t tbs; + uint32_t trs; + uint32_t tas; - uint32_t TdXdY12; - uint32_t txend12; - uint32_t TdXdY01; - uint32_t txend01; - uint32_t TdXdY02; - uint32_t txs; - uint32_t tys; - int ty01, ty12, tlr; + uint32_t TdXdY12; + uint32_t txend12; + uint32_t TdXdY01; + uint32_t txend01; + uint32_t TdXdY02; + uint32_t txs; + uint32_t tys; + int ty01; + int ty12; + int tlr; - uint8_t fog_r, fog_g, fog_b; + uint8_t fog_r; + uint8_t fog_g; + uint8_t fog_b; } s3d_t; typedef struct virge_t { @@ -158,43 +191,51 @@ typedef struct virge_t { mem_mapping_t mmio_mapping; mem_mapping_t new_mmio_mapping; - rom_t bios_rom; + rom_t bios_rom; - svga_t svga; + svga_t svga; - uint8_t bank; - uint8_t ma_ext; - uint8_t reg6b, lfb_bios; + uint8_t bank; + uint8_t ma_ext; - uint8_t virge_id, virge_id_high, virge_id_low, virge_rev; + uint8_t virge_id; + uint8_t virge_id_high; + uint8_t virge_id_low; + uint8_t virge_rev; - uint8_t int_line; + uint32_t linear_base; + uint32_t linear_size; - uint32_t linear_base, linear_size; + uint8_t pci_regs[256]; + uint8_t pci_slot; - uint8_t pci_regs[256]; + int chip; - uint8_t pci_slot; - uint8_t irq_state; + int bilinear_enabled; + int dithering_enabled; + int memory_size; - int pci; - int chip; - int is_agp; + int pixel_count; + int tri_count; - int bilinear_enabled; - int dithering_enabled; - uint32_t memory_size; - uint32_t vram_mask; + thread_t * render_thread; + event_t * wake_render_thread; + event_t * wake_main_thread; + event_t * not_full_event; - uint32_t hwc_fg_col, hwc_bg_col; - int hwc_col_stack_pos; + uint32_t hwc_fg_col; + uint32_t hwc_bg_col; + int hwc_col_stack_pos; - struct - { + struct { uint32_t src_base; uint32_t dest_base; - int clip_l, clip_r, clip_t, clip_b; - int dest_str, src_str; + int clip_l; + int clip_r; + int clip_t; + int clip_b; + int dest_str; + int src_str; uint32_t mono_pat_0; uint32_t mono_pat_1; uint32_t pat_bg_clr; @@ -202,20 +243,28 @@ typedef struct virge_t { uint32_t src_bg_clr; uint32_t src_fg_clr; uint32_t cmd_set; - int r_width, r_height; - int rsrc_x, rsrc_y; - int rdest_x, rdest_y; + int r_width; + int r_height; + int rsrc_x; + int rsrc_y; + int rdest_x; + int rdest_y; - int lxend0, lxend1; + int lxend0; + int lxend1; int32_t ldx; - uint32_t lxstart, lystart; + uint32_t lxstart; + uint32_t lystart; int lycnt; int line_dir; - int src_x, src_y; - int dest_x, dest_y; - int w, h; - uint8_t rop; + int src_x; + int src_y; + int dest_x; + int dest_y; + int w; + int h; + uint8_t rop; int data_left_count; uint32_t data_left; @@ -234,14 +283,14 @@ typedef struct virge_t { uint32_t dest_l, dest_r; } s3d; - s3d_t s3d_tri; + s3d_t s3d_tri; - s3d_t s3d_buffer[RB_SIZE]; - atomic_int s3d_read_idx, s3d_write_idx; - atomic_int s3d_busy; + s3d_t s3d_buffer[RB_SIZE]; + int s3d_read_idx; + int s3d_write_idx; + int s3d_busy; - struct - { + struct { uint32_t pri_ctrl; uint32_t chroma_ctrl; uint32_t sec_ctrl; @@ -266,42 +315,64 @@ typedef struct virge_t { uint32_t sec_start; uint32_t sec_size; - int sdif; + int sdif; - int pri_x, pri_y, pri_w, pri_h; - int sec_x, sec_y, sec_w, sec_h; + int pri_x; + int pri_y; + int pri_w; + int pri_h; + int sec_x; + int sec_y; + int sec_w; + int sec_h; } streams; - uint8_t cmd_dma; - uint32_t cmd_dma_base; - uint32_t dma_ptr; - uint64_t blitter_time; + fifo_entry_t fifo[FIFO_SIZE]; + volatile int fifo_read_idx, fifo_write_idx; + + volatile int fifo_thread_run, render_thread_run; + + thread_t * fifo_thread; + event_t *wake_fifo_thread; + event_t * fifo_not_full_event; + + int virge_busy; + + uint8_t subsys_stat; + uint8_t subsys_cntl; + + int local; + + uint8_t serialport; + + uint8_t irq_state; + uint8_t advfunc_cntl; + + void *i2c, *ddc; + + int onboard; int fifo_slots_num; - struct { - uint32_t addr_type; - uint32_t val; - } fifo[FIFO_SIZE]; - atomic_int fifo_write_idx, fifo_read_idx; + uint32_t vram_mask; - pc_timer_t fifo_timer; - pc_timer_t render_timer; + uint8_t reg6b; + uint8_t lfb_bios; + uint8_t int_line; + uint8_t cmd_dma; - atomic_int virge_busy; - int local; - int fifo_state; + uint32_t cmd_dma_base; + uint32_t dma_ptr; - uint8_t subsys_stat, subsys_cntl, advfunc_cntl; - - uint8_t serialport; - - void *i2c, *ddc; - - int waiting; - - int onboard; + int pci; + int is_agp; } virge_t; +static __inline void +wake_fifo_thread(virge_t *virge) { + /* Wake up FIFO thread if moving from idle */ + thread_set_event(virge->wake_fifo_thread); +} + static virge_t *reset_state = NULL; static video_timings_t timing_diamond_stealth3d_2000_pci = { .type = VIDEO_PCI, .write_b = 2, .write_w = 2, .write_l = 3, .read_b = 28, .read_w = 28, .read_l = 45 }; @@ -309,45 +380,45 @@ static video_timings_t timing_diamond_stealth3d_3000_pci = { .type = VIDEO_PCI, static video_timings_t timing_virge_dx_pci = { .type = VIDEO_PCI, .write_b = 2, .write_w = 2, .write_l = 3, .read_b = 28, .read_w = 28, .read_l = 45 }; static video_timings_t timing_virge_agp = { .type = VIDEO_AGP, .write_b = 2, .write_w = 2, .write_l = 3, .read_b = 28, .read_w = 28, .read_l = 45 }; -static void s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri); +static void queue_triangle(virge_t *virge); -static void s3_virge_recalctimings(svga_t *svga); -static void s3_virge_updatemapping(virge_t *virge); +static void s3_virge_recalctimings(svga_t *svga); +static void s3_virge_updatemapping(virge_t *virge); -static void s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat); +static void s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat); static uint8_t s3_virge_mmio_read(uint32_t addr, void *priv); -static uint16_t s3_virge_mmio_read_w(uint32_t addr, void *rivp); +static uint16_t s3_virge_mmio_read_w(uint32_t addr, void *priv); static uint32_t s3_virge_mmio_read_l(uint32_t addr, void *priv); static void s3_virge_mmio_write(uint32_t addr, uint8_t val, void *priv); static void s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *priv); static void s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *priv); enum { - CMD_SET_AE = 1, - CMD_SET_HC = (1 << 1), + CMD_SET_AE = 1, + CMD_SET_HC = (1 << 1), - CMD_SET_FORMAT_MASK = (7 << 2), - CMD_SET_FORMAT_8 = (0 << 2), - CMD_SET_FORMAT_16 = (1 << 2), - CMD_SET_FORMAT_24 = (2 << 2), + CMD_SET_FORMAT_MASK = (7 << 2), + CMD_SET_FORMAT_8 = (0 << 2), + CMD_SET_FORMAT_16 = (1 << 2), + CMD_SET_FORMAT_24 = (2 << 2), - CMD_SET_MS = (1 << 6), - CMD_SET_IDS = (1 << 7), - CMD_SET_MP = (1 << 8), - CMD_SET_TP = (1 << 9), + CMD_SET_MS = (1 << 6), + CMD_SET_IDS = (1 << 7), + CMD_SET_MP = (1 << 8), + CMD_SET_TP = (1 << 9), - CMD_SET_ITA_MASK = (3 << 10), - CMD_SET_ITA_BYTE = (0 << 10), - CMD_SET_ITA_WORD = (1 << 10), - CMD_SET_ITA_DWORD = (2 << 10), + CMD_SET_ITA_MASK = (3 << 10), + CMD_SET_ITA_BYTE = (0 << 10), + CMD_SET_ITA_WORD = (1 << 10), + CMD_SET_ITA_DWORD = (2 << 10), - CMD_SET_ZUP = (1 << 23), + CMD_SET_ZUP = (1 << 23), - CMD_SET_ZB_MODE = (3 << 24), + CMD_SET_ZB_MODE = (3 << 24), - CMD_SET_XP = (1 << 25), - CMD_SET_YP = (1 << 26), + CMD_SET_XP = (1 << 25), + CMD_SET_YP = (1 << 26), CMD_SET_COMMAND_MASK = (15 << 27) }; @@ -358,11 +429,11 @@ enum { #define CMD_SET_TWE (1 << 26) enum { - CMD_SET_COMMAND_BITBLT = (0 << 27), - CMD_SET_COMMAND_RECTFILL = (2 << 27), - CMD_SET_COMMAND_LINE = (3 << 27), - CMD_SET_COMMAND_POLY = (5 << 27), - CMD_SET_COMMAND_NOP = (15 << 27) + CMD_SET_COMMAND_BITBLT = (0 << 27), + CMD_SET_COMMAND_RECTFILL = (2 << 27), + CMD_SET_COMMAND_LINE = (3 << 27), + CMD_SET_COMMAND_POLY = (5 << 27), + CMD_SET_COMMAND_NOP = (15 << 27) }; #define INT_VSY (1 << 0) @@ -377,77 +448,18 @@ enum { #define SERIAL_PORT_SCR (1 << 2) #define SERIAL_PORT_SDR (1 << 3) -#ifdef ENABLE_S3_VIRGE_LOG -int s3_virge_do_log = ENABLE_S3_VIRGE_LOG; - static void -s3_virge_log(const char *fmt, ...) -{ - va_list ap; - - if (s3_virge_do_log) { - va_start(ap, fmt); - pclog_ex(fmt, ap); - va_end(ap); - } -} -#else -# define s3_virge_log(fmt, ...) -#endif - -static void -queue_triangle(virge_t *virge) -{ - if (RB_FULL) { - s3_virge_log("RB FULL, TBD.\n"); - return; - } - - virge->s3d_buffer[virge->s3d_write_idx] = virge->s3d_tri; - virge->s3d_write_idx++; - if (virge->s3d_write_idx >= RB_SIZE) - virge->s3d_write_idx = 0; - - if (!virge->s3d_busy) - timer_set_delay_u64(&virge->render_timer, 100 * TIMER_USEC); -} - -static void -s3_virge_update_irqs(virge_t *virge) -{ - if ((virge->svga.crtc[0x32] & 0x10) && (virge->subsys_stat & (virge->subsys_cntl & INT_MASK))) +s3_virge_update_irqs(virge_t *virge) { + if ((virge->svga.crtc[0x32] & 0x10) && (virge->subsys_stat & virge->subsys_cntl & INT_MASK)) pci_set_irq(virge->pci_slot, PCI_INTA, &virge->irq_state); else pci_clear_irq(virge->pci_slot, PCI_INTA, &virge->irq_state); } static void -s3_virge_render_timer(void *priv) -{ +s3_virge_out(uint16_t addr, uint8_t val, void *priv) { virge_t *virge = (virge_t *) priv; - - virge->s3d_busy = 1; - while (virge->s3d_read_idx < virge->s3d_write_idx) { - s3_virge_triangle(virge, &virge->s3d_buffer[virge->s3d_read_idx]); - virge->s3d_read_idx++; - if (virge->s3d_read_idx >= RB_SIZE) - virge->s3d_read_idx = 0; - - if (RB_ENTRIES == RB_MASK) - timer_disable(&virge->render_timer); - } - virge->s3d_busy = 0; - virge->s3d_read_idx = 0; - virge->s3d_write_idx = 0; - virge->subsys_stat |= INT_S3D_DONE; - s3_virge_update_irqs(virge); -} - -static void -s3_virge_out(uint16_t addr, uint8_t val, void *priv) -{ - virge_t *virge = (virge_t *) priv; - svga_t *svga = &virge->svga; + svga_t * svga = &virge->svga; uint8_t old; uint32_t cursoraddr; @@ -461,8 +473,7 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) svga_recalctimings(svga); return; } - if (svga->seqaddr == 4) /*Chain-4 - update banking*/ - { + if (svga->seqaddr == 4) { /*Chain-4 - update banking*/ if (val & 8) svga->write_bank = svga->read_bank = virge->bank << 16; else @@ -486,7 +497,9 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) return; if ((svga->crtcreg == 7) && (svga->crtc[0x11] & 0x80)) val = (svga->crtc[7] & ~0x10) | (val & 0x10); - if ((svga->crtcreg >= 0x20) && (svga->crtcreg < 0x40) && (svga->crtcreg != 0x36) && (svga->crtcreg != 0x38) && (svga->crtcreg != 0x39) && ((svga->crtc[0x38] & 0xcc) != 0x48)) + if ((svga->crtcreg >= 0x20) && (svga->crtcreg < 0x40) && + (svga->crtcreg != 0x36) && (svga->crtcreg != 0x38) && + (svga->crtcreg != 0x39) && ((svga->crtc[0x38] & 0xcc) != 0x48)) return; if ((svga->crtcreg >= 0x40) && ((svga->crtc[0x39] & 0xe0) != 0xa0)) return; @@ -494,11 +507,9 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) return; if (svga->crtcreg >= 0x80) return; - old = svga->crtc[svga->crtcreg]; - svga->crtc[svga->crtcreg] = val; - if (svga->crtcreg > 0x18) - s3_virge_log("OUTB VGA reg = %02x, val = %02x\n", svga->crtcreg, val); + old = svga->crtc[svga->crtcreg]; + svga->crtc[svga->crtcreg] = val; switch (svga->crtcreg) { case 0x31: @@ -556,7 +567,8 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) svga->hwcursor.xoff = svga->crtc[0x4e] & 0x3f; svga->hwcursor.yoff = svga->crtc[0x4f] & 0x3f; cursoraddr = (virge->memory_size == 8) ? 0x1fff : 0x0fff; - svga->hwcursor.addr = ((((svga->crtc[0x4c] << 8) | svga->crtc[0x4d]) & cursoraddr) * 1024) + (svga->hwcursor.yoff * 16); + svga->hwcursor.addr = ((((svga->crtc[0x4c] << 8) | + svga->crtc[0x4d]) & cursoraddr) * 1024) + (svga->hwcursor.yoff * 16); break; case 0x4a: @@ -580,7 +592,7 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) switch (virge->hwc_col_stack_pos) { case 0: virge->hwc_bg_col = (virge->hwc_bg_col & 0xffff00) | val; - break; + break; case 1: virge->hwc_bg_col = (virge->hwc_bg_col & 0xff00ff) | (val << 8); break; @@ -628,8 +640,8 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) svga->bpp = (virge->chip == S3_VIRGEVX) ? 24 : 32; break; default: - svga->bpp = 8; - break; + svga->bpp = 8; + break; } break; @@ -644,7 +656,8 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) if (svga->crtcreg < 0xe || svga->crtcreg > 0x10) { if ((svga->crtcreg == 0xc) || (svga->crtcreg == 0xd)) { svga->fullchange = 3; - svga->ma_latch = ((svga->crtc[0xc] << 8) | svga->crtc[0xd]) + ((svga->crtc[8] & 0x60) >> 5); + svga->ma_latch = ((svga->crtc[0xc] << 8) | svga->crtc[0xd]) + + ((svga->crtc[8] & 0x60) >> 5); if ((svga->crtc[0x67] & 0xc) != 0xc) svga->ma_latch |= (virge->ma_ext << 16); } else { @@ -662,10 +675,9 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) } static uint8_t -s3_virge_in(uint16_t addr, void *priv) -{ +s3_virge_in(uint16_t addr, void *priv) { virge_t *virge = (virge_t *) priv; - svga_t *svga = &virge->svga; + svga_t * svga = &virge->svga; uint8_t ret; if (((addr & 0xfff0) == 0x3d0 || (addr & 0xfff0) == 0x3b0) && !(svga->miscout & 1)) @@ -688,10 +700,10 @@ s3_virge_in(uint16_t addr, void *priv) ret = 0xff; break; - case 0x3D4: + case 0x3d4: ret = svga->crtcreg; break; - case 0x3D5: + case 0x3d5: switch (svga->crtcreg) { case 0x2d: ret = virge->virge_id_high; @@ -714,9 +726,12 @@ s3_virge_in(uint16_t addr, void *priv) case 0x35: ret = (svga->crtc[0x35] & 0xf0) | (virge->bank & 0xf); break; + case 0x36: + ret = (svga->crtc[0x36] & 0xfc) | 2; + break; /*PCI bus*/ case 0x45: virge->hwc_col_stack_pos = 0; - ret = svga->crtc[0x45]; + ret = svga->crtc[0x45]; break; case 0x51: ret = (svga->crtc[0x51] & 0xf0) | ((virge->bank >> 2) & 0xc) | ((virge->ma_ext >> 2) & 3); @@ -763,13 +778,13 @@ s3_virge_in(uint16_t addr, void *priv) ret = svga_in(addr, svga); break; } + return ret; } static void -s3_virge_recalctimings(svga_t *svga) -{ - const virge_t *virge = (virge_t *) svga->priv; +s3_virge_recalctimings(svga_t *svga) { + virge_t *virge = (virge_t *) svga->priv; svga->hdisp = svga->hdisp_old; @@ -785,28 +800,28 @@ s3_virge_recalctimings(svga_t *svga) } if (svga->crtc[0x5d] & 0x01) - svga->htotal += 0x100; + svga->htotal |= 0x100; if (svga->crtc[0x5d] & 0x02) { - svga->hdisp_time += 0x100; - svga->hdisp += 0x100 * svga->dots_per_clock; + svga->hdisp_time |= 0x100; + svga->hdisp |= (0x100 * svga->dots_per_clock); } if (svga->crtc[0x5e] & 0x01) - svga->vtotal += 0x400; + svga->vtotal |= 0x400; if (svga->crtc[0x5e] & 0x02) - svga->dispend += 0x400; + svga->dispend |= 0x400; if (svga->crtc[0x5e] & 0x04) - svga->vblankstart += 0x400; + svga->vblankstart |= 0x400; if (svga->crtc[0x5e] & 0x10) - svga->vsyncstart += 0x400; + svga->vsyncstart |= 0x400; if (svga->crtc[0x5e] & 0x40) - svga->split += 0x400; + svga->split |= 0x400; svga->interlace = svga->crtc[0x42] & 0x20; if (((svga->miscout >> 2) & 3) == 3) { int n = svga->seqregs[0x12] & 0x1f; int r = (svga->seqregs[0x12] >> 5); - if (virge->chip == S3_VIRGEVX || virge->chip == S3_VIRGEDX) + if ((virge->chip == S3_VIRGEVX) || (virge->chip == S3_VIRGEDX)) r &= 7; else if (virge->chip >= S3_VIRGEGX2) r &= 10; @@ -840,59 +855,52 @@ s3_virge_recalctimings(svga_t *svga) video_force_resize_set_monitor(1, svga->monitor_index); } - /* ViRGE/GX2 and later does not use primary stream registers. */ - if ((svga->crtc[0x67] & 0xc) != 0xc || virge->chip >= S3_VIRGEGX2) /*VGA mode*/ - { + if ((svga->crtc[0x67] & 0xc) != 0xc) { /*VGA mode*/ svga->ma_latch |= (virge->ma_ext << 16); if (svga->crtc[0x51] & 0x30) - svga->rowoffset += (svga->crtc[0x51] & 0x30) << 4; + svga->rowoffset |= (svga->crtc[0x51] & 0x30) << 4; else if (svga->crtc[0x43] & 0x04) - svga->rowoffset += 0x100; + svga->rowoffset |= 0x100; if (!svga->rowoffset) svga->rowoffset = 256; svga->lowres = !((svga->gdcreg[5] & 0x40) && (svga->crtc[0x3a] & 0x10)); - if ((svga->gdcreg[5] & 0x40) && (svga->crtc[0x3a] & 0x10)) { - switch (svga->bpp) { - case 8: - svga->render = svga_render_8bpp_highres; - break; - case 15: - svga->render = svga_render_15bpp_highres; - if (virge->chip != S3_VIRGEVX && virge->chip < S3_VIRGEGX2) { - // svga->htotal >>= 1; - // if ((svga->crtc[0x33] & 0x20) || ((svga->crtc[0x67] & 0xc) == 0xc)) - // svga->hblank_end_val = svga->htotal - 1; - svga->hdisp >>= 1; - svga->dots_per_clock >>= 1; - } - break; - case 16: - svga->render = svga_render_16bpp_highres; - if (virge->chip != S3_VIRGEVX && virge->chip < S3_VIRGEGX2) { - // svga->htotal >>= 1; - // if ((svga->crtc[0x33] & 0x20) || ((svga->crtc[0x67] & 0xc) == 0xc)) - // svga->hblank_end_val = svga->htotal - 1; - svga->hdisp >>= 1; - svga->dots_per_clock >>= 1; - } - break; - case 24: - svga->render = svga_render_24bpp_highres; - if (virge->chip != S3_VIRGEVX && virge->chip < S3_VIRGEGX2) - svga->rowoffset = (svga->rowoffset * 3) / 4; /*Hack*/ - break; - case 32: - svga->render = svga_render_32bpp_highres; - break; + if ((svga->gdcreg[5] & 0x40) && (svga->crtc[0x3a] & 0x10)) switch (svga->bpp) { + case 8: + svga->render = svga_render_8bpp_highres; + break; + case 15: + svga->render = svga_render_15bpp_highres; + if ((virge->chip != S3_VIRGEVX) && (virge->chip < S3_VIRGEGX2)) { + svga->hdisp >>= 1; + svga->dots_per_clock >>= 1; + } + break; + case 16: + svga->render = svga_render_16bpp_highres; + if ((virge->chip != S3_VIRGEVX) && (virge->chip < S3_VIRGEGX2)) { + svga->hdisp >>= 1; + svga->dots_per_clock >>= 1; + } + break; + case 24: + svga->render = svga_render_24bpp_highres; + if ((virge->chip != S3_VIRGEVX) && (virge->chip < S3_VIRGEGX2)) + svga->rowoffset = (svga->rowoffset * 3) / 4; /*Hack*/ + break; + case 32: + svga->render = svga_render_32bpp_highres; + break; default: break; - } } - svga->vram_display_mask = (!(svga->crtc[0x31] & 0x08) && (svga->crtc[0x32] & 0x40)) ? 0x3ffff : virge->vram_mask; + + svga->vram_display_mask = (!(svga->crtc[0x31] & 0x08) && + (svga->crtc[0x32] & 0x40)) ? 0x3ffff : virge->vram_mask; + svga->overlay.ena = 0; - s3_virge_log("VGA mode\n"); + if (virge->chip >= S3_VIRGEGX2 && (svga->crtc[0x67] & 0xc) == 0xc) { /* ViRGE/GX2 and later does not use primary stream registers. */ svga->overlay.x = virge->streams.sec_x; @@ -909,8 +917,7 @@ s3_virge_recalctimings(svga_t *svga) svga->rowoffset = virge->streams.pri_stride >> 3; svga->vram_display_mask = virge->vram_mask; } - } else /*Streams mode*/ - { + } else { /*Streams mode*/ if (virge->streams.buffer_ctrl & 1) svga->ma_latch = virge->streams.pri_fb1 >> 2; else @@ -920,8 +927,8 @@ s3_virge_recalctimings(svga_t *svga) if (virge->streams.pri_h < svga->dispend) svga->dispend = virge->streams.pri_h; - svga->overlay.x = virge->streams.sec_x - virge->streams.pri_x; - svga->overlay.y = virge->streams.sec_y - virge->streams.pri_y; + svga->overlay.x = virge->streams.sec_x - virge->streams.pri_x; + svga->overlay.y = virge->streams.sec_y - virge->streams.pri_y; svga->overlay.cur_ysize = virge->streams.sec_h; if (virge->streams.buffer_ctrl & 2) @@ -929,12 +936,14 @@ s3_virge_recalctimings(svga_t *svga) else svga->overlay.addr = virge->streams.sec_fb0; - svga->overlay.ena = (svga->overlay.x >= 0); + svga->overlay.ena = (svga->overlay.x >= 0); svga->overlay.v_acc = virge->streams.dda_vert_accumulator; - svga->rowoffset = virge->streams.pri_stride >> 3; + + svga->rowoffset = virge->streams.pri_stride >> 3; if (virge->chip <= S3_VIRGEDX && svga->overlay.ena) { - svga->overlay.ena = (((virge->streams.blend_ctrl >> 24) & 7) == 0b000) || (((virge->streams.blend_ctrl >> 24) & 7) == 0b101); + svga->overlay.ena = (((virge->streams.blend_ctrl >> 24) & 7) == 0b000) || + (((virge->streams.blend_ctrl >> 24) & 7) == 0b101); } else if (virge->chip == S3_VIRGEGX2 && svga->overlay.ena) { /* 0x20 = Secondary Stream enabled */ /* 0x2000 = Primary Stream enabled */ @@ -946,17 +955,9 @@ s3_virge_recalctimings(svga_t *svga) svga->render = svga_render_8bpp_highres; break; case 3: /*KRGB-16 (1.5.5.5)*/ - // svga->htotal >>= 1; - // if ((svga->crtc[0x33] & 0x20) || ((svga->crtc[0x67] & 0xc) == 0xc)) - // svga->hblank_end_val = svga->htotal - 1; - // svga->dots_per_clock >>= 1; svga->render = svga_render_15bpp_highres; break; case 5: /*RGB-16 (5.6.5)*/ - // svga->htotal >>= 1; - // if ((svga->crtc[0x33] & 0x20) || ((svga->crtc[0x67] & 0xc) == 0xc)) - // svga->hblank_end_val = svga->htotal - 1; - // svga->dots_per_clock >>= 1; svga->render = svga_render_16bpp_highres; break; case 6: /*RGB-24 (8.8.8)*/ @@ -965,9 +966,6 @@ s3_virge_recalctimings(svga_t *svga) case 7: /*XRGB-32 (X.8.8.8)*/ svga->render = svga_render_32bpp_highres; break; - - default: - break; } svga->vram_display_mask = virge->vram_mask; } @@ -1004,8 +1002,7 @@ s3_virge_update_buffer(virge_t *virge) } static void -s3_virge_updatemapping(virge_t *virge) -{ +s3_virge_updatemapping(virge_t *virge) { svga_t *svga = &virge->svga; if (!(virge->pci_regs[PCI_REG_COMMAND] & PCI_COMMAND_MEM)) { @@ -1016,10 +1013,8 @@ s3_virge_updatemapping(virge_t *virge) return; } - s3_virge_log("Update mapping - bank %02X ", svga->gdcreg[6] & 0xc); - /*Banked framebuffer*/ - switch (svga->gdcreg[6] & 0xc) { /*VGA mapping*/ - case 0x0: /*128k at A0000*/ + switch (svga->gdcreg[6] & 0xc) { /*Banked framebuffer*/ + case 0x0: /*128k at A0000*/ mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x20000); svga->banked_mask = 0xffff; break; @@ -1035,16 +1030,12 @@ s3_virge_updatemapping(virge_t *virge) mem_mapping_set_addr(&svga->mapping, 0xb8000, 0x08000); svga->banked_mask = 0x7fff; break; - - default: - break; } virge->linear_base = (svga->crtc[0x5a] << 16) | (svga->crtc[0x59] << 24); - s3_virge_log("Linear framebuffer %02X, linear base = %08x, display mask = %08x\n", svga->crtc[0x58] & 0x17, virge->linear_base, svga->vram_display_mask); if ((svga->crtc[0x58] & 0x10) || (virge->advfunc_cntl & 0x10)) { /*Linear framebuffer*/ - switch (svga->crtc[0x58] & 7) { + switch (svga->crtc[0x58] & 3) { case 0: /*64k*/ virge->linear_size = 0x10000; break; @@ -1055,7 +1046,7 @@ s3_virge_updatemapping(virge_t *virge) virge->linear_size = 0x200000; break; case 3: /*4mb on other than ViRGE/VX, 8mb on ViRGE/VX*/ - if (virge->chip == S3_VIRGEVX || virge->chip == S3_TRIO3D2X) + if ((virge->chip == S3_VIRGEVX) || (virge->chip == S3_TRIO3D2X)) virge->linear_size = 0x800000; else virge->linear_size = 0x400000; @@ -1063,17 +1054,13 @@ s3_virge_updatemapping(virge_t *virge) case 7: virge->linear_size = 0x800000; break; - - default: - break; } virge->linear_base &= ~(virge->linear_size - 1); - s3_virge_log("Linear framebuffer at %08X size %08X, mask = %08x, CRTC58 sel = %02x\n", virge->linear_base, virge->linear_size, virge->vram_mask, svga->crtc[0x58] & 7); if (virge->linear_base == 0xa0000) { mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x10000); mem_mapping_disable(&virge->linear_mapping); } else { - if (virge->chip == S3_VIRGEVX || virge->chip == S3_TRIO3D2X) + if ((virge->chip == S3_VIRGEVX) || (virge->chip == S3_TRIO3D2X)) virge->linear_base &= 0xfe000000; else virge->linear_base &= 0xfc000000; @@ -1086,11 +1073,7 @@ s3_virge_updatemapping(virge_t *virge) svga->fb_only = 0; } - s3_virge_log("Memory mapped IO %02X\n", svga->crtc[0x53] & 0x38); - - /* Memory mapped I/O. */ - /* Old MMIO. */ - if ((svga->crtc[0x53] & 0x10) || (virge->advfunc_cntl & 0x20)) { + if ((svga->crtc[0x53] & 0x10) || (virge->advfunc_cntl & 0x20)) { /*Old MMIO*/ if (svga->crtc[0x53] & 0x20) mem_mapping_set_addr(&virge->mmio_mapping, 0xb8000, 0x8000); else @@ -1098,16 +1081,14 @@ s3_virge_updatemapping(virge_t *virge) } else mem_mapping_disable(&virge->mmio_mapping); - /* New MMIO. */ - if (svga->crtc[0x53] & 0x08) + if (svga->crtc[0x53] & 0x08) /*New MMIO*/ mem_mapping_set_addr(&virge->new_mmio_mapping, virge->linear_base + 0x1000000, 0x10000); else mem_mapping_disable(&virge->new_mmio_mapping); } static void -s3_virge_vblank_start(svga_t *svga) -{ +s3_virge_vblank_start(svga_t *svga) { virge_t *virge = (virge_t *) svga->priv; virge->subsys_stat |= INT_VSY; @@ -1115,523 +1096,33 @@ s3_virge_vblank_start(svga_t *svga) } static void -s3_virge_mmio_fifo_write(uint32_t addr, uint8_t val, virge_t *virge) -{ - if ((addr & 0xffff) < 0x8000) - s3_virge_bitblt(virge, 8, val); - else { - switch (addr & 0xffff) { - case 0x859c: - virge->cmd_dma = val; - break; - - default: - break; - } - } -} - -static void -s3_virge_mmio_fifo_write_w(uint32_t addr, uint16_t val, virge_t *virge) -{ - if ((addr & 0xfffe) < 0x8000) { - if (virge->s3d.cmd_set & CMD_SET_MS) - s3_virge_bitblt(virge, 16, ((val >> 8) | (val << 8)) << 16); - else - s3_virge_bitblt(virge, 16, val); - } else { - if ((addr & 0xfffe) == 0x859c) - virge->cmd_dma = val; - } -} - -static void -s3_virge_mmio_fifo_write_l(uint32_t addr, uint32_t val, virge_t *virge) -{ - if ((addr & 0xfffc) < 0x8000) { - if (virge->s3d.cmd_set & CMD_SET_MS) - s3_virge_bitblt(virge, 32, ((val & 0xff000000) >> 24) | ((val & 0x00ff0000) >> 8) | ((val & 0x0000ff00) << 8) | ((val & 0x000000ff) << 24)); - else - s3_virge_bitblt(virge, 32, val); - } else { - switch (addr & 0xfffc) { - case 0x8590: - virge->cmd_dma_base = val; - break; - - case 0x8594: - virge->dma_ptr = val; - break; - - case 0x8598: - break; - - case 0x859c: - virge->cmd_dma = val; - break; - - case 0xa000: - case 0xa004: - case 0xa008: - case 0xa00c: - case 0xa010: - case 0xa014: - case 0xa018: - case 0xa01c: - case 0xa020: - case 0xa024: - case 0xa028: - case 0xa02c: - case 0xa030: - case 0xa034: - case 0xa038: - case 0xa03c: - case 0xa040: - case 0xa044: - case 0xa048: - case 0xa04c: - case 0xa050: - case 0xa054: - case 0xa058: - case 0xa05c: - case 0xa060: - case 0xa064: - case 0xa068: - case 0xa06c: - case 0xa070: - case 0xa074: - case 0xa078: - case 0xa07c: - case 0xa080: - case 0xa084: - case 0xa088: - case 0xa08c: - case 0xa090: - case 0xa094: - case 0xa098: - case 0xa09c: - case 0xa0a0: - case 0xa0a4: - case 0xa0a8: - case 0xa0ac: - case 0xa0b0: - case 0xa0b4: - case 0xa0b8: - case 0xa0bc: - case 0xa0c0: - case 0xa0c4: - case 0xa0c8: - case 0xa0cc: - case 0xa0d0: - case 0xa0d4: - case 0xa0d8: - case 0xa0dc: - case 0xa0e0: - case 0xa0e4: - case 0xa0e8: - case 0xa0ec: - case 0xa0f0: - case 0xa0f4: - case 0xa0f8: - case 0xa0fc: - case 0xa100: - case 0xa104: - case 0xa108: - case 0xa10c: - case 0xa110: - case 0xa114: - case 0xa118: - case 0xa11c: - case 0xa120: - case 0xa124: - case 0xa128: - case 0xa12c: - case 0xa130: - case 0xa134: - case 0xa138: - case 0xa13c: - case 0xa140: - case 0xa144: - case 0xa148: - case 0xa14c: - case 0xa150: - case 0xa154: - case 0xa158: - case 0xa15c: - case 0xa160: - case 0xa164: - case 0xa168: - case 0xa16c: - case 0xa170: - case 0xa174: - case 0xa178: - case 0xa17c: - case 0xa180: - case 0xa184: - case 0xa188: - case 0xa18c: - case 0xa190: - case 0xa194: - case 0xa198: - case 0xa19c: - case 0xa1a0: - case 0xa1a4: - case 0xa1a8: - case 0xa1ac: - case 0xa1b0: - case 0xa1b4: - case 0xa1b8: - case 0xa1bc: - case 0xa1c0: - case 0xa1c4: - case 0xa1c8: - case 0xa1cc: - case 0xa1d0: - case 0xa1d4: - case 0xa1d8: - case 0xa1dc: - case 0xa1e0: - case 0xa1e4: - case 0xa1e8: - case 0xa1ec: - case 0xa1f0: - case 0xa1f4: - case 0xa1f8: - case 0xa1fc: - { - int x = addr & 4; - int y = (addr >> 3) & 7; - int color; - int byte; - virge->s3d.pattern_8[y * 8 + x] = val & 0xff; - virge->s3d.pattern_8[y * 8 + x + 1] = val >> 8; - virge->s3d.pattern_8[y * 8 + x + 2] = val >> 16; - virge->s3d.pattern_8[y * 8 + x + 3] = val >> 24; - - x = (addr >> 1) & 6; - y = (addr >> 4) & 7; - virge->s3d.pattern_16[y * 8 + x] = val & 0xffff; - virge->s3d.pattern_16[y * 8 + x + 1] = val >> 16; - - addr &= 0x00ff; - for (uint8_t xx = 0; xx < 4; xx++) { - x = ((addr + xx) / 3) % 8; - y = ((addr + xx) / 24) % 8; - color = ((addr + xx) % 3) << 3; - byte = (xx << 3); - virge->s3d.pattern_24[y * 8 + x] &= ~(0xff << color); - virge->s3d.pattern_24[y * 8 + x] |= ((val >> byte) & 0xff) << color; - } - - x = (addr >> 2) & 7; - y = (addr >> 5) & 7; - virge->s3d.pattern_32[y * 8 + x] = val & 0xffffff; - } - break; - - case 0xa4d4: /*2D BitBLT SRC Base*/ - case 0xa8d4: /*2D Line SRC Base*/ - case 0xacd4: /*2D Polygon SRC Base*/ - virge->s3d.src_base = (virge->memory_size == 8) ? (val & 0x7ffff8) : (val & 0x3ffff8); - s3_virge_log("PortWrite = %04x, SRC Base = %08x, memsize = %i\n", addr & 0xfffc, val, virge->memory_size); - break; - case 0xa4d8: /*2D BitBLT DEST Base*/ - case 0xa8d8: /*2D Line DEST Base*/ - case 0xacd8: /*2D Polygon DEST Base*/ - virge->s3d.dest_base = (virge->memory_size == 8) ? (val & 0x7ffff8) : (val & 0x3ffff8); - s3_virge_log("PortWrite = %04x, DST Base = %08x, memsize = %i\n", addr & 0xfffc, val, virge->memory_size); - break; - case 0xa4dc: /*2D BitBLT Left/Right Clipping*/ - case 0xa8dc: /*2D Line Left/Right Clipping*/ - case 0xacdc: /*2D Polygon Left/Right Clipping*/ - virge->s3d.clip_l = (val >> 16) & 0x7ff; - virge->s3d.clip_r = val & 0x7ff; - break; - case 0xa4e0: /*2D BitBLT Top/Bottom Clipping*/ - case 0xa8e0: /*2D Line Top/Bottom Clipping*/ - case 0xace0: /*2D Polygon Top/Bottom Clipping*/ - virge->s3d.clip_t = (val >> 16) & 0x7ff; - virge->s3d.clip_b = val & 0x7ff; - break; - case 0xa4e4: /*2D BitBLT DEST/SRC Stride*/ - case 0xa8e4: /*2D Line DEST/SRC Stride*/ - case 0xace4: /*2D Polygon DEST/SRC Stride*/ - virge->s3d.dest_str = (val >> 16) & 0xff8; - virge->s3d.src_str = val & 0xff8; - break; - case 0xa4e8: /*2D BitBLT Mono Pattern 0*/ - case 0xace8: /*2D Polygon Mono Pattern 0*/ - virge->s3d.mono_pat_0 = val; - break; - case 0xa4ec: /*2D BitBLT Mono Pattern 1*/ - case 0xacec: /*2D Polygon Mono Pattern 1*/ - virge->s3d.mono_pat_1 = val; - break; - case 0xa4f0: /*2D BitBLT Mono Pattern Background*/ - case 0xacf0: /*2D Polygon Mono Pattern Background*/ - virge->s3d.pat_bg_clr = val; - break; - case 0xa4f4: /*2D BitBLT Mono Pattern Foreground*/ - case 0xa8f4: /*2D Line Mono Pattern Foreground*/ - case 0xacf4: /*2D Polygon Mono Pattern Foreground*/ - virge->s3d.pat_fg_clr = val; - break; - case 0xa4f8: /*2D BitBLT SRC Background*/ - virge->s3d.src_bg_clr = val; - break; - case 0xa4fc: /*2D BitBLT SRC Foreground*/ - virge->s3d.src_fg_clr = val; - break; - case 0xa500: /*2D BitBLT Command Set*/ - case 0xa900: /*2D Line Command Set*/ - case 0xad00: /*2D Polygon Command Set*/ - virge->s3d.cmd_set = val; - if (!(val & CMD_SET_AE)) { - s3_virge_log("MMIO WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); - s3_virge_bitblt(virge, -1, 0); - } - break; - case 0xa504: /*2D BitBLT Rectangle Width/Height*/ - virge->s3d.r_width = (val >> 16) & 0x7ff; - virge->s3d.r_height = val & 0x7ff; - break; - case 0xa508: /*2D BitBLT Rectangle SRC X/Y*/ - virge->s3d.rsrc_x = (val >> 16) & 0x7ff; - virge->s3d.rsrc_y = val & 0x7ff; - break; - case 0xa50c: /*2D BitBLT Rectangle DEST X/Y*/ - virge->s3d.rdest_x = (val >> 16) & 0x7ff; - virge->s3d.rdest_y = val & 0x7ff; - if (virge->s3d.cmd_set & CMD_SET_AE) { - s3_virge_log("MMIO WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); - s3_virge_bitblt(virge, -1, 0); - } - break; - case 0xa96c: /*2D Line Draw Endpoints*/ - virge->s3d.lxend0 = (val >> 16) & 0x7ff; - virge->s3d.lxend1 = val & 0x7ff; - break; - case 0xa970: /*2D Line X Delta*/ - virge->s3d.ldx = (int32_t) val; - break; - case 0xa974: /*2D Line X Start*/ - virge->s3d.lxstart = val; - break; - case 0xa978: /*2D Line Y Start*/ - virge->s3d.lystart = val & 0x7ff; - break; - case 0xa97c: /*2D Line Y Count*/ - virge->s3d.lycnt = val & 0x7ff; - virge->s3d.line_dir = val >> 31; - if (virge->s3d.cmd_set & CMD_SET_AE) { - s3_virge_log("MMIO WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); - s3_virge_bitblt(virge, -1, 0); - } - break; - - case 0xad68: /*2D Polygon Right X Delta*/ - virge->s3d.prdx = val; - break; - case 0xad6c: /*2D Polygon Right X Start*/ - virge->s3d.prxstart = val; - break; - case 0xad70: /*2D Polygon Left X Delta*/ - virge->s3d.pldx = val; - break; - case 0xad74: /*2D Polygon Left X Start*/ - virge->s3d.plxstart = val; - break; - case 0xad78: /*2D Polygon Y Start*/ - virge->s3d.pystart = val & 0x7ff; - break; - case 0xad7c: /*2D Polygon Y Count*/ - virge->s3d.pycnt = val & 0x300007ff; - if (virge->s3d.cmd_set & CMD_SET_AE) { - s3_virge_log("MMIO WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); - s3_virge_bitblt(virge, -1, 0); - } - break; - - case 0xb0d4: - case 0xb4d4: - virge->s3d_tri.z_base = (virge->memory_size == 8) ? (val & 0x7ffff8) : (val & 0x3ffff8); - break; - case 0xb0d8: - case 0xb4d8: - virge->s3d_tri.dest_base = (virge->memory_size == 8) ? (val & 0x7ffff8) : (val & 0x3ffff8); - break; - case 0xb0dc: - case 0xb4dc: - virge->s3d_tri.clip_l = (val >> 16) & 0x7ff; - virge->s3d_tri.clip_r = val & 0x7ff; - break; - case 0xb0e0: - case 0xb4e0: - virge->s3d_tri.clip_t = (val >> 16) & 0x7ff; - virge->s3d_tri.clip_b = val & 0x7ff; - break; - case 0xb0e4: - case 0xb4e4: - virge->s3d_tri.dest_str = (val >> 16) & 0xff8; - virge->s3d.src_str = val & 0xff8; - break; - case 0xb0e8: - case 0xb4e8: - virge->s3d_tri.z_str = val & 0xff8; - break; - case 0xb4ec: - virge->s3d_tri.tex_base = (virge->memory_size == 8) ? (val & 0x7ffff8) : (val & 0x3ffff8); - break; - case 0xb4f0: - virge->s3d_tri.tex_bdr_clr = val & 0xffffff; - break; - case 0xb0f4: - case 0xb4f4: - virge->s3d_tri.fog_b = val & 0xff; - virge->s3d_tri.fog_g = (val >> 8) & 0xff; - virge->s3d_tri.fog_r = (val >> 16) & 0xff; - break; - case 0xb100: - case 0xb500: - virge->s3d_tri.cmd_set = val; - if (!(val & CMD_SET_AE)) - queue_triangle(virge); - break; - case 0xb504: - virge->s3d_tri.tbv = val & 0xfffff; - break; - case 0xb508: - virge->s3d_tri.tbu = val & 0xfffff; - break; - case 0xb50c: - virge->s3d_tri.TdWdX = val; - break; - case 0xb510: - virge->s3d_tri.TdWdY = val; - break; - case 0xb514: - virge->s3d_tri.tws = val; - break; - case 0xb518: - virge->s3d_tri.TdDdX = val; - break; - case 0xb51c: - virge->s3d_tri.TdVdX = val; - break; - case 0xb520: - virge->s3d_tri.TdUdX = val; - break; - case 0xb524: - virge->s3d_tri.TdDdY = val; - break; - case 0xb528: - virge->s3d_tri.TdVdY = val; - break; - case 0xb52c: - virge->s3d_tri.TdUdY = val; - break; - case 0xb530: - virge->s3d_tri.tds = val; - break; - case 0xb534: - virge->s3d_tri.tvs = val; - break; - case 0xb538: - virge->s3d_tri.tus = val; - break; - case 0xb53c: - virge->s3d_tri.TdGdX = val >> 16; - virge->s3d_tri.TdBdX = val & 0xffff; - break; - case 0xb540: - virge->s3d_tri.TdAdX = val >> 16; - virge->s3d_tri.TdRdX = val & 0xffff; - break; - case 0xb544: - virge->s3d_tri.TdGdY = val >> 16; - virge->s3d_tri.TdBdY = val & 0xffff; - break; - case 0xb548: - virge->s3d_tri.TdAdY = val >> 16; - virge->s3d_tri.TdRdY = val & 0xffff; - break; - case 0xb54c: - virge->s3d_tri.tgs = (val >> 16) & 0xffff; - virge->s3d_tri.tbs = val & 0xffff; - break; - case 0xb550: - virge->s3d_tri.tas = (val >> 16) & 0xffff; - virge->s3d_tri.trs = val & 0xffff; - break; - - case 0xb554: - virge->s3d_tri.TdZdX = val; - break; - case 0xb558: - virge->s3d_tri.TdZdY = val; - break; - case 0xb55c: - virge->s3d_tri.tzs = val; - break; - case 0xb560: - virge->s3d_tri.TdXdY12 = val; - break; - case 0xb564: - virge->s3d_tri.txend12 = val; - break; - case 0xb568: - virge->s3d_tri.TdXdY01 = val; - break; - case 0xb56c: - virge->s3d_tri.txend01 = val; - break; - case 0xb570: - virge->s3d_tri.TdXdY02 = val; - break; - case 0xb574: - virge->s3d_tri.txs = val; - break; - case 0xb578: - virge->s3d_tri.tys = val; - break; - case 0xb57c: - virge->s3d_tri.ty01 = (val >> 16) & 0x7ff; - virge->s3d_tri.ty12 = val & 0x7ff; - virge->s3d_tri.tlr = val >> 31; - if (virge->s3d_tri.cmd_set & CMD_SET_AE) - queue_triangle(virge); - break; - - default: - break; - } +s3_virge_wait_fifo_idle(virge_t *virge) { + while (!FIFO_EMPTY) { + wake_fifo_thread(virge); + thread_wait_event(virge->fifo_not_full_event, 1); } } static uint8_t -s3_virge_mmio_read(uint32_t addr, void *priv) -{ +s3_virge_mmio_read(uint32_t addr, void *priv) { virge_t *virge = (virge_t *) priv; - uint8_t ret = 0xff; - - s3_virge_log("[%04X:%08X]: MMIO ReadB addr = %04x\n", CS, cpu_state.pc, addr & 0xffff); + uint8_t ret; switch (addr & 0xffff) { case 0x8504: if (!virge->virge_busy) - virge->fifo_state = FIFO_STATE_RUN; + wake_fifo_thread(virge); ret = virge->subsys_stat; return ret; case 0x8505: ret = 0xc0; - if (virge->s3d_busy || virge->virge_busy || (virge->fifo_read_idx < virge->fifo_write_idx)) + if (virge->s3d_busy || virge->virge_busy || !FIFO_EMPTY) ret |= 0x10; else ret |= 0x30; - if (!virge->virge_busy) - virge->fifo_state = FIFO_STATE_RUN; + wake_fifo_thread(virge); return ret; case 0x850c: @@ -1643,55 +1134,8 @@ s3_virge_mmio_read(uint32_t addr, void *priv) ret = virge->fifo_slots_num >> 2; return ret; - case 0x83b0: - case 0x83b1: - case 0x83b2: - case 0x83b3: - case 0x83b4: - case 0x83b5: - case 0x83b6: - case 0x83b7: - case 0x83b8: - case 0x83b9: - case 0x83ba: - case 0x83bb: - case 0x83bc: - case 0x83bd: - case 0x83be: - case 0x83bf: - case 0x83c0: - case 0x83c1: - case 0x83c2: - case 0x83c3: - case 0x83c4: - case 0x83c5: - case 0x83c6: - case 0x83c7: - case 0x83c8: - case 0x83c9: - case 0x83ca: - case 0x83cb: - case 0x83cc: - case 0x83cd: - case 0x83ce: - case 0x83cf: - case 0x83d0: - case 0x83d1: - case 0x83d2: - case 0x83d3: - case 0x83d4: - case 0x83d5: - case 0x83d6: - case 0x83d7: - case 0x83d8: - case 0x83d9: - case 0x83da: - case 0x83db: - case 0x83dc: - case 0x83dd: - case 0x83de: - case 0x83df: - return s3_virge_in(addr & 0x3ff, virge); + case 0x83b0 ... 0x83df: + return s3_virge_in(addr & 0x3ff, priv); case 0x859c: return virge->cmd_dma; @@ -1704,31 +1148,26 @@ s3_virge_mmio_read(uint32_t addr, void *priv) if ((virge->serialport & SERIAL_PORT_SDW) && i2c_gpio_get_sda(virge->i2c)) ret |= SERIAL_PORT_SDR; return ret; - - default: - break; } return 0xff; } -static uint16_t -s3_virge_mmio_read_w(uint32_t addr, void *priv) -{ - virge_t *virge = (virge_t *) priv; - uint16_t ret = 0xffff; - s3_virge_log("[%04X:%08X]: MMIO ReadW addr = %04x\n", CS, cpu_state.pc, addr & 0xfffe); +static uint16_t +s3_virge_mmio_read_w(uint32_t addr, void *priv) { + virge_t *virge = (virge_t *) priv; + uint16_t ret; switch (addr & 0xfffe) { case 0x8504: ret = 0xc000; - if (virge->s3d_busy || virge->virge_busy || (virge->fifo_read_idx < virge->fifo_write_idx)) + if (virge->s3d_busy || virge->virge_busy || !FIFO_EMPTY) ret |= 0x1000; else ret |= 0x3000; ret |= virge->subsys_stat; if (!virge->virge_busy) - virge->fifo_state = FIFO_STATE_RUN; + wake_fifo_thread(virge); return ret; case 0x850c: @@ -1740,17 +1179,16 @@ s3_virge_mmio_read_w(uint32_t addr, void *priv) return virge->cmd_dma; default: - return s3_virge_mmio_read(addr, virge) | (s3_virge_mmio_read(addr + 1, virge) << 8); + return s3_virge_mmio_read(addr, priv) | (s3_virge_mmio_read(addr + 1, priv) << 8); } - return 0xffff; } static uint32_t -s3_virge_mmio_read_l(uint32_t addr, void *priv) -{ +s3_virge_mmio_read_l(uint32_t addr, void *priv) { virge_t *virge = (virge_t *) priv; - uint32_t ret = 0xffffffff; + svga_t *svga = &virge->svga; + uint32_t ret = 0xffffffff; switch (addr & 0xfffc) { case 0x8180: @@ -1770,30 +1208,46 @@ s3_virge_mmio_read_l(uint32_t addr, void *priv) break; case 0x81a0: ret = virge->streams.blend_ctrl; + svga_recalctimings(svga); break; case 0x81c0: ret = virge->streams.pri_fb0; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; break; case 0x81c4: ret = virge->streams.pri_fb1; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; break; case 0x81c8: ret = virge->streams.pri_stride; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; break; case 0x81cc: ret = virge->streams.buffer_ctrl; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; break; case 0x81d0: ret = virge->streams.sec_fb0; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; break; case 0x81d4: ret = virge->streams.sec_fb1; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; break; case 0x81d8: ret = virge->streams.sec_stride; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; break; case 0x81dc: ret = virge->streams.overlay_ctrl; + svga->fullchange = changeframecount; break; case 0x81e0: ret = virge->streams.k1_vert_scale; @@ -1822,14 +1276,13 @@ s3_virge_mmio_read_l(uint32_t addr, void *priv) case 0x8504: ret = 0x0000c000; - if (virge->s3d_busy || virge->virge_busy || (virge->fifo_read_idx < virge->fifo_write_idx)) + if (virge->s3d_busy || virge->virge_busy || !FIFO_EMPTY) ret |= 0x00001000; else ret |= 0x00003000; - ret |= virge->subsys_stat; if (!virge->virge_busy) - virge->fifo_state = FIFO_STATE_RUN; + wake_fifo_thread(virge); break; case 0x850c: @@ -1852,512 +1305,896 @@ s3_virge_mmio_read_l(uint32_t addr, void *priv) case 0xa4d4: case 0xa8d4: case 0xacd4: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.src_base; break; case 0xa4d8: case 0xa8d8: case 0xacd8: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.dest_base; break; case 0xa4dc: case 0xa8dc: case 0xacdc: + s3_virge_wait_fifo_idle(virge); ret = (virge->s3d.clip_l << 16) | virge->s3d.clip_r; break; case 0xa4e0: case 0xa8e0: case 0xace0: + s3_virge_wait_fifo_idle(virge); ret = (virge->s3d.clip_t << 16) | virge->s3d.clip_b; break; case 0xa4e4: case 0xa8e4: case 0xace4: + s3_virge_wait_fifo_idle(virge); ret = (virge->s3d.dest_str << 16) | virge->s3d.src_str; break; case 0xa4e8: case 0xace8: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.mono_pat_0; break; case 0xa4ec: case 0xacec: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.mono_pat_1; break; case 0xa4f0: case 0xacf0: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.pat_bg_clr; break; case 0xa4f4: case 0xa8f4: case 0xacf4: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.pat_fg_clr; break; case 0xa4f8: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.src_bg_clr; break; case 0xa4fc: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.src_fg_clr; break; case 0xa500: case 0xa900: case 0xad00: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.cmd_set; break; case 0xa504: + s3_virge_wait_fifo_idle(virge); ret = (virge->s3d.r_width << 16) | virge->s3d.r_height; break; case 0xa508: + s3_virge_wait_fifo_idle(virge); ret = (virge->s3d.rsrc_x << 16) | virge->s3d.rsrc_y; break; case 0xa50c: + s3_virge_wait_fifo_idle(virge); ret = (virge->s3d.rdest_x << 16) | virge->s3d.rdest_y; break; case 0xa96c: + s3_virge_wait_fifo_idle(virge); ret = (virge->s3d.lxend0 << 16) | virge->s3d.lxend1; break; case 0xa970: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.ldx; break; case 0xa974: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.lxstart; break; case 0xa978: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.lystart; break; case 0xa97c: + s3_virge_wait_fifo_idle(virge); ret = (virge->s3d.line_dir << 31) | virge->s3d.lycnt; break; case 0xad68: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.prdx; break; case 0xad6c: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.prxstart; break; case 0xad70: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.pldx; break; case 0xad74: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.plxstart; break; case 0xad78: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.pystart; break; case 0xad7c: + s3_virge_wait_fifo_idle(virge); ret = virge->s3d.pycnt; break; default: - ret = s3_virge_mmio_read(addr, virge) | (s3_virge_mmio_read(addr + 1, virge) << 8) | (s3_virge_mmio_read(addr + 2, virge) << 16) | (s3_virge_mmio_read(addr + 3, virge) << 24); - break; + ret = s3_virge_mmio_read_w(addr, priv) | (s3_virge_mmio_read_w(addr + 2, priv) << 16); } - s3_virge_log("MMIO ReadL addr = %04x, val = %08x.\n", addr & 0xfffc, ret); return ret; } static void -s3_virge_fifo_timer(void *priv) -{ - virge_t *virge = (virge_t *)priv; +fifo_thread(void *param) { + virge_t *virge = (virge_t *)param; - timer_advance_u64(&virge->fifo_timer, 100 * TIMER_USEC); - - while (virge->fifo_state == FIFO_STATE_RUN) { + while (virge->fifo_thread_run) { + thread_set_event(virge->fifo_not_full_event); + thread_wait_event(virge->wake_fifo_thread, -1); + thread_reset_event(virge->wake_fifo_thread); virge->virge_busy = 1; - while (virge->fifo_read_idx < virge->fifo_write_idx) { - switch (virge->fifo[virge->fifo_read_idx].addr_type & FIFO_TYPE) { + while (!FIFO_EMPTY) { + uint64_t start_time = plat_timer_read(); + uint64_t end_time; + fifo_entry_t *fifo = &virge->fifo[virge->fifo_read_idx & FIFO_MASK]; + uint32_t val = fifo->val; + + switch (fifo->addr_type & FIFO_TYPE) { case FIFO_WRITE_BYTE: - s3_virge_mmio_fifo_write(virge->fifo[virge->fifo_read_idx].addr_type & FIFO_ADDR, virge->fifo[virge->fifo_read_idx].val, virge); + if (((fifo->addr_type & FIFO_ADDR) & 0xfffc) < 0x8000) + s3_virge_bitblt(virge, 8, val); + else if (((fifo->addr_type & FIFO_ADDR) & 0xffff) == 0x859c) + virge->cmd_dma = val; break; case FIFO_WRITE_WORD: - s3_virge_mmio_fifo_write_w(virge->fifo[virge->fifo_read_idx].addr_type & FIFO_ADDR, virge->fifo[virge->fifo_read_idx].val, virge); + if (((fifo->addr_type & FIFO_ADDR) & 0xfffc) < 0x8000) { + if (virge->s3d.cmd_set & CMD_SET_MS) + s3_virge_bitblt(virge, 16, ((val >> 8) | (val << 8)) << 16); + else + s3_virge_bitblt(virge, 16, val); + } else if (((fifo->addr_type & FIFO_ADDR) & 0xfffe) == 0x859c) + virge->cmd_dma = val; break; case FIFO_WRITE_DWORD: - s3_virge_mmio_fifo_write_l(virge->fifo[virge->fifo_read_idx].addr_type & FIFO_ADDR, virge->fifo[virge->fifo_read_idx].val, virge); - break; - default: + if (((fifo->addr_type & FIFO_ADDR) & 0xfffc) < 0x8000) { + if (virge->s3d.cmd_set & CMD_SET_MS) + s3_virge_bitblt(virge, 32, + ((val & 0xff000000) >> 24) | ((val & 0x00ff0000) >> 8) | + ((val & 0x0000ff00) << 8) | ((val & 0x000000ff) << 24)); + else + s3_virge_bitblt(virge, 32, val); + } else { + switch ((fifo->addr_type & FIFO_ADDR) & 0xfffc) { + case 0x8590: + virge->cmd_dma_base = val; + break; + + case 0x8594: + virge->dma_ptr = val; + break; + + case 0x8598: + break; + + case 0x859c: + virge->cmd_dma = val; + break; + + case 0xa000 ... 0xa1fc: + { + int x = (fifo->addr_type & FIFO_ADDR) & 4; + int y = ((fifo->addr_type & FIFO_ADDR) >> 3) & 7; + int color; + int byte; + uint32_t addr = (fifo->addr_type & FIFO_ADDR); + virge->s3d.pattern_8[y * 8 + x] = val & 0xff; + virge->s3d.pattern_8[y * 8 + x + 1] = val >> 8; + virge->s3d.pattern_8[y * 8 + x + 2] = val >> 16; + virge->s3d.pattern_8[y * 8 + x + 3] = val >> 24; + + x = ((fifo->addr_type & FIFO_ADDR) >> 1) & 6; + y = ((fifo->addr_type & FIFO_ADDR) >> 4) & 7; + virge->s3d.pattern_16[y * 8 + x] = val & 0xffff; + virge->s3d.pattern_16[y * 8 + x + 1] = val >> 16; + + addr &= 0x00ff; + for (uint8_t xx = 0; xx < 4; xx++) { + x = ((addr + xx) / 3) % 8; + y = ((addr + xx) / 24) % 8; + color = ((addr + xx) % 3) << 3; + byte = (xx << 3); + virge->s3d.pattern_24[y * 8 + x] &= ~(0xff << color); + virge->s3d.pattern_24[y * 8 + x] |= ((val >> byte) & 0xff) << color; + } + + x = ((fifo->addr_type & FIFO_ADDR) >> 2) & 7; + y = ((fifo->addr_type & FIFO_ADDR) >> 5) & 7; + virge->s3d.pattern_32[y * 8 + x] = val & 0xffffff; + } break; + + case 0xa4d4: + case 0xa8d4: + case 0xacd4: + virge->s3d.src_base = val & ((virge->memory_size == 8) ? + (val & 0x7ffff8) : (val & 0x3ffff8)); + break; + case 0xa4d8: + case 0xa8d8: + case 0xacd8: + virge->s3d.dest_base = val & ((virge->memory_size == 8) ? + (val & 0x7ffff8) : (val & 0x3ffff8)); + break; + case 0xa4dc: + case 0xa8dc: + case 0xacdc: + virge->s3d.clip_l = (val >> 16) & 0x7ff; + virge->s3d.clip_r = val & 0x7ff; + break; + case 0xa4e0: + case 0xa8e0: + case 0xace0: + virge->s3d.clip_t = (val >> 16) & 0x7ff; + virge->s3d.clip_b = val & 0x7ff; + break; + case 0xa4e4: + case 0xa8e4: + case 0xace4: + virge->s3d.dest_str = (val >> 16) & 0xff8; + virge->s3d.src_str = val & 0xff8; + break; + case 0xa4e8: + case 0xace8: + virge->s3d.mono_pat_0 = val; + break; + case 0xa4ec: + case 0xacec: + virge->s3d.mono_pat_1 = val; + break; + case 0xa4f0: + case 0xacf0: + virge->s3d.pat_bg_clr = val; + break; + case 0xa4f4: + case 0xa8f4: + case 0xacf4: + virge->s3d.pat_fg_clr = val; + break; + case 0xa4f8: + virge->s3d.src_bg_clr = val; + break; + case 0xa4fc: + virge->s3d.src_fg_clr = val; + break; + case 0xa500: + case 0xa900: + case 0xad00: + virge->s3d.cmd_set = val; + if (!(val & CMD_SET_AE)) + s3_virge_bitblt(virge, -1, 0); + break; + case 0xa504: + virge->s3d.r_width = (val >> 16) & 0x7ff; + virge->s3d.r_height = val & 0x7ff; + break; + case 0xa508: + virge->s3d.rsrc_x = (val >> 16) & 0x7ff; + virge->s3d.rsrc_y = val & 0x7ff; + break; + case 0xa50c: + virge->s3d.rdest_x = (val >> 16) & 0x7ff; + virge->s3d.rdest_y = val & 0x7ff; + if (virge->s3d.cmd_set & CMD_SET_AE) + s3_virge_bitblt(virge, -1, 0); + break; + case 0xa96c: + virge->s3d.lxend0 = (val >> 16) & 0x7ff; + virge->s3d.lxend1 = val & 0x7ff; + break; + case 0xa970: + virge->s3d.ldx = (int32_t)val; + break; + case 0xa974: + virge->s3d.lxstart = val; + break; + case 0xa978: + virge->s3d.lystart = val & 0x7ff; + break; + case 0xa97c: + virge->s3d.lycnt = val & 0x7ff; + virge->s3d.line_dir = val >> 31; + if (virge->s3d.cmd_set & CMD_SET_AE) + s3_virge_bitblt(virge, -1, 0); + break; + + case 0xad68: + virge->s3d.prdx = val; + break; + case 0xad6c: + virge->s3d.prxstart = val; + break; + case 0xad70: + virge->s3d.pldx = val; + break; + case 0xad74: + virge->s3d.plxstart = val; + break; + case 0xad78: + virge->s3d.pystart = val & 0x7ff; + break; + case 0xad7c: + virge->s3d.pycnt = val & 0x300007ff; + if (virge->s3d.cmd_set & CMD_SET_AE) + s3_virge_bitblt(virge, -1, 0); + break; + + case 0xb0d4: + case 0xb4d4: + virge->s3d_tri.z_base = val & ((virge->memory_size == 8) ? + (val & 0x7ffff8) : (val & 0x3ffff8)); + break; + case 0xb0d8: + case 0xb4d8: + virge->s3d_tri.dest_base = val & ((virge->memory_size == 8) ? + (val & 0x7ffff8) : (val & 0x3ffff8)); + break; + case 0xb0dc: + case 0xb4dc: + virge->s3d_tri.clip_l = (val >> 16) & 0x7ff; + virge->s3d_tri.clip_r = val & 0x7ff; + break; + case 0xb0e0: + case 0xb4e0: + virge->s3d_tri.clip_t = (val >> 16) & 0x7ff; + virge->s3d_tri.clip_b = val & 0x7ff; + break; + case 0xb0e4: + case 0xb4e4: + virge->s3d_tri.dest_str = (val >> 16) & 0xff8; + virge->s3d.src_str = val & 0xff8; + break; + case 0xb0e8: + case 0xb4e8: + virge->s3d_tri.z_str = val & 0xff8; + break; + case 0xb4ec: + virge->s3d_tri.tex_base = val & ((virge->memory_size == 8) ? + (val & 0x7ffff8) : (val & 0x3ffff8)); + break; + case 0xb4f0: + virge->s3d_tri.tex_bdr_clr = val & 0xffffff; + break; + case 0xb0f4: + case 0xb4f4: + virge->s3d_tri.fog_b = val & 0xff; + virge->s3d_tri.fog_g = (val >> 8) & 0xff; + virge->s3d_tri.fog_r = (val >> 16) & 0xff; + break; + case 0xb100: + case 0xb500: + virge->s3d_tri.cmd_set = val; + if (!(val & CMD_SET_AE)) + queue_triangle(virge); + break; + case 0xb504: + virge->s3d_tri.tbv = val & 0xfffff; + break; + case 0xb508: + virge->s3d_tri.tbu = val & 0xfffff; + break; + case 0xb50c: + virge->s3d_tri.TdWdX = val; + break; + case 0xb510: + virge->s3d_tri.TdWdY = val; + break; + case 0xb514: + virge->s3d_tri.tws = val; + break; + case 0xb518: + virge->s3d_tri.TdDdX = val; + break; + case 0xb51c: + virge->s3d_tri.TdVdX = val; + break; + case 0xb520: + virge->s3d_tri.TdUdX = val; + break; + case 0xb524: + virge->s3d_tri.TdDdY = val; + break; + case 0xb528: + virge->s3d_tri.TdVdY = val; + break; + case 0xb52c: + virge->s3d_tri.TdUdY = val; + break; + case 0xb530: + virge->s3d_tri.tds = val; + break; + case 0xb534: + virge->s3d_tri.tvs = val; + break; + case 0xb538: + virge->s3d_tri.tus = val; + break; + case 0xb53c: + virge->s3d_tri.TdGdX = val >> 16; + virge->s3d_tri.TdBdX = val & 0xffff; + break; + case 0xb540: + virge->s3d_tri.TdAdX = val >> 16; + virge->s3d_tri.TdRdX = val & 0xffff; + break; + case 0xb544: + virge->s3d_tri.TdGdY = val >> 16; + virge->s3d_tri.TdBdY = val & 0xffff; + break; + case 0xb548: + virge->s3d_tri.TdAdY = val >> 16; + virge->s3d_tri.TdRdY = val & 0xffff; + break; + case 0xb54c: + virge->s3d_tri.tgs = (val >> 16) & 0xffff; + virge->s3d_tri.tbs = val & 0xffff; + break; + case 0xb550: + virge->s3d_tri.tas = (val >> 16) & 0xffff; + virge->s3d_tri.trs = val & 0xffff; + break; + + case 0xb554: + virge->s3d_tri.TdZdX = val; + break; + case 0xb558: + virge->s3d_tri.TdZdY = val; + break; + case 0xb55c: + virge->s3d_tri.tzs = val; + break; + case 0xb560: + virge->s3d_tri.TdXdY12 = val; + break; + case 0xb564: + virge->s3d_tri.txend12 = val; + break; + case 0xb568: + virge->s3d_tri.TdXdY01 = val; + break; + case 0xb56c: + virge->s3d_tri.txend01 = val; + break; + case 0xb570: + virge->s3d_tri.TdXdY02 = val; + break; + case 0xb574: + virge->s3d_tri.txs = val; + break; + case 0xb578: + virge->s3d_tri.tys = val; + break; + case 0xb57c: + virge->s3d_tri.ty01 = (val >> 16) & 0x7ff; + virge->s3d_tri.ty12 = val & 0x7ff; + virge->s3d_tri.tlr = val >> 31; + if (virge->s3d_tri.cmd_set & CMD_SET_AE) + queue_triangle(virge); + break; + } + } break; } - virge->fifo[virge->fifo_read_idx].addr_type = FIFO_INVALID; virge->fifo_read_idx++; - if (virge->fifo_read_idx >= FIFO_SIZE) - virge->fifo_read_idx = 0; + fifo->addr_type = FIFO_INVALID; - s3_virge_log("ReadIDX=%d.\n", virge->fifo_read_idx); - } - virge->virge_busy = 0; - virge->subsys_stat |= INT_FIFO_EMP | INT_3DF_EMP; - s3_virge_update_irqs(virge); - virge->fifo_read_idx = 0; - virge->fifo_write_idx = 0; - virge->fifo_state = FIFO_STATE_IDLE; + if (FIFO_ENTRIES > 0xe000) + thread_set_event(virge->fifo_not_full_event); + + end_time = plat_timer_read(); + virge_time += end_time - start_time; + } + virge->virge_busy = 0; + virge->subsys_stat |= INT_FIFO_EMP | INT_3DF_EMP; + s3_virge_update_irqs(virge); } } static void s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t type) { - if (FIFO_FULL) { - s3_virge_log("FIFO FULL, TBD.\n"); - return; - } + fifo_entry_t *fifo = &virge->fifo[virge->fifo_write_idx & FIFO_MASK]; + int limit = 0; - virge->fifo[virge->fifo_write_idx].val = val; - virge->fifo[virge->fifo_write_idx].addr_type = (addr & FIFO_ADDR) | type; - virge->fifo_write_idx++; - if (virge->fifo_write_idx >= FIFO_SIZE) - virge->fifo_write_idx = 0; - - s3_virge_log("WriteIDX=%d.\n", virge->fifo_write_idx); - virge->fifo_state = FIFO_STATE_RUN; -} - -static void -s3_virge_mmio_write(uint32_t addr, uint8_t val, void *priv) -{ - virge_t *virge = (virge_t *) priv; - s3_virge_log("MMIO WriteB addr = %04x, val = %02x\n", addr & 0xffff, val); - if ((addr & 0xffff) < 0x8000) - s3_virge_queue(virge, addr, val, FIFO_WRITE_BYTE); - else { - switch (addr & 0xffff) { - case 0x83b0: - case 0x83b1: - case 0x83b2: - case 0x83b3: - case 0x83b4: - case 0x83b5: - case 0x83b6: - case 0x83b7: - case 0x83b8: - case 0x83b9: - case 0x83ba: - case 0x83bb: - case 0x83bc: - case 0x83bd: - case 0x83be: - case 0x83bf: - case 0x83c0: - case 0x83c1: - case 0x83c2: - case 0x83c3: - case 0x83c4: - case 0x83c5: - case 0x83c6: - case 0x83c7: - case 0x83c8: - case 0x83c9: - case 0x83ca: - case 0x83cb: - case 0x83cc: - case 0x83cd: - case 0x83ce: - case 0x83cf: - case 0x83d0: - case 0x83d1: - case 0x83d2: - case 0x83d3: - case 0x83d4: - case 0x83d5: - case 0x83d6: - case 0x83d7: - case 0x83d8: - case 0x83d9: - case 0x83da: - case 0x83db: - case 0x83dc: - case 0x83dd: - case 0x83de: - case 0x83df: - s3_virge_out(addr & 0x3ff, val, virge); - break; - - case 0xff20: - virge->serialport = val; - i2c_gpio_set(virge->i2c, !!(val & SERIAL_PORT_SCW), !!(val & SERIAL_PORT_SDW)); - break; - - default: - break; - } - } -} - -static void -s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *priv) -{ - virge_t *virge = (virge_t *) priv; - s3_virge_log("[%04X:%08X]: MMIO WriteW addr = %04x, val = %04x\n", CS, cpu_state.pc, addr & 0xfffe, val); - if ((addr & 0xfffe) < 0x8000) - s3_virge_queue(virge, addr, val, FIFO_WRITE_WORD); - else { - if ((addr & 0xfffe) == 0x83d4) { - s3_virge_mmio_write(addr, val, virge); - s3_virge_mmio_write(addr + 1, val >> 8, virge); - } else if ((addr & 0xfffe) == 0xff20) - s3_virge_mmio_write(addr, val, virge); - } -} - -static void -s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *priv) -{ - virge_t *virge = (virge_t *) priv; - svga_t *svga = &virge->svga; - - s3_virge_log("[%04X:%08X]: MMIO WriteL addr = %04x, val = %04x\n", CS, cpu_state.pc, addr & 0xfffc, val); - if (((addr & 0xfffc) < 0x8000) || ((addr & 0xe000) == 0xa000)) - s3_virge_queue(virge, addr, val, FIFO_WRITE_DWORD); - else { + if (type == FIFO_WRITE_DWORD) { switch (addr & 0xfffc) { - case 0x8180: - virge->streams.pri_ctrl = val; - svga_recalctimings(svga); - svga->fullchange = changeframecount; + case 0xa500: + if (val & CMD_SET_AE) + limit = 1; break; - case 0x8184: - virge->streams.chroma_ctrl = val; - break; - case 0x8190: - virge->streams.sec_ctrl = val; - virge->streams.dda_horiz_accumulator = val & 0xfff; - if (val & (1 << 11)) - virge->streams.dda_horiz_accumulator |= 0xfffff800; - virge->streams.sdif = (val >> 24) & 7; - break; - case 0x8194: - virge->streams.chroma_upper_bound = val; - break; - case 0x8198: - virge->streams.sec_filter = val; - virge->streams.k1_horiz_scale = val & 0x7ff; - if (val & (1 << 10)) - virge->streams.k1_horiz_scale |= 0xfffff800; - virge->streams.k2_horiz_scale = (val >> 16) & 0x7ff; - if ((val >> 16) & (1 << 10)) - virge->streams.k2_horiz_scale |= 0xfffff800; - break; - case 0x81a0: - virge->streams.blend_ctrl = val; - svga_recalctimings(svga); - break; - case 0x81c0: - virge->streams.pri_fb0 = val & 0x7fffff; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; - break; - case 0x81c4: - virge->streams.pri_fb1 = val & 0x7fffff; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; - break; - case 0x81c8: - virge->streams.pri_stride = val & 0xfff; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; - break; - case 0x81cc: - virge->streams.buffer_ctrl = val; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; - break; - case 0x81d0: - virge->streams.sec_fb0 = val; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; - break; - case 0x81d4: - virge->streams.sec_fb1 = val; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; - break; - case 0x81d8: - virge->streams.sec_stride = val; - svga->fullchange = changeframecount; - break; - case 0x81dc: - virge->streams.overlay_ctrl = val; - break; - case 0x81e0: - virge->streams.k1_vert_scale = val & 0x7ff; - if (val & (1 << 10)) - virge->streams.k1_vert_scale |= 0xfffff800; - break; - case 0x81e4: - virge->streams.k2_vert_scale = val & 0x7ff; - if (val & (1 << 10)) - virge->streams.k2_vert_scale |= 0xfffff800; - break; - case 0x81e8: - virge->streams.dda_vert_accumulator = val & 0xfff; - if (val & (1 << 11)) - virge->streams.dda_vert_accumulator |= 0xfffff800; - break; - case 0x81ec: - virge->streams.fifo_ctrl = val; - break; - case 0x81f0: - virge->streams.pri_start = val; - virge->streams.pri_x = (val >> 16) & 0x7ff; - virge->streams.pri_y = val & 0x7ff; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - case 0x81f4: - virge->streams.pri_size = val; - virge->streams.pri_w = (val >> 16) & 0x7ff; - virge->streams.pri_h = val & 0x7ff; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - case 0x81f8: - virge->streams.sec_start = val; - virge->streams.sec_x = (val >> 16) & 0x7ff; - virge->streams.sec_y = val & 0x7ff; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - case 0x81fc: - virge->streams.sec_size = val; - virge->streams.sec_w = (val >> 16) & 0x7ff; - virge->streams.sec_h = val & 0x7ff; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - - case 0x8504: - virge->subsys_stat &= ~(val & 0xff); - virge->subsys_cntl = (val >> 8); - s3_virge_update_irqs(virge); - break; - - case 0x850c: - virge->advfunc_cntl = val & 0xff; - s3_virge_updatemapping(virge); - break; - - case 0xff20: - s3_virge_mmio_write(addr, val, virge); - break; - default: - s3_virge_log("Actual WriteL=%04x.\n", addr & 0xfffc); break; } } + + if (limit) { + if (FIFO_ENTRIES >= 16) { + thread_reset_event(virge->fifo_not_full_event); + if (FIFO_ENTRIES >= 16) + thread_wait_event(virge->fifo_not_full_event, -1); /*Wait for room in ringbuffer*/ + } + } else { + if (FIFO_FULL) { + thread_reset_event(virge->fifo_not_full_event); + if (FIFO_FULL) + thread_wait_event(virge->fifo_not_full_event, -1); /*Wait for room in ringbuffer*/ + } + } + + fifo->val = val; + fifo->addr_type = (addr & FIFO_ADDR) | type; + + virge->fifo_write_idx++; + + if (FIFO_ENTRIES > 0xe000) + wake_fifo_thread(virge); + if (FIFO_ENTRIES > 0xe000 || FIFO_ENTRIES < 8) + wake_fifo_thread(virge); } -#define READ(addr, val) \ - { \ - switch (bpp) { \ - case 0: /*8 bpp*/ \ - val = vram[addr & virge->vram_mask]; \ - break; \ - case 1: /*16 bpp*/ \ - val = *(uint16_t *) &vram[addr & virge->vram_mask]; \ - break; \ - case 2: /*24 bpp*/ \ - val = (*(uint32_t *) &vram[addr & virge->vram_mask]) & 0xffffff; \ - break; \ - } \ - } +static void +s3_virge_mmio_write(uint32_t addr, uint8_t val, void *priv) { + virge_t *virge = (virge_t *) priv; -#define CLIP(x, y) \ - { \ - if ((virge->s3d.cmd_set & CMD_SET_HC) && (x < virge->s3d.clip_l || x > virge->s3d.clip_r || y < virge->s3d.clip_t || y > virge->s3d.clip_b)) \ - update = 0; \ - } + if ((addr & 0xfffc) < 0x8000) + s3_virge_queue(virge, addr, val, FIFO_WRITE_BYTE); + else switch (addr & 0xffff) { + case 0x83b0 ... 0x83df: + s3_virge_out(addr & 0x3ff, val, priv); + break; -#define CLIP_3D(x, y) \ - { \ - if ((s3d_tri->cmd_set & CMD_SET_HC) && (x < s3d_tri->clip_l || x > s3d_tri->clip_r || y < s3d_tri->clip_t || y > s3d_tri->clip_b)) \ - update = 0; \ - } - -#define MIX() \ - { \ - int c; \ - for (c = 0; c < 24; c++) { \ - int d = (dest & (1 << c)) ? 1 : 0; \ - if (source & (1 << c)) \ - d |= 2; \ - if (pattern & (1 << c)) \ - d |= 4; \ - if (virge->s3d.rop & (1 << d)) \ - out |= (1 << c); \ - } \ - } - -#define WRITE(addr, val) \ - { \ - switch (bpp) { \ - case 0: /*8 bpp*/ \ - vram[addr & virge->vram_mask] = val; \ - svga->changedvram[(addr & virge->vram_mask) >> 12] = changeframecount; \ - break; \ - case 1: /*16 bpp*/ \ - *(uint16_t *) &vram[addr & virge->vram_mask] = val; \ - svga->changedvram[(addr & virge->vram_mask) >> 12] = changeframecount; \ - break; \ - case 2: /*24 bpp*/ \ - *(uint32_t *) &vram[addr & virge->vram_mask] = (val & 0xffffff) | (vram[(addr + 3) & virge->vram_mask] << 24); \ - svga->changedvram[(addr & virge->vram_mask) >> 12] = changeframecount; \ - break; \ - } \ + case 0xff20: + virge->serialport = val; + i2c_gpio_set(virge->i2c, !!(val & SERIAL_PORT_SCW), !!(val & SERIAL_PORT_SDW)); + break; } +} static void -s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) -{ - svga_t *svga = &virge->svga; - uint8_t *vram = svga->vram; - uint32_t mono_pattern[64]; - int count_mask; - int x_inc = (virge->s3d.cmd_set & CMD_SET_XP) ? 1 : -1; - int y_inc = (virge->s3d.cmd_set & CMD_SET_YP) ? 1 : -1; - int bpp; - int x_mul; - int cpu_dat_shift; - const uint32_t *pattern_data; - uint32_t src_fg_clr; - uint32_t src_bg_clr; - uint32_t src_addr; - uint32_t dest_addr; - uint32_t source = 0; - uint32_t dest = 0; - uint32_t pattern; - uint32_t out = 0; - int update; +s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *priv) { + virge_t *virge = (virge_t *) priv; + + if ((addr & 0xfffc) < 0x8000) + s3_virge_queue(virge, addr, val, FIFO_WRITE_WORD); + else switch (addr & 0xfffe) { + case 0x83d4: + s3_virge_mmio_write(addr, val, priv); + s3_virge_mmio_write(addr + 1, val >> 8, priv); + break; + + case 0xff20: + s3_virge_mmio_write(addr, val, priv); + break; + } +} + +static void +s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *priv) { + virge_t *virge = (virge_t *) priv; + svga_t * svga = &virge->svga; + + if ((addr & 0xfffc) < 0x8000) + s3_virge_queue(virge, addr, val, FIFO_WRITE_DWORD); + else if ((addr & 0xe000) == 0xa000) + s3_virge_queue(virge, addr, val, FIFO_WRITE_DWORD); + else switch (addr & 0xfffc) { + case 0x8180: + virge->streams.pri_ctrl = val; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x8184: + virge->streams.chroma_ctrl = val; + break; + case 0x8190: + virge->streams.sec_ctrl = val; + virge->streams.dda_horiz_accumulator = val & 0xfff; + if (val & (1 << 11)) + virge->streams.dda_horiz_accumulator |= 0xfffff800; + virge->streams.sdif = (val >> 24) & 7; + break; + case 0x8194: + virge->streams.chroma_upper_bound = val; + break; + case 0x8198: + virge->streams.sec_filter = val; + virge->streams.k1_horiz_scale = val & 0x7ff; + if (val & (1 << 10)) + virge->streams.k1_horiz_scale |= 0xfffff800; + virge->streams.k2_horiz_scale = (val >> 16) & 0x7ff; + if ((val >> 16) & (1 << 10)) + virge->streams.k2_horiz_scale |= 0xfffff800; + break; + case 0x81a0: + virge->streams.blend_ctrl = val; + break; + case 0x81c0: + virge->streams.pri_fb0 = val & ((virge->memory_size == 8) ? + (val & 0x7fffff) : (val & 0x3fffff)); + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81c4: + virge->streams.pri_fb1 = ((virge->memory_size == 8) ? + (val & 0x7fffff) : (val & 0x3fffff)); + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81c8: + virge->streams.pri_stride = val & 0xfff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81cc: + virge->streams.buffer_ctrl = val; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81d0: + virge->streams.sec_fb0 = val; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81d4: + virge->streams.sec_fb1 = val; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81d8: + virge->streams.sec_stride = val; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81dc: + virge->streams.overlay_ctrl = val; + break; + case 0x81e0: + virge->streams.k1_vert_scale = val & 0x7ff; + if (val & (1 << 10)) + virge->streams.k1_vert_scale |= 0xfffff800; + break; + case 0x81e4: + virge->streams.k2_vert_scale = val & 0x7ff; + if (val & (1 << 10)) + virge->streams.k2_vert_scale |= 0xfffff800; + break; + case 0x81e8: + virge->streams.dda_vert_accumulator = val & 0xfff; + if (val & (1 << 11)) + virge->streams.dda_vert_accumulator |= 0xfffff800; + break; + case 0x81ec: + virge->streams.fifo_ctrl = val; + break; + case 0x81f0: + virge->streams.pri_start = val; + virge->streams.pri_x = (val >> 16) & 0x7ff; + virge->streams.pri_y = val & 0x7ff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81f4: + virge->streams.pri_size = val; + virge->streams.pri_w = (val >> 16) & 0x7ff; + virge->streams.pri_h = val & 0x7ff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81f8: + virge->streams.sec_start = val; + virge->streams.sec_x = (val >> 16) & 0x7ff; + virge->streams.sec_y = val & 0x7ff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81fc: + virge->streams.sec_size = val; + virge->streams.sec_w = (val >> 16) & 0x7ff; + virge->streams.sec_h = val & 0x7ff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + + case 0x8504: + virge->subsys_stat &= ~(val & 0xff); + virge->subsys_cntl = (val >> 8); + s3_virge_update_irqs(virge); + break; + + case 0x850c: + virge->advfunc_cntl = val & 0xff; + s3_virge_updatemapping(virge); + break; + + case 0xff20: + s3_virge_mmio_write(addr, val, priv); + break; + } +} + +#define READ(addr, val) \ + do { \ + switch (bpp) { \ + case 0: /*8 bpp*/ \ + val = vram[addr & svga->vram_mask]; \ + break; \ + case 1: /*16 bpp*/ \ + val = *(uint16_t *)&vram[addr & svga->vram_mask]; \ + break; \ + case 2: /*24 bpp*/ \ + val = (*(uint32_t *)&vram[addr & svga->vram_mask]) & 0xffffff; \ + break; \ + } \ + } while (0) + +#define Z_READ(addr) *(uint16_t *)&vram[addr & svga->vram_mask] + +#define Z_WRITE(addr, val) \ + if (!(s3d_tri->cmd_set & CMD_SET_ZB_MODE)) \ + *(uint16_t *)&vram[addr & svga->vram_mask] = val + +#define CLIP(x, y) \ + do { \ + if ((virge->s3d.cmd_set & CMD_SET_HC) && \ + (x < virge->s3d.clip_l || x > virge->s3d.clip_r || \ + y < virge->s3d.clip_t || y > virge->s3d.clip_b)) \ + update = 0; \ + } while (0) + +#define CLIP_3D(x, y) \ + do { \ + if ((s3d_tri->cmd_set & CMD_SET_HC) && (x < s3d_tri->clip_l || \ + x > s3d_tri->clip_r || y < s3d_tri->clip_t || \ + y > s3d_tri->clip_b)) \ + update = 0; \ + } while (0) + +#define Z_CLIP(Zzb, Zs) \ + do { \ + if (!(s3d_tri->cmd_set & CMD_SET_ZB_MODE)) \ + switch ((s3d_tri->cmd_set >> 20) & 7) { \ + case 0: \ + update = 0; \ + break; \ + case 1: \ + if (Zs <= Zzb) \ + update = 0; \ + else \ + Zzb = Zs; \ + break; \ + case 2: \ + if (Zs != Zzb) \ + update = 0; \ + else \ + Zzb = Zs; \ + break; \ + case 3: \ + if (Zs < Zzb) \ + update = 0; \ + else \ + Zzb = Zs; \ + break; \ + case 4: \ + if (Zs >= Zzb) \ + update = 0; \ + else \ + Zzb = Zs; \ + break; \ + case 5: \ + if (Zs == Zzb) \ + update = 0; \ + else \ + Zzb = Zs; \ + break; \ + case 6: \ + if (Zs > Zzb) \ + update = 0; \ + else \ + Zzb = Zs; \ + break; \ + case 7: \ + update = 1; \ + Zzb = Zs; \ + break; \ + } \ + } while (0) + +#define MIX() \ + do { \ + int c; \ + for (c = 0; c < 24; c++) { \ + int d = (dest & (1 << c)) ? 1 : 0; \ + if (source & (1 << c)) \ + d |= 2; \ + if (pattern & (1 << c)) \ + d |= 4; \ + if (virge->s3d.rop & (1 << d)) \ + out |= (1 << c); \ + } \ + } while (0) + +#define WRITE(addr, val) \ + do { \ + switch (bpp) { \ + case 0: /*8 bpp*/ \ + vram[addr & svga->vram_mask] = val; \ + virge->svga.changedvram[(addr & svga->vram_mask) >> 12] = \ + changeframecount; \ + break; \ + case 1: /*16 bpp*/ \ + *(uint16_t *)&vram[addr & svga->vram_mask] = val; \ + virge->svga.changedvram[(addr & svga->vram_mask) >> 12] = \ + changeframecount; \ + break; \ + case 2: /*24 bpp*/ \ + *(uint32_t *)&vram[addr & svga->vram_mask] = (val & 0xffffff) |\ + (vram[(addr + 3) & svga->vram_mask] << 24); \ + virge->svga.changedvram[(addr & svga->vram_mask) >> 12] = \ + changeframecount; \ + break; \ + } \ + } while (0) + +static void +s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) { + svga_t *svga = &virge->svga; + uint8_t *vram = virge->svga.vram; + uint32_t mono_pattern[64]; + int count_mask; + int x_inc = (virge->s3d.cmd_set & CMD_SET_XP) ? 1 : -1; + int y_inc = (virge->s3d.cmd_set & CMD_SET_YP) ? 1 : -1; + int bpp; + int x_mul; + int cpu_dat_shift; + uint32_t *pattern_data; + uint32_t src_fg_clr; + uint32_t src_bg_clr; switch (virge->s3d.cmd_set & CMD_SET_FORMAT_MASK) { case CMD_SET_FORMAT_8: - bpp = 0; - x_mul = 1; + bpp = 0; + x_mul = 1; cpu_dat_shift = 8; - pattern_data = virge->s3d.pattern_8; - src_fg_clr = virge->s3d.src_fg_clr & 0xff; - src_bg_clr = virge->s3d.src_bg_clr & 0xff; + pattern_data = virge->s3d.pattern_8; + src_fg_clr = virge->s3d.src_fg_clr & 0xff; + src_bg_clr = virge->s3d.src_bg_clr & 0xff; break; case CMD_SET_FORMAT_16: - bpp = 1; - x_mul = 2; + bpp = 1; + x_mul = 2; cpu_dat_shift = 16; - pattern_data = virge->s3d.pattern_16; - src_fg_clr = virge->s3d.src_fg_clr & 0xffff; - src_bg_clr = virge->s3d.src_bg_clr & 0xffff; + pattern_data = virge->s3d.pattern_16; + src_fg_clr = virge->s3d.src_fg_clr & 0xffff; + src_bg_clr = virge->s3d.src_bg_clr & 0xffff; break; case CMD_SET_FORMAT_24: default: - bpp = 2; - x_mul = 3; + bpp = 2; + x_mul = 3; cpu_dat_shift = 24; - pattern_data = virge->s3d.pattern_24; - src_fg_clr = virge->s3d.src_fg_clr; - src_bg_clr = virge->s3d.src_bg_clr; + pattern_data = virge->s3d.pattern_24; + src_fg_clr = virge->s3d.src_fg_clr; + src_bg_clr = virge->s3d.src_bg_clr; break; - } - if (virge->s3d.cmd_set & CMD_SET_MP) - pattern_data = mono_pattern; + } + if (virge->s3d.cmd_set & CMD_SET_MP) + pattern_data = mono_pattern; switch (virge->s3d.cmd_set & CMD_SET_ITA_MASK) { case CMD_SET_ITA_BYTE: @@ -2372,55 +2209,51 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) break; } if (virge->s3d.cmd_set & CMD_SET_MP) { - for (uint8_t y = 0; y < 4; y++) { - for (uint8_t x = 0; x < 8; x++) { - if (virge->s3d.mono_pat_0 & (1 << (x + y * 8))) - mono_pattern[y * 8 + (7 - x)] = virge->s3d.pat_fg_clr; - else - mono_pattern[y * 8 + (7 - x)] = virge->s3d.pat_bg_clr; - if (virge->s3d.mono_pat_1 & (1 << (x + y * 8))) - mono_pattern[(y + 4) * 8 + (7 - x)] = virge->s3d.pat_fg_clr; - else - mono_pattern[(y + 4) * 8 + (7 - x)] = virge->s3d.pat_bg_clr; - } + int x; + int y; + for (y = 0; y < 4; y++) { + for (x = 0; x < 8; x++) { + if (virge->s3d.mono_pat_0 & (1 << (x + y * 8))) + mono_pattern[y * 8 + (7 - x)] = virge->s3d.pat_fg_clr; + else + mono_pattern[y * 8 + (7 - x)] = virge->s3d.pat_bg_clr; + if (virge->s3d.mono_pat_1 & (1 << (x + y * 8))) + mono_pattern[(y + 4) * 8 + (7 - x)] = virge->s3d.pat_fg_clr; + else + mono_pattern[(y + 4) * 8 + (7 - x)] = virge->s3d.pat_bg_clr; + } } } - switch (virge->s3d.cmd_set & CMD_SET_COMMAND_MASK) { + case CMD_SET_COMMAND_NOP: + break; + case CMD_SET_COMMAND_BITBLT: if (count == -1) { - virge->s3d.src_x = virge->s3d.rsrc_x; - virge->s3d.src_y = virge->s3d.rsrc_y; - virge->s3d.dest_x = virge->s3d.rdest_x; - virge->s3d.dest_y = virge->s3d.rdest_y; - virge->s3d.w = virge->s3d.r_width; - virge->s3d.h = virge->s3d.r_height; - virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff; + virge->s3d.src_x = virge->s3d.rsrc_x; + virge->s3d.src_y = virge->s3d.rsrc_y; + virge->s3d.dest_x = virge->s3d.rdest_x; + virge->s3d.dest_y = virge->s3d.rdest_y; + virge->s3d.w = virge->s3d.r_width; + virge->s3d.h = virge->s3d.r_height; + virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff; virge->s3d.data_left_count = 0; - s3_virge_log("BitBlt start src_x=%i,src_y=%i,dest_x=%i,dest_y=%i,w=%i,h=%i,rop=%02X,src_base=%x,dest_base=%x\n", - virge->s3d.src_x, - virge->s3d.src_y, - virge->s3d.dest_x, - virge->s3d.dest_y, - virge->s3d.w, - virge->s3d.h, - virge->s3d.rop, - virge->s3d.src_base, - virge->s3d.dest_base); - if (virge->s3d.cmd_set & CMD_SET_IDS) return; } - if (!virge->s3d.h) return; - while (count) { - src_addr = virge->s3d.src_base + (virge->s3d.src_x * x_mul) + (virge->s3d.src_y * virge->s3d.src_str); - dest_addr = virge->s3d.dest_base + (virge->s3d.dest_x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str); - out = 0; - update = 1; + uint32_t src_addr = virge->s3d.src_base + (virge->s3d.src_x * x_mul) + + (virge->s3d.src_y * virge->s3d.src_str); + uint32_t dest_addr = virge->s3d.dest_base + (virge->s3d.dest_x * x_mul) + + (virge->s3d.dest_y * virge->s3d.dest_str); + uint32_t source = 0; + uint32_t dest; + uint32_t pattern; + uint32_t out = 0; + int update = 1; switch (virge->s3d.cmd_set & (CMD_SET_MS | CMD_SET_IDS)) { case 0: @@ -2438,18 +2271,18 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) count -= (cpu_dat_shift - virge->s3d.data_left_count); virge->s3d.data_left_count = 0; if (count < cpu_dat_shift) { - virge->s3d.data_left = cpu_dat; + virge->s3d.data_left = cpu_dat; virge->s3d.data_left_count = count; - count = 0; + count = 0; } } else { source = cpu_dat; cpu_dat >>= cpu_dat_shift; count -= cpu_dat_shift; if (count < cpu_dat_shift) { - virge->s3d.data_left = cpu_dat; + virge->s3d.data_left = cpu_dat; virge->s3d.data_left_count = count; - count = 0; + count = 0; } } if ((virge->s3d.cmd_set & CMD_SET_TP) && source == src_fg_clr) @@ -2462,9 +2295,6 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) cpu_dat <<= 1; count--; break; - - default: - break; } CLIP(virge->s3d.dest_x, virge->s3d.dest_y); @@ -2482,9 +2312,9 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) virge->s3d.dest_x += x_inc; virge->s3d.dest_x &= 0x7ff; if (!virge->s3d.w) { - virge->s3d.src_x = virge->s3d.rsrc_x; + virge->s3d.src_x = virge->s3d.rsrc_x; virge->s3d.dest_x = virge->s3d.rdest_x; - virge->s3d.w = virge->s3d.r_width; + virge->s3d.w = virge->s3d.r_width; virge->s3d.src_y += y_inc; virge->s3d.dest_y += y_inc; @@ -2501,9 +2331,6 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) cpu_dat <<= (count - (count & count_mask)); count &= count_mask; break; - - default: - break; } if (!virge->s3d.h) return; @@ -2515,27 +2342,23 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) case CMD_SET_COMMAND_RECTFILL: /*No source, pattern = pat_fg_clr*/ if (count == -1) { - virge->s3d.src_x = virge->s3d.rsrc_x; - virge->s3d.src_y = virge->s3d.rsrc_y; + virge->s3d.src_x = virge->s3d.rsrc_x; + virge->s3d.src_y = virge->s3d.rsrc_y; virge->s3d.dest_x = virge->s3d.rdest_x; virge->s3d.dest_y = virge->s3d.rdest_y; - virge->s3d.w = virge->s3d.r_width; - virge->s3d.h = virge->s3d.r_height; - virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff; - - s3_virge_log("RctFll start %i,%i %i,%i %02X %08x\n", virge->s3d.dest_x, - virge->s3d.dest_y, - virge->s3d.w, - virge->s3d.h, - virge->s3d.rop, virge->s3d.dest_base); + virge->s3d.w = virge->s3d.r_width; + virge->s3d.h = virge->s3d.r_height; + virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff; } while (count && virge->s3d.h) { - source = virge->s3d.pat_fg_clr; - dest_addr = virge->s3d.dest_base + (virge->s3d.dest_x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str); - pattern = virge->s3d.pat_fg_clr; - out = 0; - update = 1; + uint32_t dest_addr = virge->s3d.dest_base + (virge->s3d.dest_x * x_mul) + + (virge->s3d.dest_y * virge->s3d.dest_str); + uint32_t source = 0; + uint32_t dest; + uint32_t pattern = virge->s3d.pat_fg_clr; + uint32_t out = 0; + int update = 1; CLIP(virge->s3d.dest_x, virge->s3d.dest_y); @@ -2552,9 +2375,9 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) virge->s3d.dest_x += x_inc; virge->s3d.dest_x &= 0x7ff; if (!virge->s3d.w) { - virge->s3d.src_x = virge->s3d.rsrc_x; + virge->s3d.src_x = virge->s3d.rsrc_x; virge->s3d.dest_x = virge->s3d.rdest_x; - virge->s3d.w = virge->s3d.r_width; + virge->s3d.w = virge->s3d.r_width; virge->s3d.src_y += y_inc; virge->s3d.dest_y += y_inc; @@ -2562,8 +2385,7 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) if (!virge->s3d.h) return; } else - virge->s3d.w--; - + virge->s3d.w--; count--; } break; @@ -2572,8 +2394,8 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) if (count == -1) { virge->s3d.dest_x = virge->s3d.lxstart; virge->s3d.dest_y = virge->s3d.lystart; - virge->s3d.h = virge->s3d.lycnt; - virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff; + virge->s3d.h = virge->s3d.lycnt; + virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff; } while (virge->s3d.h) { int x; @@ -2582,7 +2404,8 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) x = virge->s3d.dest_x >> 20; - if (virge->s3d.h == virge->s3d.lycnt && ((virge->s3d.line_dir && x > virge->s3d.lxend0) || (!virge->s3d.line_dir && x < virge->s3d.lxend0))) + if (virge->s3d.h == virge->s3d.lycnt && ((virge->s3d.line_dir && x > virge->s3d.lxend0) || + (!virge->s3d.line_dir && x < virge->s3d.lxend0))) x = virge->s3d.lxend0; if (virge->s3d.h == 1) @@ -2594,17 +2417,22 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) goto skip_line; do { - uint32_t dest_addr = virge->s3d.dest_base + (x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str); - uint32_t source = 0; - uint32_t dest = 0; + uint32_t dest_addr = virge->s3d.dest_base + (x * x_mul) + + (virge->s3d.dest_y * virge->s3d.dest_str); + uint32_t source = 0; + uint32_t dest; uint32_t pattern; - uint32_t out = 0; - int update = 1; + uint32_t out = 0; + int update = 1; - if ((virge->s3d.h == virge->s3d.lycnt || !first_pixel) && ((virge->s3d.line_dir && x < virge->s3d.lxend0) || (!virge->s3d.line_dir && x > virge->s3d.lxend0))) + if ((virge->s3d.h == virge->s3d.lycnt || !first_pixel) && + ((virge->s3d.line_dir && x < virge->s3d.lxend0) || + (!virge->s3d.line_dir && x > virge->s3d.lxend0))) update = 0; - if ((virge->s3d.h == 1 || !first_pixel) && ((virge->s3d.line_dir && x > virge->s3d.lxend1) || (!virge->s3d.line_dir && x < virge->s3d.lxend1))) + if ((virge->s3d.h == 1 || !first_pixel) && + ((virge->s3d.line_dir && x > virge->s3d.lxend1) || + (!virge->s3d.line_dir && x < virge->s3d.lxend1))) update = 0; CLIP(x, virge->s3d.dest_y); @@ -2638,68 +2466,71 @@ skip_line: virge->s3d.dest_r = virge->s3d.prxstart; if (virge->s3d.pycnt & (1 << 29)) virge->s3d.dest_l = virge->s3d.plxstart; - virge->s3d.h = virge->s3d.pycnt & 0x7ff; + virge->s3d.h = virge->s3d.pycnt & 0x7ff; virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff; while (virge->s3d.h) { - int x = virge->s3d.dest_l >> 20; - int xend = virge->s3d.dest_r >> 20; - int y = virge->s3d.pystart & 0x7ff; - int xdir = (x < xend) ? 1 : -1; - do { - uint32_t dest_addr = virge->s3d.dest_base + (x * x_mul) + (y * virge->s3d.dest_str); - uint32_t source = 0; - uint32_t dest = 0; - uint32_t pattern; - uint32_t out = 0; - int update = 1; + int x = virge->s3d.dest_l >> 20; + int xend = virge->s3d.dest_r >> 20; + int y = virge->s3d.pystart & 0x7ff; + int xdir = (x < xend) ? 1 : -1; + do { + uint32_t dest_addr = virge->s3d.dest_base + (x * x_mul) + (y * virge->s3d.dest_str); + uint32_t source = 0; + uint32_t dest; + uint32_t pattern; + uint32_t out = 0; + int update = 1; - CLIP(x, y); + CLIP(x, y); - if (update) { - READ(dest_addr, dest); - pattern = pattern_data[(y & 7) * 8 + (x & 7)]; - MIX(); + if (update) { + READ(dest_addr, dest); + pattern = pattern_data[(y & 7) * 8 + (x & 7)]; - WRITE(dest_addr, out); - } + MIX(); - x = (x + xdir) & 0x7ff; - } while (x != (xend + xdir)); + WRITE(dest_addr, out); + } - virge->s3d.dest_l += virge->s3d.pldx; - virge->s3d.dest_r += virge->s3d.prdx; - virge->s3d.h--; - virge->s3d.pystart = (virge->s3d.pystart - 1) & 0x7ff; + x = (x + xdir) & 0x7ff; + } while (x != (xend + xdir)); + + virge->s3d.dest_l += virge->s3d.pldx; + virge->s3d.dest_r += virge->s3d.prdx; + virge->s3d.h--; + virge->s3d.pystart = (virge->s3d.pystart - 1) & 0x7ff; } break; - case CMD_SET_COMMAND_NOP: - break; - default: - break; + fatal("s3_virge_bitblt : blit command %i %08x\n", + (virge->s3d.cmd_set >> 27) & 0xf, virge->s3d.cmd_set); } } -#define RGB15_TO_24(val, r, g, b) \ - b = ((val & 0x001f) << 3) | ((val & 0x001f) >> 2); \ - g = ((val & 0x03e0) >> 2) | ((val & 0x03e0) >> 7); \ - r = ((val & 0x7c00) >> 7) | ((val & 0x7c00) >> 12); +#define RGB15_TO_24(val, r, g, b) \ + b = ((val & 0x001f) << 3) | ((val & 0x001f) >> 2); \ + g = ((val & 0x03e0) >> 2) | ((val & 0x03e0) >> 7); \ + r = ((val & 0x7c00) >> 7) | ((val & 0x7c00) >> 12); -#define RGB24_TO_24(val, r, g, b) \ - b = val & 0xff; \ - g = (val & 0xff00) >> 8; \ - r = (val & 0xff0000) >> 16 +#define RGB24_TO_24(val, r, g, b) \ + b = val & 0xff; \ + g = (val & 0xff00) >> 8; \ + r = (val & 0xff0000) >> 16 -#define RGB15(r, g, b, dest) \ - if (virge->dithering_enabled) { \ - int add = dither[_y & 3][_x & 3]; \ - int _r = (r > 248) ? 248 : r + add; \ - int _g = (g > 248) ? 248 : g + add; \ - int _b = (b > 248) ? 248 : b + add; \ - dest = ((_b >> 3) & 0x1f) | (((_g >> 3) & 0x1f) << 5) | (((_r >> 3) & 0x1f) << 10); \ - } else \ - dest = ((b >> 3) & 0x1f) | (((g >> 3) & 0x1f) << 5) | (((r >> 3) & 0x1f) << 10) +#define RGB15(r, g, b, dest) \ + if (virge->dithering_enabled) { \ + int add = dither[_y & 3][_x & 3]; \ + int _r = (r > 248) ? 248 : r + add; \ + int _g = (g > 248) ? 248 : g + add; \ + int _b = (b > 248) ? 248 : b + add; \ + dest = ((_b >> 3) & 0x1f) | \ + (((_g >> 3) & 0x1f) << 5) | \ + (((_r >> 3) & 0x1f) << 10); \ + } else \ + dest = ((b >> 3) & 0x1f) | \ + (((g >> 3) & 0x1f) << 5) | \ + (((r >> 3) & 0x1f) << 10) #define RGB24(r, g, b) ((b) | ((g) << 8) | ((r) << 16)) @@ -2708,32 +2539,50 @@ typedef struct rgba_t { } rgba_t; typedef struct s3d_state_t { - int32_t r, g, b, a, u, v, d, w; + int32_t r; + int32_t g; + int32_t b; + int32_t a; + int32_t u; + int32_t v; + int32_t d; + int32_t w; - int32_t base_r, base_g, base_b, base_a, base_u, base_v, base_d, base_w; + int32_t base_r; + int32_t base_g; + int32_t base_b; + int32_t base_a; + int32_t base_u; + int32_t base_v; + int32_t base_d; + int32_t base_w; - uint32_t base_z; + uint32_t base_z; - uint32_t tbu, tbv; + uint32_t tbu; + uint32_t tbv; - uint32_t cmd_set; - int max_d; + uint32_t cmd_set; + int max_d; uint16_t *texture[10]; - uint32_t tex_bdr_clr; + uint32_t tex_bdr_clr; - int32_t x1, x2; - int y; + int32_t x1; + int32_t x2; - rgba_t dest_rgba; + int y; + + rgba_t dest_rgba; } s3d_state_t; typedef struct s3d_texture_state_t { - int level; - int texture_shift; + int level; + int texture_shift; - int32_t u, v; + int32_t u; + int32_t v; } s3d_texture_state_t; static void (*tex_read)(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out); @@ -2747,10 +2596,10 @@ static int _x; static int _y; static void -tex_ARGB1555(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) -{ - int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); - uint16_t val = state->texture[texture_state->level][offset]; +tex_ARGB1555(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) { + int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); + uint16_t val = state->texture[texture_state->level][offset]; out->r = ((val & 0x7c00) >> 7) | ((val & 0x7000) >> 12); out->g = ((val & 0x03e0) >> 2) | ((val & 0x0380) >> 7); @@ -2759,10 +2608,10 @@ tex_ARGB1555(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out } static void -tex_ARGB1555_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) -{ - int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); - uint16_t val = state->texture[texture_state->level][offset]; +tex_ARGB1555_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) { + int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); + uint16_t val = state->texture[texture_state->level][offset]; if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000) val = state->tex_bdr_clr; @@ -2774,10 +2623,10 @@ tex_ARGB1555_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba } static void -tex_ARGB4444(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) -{ - int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); - uint16_t val = state->texture[texture_state->level][offset]; +tex_ARGB4444(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) { + int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); + uint16_t val = state->texture[texture_state->level][offset]; out->r = ((val & 0x0f00) >> 4) | ((val & 0x0f00) >> 8); out->g = (val & 0x00f0) | ((val & 0x00f0) >> 4); @@ -2786,10 +2635,10 @@ tex_ARGB4444(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out } static void -tex_ARGB4444_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) -{ - int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); - uint16_t val = state->texture[texture_state->level][offset]; +tex_ARGB4444_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) { + int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); + uint16_t val = state->texture[texture_state->level][offset]; if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000) val = state->tex_bdr_clr; @@ -2801,21 +2650,22 @@ tex_ARGB4444_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba } static void -tex_ARGB8888(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) -{ - int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); - uint32_t val = ((uint32_t *) state->texture[texture_state->level])[offset]; +tex_ARGB8888(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) { + int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); + uint32_t val = ((uint32_t *)state->texture[texture_state->level])[offset]; out->r = (val >> 16) & 0xff; out->g = (val >> 8) & 0xff; out->b = val & 0xff; out->a = (val >> 24) & 0xff; } + static void -tex_ARGB8888_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) -{ - int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); - uint32_t val = ((uint32_t *) state->texture[texture_state->level])[offset]; +tex_ARGB8888_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) { + int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); + uint32_t val = ((uint32_t *)state->texture[texture_state->level])[offset]; if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000) val = state->tex_bdr_clr; @@ -2827,21 +2677,19 @@ tex_ARGB8888_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba } static void -tex_sample_normal(s3d_state_t *state) -{ +tex_sample_normal(s3d_state_t *state) { s3d_texture_state_t texture_state; - texture_state.level = state->max_d; + texture_state.level = state->max_d; texture_state.texture_shift = 18 + (9 - texture_state.level); - texture_state.u = state->u + state->tbu; - texture_state.v = state->v + state->tbv; + texture_state.u = state->u + state->tbu; + texture_state.v = state->v + state->tbv; tex_read(state, &texture_state, &state->dest_rgba); } static void -tex_sample_normal_filter(s3d_state_t *state) -{ +tex_sample_normal_filter(s3d_state_t *state) { s3d_texture_state_t texture_state; int tex_offset; rgba_t tex_samples[4]; @@ -2849,9 +2697,9 @@ tex_sample_normal_filter(s3d_state_t *state) int dv; int d[4]; - texture_state.level = state->max_d; + texture_state.level = state->max_d; texture_state.texture_shift = 18 + (9 - texture_state.level); - tex_offset = 1 << texture_state.texture_shift; + tex_offset = 1 << texture_state.texture_shift; texture_state.u = state->u + state->tbu; texture_state.v = state->v + state->tbv; @@ -2876,30 +2724,32 @@ tex_sample_normal_filter(s3d_state_t *state) d[2] = (256 - du) * dv; d[3] = du * dv; - state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; - state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; - state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; - state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; } static void -tex_sample_mipmap(s3d_state_t *state) -{ +tex_sample_mipmap(s3d_state_t *state) { s3d_texture_state_t texture_state; texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf); if (texture_state.level < 0) texture_state.level = 0; texture_state.texture_shift = 18 + (9 - texture_state.level); - texture_state.u = state->u + state->tbu; - texture_state.v = state->v + state->tbv; + texture_state.u = state->u + state->tbu; + texture_state.v = state->v + state->tbv; tex_read(state, &texture_state, &state->dest_rgba); } static void -tex_sample_mipmap_filter(s3d_state_t *state) -{ +tex_sample_mipmap_filter(s3d_state_t *state) { s3d_texture_state_t texture_state; int tex_offset; rgba_t tex_samples[4]; @@ -2911,7 +2761,7 @@ tex_sample_mipmap_filter(s3d_state_t *state) if (texture_state.level < 0) texture_state.level = 0; texture_state.texture_shift = 18 + (9 - texture_state.level); - tex_offset = 1 << texture_state.texture_shift; + tex_offset = 1 << texture_state.texture_shift; texture_state.u = state->u + state->tbu; texture_state.v = state->v + state->tbv; @@ -2936,34 +2786,36 @@ tex_sample_mipmap_filter(s3d_state_t *state) d[2] = (256 - du) * dv; d[3] = du * dv; - state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; - state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; - state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; - state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; } static void -tex_sample_persp_normal(s3d_state_t *state) -{ +tex_sample_persp_normal(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); - texture_state.level = state->max_d; + texture_state.level = state->max_d; texture_state.texture_shift = 18 + (9 - texture_state.level); - texture_state.u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (12 + state->max_d)) + state->tbu; - texture_state.v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (12 + state->max_d)) + state->tbv; + texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu; + texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv; tex_read(state, &texture_state, &state->dest_rgba); } static void -tex_sample_persp_normal_filter(s3d_state_t *state) -{ +tex_sample_persp_normal_filter(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; int32_t u; int32_t v; int tex_offset; @@ -2973,14 +2825,14 @@ tex_sample_persp_normal_filter(s3d_state_t *state) int d[4]; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); - u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (12 + state->max_d)) + state->tbu; - v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (12 + state->max_d)) + state->tbv; + u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu; + v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv; - texture_state.level = state->max_d; + texture_state.level = state->max_d; texture_state.texture_shift = 18 + (9 - texture_state.level); - tex_offset = 1 << texture_state.texture_shift; + tex_offset = 1 << texture_state.texture_shift; texture_state.u = u; texture_state.v = v; @@ -3005,34 +2857,36 @@ tex_sample_persp_normal_filter(s3d_state_t *state) d[2] = (256 - du) * dv; d[3] = du * dv; - state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; - state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; - state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; - state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; } static void -tex_sample_persp_normal_375(s3d_state_t *state) -{ +tex_sample_persp_normal_375(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); - texture_state.level = state->max_d; + texture_state.level = state->max_d; texture_state.texture_shift = 18 + (9 - texture_state.level); - texture_state.u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (8 + state->max_d)) + state->tbu; - texture_state.v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (8 + state->max_d)) + state->tbv; + texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu; + texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv; tex_read(state, &texture_state, &state->dest_rgba); } static void -tex_sample_persp_normal_filter_375(s3d_state_t *state) -{ +tex_sample_persp_normal_filter_375(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; int32_t u; int32_t v; int tex_offset; @@ -3042,14 +2896,14 @@ tex_sample_persp_normal_filter_375(s3d_state_t *state) int d[4]; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); - u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (8 + state->max_d)) + state->tbu; - v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (8 + state->max_d)) + state->tbv; + u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu; + v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv; - texture_state.level = state->max_d; + texture_state.level = state->max_d; texture_state.texture_shift = 18 + (9 - texture_state.level); - tex_offset = 1 << texture_state.texture_shift; + tex_offset = 1 << texture_state.texture_shift; texture_state.u = u; texture_state.v = v; @@ -3074,36 +2928,38 @@ tex_sample_persp_normal_filter_375(s3d_state_t *state) d[2] = (256 - du) * dv; d[3] = du * dv; - state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; - state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; - state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; - state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; } static void -tex_sample_persp_mipmap(s3d_state_t *state) -{ +tex_sample_persp_mipmap(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf); if (texture_state.level < 0) texture_state.level = 0; texture_state.texture_shift = 18 + (9 - texture_state.level); - texture_state.u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (12 + state->max_d)) + state->tbu; - texture_state.v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (12 + state->max_d)) + state->tbv; + texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu; + texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv; tex_read(state, &texture_state, &state->dest_rgba); } static void -tex_sample_persp_mipmap_filter(s3d_state_t *state) -{ +tex_sample_persp_mipmap_filter(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; int32_t u; int32_t v; int tex_offset; @@ -3113,16 +2969,16 @@ tex_sample_persp_mipmap_filter(s3d_state_t *state) int d[4]; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); - u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (12 + state->max_d)) + state->tbu; - v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (12 + state->max_d)) + state->tbv; + u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu; + v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv; texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf); if (texture_state.level < 0) texture_state.level = 0; texture_state.texture_shift = 18 + (9 - texture_state.level); - tex_offset = 1 << texture_state.texture_shift; + tex_offset = 1 << texture_state.texture_shift; texture_state.u = u; texture_state.v = v; @@ -3147,36 +3003,38 @@ tex_sample_persp_mipmap_filter(s3d_state_t *state) d[2] = (256 - du) * dv; d[3] = du * dv; - state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; - state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; - state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; - state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; } static void -tex_sample_persp_mipmap_375(s3d_state_t *state) -{ +tex_sample_persp_mipmap_375(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf); if (texture_state.level < 0) texture_state.level = 0; texture_state.texture_shift = 18 + (9 - texture_state.level); - texture_state.u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (8 + state->max_d)) + state->tbu; - texture_state.v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (8 + state->max_d)) + state->tbv; + texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu; + texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv; tex_read(state, &texture_state, &state->dest_rgba); } static void -tex_sample_persp_mipmap_filter_375(s3d_state_t *state) -{ +tex_sample_persp_mipmap_filter_375(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; int32_t u; int32_t v; int tex_offset; @@ -3186,16 +3044,16 @@ tex_sample_persp_mipmap_filter_375(s3d_state_t *state) int d[4]; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); - u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (8 + state->max_d)) + state->tbu; - v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (8 + state->max_d)) + state->tbv; + u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu; + v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv; texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf); if (texture_state.level < 0) texture_state.level = 0; texture_state.texture_shift = 18 + (9 - texture_state.level); - tex_offset = 1 << texture_state.texture_shift; + tex_offset = 1 << texture_state.texture_shift; texture_state.u = u; texture_state.v = v; @@ -3220,47 +3078,50 @@ tex_sample_persp_mipmap_filter_375(s3d_state_t *state) d[2] = (256 - du) * dv; d[3] = du * dv; - state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; - state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; - state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; - state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; } -#define CLAMP(x) \ - do { \ - if ((x) & ~0xff) \ - x = ((x) < 0) ? 0 : 0xff; \ +#define CLAMP(x) \ + do { \ + if ((x) & ~0xff) \ + x = ((x) < 0) ? 0 : 0xff; \ } while (0) -#define CLAMP_RGBA(r, g, b, a) \ - if ((r) & ~0xff) \ - r = ((r) < 0) ? 0 : 0xff; \ - if ((g) & ~0xff) \ - g = ((g) < 0) ? 0 : 0xff; \ - if ((b) & ~0xff) \ - b = ((b) < 0) ? 0 : 0xff; \ - if ((a) & ~0xff) \ - a = ((a) < 0) ? 0 : 0xff; +#define CLAMP_RGBA(r, g, b, a) \ + if ((r) & ~0xff) \ + r = ((r) < 0) ? 0 : 0xff; \ + if ((g) & ~0xff) \ + g = ((g) < 0) ? 0 : 0xff; \ + if ((b) & ~0xff) \ + b = ((b) < 0) ? 0 : 0xff; \ + if ((a) & ~0xff) \ + a = ((a) < 0) ? 0 : 0xff; -#define CLAMP_RGB(r, g, b) \ - do { \ - if ((r) < 0) \ - r = 0; \ - if ((r) > 0xff) \ - r = 0xff; \ - if ((g) < 0) \ - g = 0; \ - if ((g) > 0xff) \ - g = 0xff; \ - if ((b) < 0) \ - b = 0; \ - if ((b) > 0xff) \ - b = 0xff; \ +#define CLAMP_RGB(r, g, b) \ + do { \ + if ((r) < 0) \ + r = 0; \ + if ((r) > 0xff) \ + r = 0xff; \ + if ((g) < 0) \ + g = 0; \ + if ((g) > 0xff) \ + g = 0xff; \ + if ((b) < 0) \ + b = 0; \ + if ((b) > 0xff) \ + b = 0xff; \ } while (0) static void -dest_pixel_gouraud_shaded_triangle(s3d_state_t *state) -{ +dest_pixel_gouraud_shaded_triangle(s3d_state_t *state) { state->dest_rgba.r = state->r >> 7; CLAMP(state->dest_rgba.r); @@ -3275,8 +3136,7 @@ dest_pixel_gouraud_shaded_triangle(s3d_state_t *state) } static void -dest_pixel_unlit_texture_triangle(s3d_state_t *state) -{ +dest_pixel_unlit_texture_triangle(s3d_state_t *state) { tex_sample(state); if (state->cmd_set & CMD_SET_ABC_SRC) @@ -3284,8 +3144,7 @@ dest_pixel_unlit_texture_triangle(s3d_state_t *state) } static void -dest_pixel_lit_texture_decal(s3d_state_t *state) -{ +dest_pixel_lit_texture_decal(s3d_state_t *state) { tex_sample(state); if (state->cmd_set & CMD_SET_ABC_SRC) @@ -3293,8 +3152,7 @@ dest_pixel_lit_texture_decal(s3d_state_t *state) } static void -dest_pixel_lit_texture_reflection(s3d_state_t *state) -{ +dest_pixel_lit_texture_reflection(s3d_state_t *state) { tex_sample(state); state->dest_rgba.r += (state->r >> 7); @@ -3307,8 +3165,7 @@ dest_pixel_lit_texture_reflection(s3d_state_t *state) } static void -dest_pixel_lit_texture_modulate(s3d_state_t *state) -{ +dest_pixel_lit_texture_modulate(s3d_state_t *state) { int r = state->r >> 7; int g = state->g >> 7; int b = state->b >> 7; @@ -3327,39 +3184,18 @@ dest_pixel_lit_texture_modulate(s3d_state_t *state) } static void -tri(virge_t *virge, s3d_t *s3d_tri, s3d_state_t *state, int yc, int32_t dx1, int32_t dx2) -{ - svga_t *svga = &virge->svga; - uint8_t *vram = svga->vram; +tri(virge_t *virge, s3d_t *s3d_tri, s3d_state_t *state, int yc, int32_t dx1, int32_t dx2) { + svga_t *svga = &virge->svga; + uint8_t *vram = virge->svga.vram; + int x_dir = s3d_tri->tlr ? 1 : -1; + int use_z = !(s3d_tri->cmd_set & CMD_SET_ZB_MODE); + int y_count = yc; + int bpp = (s3d_tri->cmd_set >> 2) & 7; + uint32_t dest_offset; + uint32_t z_offset; - int x_dir = s3d_tri->tlr ? 1 : -1; - - int use_z = !(s3d_tri->cmd_set & CMD_SET_ZB_MODE); - - int y_count = yc; - - int bpp = (s3d_tri->cmd_set >> 2) & 7; - - uint32_t dest_offset = 0; - uint32_t z_offset = 0; - - uint32_t src_col; - int src_r = 0; - int src_g = 0; - int src_b = 0; - - int x; - int xe; - uint32_t z; - - uint32_t dest_addr; - uint32_t z_addr; - int dx; - int x_offset; - int xz_offset; - - int update; - uint16_t src_z = 0; + dest_offset = s3d_tri->dest_base + (state->y * s3d_tri->dest_str); + z_offset = s3d_tri->z_base + (state->y * s3d_tri->z_str); if (s3d_tri->cmd_set & CMD_SET_HC) { if (state->y < s3d_tri->clip_t) @@ -3379,240 +3215,194 @@ tri(virge_t *virge, s3d_t *s3d_tri, s3d_state_t *state, int yc, int32_t dx1, int state->base_a += (s3d_tri->TdAdY * diff_y); state->base_d += (s3d_tri->TdDdY * diff_y); state->base_w += (s3d_tri->TdWdY * diff_y); - state->x1 += (dx1 * diff_y); - state->x2 += (dx2 * diff_y); - state->y -= diff_y; - dest_offset -= s3d_tri->dest_str * diff_y; - z_offset -= s3d_tri->z_str; - y_count -= diff_y; + state->x1 += (dx1 * diff_y); + state->x2 += (dx2 * diff_y); + state->y -= diff_y; + dest_offset -= s3d_tri->dest_str; + z_offset -= s3d_tri->z_str; + y_count -= diff_y; } if ((state->y - y_count) < s3d_tri->clip_t) y_count = (state->y - s3d_tri->clip_t) + 1; } - dest_offset = s3d_tri->dest_base + (state->y * s3d_tri->dest_str); - z_offset = s3d_tri->z_base + (state->y * s3d_tri->z_str); + for (; y_count > 0; y_count--) { + int x = (state->x1 + ((1 << 20) - 1)) >> 20; + int xe = (state->x2 + ((1 << 20) - 1)) >> 20; + uint32_t z = (state->base_z > 0) ? (state->base_z << 1) : 0; - while (y_count > 0) { - x = (state->x1 + ((1 << 20) - 1)) >> 20; - xe = (state->x2 + ((1 << 20) - 1)) >> 20; - z = (state->base_z > 0) ? (state->base_z << 1) : 0; - if (x_dir < 0) { - x--; - xe--; + if (x_dir < 0) { + x--; + xe--; + } + + if (x != xe && ((x_dir > 0 && x < xe) || (x_dir < 0 && x > xe))) { + uint32_t dest_addr; + uint32_t z_addr; + int dx = (x_dir > 0) ? ((31 - ((state->x1 - 1) >> 15)) & 0x1f) : + (((state->x1 - 1) >> 15) & 0x1f); + int x_offset = x_dir * (bpp + 1); + int xz_offset = x_dir << 1; + + if (x_dir > 0) + dx += 1; + + state->r = state->base_r + ((s3d_tri->TdRdX * dx) >> 5); + state->g = state->base_g + ((s3d_tri->TdGdX * dx) >> 5); + state->b = state->base_b + ((s3d_tri->TdBdX * dx) >> 5); + state->a = state->base_a + ((s3d_tri->TdAdX * dx) >> 5); + state->u = state->base_u + ((s3d_tri->TdUdX * dx) >> 5); + state->v = state->base_v + ((s3d_tri->TdVdX * dx) >> 5); + state->w = state->base_w + ((s3d_tri->TdWdX * dx) >> 5); + state->d = state->base_d + ((s3d_tri->TdDdX * dx) >> 5); + z += ((s3d_tri->TdZdX * dx) >> 5); + + if (s3d_tri->cmd_set & CMD_SET_HC) { + if (x_dir > 0) { + if (x > s3d_tri->clip_r) + goto tri_skip_line; + if (xe < s3d_tri->clip_l) + goto tri_skip_line; + if (xe > s3d_tri->clip_r) + xe = s3d_tri->clip_r + 1; + if (x < s3d_tri->clip_l) { + int diff_x = s3d_tri->clip_l - x; + + z += (s3d_tri->TdZdX * diff_x); + state->u += (s3d_tri->TdUdX * diff_x); + state->v += (s3d_tri->TdVdX * diff_x); + state->r += (s3d_tri->TdRdX * diff_x); + state->g += (s3d_tri->TdGdX * diff_x); + state->b += (s3d_tri->TdBdX * diff_x); + state->a += (s3d_tri->TdAdX * diff_x); + state->d += (s3d_tri->TdDdX * diff_x); + state->w += (s3d_tri->TdWdX * diff_x); + + x = s3d_tri->clip_l; + } + } else { + if (x < s3d_tri->clip_l) + goto tri_skip_line; + if (xe > s3d_tri->clip_r) + goto tri_skip_line; + if (xe < s3d_tri->clip_l) + xe = s3d_tri->clip_l - 1; + if (x > s3d_tri->clip_r) { + int diff_x = x - s3d_tri->clip_r; + + z += (s3d_tri->TdZdX * diff_x); + state->u += (s3d_tri->TdUdX * diff_x); + state->v += (s3d_tri->TdVdX * diff_x); + state->r += (s3d_tri->TdRdX * diff_x); + state->g += (s3d_tri->TdGdX * diff_x); + state->b += (s3d_tri->TdBdX * diff_x); + state->a += (s3d_tri->TdAdX * diff_x); + state->d += (s3d_tri->TdDdX * diff_x); + state->w += (s3d_tri->TdWdX * diff_x); + + x = s3d_tri->clip_r; + } + } + } + + virge->svga.changedvram[(dest_offset & svga->vram_mask) >> 12] = changeframecount; + + dest_addr = dest_offset + (x * (bpp + 1)); + z_addr = z_offset + (x << 1); + + x &= 0xfff; + xe &= 0xfff; + + for (; x != xe; x = (x + x_dir) & 0xfff) { + int update = 1; + uint16_t src_z = 0; + + _x = x; + _y = state->y; + + if (use_z) { + src_z = Z_READ(z_addr); + Z_CLIP(src_z, z >> 16); + } + + if (update) { + uint32_t dest_col; + + dest_pixel(state); + + if (s3d_tri->cmd_set & CMD_SET_FE) { + int a = state->a >> 7; + state->dest_rgba.r = ((state->dest_rgba.r * a) + (s3d_tri->fog_r * (255 - a))) / 255; + state->dest_rgba.g = ((state->dest_rgba.g * a) + (s3d_tri->fog_g * (255 - a))) / 255; + state->dest_rgba.b = ((state->dest_rgba.b * a) + (s3d_tri->fog_b * (255 - a))) / 255; + } + + if (s3d_tri->cmd_set & CMD_SET_ABC_ENABLE) { + uint32_t src_col; + int src_r = 0; + uint32_t src_g = 0; + uint32_t src_b = 0; + + switch (bpp) { + case 0: /*8 bpp*/ + /*Not implemented yet*/ + break; + case 1: /*16 bpp*/ + src_col = *(uint16_t *)&vram[dest_addr & svga->vram_mask]; + RGB15_TO_24(src_col, src_r, src_g, src_b); + break; + case 2: /*24 bpp*/ + src_col = (*(uint32_t *)&vram[dest_addr & svga->vram_mask]) & 0xffffff; + RGB24_TO_24(src_col, src_r, src_g, src_b); + break; + } + + state->dest_rgba.r = ((state->dest_rgba.r * state->dest_rgba.a) + + (src_r * (255 - state->dest_rgba.a))) / 255; + state->dest_rgba.g = ((state->dest_rgba.g * state->dest_rgba.a) + + (src_g * (255 - state->dest_rgba.a))) / 255; + state->dest_rgba.b = ((state->dest_rgba.b * state->dest_rgba.a) + + (src_b * (255 - state->dest_rgba.a))) / 255; + } + + switch (bpp) { + case 0: /*8 bpp*/ + /*Not implemented yet*/ + break; + case 1: /*16 bpp*/ + RGB15(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b, dest_col); + *(uint16_t *)&vram[dest_addr] = dest_col; + break; + case 2: /*24 bpp*/ + dest_col = RGB24(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b); + *(uint8_t *)&vram[dest_addr] = dest_col & 0xff; + *(uint8_t *)&vram[dest_addr + 1] = (dest_col >> 8) & 0xff; + *(uint8_t *)&vram[dest_addr + 2] = (dest_col >> 16) & 0xff; + break; + } + + if (use_z && (s3d_tri->cmd_set & CMD_SET_ZUP)) + Z_WRITE(z_addr, src_z); + } + + z += s3d_tri->TdZdX; + state->u += s3d_tri->TdUdX; + state->v += s3d_tri->TdVdX; + state->r += s3d_tri->TdRdX; + state->g += s3d_tri->TdGdX; + state->b += s3d_tri->TdBdX; + state->a += s3d_tri->TdAdX; + state->d += s3d_tri->TdDdX; + state->w += s3d_tri->TdWdX; + dest_addr += x_offset; + z_addr += xz_offset; + virge->pixel_count++; + } } - if (((x != xe) && ((x_dir > 0) && (x < xe))) || ((x_dir < 0) && (x > xe))) { - dx = (x_dir > 0) ? ((31 - ((state->x1 - 1) >> 15)) & 0x1f) : (((state->x1 - 1) >> 15) & 0x1f); - x_offset = x_dir * (bpp + 1); - xz_offset = x_dir << 1; - if (x_dir > 0) - dx += 1; - state->r = state->base_r + ((s3d_tri->TdRdX * dx) >> 5); - state->g = state->base_g + ((s3d_tri->TdGdX * dx) >> 5); - state->b = state->base_b + ((s3d_tri->TdBdX * dx) >> 5); - state->a = state->base_a + ((s3d_tri->TdAdX * dx) >> 5); - state->u = state->base_u + ((s3d_tri->TdUdX * dx) >> 5); - state->v = state->base_v + ((s3d_tri->TdVdX * dx) >> 5); - state->w = state->base_w + ((s3d_tri->TdWdX * dx) >> 5); - state->d = state->base_d + ((s3d_tri->TdDdX * dx) >> 5); - z += ((s3d_tri->TdZdX * dx) >> 5); - - if (s3d_tri->cmd_set & CMD_SET_HC) { - if (x_dir > 0) { - if (x > s3d_tri->clip_r) - goto tri_skip_line; - if (xe < s3d_tri->clip_l) - goto tri_skip_line; - if (xe > s3d_tri->clip_r) - xe = s3d_tri->clip_r + 1; - if (x < s3d_tri->clip_l) { - int diff_x = s3d_tri->clip_l - x; - - z += (s3d_tri->TdZdX * diff_x); - state->u += (s3d_tri->TdUdX * diff_x); - state->v += (s3d_tri->TdVdX * diff_x); - state->r += (s3d_tri->TdRdX * diff_x); - state->g += (s3d_tri->TdGdX * diff_x); - state->b += (s3d_tri->TdBdX * diff_x); - state->a += (s3d_tri->TdAdX * diff_x); - state->d += (s3d_tri->TdDdX * diff_x); - state->w += (s3d_tri->TdWdX * diff_x); - - x = s3d_tri->clip_l; - } - } else { - if (x < s3d_tri->clip_l) - goto tri_skip_line; - if (xe > s3d_tri->clip_r) - goto tri_skip_line; - if (xe < s3d_tri->clip_l) - xe = s3d_tri->clip_l - 1; - if (x > s3d_tri->clip_r) { - int diff_x = x - s3d_tri->clip_r; - - z += (s3d_tri->TdZdX * diff_x); - state->u += (s3d_tri->TdUdX * diff_x); - state->v += (s3d_tri->TdVdX * diff_x); - state->r += (s3d_tri->TdRdX * diff_x); - state->g += (s3d_tri->TdGdX * diff_x); - state->b += (s3d_tri->TdBdX * diff_x); - state->a += (s3d_tri->TdAdX * diff_x); - state->d += (s3d_tri->TdDdX * diff_x); - state->w += (s3d_tri->TdWdX * diff_x); - - x = s3d_tri->clip_r; - } - } - } - - svga->changedvram[(dest_offset & virge->vram_mask) >> 12] = changeframecount; - - dest_addr = dest_offset + (x * (bpp + 1)); - z_addr = z_offset + (x << 1); - - x &= 0xfff; - xe &= 0xfff; - - while (x != xe) { - update = 1; - _x = x; - _y = state->y; - - if (use_z) { - src_z = *(uint16_t *) &vram[z_addr & virge->vram_mask]; - switch ((s3d_tri->cmd_set >> 20) & 7) { - case 0: - update = 0; - break; - case 1: - if ((z >> 16) > src_z) { - src_z = (z >> 16); - } else - update = 0; - break; - case 2: - if ((z >> 16) == src_z) { - src_z = (z >> 16); - } else - update = 0; - break; - case 3: - if ((z >> 16) >= src_z) { - src_z = (z >> 16); - } else - update = 0; - break; - case 4: - if ((z >> 16) < src_z) { - src_z = (z >> 16); - } else - update = 0; - break; - case 5: - if ((z >> 16) != src_z) { - src_z = (z >> 16); - } else - update = 0; - break; - case 6: - if ((z >> 16) <= src_z) { - src_z = (z >> 16); - } else - update = 0; - break; - case 7: - src_z = (z >> 16); - break; - - default: - break; - } - } - - if (update) { - uint32_t dest_col; - - dest_pixel(state); - - if (s3d_tri->cmd_set & CMD_SET_FE) { - int a = state->a >> 7; - state->dest_rgba.r = ((state->dest_rgba.r * a) + (s3d_tri->fog_r * (255 - a))) / 255; - state->dest_rgba.g = ((state->dest_rgba.g * a) + (s3d_tri->fog_g * (255 - a))) / 255; - state->dest_rgba.b = ((state->dest_rgba.b * a) + (s3d_tri->fog_b * (255 - a))) / 255; - } - - if (s3d_tri->cmd_set & CMD_SET_ABC_ENABLE) { - switch (bpp) { - case 0: /*8 bpp*/ - /*TODO: Not implemented yet*/ - break; - case 1: /*16 bpp*/ - src_col = *(uint16_t *) &vram[dest_addr & virge->vram_mask]; - RGB15_TO_24(src_col, src_r, src_g, src_b); - break; - case 2: /*24 bpp*/ - src_col = (*(uint32_t *) &vram[dest_addr & virge->vram_mask]) & 0xffffff; - RGB24_TO_24(src_col, src_r, src_g, src_b); - break; - - default: - break; - } - - state->dest_rgba.r = ((state->dest_rgba.r * state->dest_rgba.a) + (src_r * (255 - state->dest_rgba.a))) / 255; - state->dest_rgba.g = ((state->dest_rgba.g * state->dest_rgba.a) + (src_g * (255 - state->dest_rgba.a))) / 255; - state->dest_rgba.b = ((state->dest_rgba.b * state->dest_rgba.a) + (src_b * (255 - state->dest_rgba.a))) / 255; - } - - switch (bpp) { - case 0: /*8 bpp*/ - /*TODO: Not implemented yet*/ - break; - case 1: /*16 bpp*/ - RGB15(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b, dest_col); - *(uint16_t *) &vram[dest_addr & virge->vram_mask] = dest_col; - svga->changedvram[(dest_addr & virge->vram_mask) >> 12] = changeframecount; - break; - case 2: /*24 bpp*/ - dest_col = RGB24(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b); - *(uint8_t *) &vram[dest_addr & virge->vram_mask] = dest_col & 0xff; - *(uint8_t *) &vram[(dest_addr + 1) & virge->vram_mask] = (dest_col >> 8) & 0xff; - *(uint8_t *) &vram[(dest_addr + 2) & virge->vram_mask] = (dest_col >> 16) & 0xff; - svga->changedvram[(dest_addr & virge->vram_mask) >> 12] = changeframecount; - break; - - default: - break; - } - } - - if (use_z && (s3d_tri->cmd_set & CMD_SET_ZUP)) { - *(uint16_t *) &vram[z_addr & virge->vram_mask] = src_z; - svga->changedvram[(z_addr & virge->vram_mask) >> 12] = changeframecount; - } - - z += s3d_tri->TdZdX; - state->u += s3d_tri->TdUdX; - state->v += s3d_tri->TdVdX; - state->r += s3d_tri->TdRdX; - state->g += s3d_tri->TdGdX; - state->b += s3d_tri->TdBdX; - state->a += s3d_tri->TdAdX; - state->d += s3d_tri->TdDdX; - state->w += s3d_tri->TdWdX; - dest_addr += x_offset; - z_addr += xz_offset; - - x = (x + x_dir) & 0xfff; - } - } - - y_count--; - tri_skip_line: - state->x1 += dx1; - state->x2 += dx2; + state->x1 += dx1; + state->x2 += dx2; state->base_u += s3d_tri->TdUdY; state->base_v += s3d_tri->TdVdY; state->base_z += s3d_tri->TdZdY; @@ -3623,31 +3413,22 @@ tri_skip_line: state->base_d += s3d_tri->TdDdY; state->base_w += s3d_tri->TdWdY; state->y--; - dest_offset -= s3d_tri->dest_str; - z_offset -= s3d_tri->z_str; + dest_offset -= s3d_tri->dest_str; + z_offset -= s3d_tri->z_str; } } -static int tex_size[8] = { - 4 * 2, - 2 * 2, - 2 * 2, - 1 * 2, - 2 / 1, - 2 / 1, - 1 * 2, - 1 * 2 -}; +static int tex_size[8] = {4 * 2, 2 * 2, 2 * 2, 1 * 2, 2 / 1, 2 / 1, 1 * 2, 1 * 2}; static void -s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) -{ +s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) { s3d_state_t state; - uint32_t tex_base; + uint32_t tex_base; + int c; - uint64_t start_time = plat_timer_read(); - uint64_t end_time; + uint64_t start_time = plat_timer_read(); + uint64_t end_time; state.tbu = s3d_tri->tbu << 11; state.tbv = s3d_tri->tbv << 11; @@ -3661,18 +3442,18 @@ s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) state.base_u = s3d_tri->tus; state.base_v = s3d_tri->tvs; state.base_z = s3d_tri->tzs; - state.base_r = (int32_t) s3d_tri->trs; - state.base_g = (int32_t) s3d_tri->tgs; - state.base_b = (int32_t) s3d_tri->tbs; - state.base_a = (int32_t) s3d_tri->tas; + state.base_r = (int32_t)s3d_tri->trs; + state.base_g = (int32_t)s3d_tri->tgs; + state.base_b = (int32_t)s3d_tri->tbs; + state.base_a = (int32_t)s3d_tri->tas; state.base_d = s3d_tri->tds; state.base_w = s3d_tri->tws; tex_base = s3d_tri->tex_base; - for (int c = 9; c >= 0; c--) { - state.texture[c] = (uint16_t *) &virge->svga.vram[tex_base]; - if (c <= state.max_d) - tex_base += ((1 << (c * 2)) * tex_size[(s3d_tri->cmd_set >> 5) & 7]) / 2; + for (c = 9; c >= 0; c--) { + state.texture[c] = (uint16_t *)&virge->svga.vram[tex_base]; + if (c <= state.max_d) + tex_base += ((1 << (c * 2)) * tex_size[(s3d_tri->cmd_set >> 5) & 7]) / 2; } switch ((s3d_tri->cmd_set >> 27) & 0xf) { @@ -3692,7 +3473,6 @@ s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) dest_pixel = dest_pixel_lit_texture_decal; break; default: - s3_virge_log("bad triangle type %x\n", (s3d_tri->cmd_set >> 27) & 0xf); return; } break; @@ -3701,7 +3481,6 @@ s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) dest_pixel = dest_pixel_unlit_texture_triangle; break; default: - s3_virge_log("bad triangle type %x\n", (s3d_tri->cmd_set >> 27) & 0xf); return; } @@ -3724,34 +3503,35 @@ s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) break; case (0 | 8): case (1 | 8): - if (virge->chip == S3_VIRGEDX || virge->chip >= S3_VIRGEGX2) + if ((virge->chip == S3_VIRGEDX) || (virge->chip >= S3_VIRGEGX2)) tex_sample = tex_sample_persp_mipmap_375; else tex_sample = tex_sample_persp_mipmap; break; case (2 | 8): case (3 | 8): - if (virge->chip == S3_VIRGEDX || virge->chip >= S3_VIRGEGX2) - tex_sample = virge->bilinear_enabled ? tex_sample_persp_mipmap_filter_375 : tex_sample_persp_mipmap_375; + if ((virge->chip == S3_VIRGEDX) || (virge->chip >= S3_VIRGEGX2)) + tex_sample = virge->bilinear_enabled ? tex_sample_persp_mipmap_filter_375 : + tex_sample_persp_mipmap_375; else - tex_sample = virge->bilinear_enabled ? tex_sample_persp_mipmap_filter : tex_sample_persp_mipmap; + tex_sample = virge->bilinear_enabled ? tex_sample_persp_mipmap_filter : + tex_sample_persp_mipmap; break; case (4 | 8): case (5 | 8): - if (virge->chip == S3_VIRGEDX || virge->chip >= S3_VIRGEGX2) + if ((virge->chip == S3_VIRGEDX) || (virge->chip >= S3_VIRGEGX2)) tex_sample = tex_sample_persp_normal_375; else tex_sample = tex_sample_persp_normal; break; case (6 | 8): case (7 | 8): - if (virge->chip == S3_VIRGEDX || virge->chip >= S3_VIRGEGX2) - tex_sample = virge->bilinear_enabled ? tex_sample_persp_normal_filter_375 : tex_sample_persp_normal_375; + if ((virge->chip == S3_VIRGEDX) || (virge->chip >= S3_VIRGEGX2)) + tex_sample = virge->bilinear_enabled ? tex_sample_persp_normal_filter_375 : + tex_sample_persp_normal_375; else - tex_sample = virge->bilinear_enabled ? tex_sample_persp_normal_filter : tex_sample_persp_normal; - break; - - default: + tex_sample = virge->bilinear_enabled ? tex_sample_persp_normal_filter : + tex_sample_persp_normal; break; } @@ -3766,7 +3546,6 @@ s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) tex_read = (s3d_tri->cmd_set & CMD_SET_TWE) ? tex_ARGB1555 : tex_ARGB1555_nowrap; break; default: - s3_virge_log("bad texture type %i\n", (s3d_tri->cmd_set >> 5) & 7); tex_read = (s3d_tri->cmd_set & CMD_SET_TWE) ? tex_ARGB1555 : tex_ARGB1555_nowrap; break; } @@ -3778,21 +3557,56 @@ s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) state.x2 = s3d_tri->txend12; tri(virge, s3d_tri, &state, s3d_tri->ty12, s3d_tri->TdXdY02, s3d_tri->TdXdY12); + virge->tri_count++; + end_time = plat_timer_read(); - virge->blitter_time += end_time - start_time; + virge_time += end_time - start_time; } static void -s3_virge_hwcursor_draw(svga_t *svga, int displine) -{ - const virge_t *virge = (virge_t *) svga->priv; - uint16_t dat[2]; - int xx; - int offset = svga->hwcursor_latch.x - svga->hwcursor_latch.xoff; - uint32_t fg; - uint32_t bg; - uint32_t vram_mask = virge->vram_mask; +render_thread(void *param) { + virge_t *virge = (virge_t *)param; + + while (virge->render_thread_run) { + thread_wait_event(virge->wake_render_thread, -1); + thread_reset_event(virge->wake_render_thread); + virge->s3d_busy = 1; + while (!RB_EMPTY) { + s3_virge_triangle(virge, &virge->s3d_buffer[virge->s3d_read_idx & RB_MASK]); + virge->s3d_read_idx++; + + if (RB_ENTRIES == (RB_SIZE - 1)) + thread_set_event(virge->not_full_event); + } + virge->s3d_busy = 0; + virge->subsys_stat |= INT_S3D_DONE; + s3_virge_update_irqs(virge); + } +} + +static void +queue_triangle(virge_t *virge) { + if (RB_FULL) { + thread_reset_event(virge->not_full_event); + if (RB_FULL) + thread_wait_event(virge->not_full_event, -1); /*Wait for room in ringbuffer*/ + } + virge->s3d_buffer[virge->s3d_write_idx & RB_MASK] = virge->s3d_tri; + virge->s3d_write_idx++; + if (!virge->s3d_busy) + thread_set_event(virge->wake_render_thread); /*Wake up render thread if moving from idle*/ +} + +static void s3_virge_hwcursor_draw(svga_t *svga, int displine) { + virge_t *virge = (virge_t *) svga->priv; + int x; + uint16_t dat[2] = { 0, 0 }; + int xx; + int offset = svga->hwcursor_latch.x - svga->hwcursor_latch.xoff; + uint32_t fg; + uint32_t bg; + uint32_t vram_mask = virge->vram_mask; if (svga->interlace && svga->hwcursor_oddeven) svga->hwcursor_latch.addr += 16; @@ -3829,13 +3643,16 @@ s3_virge_hwcursor_draw(svga_t *svga, int displine) break; } - for (uint8_t x = 0; x < 64; x += 16) { - dat[0] = (svga->vram[svga->hwcursor_latch.addr & vram_mask] << 8) | svga->vram[(svga->hwcursor_latch.addr + 1) & vram_mask]; - dat[1] = (svga->vram[(svga->hwcursor_latch.addr + 2) & vram_mask] << 8) | svga->vram[(svga->hwcursor_latch.addr + 3) & vram_mask]; + for (x = 0; x < 64; x += 16) { + dat[0] = (svga->vram[svga->hwcursor_latch.addr & vram_mask] << 8) | + svga->vram[(svga->hwcursor_latch.addr + 1) & vram_mask]; + dat[1] = (svga->vram[(svga->hwcursor_latch.addr + 2) & vram_mask] << 8) | + svga->vram[(svga->hwcursor_latch.addr + 3) & vram_mask]; + if (svga->crtc[0x55] & 0x10) { /*X11*/ for (xx = 0; xx < 16; xx++) { - if (offset >= 0) { + if (offset >= svga->hwcursor_latch.x) { if (virge->chip == S3_VIRGEGX2) dat[0] ^= 0x8000; @@ -3844,13 +3661,14 @@ s3_virge_hwcursor_draw(svga_t *svga, int displine) } offset++; + dat[0] <<= 1; dat[1] <<= 1; } } else { /*Windows*/ for (xx = 0; xx < 16; xx++) { - if (offset >= 0) { + if (offset >= svga->hwcursor_latch.x) { if (!(dat[0] & 0x8000)) buffer32->line[displine][offset + svga->x_add] = (dat[1] & 0x8000) ? fg : bg; else if (dat[1] & 0x8000) @@ -3858,101 +3676,109 @@ s3_virge_hwcursor_draw(svga_t *svga, int displine) } offset++; + dat[0] <<= 1; dat[1] <<= 1; } } + svga->hwcursor_latch.addr += 4; } + if (svga->interlace && !svga->hwcursor_oddeven) svga->hwcursor_latch.addr += 16; } -#define DECODE_YCbCr() \ - do { \ - int c; \ - \ - for (c = 0; c < 2; c++) { \ - uint8_t y1, y2; \ - int8_t Cr, Cb; \ - int dR, dG, dB; \ - \ - y1 = src[0]; \ - Cr = src[1] - 0x80; \ - y2 = src[2]; \ - Cb = src[3] - 0x80; \ - src += 4; \ - \ - dR = (359 * Cr) >> 8; \ - dG = (88 * Cb + 183 * Cr) >> 8; \ - dB = (453 * Cb) >> 8; \ - \ - r[x_write] = y1 + dR; \ - CLAMP(r[x_write]); \ - g[x_write] = y1 - dG; \ - CLAMP(g[x_write]); \ - b[x_write] = y1 + dB; \ - CLAMP(b[x_write]); \ - \ - r[x_write + 1] = y2 + dR; \ - CLAMP(r[x_write + 1]); \ - g[x_write + 1] = y2 - dG; \ - CLAMP(g[x_write + 1]); \ - b[x_write + 1] = y2 + dB; \ - CLAMP(b[x_write + 1]); \ - \ - x_write = (x_write + 2) & 7; \ - } \ +#define DECODE_YCbCr() \ + do { \ + int c; \ + \ + for (c = 0; c < 2; c++) { \ + uint8_t y1, y2; \ + int8_t Cr; \ + int8_t Cb; \ + int dR; \ + int dG; \ + int dB; \ + \ + y1 = src[0]; \ + Cr = src[1] - 0x80; \ + y2 = src[2]; \ + Cb = src[3] - 0x80; \ + src += 4; \ + \ + dR = (359 * Cr) >> 8; \ + dG = (88 * Cb + 183 * Cr) >> 8; \ + dB = (453 * Cb) >> 8; \ + \ + r[x_write] = y1 + dR; \ + CLAMP(r[x_write]); \ + g[x_write] = y1 - dG; \ + CLAMP(g[x_write]); \ + b[x_write] = y1 + dB; \ + CLAMP(b[x_write]); \ + \ + r[x_write + 1] = y2 + dR; \ + CLAMP(r[x_write + 1]); \ + g[x_write + 1] = y2 - dG; \ + CLAMP(g[x_write + 1]); \ + b[x_write + 1] = y2 + dB; \ + CLAMP(b[x_write + 1]); \ + \ + x_write = (x_write + 2) & 7; \ + } \ } while (0) /*Both YUV formats are untested*/ -#define DECODE_YUV211() \ - do { \ - uint8_t y1, y2, y3, y4; \ - int8_t U, V; \ - int dR, dG, dB; \ - \ - U = src[0] - 0x80; \ - y1 = (298 * (src[1] - 16)) >> 8; \ - y2 = (298 * (src[2] - 16)) >> 8; \ - V = src[3] - 0x80; \ - y3 = (298 * (src[4] - 16)) >> 8; \ - y4 = (298 * (src[5] - 16)) >> 8; \ - src += 6; \ - \ - dR = (309 * V) >> 8; \ - dG = (100 * U + 208 * V) >> 8; \ - dB = (516 * U) >> 8; \ - \ - r[x_write] = y1 + dR; \ - CLAMP(r[x_write]); \ - g[x_write] = y1 - dG; \ - CLAMP(g[x_write]); \ - b[x_write] = y1 + dB; \ - CLAMP(b[x_write]); \ - \ - r[x_write + 1] = y2 + dR; \ - CLAMP(r[x_write + 1]); \ - g[x_write + 1] = y2 - dG; \ - CLAMP(g[x_write + 1]); \ - b[x_write + 1] = y2 + dB; \ - CLAMP(b[x_write + 1]); \ - \ - r[x_write + 2] = y3 + dR; \ - CLAMP(r[x_write + 2]); \ - g[x_write + 2] = y3 - dG; \ - CLAMP(g[x_write + 2]); \ - b[x_write + 2] = y3 + dB; \ - CLAMP(b[x_write + 2]); \ - \ - r[x_write + 3] = y4 + dR; \ - CLAMP(r[x_write + 3]); \ - g[x_write + 3] = y4 - dG; \ - CLAMP(g[x_write + 3]); \ - b[x_write + 3] = y4 + dB; \ - CLAMP(b[x_write + 3]); \ - \ - x_write = (x_write + 4) & 7; \ +#define DECODE_YUV211() \ + do { \ + uint8_t y1, y2, y3, y4; \ + int8_t U, V; \ + int dR; \ + int dG; \ + int dB; \ + \ + U = src[0] - 0x80; \ + y1 = (298 * (src[1] - 16)) >> 8; \ + y2 = (298 * (src[2] - 16)) >> 8; \ + V = src[3] - 0x80; \ + y3 = (298 * (src[4] - 16)) >> 8; \ + y4 = (298 * (src[5] - 16)) >> 8; \ + src += 6; \ + \ + dR = (309 * V) >> 8; \ + dG = (100 * U + 208 * V) >> 8; \ + dB = (516 * U) >> 8; \ + \ + r[x_write] = y1 + dR; \ + CLAMP(r[x_write]); \ + g[x_write] = y1 - dG; \ + CLAMP(g[x_write]); \ + b[x_write] = y1 + dB; \ + CLAMP(b[x_write]); \ + \ + r[x_write + 1] = y2 + dR; \ + CLAMP(r[x_write + 1]); \ + g[x_write + 1] = y2 - dG; \ + CLAMP(g[x_write + 1]); \ + b[x_write + 1] = y2 + dB; \ + CLAMP(b[x_write + 1]); \ + \ + r[x_write + 2] = y3 + dR; \ + CLAMP(r[x_write + 2]); \ + g[x_write + 2] = y3 - dG; \ + CLAMP(g[x_write + 2]); \ + b[x_write + 2] = y3 + dB; \ + CLAMP(b[x_write + 2]); \ + \ + r[x_write + 3] = y4 + dR; \ + CLAMP(r[x_write + 3]); \ + g[x_write + 3] = y4 - dG; \ + CLAMP(g[x_write + 3]); \ + b[x_write + 3] = y4 + dB; \ + CLAMP(b[x_write + 3]); \ + \ + x_write = (x_write + 4) & 7; \ } while (0) #define DECODE_YUV422() \ @@ -3960,13 +3786,17 @@ s3_virge_hwcursor_draw(svga_t *svga, int displine) int c; \ \ for (c = 0; c < 2; c++) { \ - uint8_t y1, y2; \ - int8_t U, V; \ - int dR, dG, dB; \ + uint8_t y1; \ + uint8_t y2; \ + int8_t U; \ + int8_t V; \ + int dR; \ + int dG; \ + int dB; \ \ - U = src[0] - 0x80; \ + U = src[0] - 0x80; \ y1 = (298 * (src[1] - 16)) >> 8; \ - V = src[2] - 0x80; \ + V = src[2] - 0x80; \ y2 = (298 * (src[3] - 16)) >> 8; \ src += 4; \ \ @@ -3992,110 +3822,122 @@ s3_virge_hwcursor_draw(svga_t *svga, int displine) } \ } while (0) -#define DECODE_RGB555() \ - do { \ - int c; \ - \ - for (c = 0; c < 4; c++) { \ - uint16_t dat; \ - \ - dat = *(uint16_t *) src; \ - src += 2; \ - \ - r[x_write + c] = ((dat & 0x001f) << 3) | ((dat & 0x001f) >> 2); \ - g[x_write + c] = ((dat & 0x03e0) >> 2) | ((dat & 0x03e0) >> 7); \ - b[x_write + c] = ((dat & 0x7c00) >> 7) | ((dat & 0x7c00) >> 12); \ - } \ - x_write = (x_write + 4) & 7; \ +#define DECODE_RGB555() \ + do { \ + int c; \ + \ + for (c = 0; c < 4; c++) { \ + uint16_t dat; \ + \ + dat = *(uint16_t *)src; \ + src += 2; \ + \ + r[x_write + c] = \ + ((dat & 0x001f) << 3) | \ + ((dat & 0x001f) >> 2); \ + g[x_write + c] = \ + ((dat & 0x03e0) >> 2) | \ + ((dat & 0x03e0) >> 7); \ + b[x_write + c] = \ + ((dat & 0x7c00) >> 7) | \ + ((dat & 0x7c00) >> 12); \ + } \ + x_write = (x_write + 4) & 7; \ } while (0) -#define DECODE_RGB565() \ - do { \ - int c; \ - \ - for (c = 0; c < 4; c++) { \ - uint16_t dat; \ - \ - dat = *(uint16_t *) src; \ - src += 2; \ - \ - r[x_write + c] = ((dat & 0x001f) << 3) | ((dat & 0x001f) >> 2); \ - g[x_write + c] = ((dat & 0x07e0) >> 3) | ((dat & 0x07e0) >> 9); \ - b[x_write + c] = ((dat & 0xf800) >> 8) | ((dat & 0xf800) >> 13); \ - } \ - x_write = (x_write + 4) & 7; \ +#define DECODE_RGB565() \ + do { \ + int c; \ + \ + for (c = 0; c < 4; c++) { \ + uint16_t dat; \ + \ + dat = *(uint16_t *)src; \ + src += 2; \ + \ + r[x_write + c] = \ + ((dat & 0x001f) << 3) | \ + ((dat & 0x001f) >> 2); \ + g[x_write + c] = \ + ((dat & 0x07e0) >> 3) | \ + ((dat & 0x07e0) >> 9); \ + b[x_write + c] = \ + ((dat & 0xf800) >> 8) | \ + ((dat & 0xf800) >> 13); \ + } \ + x_write = (x_write + 4) & 7; \ } while (0) -#define DECODE_RGB888() \ - do { \ - int c; \ - \ - for (c = 0; c < 4; c++) { \ - r[x_write + c] = src[0]; \ - g[x_write + c] = src[1]; \ - b[x_write + c] = src[2]; \ - src += 3; \ - } \ - x_write = (x_write + 4) & 7; \ +#define DECODE_RGB888() \ + do { \ + int c; \ + \ + for (c = 0; c < 4; c++) { \ + r[x_write + c] = src[0]; \ + g[x_write + c] = src[1]; \ + b[x_write + c] = src[2]; \ + src += 3; \ + } \ + x_write = (x_write + 4) & 7; \ } while (0) -#define DECODE_XRGB8888() \ - do { \ - int c; \ - \ - for (c = 0; c < 4; c++) { \ - r[x_write + c] = src[0]; \ - g[x_write + c] = src[1]; \ - b[x_write + c] = src[2]; \ - src += 4; \ - } \ - x_write = (x_write + 4) & 7; \ +#define DECODE_XRGB8888() \ + do { \ + int c; \ + \ + for (c = 0; c < 4; c++) { \ + r[x_write + c] = src[0]; \ + g[x_write + c] = src[1]; \ + b[x_write + c] = src[2]; \ + src += 4; \ + } \ + x_write = (x_write + 4) & 7; \ } while (0) -#define OVERLAY_SAMPLE() \ - do { \ - switch (virge->streams.sdif) { \ - case 1: \ - DECODE_YCbCr(); \ - break; \ - case 2: \ - DECODE_YUV422(); \ - break; \ - case 3: \ - DECODE_RGB555(); \ - break; \ - case 4: \ - DECODE_YUV211(); \ - break; \ - case 5: \ - DECODE_RGB565(); \ - break; \ - case 6: \ - DECODE_RGB888(); \ - break; \ - case 7: \ - default: \ - DECODE_XRGB8888(); \ - break; \ - } \ +#define OVERLAY_SAMPLE() \ + do { \ + switch (virge->streams.sdif) { \ + case 1: \ + DECODE_YCbCr(); \ + break; \ + case 2: \ + DECODE_YUV422(); \ + break; \ + case 3: \ + DECODE_RGB555(); \ + break; \ + case 4: \ + DECODE_YUV211(); \ + break; \ + case 5: \ + DECODE_RGB565(); \ + break; \ + case 6: \ + DECODE_RGB888(); \ + break; \ + case 7: \ + default: \ + DECODE_XRGB8888(); \ + break; \ + } \ } while (0) static void -s3_virge_overlay_draw(svga_t *svga, int displine) -{ - const virge_t *virge = (virge_t *) svga->priv; - int offset = (virge->streams.sec_x - virge->streams.pri_x) + 1; - int h_acc = virge->streams.dda_horiz_accumulator; - int r[8]; - int g[8]; - int b[8]; - int x_size; - int x_read = 4; - int x_write = 4; - uint32_t *p; - uint8_t *src = &svga->vram[svga->overlay_latch.addr]; +s3_virge_overlay_draw(svga_t *svga, int displine) { + virge_t *virge = (virge_t *) svga->priv; + int offset = (virge->streams.sec_x - virge->streams.pri_x) + 1; + int h_acc = virge->streams.dda_horiz_accumulator; + int r[8]; + int g[8]; + int b[8]; + int x_size; + int x_read = 4; + int x_write = 4; + int x; + uint32_t *p; + uint8_t *src = &svga->vram[svga->overlay_latch.addr]; - p = &(buffer32->line[displine][offset + svga->x_add]); + p = &((uint32_t *)buffer32->line[displine])[offset + svga->x_add]; if ((offset + virge->streams.sec_w) > virge->streams.pri_w) x_size = (virge->streams.pri_w - virge->streams.sec_x) + 1; @@ -4104,7 +3946,7 @@ s3_virge_overlay_draw(svga_t *svga, int displine) OVERLAY_SAMPLE(); - for (int x = 0; x < x_size; x++) { + for (x = 0; x < x_size; x++) { *p++ = r[x_read] | (g[x_read] << 8) | (b[x_read] << 16); h_acc += virge->streams.k1_horiz_scale; @@ -4397,9 +4239,6 @@ s3_virge_disable_handlers(virge_t *dev) reset_state->svga.timer = dev->svga.timer; reset_state->svga.timer8514 = dev->svga.timer8514; - - reset_state->fifo_timer = dev->fifo_timer; - reset_state->render_timer = dev->render_timer; } static void @@ -4412,7 +4251,6 @@ s3_virge_reset(void *priv) dev->virge_busy = 0; dev->fifo_write_idx = 0; dev->fifo_read_idx = 0; - dev->fifo_state = FIFO_STATE_IDLE; dev->s3d_busy = 0; dev->s3d_write_idx = 0; dev->s3d_read_idx = 0; @@ -4426,7 +4264,7 @@ static void * s3_virge_init(const device_t *info) { const char *bios_fn; - virge_t *virge = calloc(1, sizeof(virge_t)); + virge_t *virge = (virge_t *) calloc(1, sizeof(virge_t)); reset_state = calloc(1, sizeof(virge_t)); virge->bilinear_enabled = device_get_config_int("bilinear"); @@ -4657,8 +4495,16 @@ s3_virge_init(const device_t *info) virge->svga.force_old_addr = 1; - timer_add(&virge->fifo_timer, s3_virge_fifo_timer, virge, 1); - timer_add(&virge->render_timer, s3_virge_render_timer, virge, 0); + virge->render_thread_run = 1; + virge->wake_render_thread = thread_create_event(); + virge->wake_main_thread = thread_create_event(); + virge->not_full_event = thread_create_event(); + virge->render_thread = thread_create(render_thread, virge); + + virge->fifo_thread_run = 1; + virge->wake_fifo_thread = thread_create_event(); + virge->fifo_not_full_event = thread_create_event(); + virge->fifo_thread = thread_create(fifo_thread, virge); virge->local = info->local; @@ -4672,6 +4518,19 @@ s3_virge_close(void *priv) { virge_t *virge = (virge_t *) priv; + virge->render_thread_run = 0; + thread_set_event(virge->wake_render_thread); + thread_wait(virge->render_thread); + thread_destroy_event(virge->not_full_event); + thread_destroy_event(virge->wake_main_thread); + thread_destroy_event(virge->wake_render_thread); + + virge->fifo_thread_run = 0; + thread_set_event(virge->wake_fifo_thread); + thread_wait(virge->fifo_thread); + thread_destroy_event(virge->fifo_not_full_event); + thread_destroy_event(virge->wake_fifo_thread); + svga_close(&virge->svga); ddc_close(virge->ddc); diff --git a/src/video/vid_svga.c b/src/video/vid_svga.c index 93c1eb669..bd31abd23 100644 --- a/src/video/vid_svga.c +++ b/src/video/vid_svga.c @@ -687,7 +687,7 @@ svga_recalctimings(svga_t *svga) } else if ((svga->gdcreg[5] & 0x60) == 0x20) { if (svga->seqregs[1] & 8) { /*Low res (320)*/ svga->render = svga_render_2bpp_lowres; - pclog("2 bpp low res\n"); + svga_log("2 bpp low res\n"); } else svga->render = svga_render_2bpp_highres; } else { @@ -859,8 +859,10 @@ svga_recalctimings(svga_t *svga) svga->y_add = (svga->monitor->mon_overscan_y >> 1); svga->x_add = (svga->monitor->mon_overscan_x >> 1); - if (svga->vblankstart < svga->dispend) + if (svga->vblankstart < svga->dispend) { + svga_log("DISPEND > VBLANKSTART.\n"); svga->dispend = svga->vblankstart; + } crtcconst = svga->clock * svga->char_width; if (ibm8514_active && (svga->dev8514 != NULL)) { @@ -1222,9 +1224,11 @@ svga_poll(void *priv) if (!svga->override) { if (svga->vertical_linedbl) { wy = (svga->lastline - svga->firstline) << 1; + svga->vdisp = wy + 1; svga_doblit(wx, wy, svga); } else { wy = svga->lastline - svga->firstline; + svga->vdisp = wy + 1; svga_doblit(wx, wy, svga); } } diff --git a/src/video/vid_svga_render.c b/src/video/vid_svga_render.c index 74d9a4d0c..c05d308fa 100644 --- a/src/video/vid_svga_render.c +++ b/src/video/vid_svga_render.c @@ -178,14 +178,14 @@ svga_render_text_40(svga_t *svga) charaddr = svga->charseta + (chr * 128); if (drawcursor) { - bg = svga->pallook[svga->egapal[attr & 15]]; - fg = svga->pallook[svga->egapal[attr >> 4]]; + bg = svga->pallook[svga->egapal[attr & 15] & svga->dac_mask]; + fg = svga->pallook[svga->egapal[attr >> 4] & svga->dac_mask]; } else { - fg = svga->pallook[svga->egapal[attr & 15]]; - bg = svga->pallook[svga->egapal[attr >> 4]]; + fg = svga->pallook[svga->egapal[attr & 15] & svga->dac_mask]; + bg = svga->pallook[svga->egapal[attr >> 4] & svga->dac_mask]; if (attr & 0x80 && svga->attrregs[0x10] & 8) { - bg = svga->pallook[svga->egapal[(attr >> 4) & 7]]; + bg = svga->pallook[svga->egapal[(attr >> 4) & 7] & svga->dac_mask]; if (svga->blink & 16) fg = bg; } @@ -256,13 +256,13 @@ svga_render_text_80(svga_t *svga) charaddr = svga->charseta + (chr * 128); if (drawcursor) { - bg = svga->pallook[svga->egapal[attr & 15]]; - fg = svga->pallook[svga->egapal[attr >> 4]]; + bg = svga->pallook[svga->egapal[attr & 15] & svga->dac_mask]; + fg = svga->pallook[svga->egapal[attr >> 4] & svga->dac_mask]; } else { - fg = svga->pallook[svga->egapal[attr & 15]]; - bg = svga->pallook[svga->egapal[attr >> 4]]; + fg = svga->pallook[svga->egapal[attr & 15] & svga->dac_mask]; + bg = svga->pallook[svga->egapal[attr >> 4] & svga->dac_mask]; if (attr & 0x80 && svga->attrregs[0x10] & 8) { - bg = svga->pallook[svga->egapal[(attr >> 4) & 7]]; + bg = svga->pallook[svga->egapal[(attr >> 4) & 7] & svga->dac_mask]; if (svga->blink & 16) fg = bg; } @@ -323,13 +323,13 @@ svga_render_text_80_ksc5601(svga_t *svga) attr = svga->vram[addr + 1]; if (drawcursor) { - bg = svga->pallook[svga->egapal[attr & 15]]; - fg = svga->pallook[svga->egapal[attr >> 4]]; + bg = svga->pallook[svga->egapal[attr & 15] & svga->dac_mask]; + fg = svga->pallook[svga->egapal[attr >> 4] & svga->dac_mask]; } else { - fg = svga->pallook[svga->egapal[attr & 15]]; - bg = svga->pallook[svga->egapal[attr >> 4]]; + fg = svga->pallook[svga->egapal[attr & 15] & svga->dac_mask]; + bg = svga->pallook[svga->egapal[attr >> 4] & svga->dac_mask]; if (attr & 0x80 && svga->attrregs[0x10] & 8) { - bg = svga->pallook[svga->egapal[(attr >> 4) & 7]]; + bg = svga->pallook[svga->egapal[(attr >> 4) & 7] & svga->dac_mask]; if (svga->blink & 16) fg = bg; } @@ -378,13 +378,13 @@ svga_render_text_80_ksc5601(svga_t *svga) attr = svga->vram[((svga->ma << 1) + 1) & svga->vram_display_mask]; if (drawcursor) { - bg = svga->pallook[svga->egapal[attr & 15]]; - fg = svga->pallook[svga->egapal[attr >> 4]]; + bg = svga->pallook[svga->egapal[attr & 15] & svga->dac_mask]; + fg = svga->pallook[svga->egapal[attr >> 4] & svga->dac_mask]; } else { - fg = svga->pallook[svga->egapal[attr & 15]]; - bg = svga->pallook[svga->egapal[attr >> 4]]; + fg = svga->pallook[svga->egapal[attr & 15] & svga->dac_mask]; + bg = svga->pallook[svga->egapal[attr >> 4] & svga->dac_mask]; if (attr & 0x80 && svga->attrregs[0x10] & 8) { - bg = svga->pallook[svga->egapal[(attr >> 4) & 7]]; + bg = svga->pallook[svga->egapal[(attr >> 4) & 7] & svga->dac_mask]; if (svga->blink & 16) fg = bg; } @@ -468,14 +468,14 @@ svga_render_2bpp_s3_lowres(svga_t *svga) else svga->ma += 4; svga->ma &= svga->vram_mask; - p[0] = p[1] = svga->pallook[svga->egapal[(dat[0] >> 6) & 3]]; - p[2] = p[3] = svga->pallook[svga->egapal[(dat[0] >> 4) & 3]]; - p[4] = p[5] = svga->pallook[svga->egapal[(dat[0] >> 2) & 3]]; - p[6] = p[7] = svga->pallook[svga->egapal[dat[0] & 3]]; - p[8] = p[9] = svga->pallook[svga->egapal[(dat[1] >> 6) & 3]]; - p[10] = p[11] = svga->pallook[svga->egapal[(dat[1] >> 4) & 3]]; - p[12] = p[13] = svga->pallook[svga->egapal[(dat[1] >> 2) & 3]]; - p[14] = p[15] = svga->pallook[svga->egapal[dat[1] & 3]]; + p[0] = p[1] = svga->pallook[svga->egapal[(dat[0] >> 6) & 3] & svga->dac_mask]; + p[2] = p[3] = svga->pallook[svga->egapal[(dat[0] >> 4) & 3] & svga->dac_mask]; + p[4] = p[5] = svga->pallook[svga->egapal[(dat[0] >> 2) & 3] & svga->dac_mask]; + p[6] = p[7] = svga->pallook[svga->egapal[dat[0] & 3] & svga->dac_mask]; + p[8] = p[9] = svga->pallook[svga->egapal[(dat[1] >> 6) & 3] & svga->dac_mask]; + p[10] = p[11] = svga->pallook[svga->egapal[(dat[1] >> 4) & 3] & svga->dac_mask]; + p[12] = p[13] = svga->pallook[svga->egapal[(dat[1] >> 2) & 3] & svga->dac_mask]; + p[14] = p[15] = svga->pallook[svga->egapal[dat[1] & 3] & svga->dac_mask]; p += 16; } } @@ -501,14 +501,14 @@ svga_render_2bpp_s3_lowres(svga_t *svga) svga->ma &= svga->vram_mask; - p[0] = p[1] = svga->pallook[svga->egapal[(dat[0] >> 6) & 3]]; - p[2] = p[3] = svga->pallook[svga->egapal[(dat[0] >> 4) & 3]]; - p[4] = p[5] = svga->pallook[svga->egapal[(dat[0] >> 2) & 3]]; - p[6] = p[7] = svga->pallook[svga->egapal[dat[0] & 3]]; - p[8] = p[9] = svga->pallook[svga->egapal[(dat[1] >> 6) & 3]]; - p[10] = p[11] = svga->pallook[svga->egapal[(dat[1] >> 4) & 3]]; - p[12] = p[13] = svga->pallook[svga->egapal[(dat[1] >> 2) & 3]]; - p[14] = p[15] = svga->pallook[svga->egapal[dat[1] & 3]]; + p[0] = p[1] = svga->pallook[svga->egapal[(dat[0] >> 6) & 3] & svga->dac_mask]; + p[2] = p[3] = svga->pallook[svga->egapal[(dat[0] >> 4) & 3] & svga->dac_mask]; + p[4] = p[5] = svga->pallook[svga->egapal[(dat[0] >> 2) & 3] & svga->dac_mask]; + p[6] = p[7] = svga->pallook[svga->egapal[dat[0] & 3] & svga->dac_mask]; + p[8] = p[9] = svga->pallook[svga->egapal[(dat[1] >> 6) & 3] & svga->dac_mask]; + p[10] = p[11] = svga->pallook[svga->egapal[(dat[1] >> 4) & 3] & svga->dac_mask]; + p[12] = p[13] = svga->pallook[svga->egapal[(dat[1] >> 2) & 3] & svga->dac_mask]; + p[14] = p[15] = svga->pallook[svga->egapal[dat[1] & 3] & svga->dac_mask]; p += 16; } @@ -566,14 +566,14 @@ svga_render_2bpp_s3_highres(svga_t *svga) else svga->ma += 4; svga->ma &= svga->vram_mask; - p[0] = svga->pallook[svga->egapal[(dat[0] >> 6) & 3]]; - p[1] = svga->pallook[svga->egapal[(dat[0] >> 4) & 3]]; - p[2] = svga->pallook[svga->egapal[(dat[0] >> 2) & 3]]; - p[3] = svga->pallook[svga->egapal[dat[0] & 3]]; - p[4] = svga->pallook[svga->egapal[(dat[1] >> 6) & 3]]; - p[5] = svga->pallook[svga->egapal[(dat[1] >> 4) & 3]]; - p[6] = svga->pallook[svga->egapal[(dat[1] >> 2) & 3]]; - p[7] = svga->pallook[svga->egapal[dat[1] & 3]]; + p[0] = svga->pallook[svga->egapal[(dat[0] >> 6) & 3] & svga->dac_mask]; + p[1] = svga->pallook[svga->egapal[(dat[0] >> 4) & 3] & svga->dac_mask]; + p[2] = svga->pallook[svga->egapal[(dat[0] >> 2) & 3] & svga->dac_mask]; + p[3] = svga->pallook[svga->egapal[dat[0] & 3] & svga->dac_mask]; + p[4] = svga->pallook[svga->egapal[(dat[1] >> 6) & 3] & svga->dac_mask]; + p[5] = svga->pallook[svga->egapal[(dat[1] >> 4) & 3] & svga->dac_mask]; + p[6] = svga->pallook[svga->egapal[(dat[1] >> 2) & 3] & svga->dac_mask]; + p[7] = svga->pallook[svga->egapal[dat[1] & 3] & svga->dac_mask]; p += 8; } } @@ -599,14 +599,14 @@ svga_render_2bpp_s3_highres(svga_t *svga) svga->ma &= svga->vram_mask; - p[0] = svga->pallook[svga->egapal[(dat[0] >> 6) & 3]]; - p[1] = svga->pallook[svga->egapal[(dat[0] >> 4) & 3]]; - p[2] = svga->pallook[svga->egapal[(dat[0] >> 2) & 3]]; - p[3] = svga->pallook[svga->egapal[dat[0] & 3]]; - p[4] = svga->pallook[svga->egapal[(dat[1] >> 6) & 3]]; - p[5] = svga->pallook[svga->egapal[(dat[1] >> 4) & 3]]; - p[6] = svga->pallook[svga->egapal[(dat[1] >> 2) & 3]]; - p[7] = svga->pallook[svga->egapal[dat[1] & 3]]; + p[0] = svga->pallook[svga->egapal[(dat[0] >> 6) & 3] & svga->dac_mask]; + p[1] = svga->pallook[svga->egapal[(dat[0] >> 4) & 3] & svga->dac_mask]; + p[2] = svga->pallook[svga->egapal[(dat[0] >> 2) & 3] & svga->dac_mask]; + p[3] = svga->pallook[svga->egapal[dat[0] & 3] & svga->dac_mask]; + p[4] = svga->pallook[svga->egapal[(dat[1] >> 6) & 3] & svga->dac_mask]; + p[5] = svga->pallook[svga->egapal[(dat[1] >> 4) & 3] & svga->dac_mask]; + p[6] = svga->pallook[svga->egapal[(dat[1] >> 2) & 3] & svga->dac_mask]; + p[7] = svga->pallook[svga->egapal[dat[1] & 3] & svga->dac_mask]; p += 8; } @@ -652,17 +652,17 @@ svga_render_2bpp_headland_highres(svga_t *svga) svga->ma &= svga->vram_mask; dat = edatlookup[edat[0] >> 6][edat[1] >> 6] | (edatlookup[edat[2] >> 6][edat[3] >> 6] << 2); - p[0] = svga->pallook[svga->egapal[(dat >> 4) & svga->plane_mask]]; - p[1] = svga->pallook[svga->egapal[dat & svga->plane_mask]]; + p[0] = svga->pallook[svga->egapal[(dat >> 4) & svga->plane_mask] & svga->dac_mask]; + p[1] = svga->pallook[svga->egapal[dat & svga->plane_mask] & svga->dac_mask]; dat = edatlookup[(edat[0] >> 4) & 3][(edat[1] >> 4) & 3] | (edatlookup[(edat[2] >> 4) & 3][(edat[3] >> 4) & 3] << 2); - p[2] = svga->pallook[svga->egapal[(dat >> 4) & svga->plane_mask]]; - p[3] = svga->pallook[svga->egapal[dat & svga->plane_mask]]; + p[2] = svga->pallook[svga->egapal[(dat >> 4) & svga->plane_mask] & svga->dac_mask]; + p[3] = svga->pallook[svga->egapal[dat & svga->plane_mask] & svga->dac_mask]; dat = edatlookup[(edat[0] >> 2) & 3][(edat[1] >> 2) & 3] | (edatlookup[(edat[2] >> 2) & 3][(edat[3] >> 2) & 3] << 2); - p[4] = svga->pallook[svga->egapal[(dat >> 4) & svga->plane_mask]]; - p[5] = svga->pallook[svga->egapal[dat & svga->plane_mask]]; + p[4] = svga->pallook[svga->egapal[(dat >> 4) & svga->plane_mask] & svga->dac_mask]; + p[5] = svga->pallook[svga->egapal[dat & svga->plane_mask] & svga->dac_mask]; dat = edatlookup[edat[0] & 3][edat[1] & 3] | (edatlookup[edat[2] & 3][edat[3] & 3] << 2); - p[6] = svga->pallook[svga->egapal[(dat >> 4) & svga->plane_mask]]; - p[7] = svga->pallook[svga->egapal[dat & svga->plane_mask]]; + p[6] = svga->pallook[svga->egapal[(dat >> 4) & svga->plane_mask] & svga->dac_mask]; + p[7] = svga->pallook[svga->egapal[dat & svga->plane_mask] & svga->dac_mask]; p += 8; } @@ -868,8 +868,8 @@ svga_render_indexed_gfx(svga_t *svga, bool highres, bool combine8bits) } } else if (combine8bits) { if (svga->packed_4bpp) { - uint32_t p0 = svga->map8[c0]; - uint32_t p1 = svga->map8[c1]; + uint32_t p0 = svga->map8[c0 & svga->dac_mask]; + uint32_t p1 = svga->map8[c1 & svga->dac_mask]; const int outoffs = i << dwshift; for (int subx = 0; subx < dotwidth; subx++) p[outoffs + subx] = p0; @@ -877,14 +877,14 @@ svga_render_indexed_gfx(svga_t *svga, bool highres, bool combine8bits) p[outoffs + subx + dotwidth] = p1; } else { uint32_t ccombined = (c0 << 4) | c1; - uint32_t p0 = svga->map8[ccombined]; + uint32_t p0 = svga->map8[ccombined & svga->dac_mask]; const int outoffs = (i >> 1) << dwshift; for (int subx = 0; subx < dotwidth; subx++) p[outoffs + subx] = p0; } } else { - uint32_t p0 = svga->pallook[svga->egapal[c0]]; - uint32_t p1 = svga->pallook[svga->egapal[c1]]; + uint32_t p0 = svga->pallook[svga->egapal[c0] & svga->dac_mask]; + uint32_t p1 = svga->pallook[svga->egapal[c1] & svga->dac_mask]; const int outoffs = i << dwshift; for (int subx = 0; subx < dotwidth; subx++) p[outoffs + subx] = p0; @@ -934,16 +934,16 @@ svga_render_8bpp_clone_highres(svga_t *svga) for (x = 0; x <= (svga->hdisp /* + svga->scrollcache*/); x += 8) { dat = *(uint32_t *) (&svga->vram[svga->ma & svga->vram_display_mask]); - p[0] = svga->map8[dat & 0xff]; - p[1] = svga->map8[(dat >> 8) & 0xff]; - p[2] = svga->map8[(dat >> 16) & 0xff]; - p[3] = svga->map8[(dat >> 24) & 0xff]; + p[0] = svga->map8[dat & svga->dac_mask & 0xff]; + p[1] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[2] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[3] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; dat = *(uint32_t *) (&svga->vram[(svga->ma + 4) & svga->vram_display_mask]); - p[4] = svga->map8[dat & 0xff]; - p[5] = svga->map8[(dat >> 8) & 0xff]; - p[6] = svga->map8[(dat >> 16) & 0xff]; - p[7] = svga->map8[(dat >> 24) & 0xff]; + p[4] = svga->map8[dat & svga->dac_mask & 0xff]; + p[5] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[6] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[7] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; svga->ma += 8; p += 8; @@ -963,16 +963,16 @@ svga_render_8bpp_clone_highres(svga_t *svga) if (!svga->remap_required) { for (x = 0; x <= (svga->hdisp /* + svga->scrollcache*/); x += 8) { dat = *(uint32_t *) (&svga->vram[svga->ma & svga->vram_display_mask]); - p[0] = svga->map8[dat & 0xff]; - p[1] = svga->map8[(dat >> 8) & 0xff]; - p[2] = svga->map8[(dat >> 16) & 0xff]; - p[3] = svga->map8[(dat >> 24) & 0xff]; + p[0] = svga->map8[dat & svga->dac_mask & 0xff]; + p[1] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[2] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[3] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; dat = *(uint32_t *) (&svga->vram[(svga->ma + 4) & svga->vram_display_mask]); - p[4] = svga->map8[dat & 0xff]; - p[5] = svga->map8[(dat >> 8) & 0xff]; - p[6] = svga->map8[(dat >> 16) & 0xff]; - p[7] = svga->map8[(dat >> 24) & 0xff]; + p[4] = svga->map8[dat & svga->dac_mask & 0xff]; + p[5] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[6] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[7] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; svga->ma += 8; p += 8; @@ -981,10 +981,10 @@ svga_render_8bpp_clone_highres(svga_t *svga) for (x = 0; x <= (svga->hdisp /* + svga->scrollcache*/); x += 4) { addr = svga->remap_func(svga, svga->ma); dat = *(uint32_t *) (&svga->vram[addr & svga->vram_display_mask]); - p[0] = svga->map8[dat & 0xff]; - p[1] = svga->map8[(dat >> 8) & 0xff]; - p[2] = svga->map8[(dat >> 16) & 0xff]; - p[3] = svga->map8[(dat >> 24) & 0xff]; + p[0] = svga->map8[dat & svga->dac_mask & 0xff]; + p[1] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[2] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[3] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; svga->ma += 4; p += 4; @@ -1043,10 +1043,10 @@ svga_render_8bpp_lowres(svga_t *svga) if (!svga->remap_required) { for (x = 0; x <= (svga->hdisp + svga->scrollcache); x += 8) { dat = *(uint32_t *) (&svga->vram[svga->ma & svga->vram_display_mask]); - p[0] = p[1] = svga->map8[dat & 0xff]; - p[2] = p[3] = svga->map8[(dat >> 8) & 0xff]; - p[4] = p[5] = svga->map8[(dat >> 16) & 0xff]; - p[6] = p[7] = svga->map8[(dat >> 24) & 0xff]; + p[0] = p[1] = svga->map8[dat & svga->dac_mask & 0xff]; + p[2] = p[3] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[4] = p[5] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[6] = p[7] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; svga->ma += 4; p += 8; @@ -1055,10 +1055,10 @@ svga_render_8bpp_lowres(svga_t *svga) for (x = 0; x <= (svga->hdisp + svga->scrollcache); x += 8) { addr = svga->remap_func(svga, svga->ma); dat = *(uint32_t *) (&svga->vram[addr & svga->vram_display_mask]); - p[0] = p[1] = svga->map8[dat & 0xff]; - p[2] = p[3] = svga->map8[(dat >> 8) & 0xff]; - p[4] = p[5] = svga->map8[(dat >> 16) & 0xff]; - p[6] = p[7] = svga->map8[(dat >> 24) & 0xff]; + p[0] = p[1] = svga->map8[dat & svga->dac_mask & 0xff]; + p[2] = p[3] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[4] = p[5] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[6] = p[7] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; svga->ma += 4; p += 8; @@ -1091,16 +1091,16 @@ svga_render_8bpp_highres(svga_t *svga) for (x = 0; x <= (svga->hdisp /* + svga->scrollcache*/); x += 8) { dat = *(uint32_t *) (&svga->vram[svga->ma & svga->vram_display_mask]); - p[0] = svga->map8[dat & 0xff]; - p[1] = svga->map8[(dat >> 8) & 0xff]; - p[2] = svga->map8[(dat >> 16) & 0xff]; - p[3] = svga->map8[(dat >> 24) & 0xff]; + p[0] = svga->map8[dat & svga->dac_mask & 0xff]; + p[1] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[2] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[3] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; dat = *(uint32_t *) (&svga->vram[(svga->ma + 4) & svga->vram_display_mask]); - p[4] = svga->map8[dat & 0xff]; - p[5] = svga->map8[(dat >> 8) & 0xff]; - p[6] = svga->map8[(dat >> 16) & 0xff]; - p[7] = svga->map8[(dat >> 24) & 0xff]; + p[4] = svga->map8[dat & svga->dac_mask & 0xff]; + p[5] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[6] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[7] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; svga->ma += 8; p += 8; @@ -1120,16 +1120,16 @@ svga_render_8bpp_highres(svga_t *svga) if (!svga->remap_required) { for (x = 0; x <= (svga->hdisp /* + svga->scrollcache*/); x += 8) { dat = *(uint32_t *) (&svga->vram[svga->ma & svga->vram_display_mask]); - p[0] = svga->map8[dat & 0xff]; - p[1] = svga->map8[(dat >> 8) & 0xff]; - p[2] = svga->map8[(dat >> 16) & 0xff]; - p[3] = svga->map8[(dat >> 24) & 0xff]; + p[0] = svga->map8[dat & svga->dac_mask & 0xff]; + p[1] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[2] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[3] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; dat = *(uint32_t *) (&svga->vram[(svga->ma + 4) & svga->vram_display_mask]); - p[4] = svga->map8[dat & 0xff]; - p[5] = svga->map8[(dat >> 8) & 0xff]; - p[6] = svga->map8[(dat >> 16) & 0xff]; - p[7] = svga->map8[(dat >> 24) & 0xff]; + p[4] = svga->map8[dat & svga->dac_mask & 0xff]; + p[5] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[6] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[7] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; svga->ma += 8; p += 8; @@ -1138,10 +1138,10 @@ svga_render_8bpp_highres(svga_t *svga) for (x = 0; x <= (svga->hdisp /* + svga->scrollcache*/); x += 4) { addr = svga->remap_func(svga, svga->ma); dat = *(uint32_t *) (&svga->vram[addr & svga->vram_display_mask]); - p[0] = svga->map8[dat & 0xff]; - p[1] = svga->map8[(dat >> 8) & 0xff]; - p[2] = svga->map8[(dat >> 16) & 0xff]; - p[3] = svga->map8[(dat >> 24) & 0xff]; + p[0] = svga->map8[dat & svga->dac_mask & 0xff]; + p[1] = svga->map8[(dat >> 8) & svga->dac_mask & 0xff]; + p[2] = svga->map8[(dat >> 16) & svga->dac_mask & 0xff]; + p[3] = svga->map8[(dat >> 24) & svga->dac_mask & 0xff]; svga->ma += 4; p += 4; @@ -1173,19 +1173,19 @@ svga_render_8bpp_tseng_lowres(svga_t *svga) dat = *(uint32_t *) (&svga->vram[svga->ma & svga->vram_display_mask]); if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[0] = p[1] = svga->map8[dat & 0xff]; + p[0] = p[1] = svga->map8[dat & svga->dac_mask & 0xff]; dat >>= 8; if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[2] = p[3] = svga->map8[dat & 0xff]; + p[2] = p[3] = svga->map8[dat & svga->dac_mask & 0xff]; dat >>= 8; if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[4] = p[5] = svga->map8[dat & 0xff]; + p[4] = p[5] = svga->map8[dat & svga->dac_mask & 0xff]; dat >>= 8; if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[6] = p[7] = svga->map8[dat & 0xff]; + p[6] = p[7] = svga->map8[dat & svga->dac_mask & 0xff]; svga->ma += 4; p += 8; @@ -1214,36 +1214,36 @@ svga_render_8bpp_tseng_highres(svga_t *svga) dat = *(uint32_t *) (&svga->vram[svga->ma & svga->vram_display_mask]); if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[0] = svga->map8[dat & 0xff]; + p[0] = svga->map8[dat & svga->dac_mask & 0xff]; dat >>= 8; if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[1] = svga->map8[dat & 0xff]; + p[1] = svga->map8[dat & svga->dac_mask & 0xff]; dat >>= 8; if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[2] = svga->map8[dat & 0xff]; + p[2] = svga->map8[dat & svga->dac_mask & 0xff]; dat >>= 8; if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[3] = svga->map8[dat & 0xff]; + p[3] = svga->map8[dat & svga->dac_mask & 0xff]; dat = *(uint32_t *) (&svga->vram[(svga->ma + 4) & svga->vram_display_mask]); if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[4] = svga->map8[dat & 0xff]; + p[4] = svga->map8[dat & svga->dac_mask & 0xff]; dat >>= 8; if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[5] = svga->map8[dat & 0xff]; + p[5] = svga->map8[dat & svga->dac_mask & 0xff]; dat >>= 8; if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[6] = svga->map8[dat & 0xff]; + p[6] = svga->map8[dat & svga->dac_mask & 0xff]; dat >>= 8; if (svga->attrregs[0x10] & 0x80) dat = (dat & ~0xf0) | ((svga->attrregs[0x14] & 0x0f) << 4); - p[7] = svga->map8[dat & 0xff]; + p[7] = svga->map8[dat & svga->dac_mask & 0xff]; svga->ma += 8; p += 8; diff --git a/src/video/vid_table.c b/src/video/vid_table.c index b6e1bd2f9..13639d05d 100644 --- a/src/video/vid_table.c +++ b/src/video/vid_table.c @@ -60,9 +60,9 @@ video_cards[] = { { &ati18800_vga88_device }, { &ati28800_device }, { &compaq_ati28800_device }, -#if defined(DEV_BRANCH) && defined(USE_XL24) +#ifdef USE_XL24 { &ati28800_wonderxl24_device }, -#endif +#endif /* USE_XL24 */ { &ati18800_device }, { &ati18800_wonder_device }, { &cga_device }, @@ -236,7 +236,7 @@ video_cards[] = { { &s3_trio3d2x_agp_device }, #ifdef USE_G100 { &productiva_g100_device, VIDEO_FLAG_TYPE_SPECIAL }, -#endif +#endif /*USE_G100 */ { &velocity_100_agp_device }, { &velocity_200_agp_device }, { &voodoo_3_1000_agp_device }, diff --git a/src/video/vid_tgui9440.c b/src/video/vid_tgui9440.c index 490c724ce..9ec7e6105 100644 --- a/src/video/vid_tgui9440.c +++ b/src/video/vid_tgui9440.c @@ -145,12 +145,11 @@ typedef struct tgui_t { uint32_t pattern_32[8 * 8]; } accel; - uint8_t ext_gdc_regs[16]; /*TGUI9400CXi only*/ - uint8_t copy_latch[16]; + uint8_t copy_latch[16]; /*TGUI9400CXi only*/ uint8_t tgui_3d8, tgui_3d9; int oldmode; - uint8_t oldctrl1, newctrl1; + uint8_t oldctrl1; uint8_t oldctrl2, newctrl2; uint8_t oldgr0e, newgr0e; @@ -160,6 +159,7 @@ typedef struct tgui_t { int ramdac_state; uint8_t ramdac_ctrl; + uint8_t alt_clock; int clock_m, clock_n, clock_k; @@ -212,9 +212,6 @@ static void tgui_ext_writel(uint32_t addr, uint32_t val, void *priv); static __inline uint32_t dword_remap(svga_t *svga, uint32_t in_addr) { - if (svga->packed_chain4) - return in_addr; - return ((in_addr << 2) & 0x3fff0) | ((in_addr >> 14) & 0xc) | (in_addr & ~0x3fffc); } @@ -297,7 +294,7 @@ tgui_out(uint16_t addr, uint8_t val, void *priv) { tgui_t *tgui = (tgui_t *) priv; svga_t *svga = &tgui->svga; - uint8_t old; + uint8_t old, o; if (((addr & 0xFFF0) == 0x3D0 || (addr & 0xFFF0) == 0x3B0) && !(svga->miscout & 1)) addr ^= 0x60; @@ -331,6 +328,15 @@ tgui_out(uint16_t addr, uint8_t val, void *priv) svga->read_bank = svga->write_bank; return; + case 0x5a: + case 0x5b: + case 0x5c: + case 0x5d: + case 0x5e: + case 0x5f: + svga->seqregs[svga->seqaddr] = val; + return; + default: break; } @@ -344,20 +350,7 @@ tgui_out(uint16_t addr, uint8_t val, void *priv) if (tgui->ramdac_state == 4) { tgui->ramdac_state = 0; tgui->ramdac_ctrl = val; - switch ((tgui->ramdac_ctrl >> 4) & 0x0f) { - case 1: - svga->bpp = 15; - break; - case 3: - svga->bpp = 16; - break; - case 0x0d: - svga->bpp = (tgui->type >= TGUI_9660) ? 32 : 24; - break; - default: - svga->bpp = 8; - break; - } + //pclog("TGUI ramdac ctrl=%02x.\n", (tgui->ramdac_ctrl >> 4) & 0x0f); svga_recalctimings(svga); return; } @@ -374,30 +367,35 @@ tgui_out(uint16_t addr, uint8_t val, void *priv) break; case 0x3CF: - if (svga->gdcaddr == 0x23) { - svga->dpms = !!(val & 0x03); - svga_recalctimings(svga); - } - if (tgui->type == TGUI_9400CXI && svga->gdcaddr >= 16 && svga->gdcaddr < 32) { - old = tgui->ext_gdc_regs[svga->gdcaddr & 15]; - tgui->ext_gdc_regs[svga->gdcaddr & 15] = val; - if (svga->gdcaddr == 16) - tgui_recalcmapping(tgui); - return; - } + o = svga->gdcreg[svga->gdcaddr]; switch (svga->gdcaddr) { - case 0x6: + case 2: + svga->colourcompare = val; + break; + case 4: + svga->readplane = val & 3; + break; + case 5: + svga->writemode = val & 3; + svga->readmode = val & 8; + svga->chain2_read = val & 0x10; + break; + case 6: if (svga->gdcreg[6] != val) { svga->gdcreg[6] = val; tgui_recalcmapping(tgui); } - return; + break; + case 7: + svga->colournocare = val; + break; case 0x0e: svga->gdcreg[0xe] = val ^ 2; if ((svga->gdcreg[0xf] & 1) == 1) svga->read_bank = (svga->gdcreg[0xe]) * 65536; break; + case 0x0f: if (val & 1) svga->read_bank = (svga->gdcreg[0xe]) * 65536; @@ -414,6 +412,12 @@ tgui_out(uint16_t addr, uint8_t val, void *priv) svga->write_bank = (svga->seqregs[0xe]) * 65536; break; + case 0x23: + svga->dpms = !!(val & 0x03); + svga_recalctimings(svga); + break; + + case 0x2f: case 0x5a: case 0x5b: case 0x5c: @@ -426,11 +430,36 @@ tgui_out(uint16_t addr, uint8_t val, void *priv) default: break; } - break; + svga->gdcreg[svga->gdcaddr] = val; + + if (tgui->type == TGUI_9400CXI) { + if ((svga->gdcaddr >= 0x10) && (svga->gdcaddr <= 0x1f)) { + tgui_recalcmapping(tgui); + return; + } + } + svga->fast = (svga->gdcreg[8] == 0xff && !(svga->gdcreg[3] & 0x18) && !svga->gdcreg[1]) && ((svga->chain4 && (svga->packed_chain4 || svga->force_old_addr)) || svga->fb_only); + if (((svga->gdcaddr == 5) && ((val ^ o) & 0x70)) || ((svga->gdcaddr == 6) && ((val ^ o) & 1))) + svga_recalctimings(svga); + return; case 0x3D4: svga->crtcreg = val; return; case 0x3D5: + if (!(svga->seqregs[0x0e] & 0x80) && !tgui->oldmode) { + switch (svga->crtcreg) { + case 0x21: + case 0x29: + case 0x2a: + case 0x38: + case 0x39: + case 0x3b: + case 0x3c: + return; + default: + break; + } + } if ((svga->crtcreg < 7) && (svga->crtc[0x11] & 0x80)) return; if ((svga->crtcreg == 7) && (svga->crtc[0x11] & 0x80)) @@ -482,8 +511,12 @@ tgui_out(uint16_t addr, uint8_t val, void *priv) svga->hwcursor.y = (svga->crtc[0x42] | (svga->crtc[0x43] << 8)) & 0x7ff; if ((tgui->accel.ger22 & 0xff) == 8) { - if (svga->bpp != 24) + if (svga->bpp != 24) { svga->hwcursor.x <<= 1; + svga_recalctimings(svga); + if ((svga->vdisp == 1022) && svga->interlace) + svga->hwcursor.x >>= 1; + } } svga->hwcursor.xoff = svga->crtc[0x46] & 0x3f; @@ -531,6 +564,10 @@ tgui_out(uint16_t addr, uint8_t val, void *priv) svga->read_bank = (val & 0x3f) * 65536; return; + case 0x3DB: + tgui->alt_clock = val & 0xe3; + return; + case 0x43c8: tgui->clock_n = val & 0x7f; tgui->clock_m = (tgui->clock_m & ~1) | (val >> 7); @@ -591,6 +628,8 @@ tgui_in(uint16_t addr, void *priv) return tgui->oldctrl1 | 0x88; return svga->seqregs[0x0e]; } + if ((svga->seqaddr >= 0x5a) && (svga->seqaddr <= 0x5f)) + return svga->seqregs[svga->seqaddr]; break; case 0x3C6: @@ -610,10 +649,10 @@ tgui_in(uint16_t addr, void *priv) break; case 0x3CF: - if (tgui->type == TGUI_9400CXI && svga->gdcaddr >= 16 && svga->gdcaddr < 32) - return tgui->ext_gdc_regs[svga->gdcaddr & 15]; if (svga->gdcaddr >= 0x5a && svga->gdcaddr <= 0x5f) return svga->gdcreg[svga->gdcaddr]; + if (svga->gdcaddr == 0x2f) + return svga->gdcreg[svga->gdcaddr]; break; case 0x3D4: return svga->crtcreg; @@ -636,6 +675,8 @@ tgui_in(uint16_t addr, void *priv) return tgui->tgui_3d8; case 0x3d9: return tgui->tgui_3d9; + case 0x3db: + return tgui->alt_clock; default: break; @@ -650,28 +691,49 @@ tgui_recalctimings(svga_t *svga) uint8_t ger22lower = (tgui->accel.ger22 & 0xff); uint8_t ger22upper = (tgui->accel.ger22 >> 8); - if (!svga->rowoffset) - svga->rowoffset = 0x100; - - if (svga->crtc[0x29] & 0x10) - svga->rowoffset |= 0x100; + if (tgui->type >= TGUI_9440) { + if ((svga->crtc[0x38] & 0x19) == 0x09) + svga->bpp = 32; + else { + switch ((tgui->ramdac_ctrl >> 4) & 0x0f) { + case 0x01: + svga->bpp = 15; + break; + case 0x03: + svga->bpp = 16; + break; + case 0x0d: + svga->bpp = 24; + break; + default: + svga->bpp = 8; + break; + } + } + } if ((tgui->type >= TGUI_9440) && (svga->bpp >= 24)) - svga->hdisp = (svga->crtc[1] + 1) * 8; + svga->hdisp = (svga->crtc[1] + 1) << 3; + + if (((svga->crtc[0x29] & 0x30) && (svga->bpp >= 15)) || !svga->rowoffset) + svga->rowoffset |= 0x100; + + //pclog("BPP=%d, DataWidth=%02x, CRTC29 bit 4-5=%02x, pixbusmode=%02x, rowoffset=%02x, doublerowoffset=%x.\n", svga->bpp, svga->crtc[0x2a] & 0x40, svga->crtc[0x29] & 0x30, svga->crtc[0x38], svga->rowoffset, svga->gdcreg[0x2f] & 4); if ((svga->crtc[0x1e] & 0xA0) == 0xA0) svga->ma_latch |= 0x10000; - if ((svga->crtc[0x27] & 0x01) == 0x01) + if (svga->crtc[0x27] & 0x01) svga->ma_latch |= 0x20000; - if ((svga->crtc[0x27] & 0x02) == 0x02) + if (svga->crtc[0x27] & 0x02) svga->ma_latch |= 0x40000; - if ((svga->crtc[0x27] & 0x04) == 0x04) + if (svga->crtc[0x27] & 0x04) svga->ma_latch |= 0x80000; if (svga->crtc[0x27] & 0x08) svga->split |= 0x400; if (svga->crtc[0x27] & 0x10) svga->dispend |= 0x400; + if (svga->crtc[0x27] & 0x20) svga->vsyncstart |= 0x400; if (svga->crtc[0x27] & 0x40) @@ -684,15 +746,18 @@ tgui_recalctimings(svga_t *svga) svga->lowres = 0; } + svga->interlace = !!(svga->crtc[0x1e] & 4); + if (svga->interlace && (tgui->type < TGUI_9440)) + svga->rowoffset >>= 1; + + if (svga->vdisp == 1020) + svga->vdisp += 2; + if ((tgui->oldctrl2 & 0x10) || (svga->crtc[0x2a] & 0x40)) svga->ma_latch <<= 1; svga->lowres = !(svga->crtc[0x2a] & 0x40); - svga->interlace = !!(svga->crtc[0x1e] & 4); - if (svga->interlace && (tgui->type < TGUI_9440)) - svga->rowoffset >>= 1; - if (tgui->type >= TGUI_9440) { if (svga->miscout & 8) svga->clock = (cpuclock * (double) (1ULL << 32)) / (((tgui->clock_n + 8) * 14318180.0) / ((tgui->clock_m + 2) * (1 << tgui->clock_k))); @@ -749,6 +814,7 @@ tgui_recalctimings(svga_t *svga) default: break; } + if (svga->gdcreg[0xf] & 0x08) { svga->htotal <<= 1; svga->hdisp <<= 1; @@ -760,7 +826,24 @@ tgui_recalctimings(svga_t *svga) switch (svga->bpp) { case 8: svga->render = svga_render_8bpp_highres; + if (svga->vdisp == 1022) { + if (svga->interlace) + svga->dispend++; + else + svga->dispend += 2; + } if (tgui->type >= TGUI_9660) { + switch (svga->vdisp) { + case 1024: + case 1200: + svga->htotal <<= 1; + svga->hdisp <<= 1; + svga->hdisp_time <<= 1; + break; + default: + break; + } +#if OLD_CODE if (svga->dispend == ((1024 >> 1) - 2)) svga->dispend += 2; if (svga->dispend == (1024 >> 1)) @@ -773,6 +856,7 @@ tgui_recalctimings(svga_t *svga) else if (!svga->interlace && (svga->dispend == 768)) svga->hdisp <<= 1; } +#endif if (ger22upper & 0x80) { svga->htotal <<= 1; @@ -782,7 +866,7 @@ tgui_recalctimings(svga_t *svga) switch (svga->hdisp) { case 640: if (!ger22lower) - svga->rowoffset = 80; + svga->rowoffset = 0x50; break; default: @@ -806,11 +890,10 @@ tgui_recalctimings(svga_t *svga) svga->hdisp = (svga->hdisp << 1) / 3; break; case 32: + if (svga->rowoffset == 0x100) + svga->rowoffset <<= 1; + svga->render = svga_render_32bpp_highres; - if (tgui->type >= TGUI_9660) { - if (!ger22upper) - svga->rowoffset <<= 1; - } break; default: @@ -825,14 +908,14 @@ tgui_recalcmapping(tgui_t *tgui) svga_t *svga = &tgui->svga; if (tgui->type == TGUI_9400CXI) { - if (tgui->ext_gdc_regs[0] & EXT_CTRL_LATCH_COPY) { + if (svga->gdcreg[0x10] & EXT_CTRL_LATCH_COPY) { mem_mapping_set_handler(&tgui->linear_mapping, tgui_ext_linear_read, NULL, NULL, tgui_ext_linear_write, tgui_ext_linear_writew, tgui_ext_linear_writel); mem_mapping_set_handler(&svga->mapping, tgui_ext_read, NULL, NULL, tgui_ext_write, tgui_ext_writew, tgui_ext_writel); - } else if (tgui->ext_gdc_regs[0] & EXT_CTRL_MONO_EXPANSION) { + } else if (svga->gdcreg[0x10] & EXT_CTRL_MONO_EXPANSION) { mem_mapping_set_handler(&tgui->linear_mapping, svga_read_linear, svga_readw_linear, svga_readl_linear, tgui_ext_linear_write, tgui_ext_linear_writew, tgui_ext_linear_writel); @@ -928,7 +1011,7 @@ tgui_recalcmapping(tgui_t *tgui) } if (tgui->type >= TGUI_9440) { - if ((tgui->mmio_base != 0x00000000) && (svga->crtc[0x39] & 1)) + if ((tgui->mmio_base != 0x00000000) && (svga->crtc[0x39] & 0x01)) mem_mapping_set_addr(&tgui->mmio_mapping, tgui->mmio_base, 0x10000); else mem_mapping_disable(&tgui->mmio_mapping); @@ -939,7 +1022,7 @@ static void tgui_hwcursor_draw(svga_t *svga, int displine) { uint32_t dat[2]; - int offset = svga->hwcursor_latch.x + svga->hwcursor_latch.xoff; + int offset = svga->hwcursor_latch.x - svga->hwcursor_latch.xoff; int pitch = (svga->hwcursor_latch.cur_xsize == 64) ? 16 : 8; if (svga->interlace && svga->hwcursor_oddeven) @@ -1121,26 +1204,24 @@ tgui_ext_linear_read(uint32_t addr, void *priv) svga_t *svga = (svga_t *) priv; tgui_t *tgui = (tgui_t *) svga->priv; - cycles -= video_timing_read_b; + cycles -= svga->monitor->mon_video_timing_read_b; addr &= svga->decode_mask; if (addr >= svga->vram_max) return 0xff; addr &= svga->vram_mask; - addr &= (tgui->ext_gdc_regs[0] & EXT_CTRL_LATCH_COPY) ? ~0x0f : ~0x07; - addr = dword_remap(svga, addr); + addr &= ~0x0f; + addr = dword_remap(svga, addr); - if (tgui->ext_gdc_regs[0] & EXT_CTRL_LATCH_COPY) { - for (int c = 0; c < 16; c++) { - tgui->copy_latch[c] = svga->vram[addr]; - addr += (c & 3) ? 1 : 13; - addr &= svga->vram_mask; - } - return svga->vram[addr]; + for (int i = 0; i < 16; i++) { + tgui->copy_latch[i] = svga->vram[addr]; + addr += ((i & 3) == 3) ? 0x0d : 0x01; } - return svga_read_linear(addr, svga); + addr &= svga->vram_mask; + + return svga->vram[addr]; } static uint8_t @@ -1158,65 +1239,77 @@ tgui_ext_linear_write(uint32_t addr, uint8_t val, void *priv) { svga_t *svga = (svga_t *) priv; const tgui_t *tgui = (tgui_t *) svga->priv; - int c; - int bpp = (tgui->ext_gdc_regs[0] & EXT_CTRL_16BIT); - uint8_t fg[2] = { tgui->ext_gdc_regs[4], tgui->ext_gdc_regs[5] }; - uint8_t bg[2] = { tgui->ext_gdc_regs[1], tgui->ext_gdc_regs[2] }; - uint8_t mask = tgui->ext_gdc_regs[7]; + int bpp = (svga->gdcreg[0x10] & EXT_CTRL_16BIT); + uint8_t fg[2] = { svga->gdcreg[0x14], svga->gdcreg[0x15] }; + uint8_t bg[2] = { svga->gdcreg[0x11], svga->gdcreg[0x12] }; - cycles -= video_timing_write_b; + cycles -= svga->monitor->mon_video_timing_write_b; addr &= svga->decode_mask; if (addr >= svga->vram_max) return; - addr &= svga->vram_mask; - addr &= (tgui->ext_gdc_regs[0] & EXT_CTRL_LATCH_COPY) ? ~0x0f : ~0x07; - addr = dword_remap(svga, addr); + addr &= svga->vram_mask; + addr &= (svga->gdcreg[0x10] & EXT_CTRL_LATCH_COPY) ? ~0x0f : ~0x07; + addr = dword_remap(svga, addr); + svga->changedvram[addr >> 12] = svga->monitor->mon_changeframecount; - if (tgui->ext_gdc_regs[0] & EXT_CTRL_LATCH_COPY) { - for (c = 0; c < 16; c++) { - svga->vram[addr] = tgui->copy_latch[c]; - addr += ((c & 3) == 3) ? 13 : 1; + if (svga->gdcreg[0x10] & EXT_CTRL_LATCH_COPY) { + for (int i = 0; i < 8; i++) { + if (val & (0x80 >> i)) + svga->vram[addr] = tgui->copy_latch[i]; + + addr += ((i & 3) == 3) ? 0x0d : 0x01; addr &= svga->vram_mask; } - } else if (tgui->ext_gdc_regs[0] & (EXT_CTRL_MONO_EXPANSION | EXT_CTRL_MONO_TRANSPARENT)) { - if (tgui->ext_gdc_regs[0] & EXT_CTRL_MONO_TRANSPARENT) { + } else { + if (svga->gdcreg[0x10] & EXT_CTRL_MONO_TRANSPARENT) { if (bpp) { - for (c = 7; c >= 0; c--) { - if ((val & mask) & (1 << c)) - svga->vram[addr] = fg[(c & 1) ^ 1]; - addr += (c & 3) ? 1 : 13; + for (int i = 0; i < 8; i++) { + if (val & (0x80 >> i)) + svga->vram[addr] = fg[i & 1]; + + addr += ((i & 3) == 3) ? 0x0d : 0x01; addr &= svga->vram_mask; } } else { - for (c = 7; c >= 0; c--) { - if ((val & mask) & (1 << c)) - svga->vram[addr] = tgui->ext_gdc_regs[4]; - addr += (c == 4) ? 13 : 1; + for (int i = 0; i < 8; i++) { + if (val & (0x80 >> i)) + svga->vram[addr] = fg[0]; + + addr += ((i & 3) == 3) ? 0x0d : 0x01; addr &= svga->vram_mask; } } } else { if (bpp) { - for (c = 7; c >= 0; c--) { - if (mask & (1 << c)) - svga->vram[addr] = (val & (1 << c)) ? fg[(c & 1) ^ 1] : bg[(c & 1) ^ 1]; - addr += (c & 3) ? 1 : 13; + for (int i = 0; i < 8; i++) { + if (val & (0x80 >> i)) { + if (svga->gdcreg[0x17] & (0x80 >> i)) + svga->vram[addr] = fg[i & 1]; + } else { + if (svga->gdcreg[0x17] & (0x80 >> i)) + svga->vram[addr] = bg[i & 1]; + } + addr += ((i & 3) == 3) ? 0x0d : 0x01; addr &= svga->vram_mask; } } else { - for (c = 7; c >= 0; c--) { - if (mask & (1 << c)) - svga->vram[addr] = (val & (1 << c)) ? tgui->ext_gdc_regs[4] : tgui->ext_gdc_regs[1]; - addr += (c == 4) ? 13 : 1; + for (int i = 0; i < 8; i++) { + if (val & (0x80 >> i)) { + if (svga->gdcreg[0x17] & (0x80 >> i)) + svga->vram[addr] = fg[0]; + } else { + if (svga->gdcreg[0x17] & (0x80 >> i)) + svga->vram[addr] = bg[0]; + } + addr += ((i & 3) == 3) ? 0x0d : 0x01; addr &= svga->vram_mask; } } } - } else - svga_write_linear(addr, val, svga); + } } static void @@ -1224,94 +1317,85 @@ tgui_ext_linear_writew(uint32_t addr, uint16_t val, void *priv) { svga_t *svga = (svga_t *) priv; const tgui_t *tgui = (tgui_t *) svga->priv; - int c; - int bpp = (tgui->ext_gdc_regs[0] & EXT_CTRL_16BIT); - uint8_t fg[2] = { tgui->ext_gdc_regs[4], tgui->ext_gdc_regs[5] }; - uint8_t bg[2] = { tgui->ext_gdc_regs[1], tgui->ext_gdc_regs[2] }; - uint16_t mask = (tgui->ext_gdc_regs[7] << 8) | tgui->ext_gdc_regs[8]; + int bpp = (svga->gdcreg[0x10] & EXT_CTRL_16BIT); + uint8_t fg[2] = { svga->gdcreg[0x14], svga->gdcreg[0x15] }; + uint8_t bg[2] = { svga->gdcreg[0x11], svga->gdcreg[0x12] }; + uint16_t mask = svga->gdcreg[0x18] | (svga->gdcreg[0x17] << 8); - cycles -= video_timing_write_w; + cycles -= svga->monitor->mon_video_timing_write_w; addr &= svga->decode_mask; if (addr >= svga->vram_max) return; + addr &= svga->vram_mask; - addr &= (tgui->ext_gdc_regs[0] & EXT_CTRL_LATCH_COPY) ? ~0x0f : ~0x07; + addr &= ~0x0f; + addr = dword_remap(svga, addr); - addr = dword_remap(svga, addr); svga->changedvram[addr >> 12] = svga->monitor->mon_changeframecount; - val = (val >> 8) | (val << 8); - if (tgui->ext_gdc_regs[0] & EXT_CTRL_LATCH_COPY) { - for (c = 0; c < 16; c++) { - svga->vram[addr] = tgui->copy_latch[c]; - addr += (c & 3) ? 1 : 13; + if (svga->gdcreg[0x10] & EXT_CTRL_LATCH_COPY) { + for (int i = 0; i < 16; i++) { + if (val & (0x8000 >> i)) + svga->vram[addr] = tgui->copy_latch[i]; + + addr += ((i & 3) == 3) ? 0x0d : 0x01; addr &= svga->vram_mask; } - } else if (tgui->ext_gdc_regs[0] & (EXT_CTRL_MONO_EXPANSION | EXT_CTRL_MONO_TRANSPARENT)) { - if (tgui->ext_gdc_regs[0] & EXT_CTRL_MONO_TRANSPARENT) { + } else { + if (svga->gdcreg[0x10] & EXT_CTRL_MONO_TRANSPARENT) { if (bpp) { - for (c = 15; c >= 0; c--) { - if ((val & mask) & (1 << c)) - svga->vram[addr] = fg[(c & 1) ^ 1]; - addr += (c & 3) ? 1 : 13; + for (int i = 0; i < 16; i++) { + if (val & (0x8000 >> i)) + svga->vram[addr] = fg[i & 1]; + + addr += ((i & 3) == 3) ? 0x0d : 0x01; addr &= svga->vram_mask; } } else { - for (c = 15; c >= 0; c--) { - if ((val & mask) & (1 << c)) - svga->vram[addr] = tgui->ext_gdc_regs[4]; + for (int i = 0; i < 16; i++) { + if (val & (0x8000 >> i)) + svga->vram[addr] = fg[0]; - addr += (c & 3) ? 1 : 13; + addr += ((i & 3) == 3) ? 0x0d : 0x01; addr &= svga->vram_mask; } } } else { if (bpp) { - for (c = 15; c >= 0; c--) { - if (mask & (1 << c)) - svga->vram[addr] = (val & (1 << c)) ? fg[(c & 1) ^ 1] : bg[(c & 1) ^ 1]; - addr += (c & 3) ? 1 : 13; + for (int i = 0; i < 16; i++) { + if (val & (0x8000 >> i)) { + if (mask & (0x8000 >> i)) + svga->vram[addr] = fg[i & 1]; + } else { + if (mask & (0x8000 >> i)) + svga->vram[addr] = bg[i & 1]; + } + addr += ((i & 3) == 3) ? 0x0d : 0x01; addr &= svga->vram_mask; } } else { - for (c = 15; c >= 0; c--) { - if (mask & (1 << c)) - svga->vram[addr] = (val & (1 << c)) ? tgui->ext_gdc_regs[4] : tgui->ext_gdc_regs[1]; - - addr += (c & 3) ? 1 : 13; + for (int i = 0; i < 16; i++) { + if (val & (0x8000 >> i)) { + if (mask & (0x8000 >> i)) + svga->vram[addr] = fg[0]; + } else { + if (mask & (0x8000 >> i)) + svga->vram[addr] = bg[0]; + } + addr += ((i & 3) == 3) ? 0x0d : 0x01; addr &= svga->vram_mask; } } } - } else - svga_writew_linear(addr, val, svga); + } } static void tgui_ext_linear_writel(uint32_t addr, uint32_t val, void *priv) { - svga_t *svga = (svga_t *) priv; - const tgui_t *tgui = (tgui_t *) svga->priv; - - cycles -= video_timing_write_l; - - addr &= svga->decode_mask; - if (addr >= svga->vram_max) - return; - addr &= svga->vram_mask; - addr &= (tgui->ext_gdc_regs[0] & EXT_CTRL_LATCH_COPY) ? ~0x0f : ~0x07; - - addr = dword_remap(svga, addr); - svga->changedvram[addr >> 12] = svga->monitor->mon_changeframecount; - - if (tgui->ext_gdc_regs[0] & (EXT_CTRL_MONO_EXPANSION | EXT_CTRL_MONO_TRANSPARENT | EXT_CTRL_LATCH_COPY)) { - tgui_ext_linear_writew(addr, val & 0xffff, priv); - tgui_ext_linear_writew(addr + 2, val >> 16, priv); - } else { - svga_writel_linear(addr, val, svga); - } + tgui_ext_linear_writew(addr, val, priv); } static void @@ -1484,6 +1568,8 @@ tgui_accel_command(int count, uint32_t cpu_dat, tgui_t *tgui) case 32: tgui->accel.pitch <<= 1; break; + default: + break; } #if 0 pclog("TGUI accel command = %x, ger22 = %04x, hdisp = %d, dispend = %d, vtotal = %d, rowoffset = %d, svgabpp = %d, interlace = %d, accelbpp = %d, pitch = %d.\n", tgui->accel.command, tgui->accel.ger22, svga->hdisp, svga->dispend, svga->vtotal, svga->rowoffset, svga->bpp, svga->interlace, tgui->accel.bpp, tgui->accel.pitch); @@ -3213,7 +3299,6 @@ tgui_init(const device_t *info) if (tgui->type >= TGUI_9440) { svga->packed_chain4 = 1; - tgui->i2c = i2c_gpio_init("ddc_tgui"); tgui->ddc = ddc_init(i2c_gpio_get_bus(tgui->i2c)); } diff --git a/src/video/vid_tvp3026_ramdac.c b/src/video/vid_tvp3026_ramdac.c index b50d0406b..6c001b461 100644 --- a/src/video/vid_tvp3026_ramdac.c +++ b/src/video/vid_tvp3026_ramdac.c @@ -516,7 +516,7 @@ tvp3026_recalctimings(void *priv, svga_t *svga) svga->interlace = !!(ramdac->ccr & 0x40); /* TODO: Figure out gamma correction for 15/16 bpp color. */ - svga->lut_map = !!(svga->bpp >= 15 && (ramdac->true_color & 0xf0) != 0x00); + svga->lut_map = !!((svga->bpp >= 15 && (svga->bpp != 24)) && (ramdac->true_color & 0xf0) != 0x00); if (!(ramdac->clock_sel & 0x70)) { if (ramdac->mcr != 0x98) { diff --git a/src/video/vid_voodoo_banshee.c b/src/video/vid_voodoo_banshee.c index 5b6dbb434..8e2b98980 100644 --- a/src/video/vid_voodoo_banshee.c +++ b/src/video/vid_voodoo_banshee.c @@ -513,7 +513,7 @@ banshee_render_16bpp_tiled(svga_t *svga) if (addr >= svga->vram_max) return; - for (int x = 0; x <= svga->hdisp; x += 64) { + for (int x = 0; x < svga->hdisp; x += 64) { if (svga->hwcursor_on || svga->overlay_on) svga->changedvram[addr >> 12] = 2; if (svga->changedvram[addr >> 12] || svga->fullchange) {