From be83a7c69db50c298b8446b1dae91bbd1cd5dfbc Mon Sep 17 00:00:00 2001 From: starfrost013 Date: Sun, 23 Mar 2025 23:42:44 +0000 Subject: [PATCH] FIFO optimisation, hitches down to ~1-2 seconds from 5 seconds, 70% speed idle at desktop --- doc/nvidia_notes/status.xlsx | Bin 15356 -> 15398 bytes .../86box/nv/classes/vid_nv3_classes.h | 2 + src/video/nv/nv3/subsystems/nv3_pfifo.c | 48 ++++++++++-------- 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/doc/nvidia_notes/status.xlsx b/doc/nvidia_notes/status.xlsx index 4118b2c5609b86ddcf989d9b715effdc97ee38e4..7d58d5250c8be03718dd86b5bfe94561227001a6 100644 GIT binary patch delta 4330 zcmZ8lWl$7s*IrT{Sh`Dak*-}@RzRd1=~%iu1r%YC&ZRp9q*Gj4Qd&uu4(V=5VS!iQ z`DUJXzVqYEnK{>e{kZR$>pJH~`BA78U@ooxFz&f^Yans|5kLOu|C# zxexVp7Y;jQD`4)yx?QjZq4J8mV_#KeyE3g<+OgBjOu@_DZ4(p2cvUXGEbK(` zSf1&P!j%y=>1DSxpGcl!2kmiXAs%TB)q*j*hj-#j_`luxdqZ#lamt$Iuj!$PZz_7) zuR%uBK{mYBYQn0U@cviT*7cgKR;@J*)aza4oNnWfpQJH$lk5wR&Ar?(8KG-`lvq_r zw^NzaN7B1GH>MRQ(%;nMZZZT^Il6wP#`3himid73v^qD`4Df*6)5GY4 ziJH(89xK+gtkwX`&m~Jne=v_o5apa@++ML#%ww5j5fy&^37*vL_8FhZpqrbf8y+kF z!l+%(Qt_u-;qOPTeBw}(v*@LxVyB75_WXu&ov^BCu3SnO5+tpi5zXs=Chg-ge~Mq_ zy7L9YbCk%lBSo)!y1w$95bsS;#3)|y;KhhCNq^N3RS_Lzi@ng!32Tki6f9nXABSVH z)$+jyCScDq4yW$L*T@<;H#N~RG0LqX6N=YgrVQ=Gr1Yx8579Z$3meoQVc{2RcM7$2 z6i(ulIEpeO;TlmpCkpK>ZHB`?;G1);OUI@g8Jz@nT;t=kl*DU3Cc)4=_;SVYOM}f< zjnB&Ssx2piT#5i|L1SBcq38P~qHq{_*Y_0Y-Wqf@1lPcm$MxW|Fa4hWjeZCFjph~$ zO0<=p2YY~#-_+sxdb-p??D6A}!_t3db3B@IT&61JhbQOnR@9y}{_{DCKw6?b*VTWW z*mQ?Xmdb)S?VvNj;+;08)tW{e(|NE(TK+aiu0>9(3N-s#aKTu;j?pI3iC<>guTZ%QT`|&L>Xx2`3o3<6rq#{KItY9{qyfQ#Y}=!8ew! z+61lnOLd9j+`>&$o^-ofU)SZ{S9nXk9P=n6JC#3ij%FF_*JN8DD0opI>T%f8fBZ4~BF;kt+``i-?Xq3@ilg%dC}GlkJHo`&UQcOrgEjMT?> z#KaF7Pd2T_1_1b|k`owc;SfGQ7w4R0X|g8e#JaU( z2-;xE*z&_gokRFBjmTUoNOwXxh;Zwj9WIkCvBFe>1P%A=Y?QlD3ARyz%ABj`&Dj7~ zhL$Cxo664}D$lo=Dok5UQo}^^&Mt&2cw|P|sQDH?tKn!miYAsNxO5D6sz@}nO_<&m ziq&=?S79;X_ilh9otF_Wvn8ycQW>A4ewNdI2>=;)kGNPc^0`#iUZcucv;DG>UC=Q6*4&&> zbKCh3IooiT=?&pKxSZTEH)jsaV1k<`kp18Y^X$WSYpCJYx!dPvViYJgL`*q`Ia3{F zo<$ME9H9F3)?14w#_zZ4Hd1Si=(Z;EJy&TA*gv*3WyCHZC{RgkR@7_l`UWb*%#g37 z7=>D*E?Z=`r=ws`x$dfI`1y8|yhBi)c!9g^5w-e}g`Yny{P+07uFD;R#9ZI`AsW_I zLHt`^M!D>|#$)+>Rd7)4@Ke%1jYr7j)Kbi_ZJDICFcd&;b$ zzdcF)8Fydr$43rQ=zjf;1W!1Bq;%}@An+L~f{S#sU;Nb!AH>G6kyRLyr z0l@g{(6;}B7W>MpX+^LA}v?^%JYw6N@W!09x?k?grez>~w(F82#luW4YqNPsR} z-g7LDOF8VZ!cm=aZ{$XWR;GXE4c#RTrKPN=?l_P?>zF=rt4#pZQsEG;Fk&?sNjju! zDA?xo_=|k3VhvaJs>4}^$Y1Otur_<3l{A?TDW&>0BD{!RcNw41m=H@L_NjoLZu83! z1_FiHjYebLReXtWSX2m_MA+4QAAI=>rvGupU#gtVFaiUyzf`L{{IIUCq%295`01vf zPCXufa_@ca+Hh@MY53i*)emvhM?A~-A~BWc;rhXzu#!^+0N-at|a zsOA3}bUk{>Tu_7^yQL2uN@xH6LpD6Qlg1hU#jWcN_>Ys9;{8`|1pccxsG*t#W(wSu zfZ`4;OQv;5UA370h}y@WCc3W;$YZT2zo((}1HAs#WOw--*#f=#{wgc05<|FlVcE_JN6HqhjA)$syW%<1)6uxd)g$0eP+}Jysb%CRST1n+Z0d+8xBVR z?kLdq4^Vl@c=~GJ`+oXZa!iYsjbV1ISF?=Q-0445G!b_JuO1~W4)>u0-(Aeegm=>E zoP{41+uxSNn>Eii5(Q=mMGD%FmVT@n<94O(C6gz%G~+^%k<5Zv(Z@LDg{2m7iUud{ zoB`=Pn_Zy~?=6?+6bSR?-S?-z-qO^yzc#v~`w-8!x6{oK_fu#{JC@IuRMtW4!e;3O zM;NdHjfE^AU#9&>1blY+1P1`9N{-?cfcLOIorC3IB#m6!C^Qo1_ocyD1EJd?bx641?f;m%#g#T$K=CoMUaSUFHa(-2Di#& z>KW1u{%m3%kV|<}FCfeYt(o!`>Py)T$Slaut<&KYJl9oo6xSZKl#mm0(5mz|g(K4h z__#=hU^J~w9tQ7WC!L-LHTnnymmPc7hujP#hGXVNI7KL_qfAz}0u4;6p}V#zU-=t< zHIB-Q60)0_1-E+-EkQeWVgLJx*yj*qz2&FWzT%6$8JP|qOILf3Vl+ndJnmG$%`F|B zpqWRyl0YepfM2FfK2CvNML{?4u3N-TfER^2@Ht4$8&rS9!{M?CN}7Bkrn z@)25K8UZa)%x%?|@^&76?Xg?nZvt~PlRN-VpT@xr%<8iqjlON)#4qz&aIA zztXpSwN!)*A8oOpv}1y!a5PnD}uSLf|?W>?V7VTv6O@J_OTPE6;%Ovrt`>wA>=E5XGM2p@g zC`6#kf;Qg}_gV+nJ{1G;4NwInNy8`$KS-g zEJFmFx&wu_9D)?IOVP*0t5)hReqYe8!K&lU%sq0WtTUM+rh3i z^+oN{w>2|If%|aD*2wZC&JcPSRkgo)f4=AZ$;Xv-R%3BZ$?sphKfCiGJ_;%wQ%l|wU)loVtCfB#JH!k-g4A|OZ%>oK^-|= zCBmOCB!9b^l?LJ3`L(fnmFga#8tOR%s1(G9hHod@0%o{jiB)p}oS@KsQn7vUID)Pk#U z;;d=*9{i5|s8F1hi%^$LO}2kQxP>@fl`r7+& z$_b}Ah|&qt3{G(6eDK{b81qZg4y0t9EHkc|sjCY8NiD0Ycl0@&G7$?DDiB8Wibc(> zh*FM~unf$+>5;=cc)FKe{E1^USD6u1lC?KP1^2qWRJq+JnI0}>0_LCjJzLbO)sfK| z7w?JG)>9LGU3@m|GbvFaSz&>->;<}W>Zpop=G%5wV(bz*7_t8~R^SrU3dDenkiUIA z>zivNQCg-!dCWH78Deq&yUl8SUdyTIR`kkR51mD=&1Aq`>7wyFFNRh%0bRE1Vm*{` z!B&_!*J*NHqaA0OZ3q8ji4PB008JGvYzhD_0PoLe_5{!dH)29o3vgh-HA27?VasZ0 z;-aya1FU1WAtOfXB3{JJsfr;Dt;lCcfipTIeC^bym7*U^lZYZec&?ruP^NLUhQ zbTG*q<`%NB#p0c3X1Ah)f@wzYouIzRWdhlUBi=?h^)Js6f&b}og1xs1HVlX6o5jm|%i>Fd_K8g{0o{-k z&G+@V3eusS?Nr`2_Y6C!QAjRl<1gw1&DxDCA!4n>B{XqrC~URG0r%f5=+-xWdzCj~ z2R?lmHc=}nPUd>M2OX5|o7E{P%Itn1Zxx=*I{Kz#7mT&WyG@CgGmK5OkS=LIJ{m^H zoRoCG*XgydT%%s;!X_P-x^NA2BQhIn!>6YE8WqAjqNe3Az+3?7qI zxWqwH3I|~Q&qqSGm^y}5@}3wk!@nyK06_ES-v5mz}GGIRz?-A(xDTvfv7c{cZgR9at$) delta 4298 zcmZ8lbx;)C*Ir_26qH(07M560T0lTVLPBEcR6=42DS-vlC6|z{B_$-6P*@ac7Nn8x zMmm%Zsr~8u&G)^(`R*TQ&fMpjb7$_Id7g7_A>z{K(-H~!<(Tth3nBpEIB|;vjQl3k zF7=2IY7xrqAn?=BVCH9&5{A_+$lz1$7d2gji-q0Pq@??&xfiq$smbMKF){m&_3TqG zTbC03r+eQ{`r}++@81ROYg!|ldov?k_%#slTx|CraRb;#oyo~Rkp5aYMRie(dmMV|3B z607F`FkD%D&X+zpfHEzq$p9{^0t#Xa0#P^?&QB*$HglW-^=Sg6(Xt?YFn)ltpeP+9BH zkCyD`HsUw)$KVju;uVjn+Y0LEyF-c6jVpyZ26(!FZpKrCF9x&)jKGm<)g6(BNVYd( zoHINwCcsj}t;Ioap%{;*6hnoH{PH62=)k@2?AOr8EAbl{>^=h$**nbcdy*2@&r>MX5B;!^n{~`Hb9gtGNiMDbIqEy;plUBF zOmVw_c;k=f<^2?`4PTzGh65EsF9zCrhf>;~JB!&b9o*8OM+mM^xwmA@EkMsToe zOQcbn;*)Dv{MePHkMQK27oaBgnHGk1Iso(_LQ6J;t1b4|v5rweJuNg&Aw`x$vy<K@IS67WZ$i*-6P)S+{@& z7b0x# zm^|Vvb)GEtFU7pgWD_G6VA7lUtviOF4|J{)SRHhLSW~x)q6<<@TxjkXgTfHM~Uz{4Yo!< zCB5qhwfVevhIEwd^I}kvCf-DaF7GG7b4~qtUYxL_N}!NjVUMERWBc$>6-3#>7{-$^ zJ>_(<`qPXBqv{74r80vLm+%1HJlm5hcr^Vs$;HD1V^xo8L@443TDFGd z=F*P*%vrEM*@RNeh6Uw>&xwA3NGPv<^)q#FMBA;fNC7qTZMw}YFr&hh?LsbBY36?7! zo{*+??;KyeR6piZU+MS#fXvFeq<6p3Q9^}tcphANiC4zv1|b0e03QEONJ`z-A{3!f zc^cX&wI-ttB+XJ?5@*}yQW^wET_2p$z=V$;9$QqK&a0& z)MWq@ZBS<~Rfs+(y9fdwIXV1UmFnawU|e+aX3}Ric^V&66eC z>enirzJ;$~szC4W$HKY(aNED{)%ljOh=6M`t|-!eo5k>Wr49^*U`bHOyrq@)&j}RvzDtDv;7Eh7M?ul}{y*$g{g9K6 zNpB^5VL3E&HI&j=%38@x(?F`1lJS$I%x@FQIe@rP^wU?bxarPmQ5YwjRl(V_$M#RcQ7I-zuFOLjl$r-m+!8kD znzG#>{z}eD0z19|x?tb^s-H=A84MQ>L&=v{^7kY6VuVE7F0&Ho%mqF;ZgWm?$Su@l zU(^nqCBIt0E2awXOR$E_w6H+^kl6*oja7Lpf^T`9{X%P3hufd&L#|i`9)A2`OZU{! zL^y+c8)7H;-6)ZdI$`}-l5h|tUws3-KegFmrzr;kH^kqvXE3`H{3a-`cBb8m{UYwWm z(2qJVOuKHK0l$5z}DojHb6IGXDSPG-ERO<7pG}W+O zy~E;Zc4GNV{@z1v7;2_^h8S(~1(wp4-O7irKg7t+V&f&FvBYT0vhGG<^1}Xc`f$Tb zT(7`?=}nRuw*2-8h}^YZuK4ti&1fV>+!aL<@o-O>XA*c!HSKhr5a>I1C$9hxw*8e_ zUv>t06^W^l4Htdzl+V`cN1ISA=)mu_!@n<_ItGAK$QegQrk4?Wm3l-jsj9|M;QMjI zVyAWc=4+3<&jb$yj^HkKPv6cuY#>mekqgl$3Np7#MC~?8)9=5kh=cf{9IC2^jvA2O zTsG#TcTz%yU%767A_-kYGBP!kvwAt&^QLEKrD8c9(-=^6AtH{7>9{9M&*Z&uuHHFX zJs-<}0*?xH&nISF@$SRI_Po^o*Y#0g2{Xe52-?SQmOC}W#CPsAowQf`rE&yUII%yt&rsKJL}QLd*P(C9M0vel=D3f}Itj$i?;td`zaDh(vBuQ1 z*enp0*o0CJH*_iFs_MYLvP~REmK3s{?G}3H$%ZNAF*LKBhZtABE=t#Ec0n#B9SSdx zfS{>#`J zrBr@(OkT0M94MdrHDJg(-xr%-FM{?Ch=5-HjyL_PRJ)53>Ng!i74Kj(LG1fU8Dn@2 z%&ugMByzzgv7fnaF>fpriQF@DzbGa@UfcqI4b1WPmtpn^X5pPRJgv&gp4t|8ZMSb; z$@fFcHg-(_QnH*BJN3n?9u=QN(z!Y(GIDre`pBdZclf$%urFWW;9*1aq*rn6_MnPm zDTSL3P^yfux)hL&0c1g2VDte71bNU$=0H^pz?fjK5!%vEhlnjiKBs@#3{8J)Aby3h z{B3|c@bU97CNq&r7GKnG^zk;G!O47O521lw+_is=ub^bK?g6Iw{1RRgMEAtcZs|3{ z$d277Z^j08$Wrei){^jz;;&mvs9n(<`CY`(l;FGlb64PZmN~{Km`GY&xF2%R^Bx&R z0purjz0cLlJqZNz6|ERVj70J))a)s_0wb=y*Ma@n_wpYK{w?tlG8U zWefPUp^93ua9!WDY9nY*vV~Jc3 zG(zRolb^~pJCaRh8e=m4o|;;HwH0;*UKf#dqotkTqvV-JC=*vVT=pr08JVB4-l~~% z1Xo_&8@oVi&nkPV>oae0r-w(?1Ur5_ea@aa7ybB2uT40==6H8{4h~^P$ZThxV0ySY zT{^++M&LAV?FPRyqkAo17DliPSNLtu1*E!0hlAo}*2xLKF>`{ZiX+s%?LHKtiNAM&iv3ay(b+&lH6e;rg=kN}FbBL?27 zy;U%4w^3^xHo^BPMhypra4Oz4uI?Kc(5>J%B4LjnwlpWdzsC*dp8!ou7x8zb?B>{c zudxlkW3hEyyF?B-SK3`>xe#=Qm@hH2-tH5bB~X;RHAKLF7Cy>RxJ3|TQ=CJxDHvl% zdwudi`16%#juB6P-K+AGtQ+n{&Nx!{SGz9K%o&Ag5^5Z6>#=*=ZC85FB?N721a9zY z1B+$>o;@o*}^UgMjcZfDS+cAp7GzoPhruY8U_jrawFXE-Y1|&;$BJ z4{3VVf7_J*PYR&@OTvDYE+P0Y<8qXdX8HS~Y6JiPoDcw@{kQ6$EF-Z=<{<$~;*|^+ J37OPi=s(wCCwc$? diff --git a/src/include/86box/nv/classes/vid_nv3_classes.h b/src/include/86box/nv/classes/vid_nv3_classes.h index 5e564fe29..076502f63 100644 --- a/src/include/86box/nv/classes/vid_nv3_classes.h +++ b/src/include/86box/nv/classes/vid_nv3_classes.h @@ -133,6 +133,8 @@ typedef enum nv3_pgraph_class_e #define NV3_IMAGE_IN_MEMORY_TOP_LEFT_OFFSET_END 22 #define NV3_W95TXT_COLORA 0x03FC // It's the colour of the text. This is used to submit a dummy object so the notifier can be used to sync in Win2000 DDraw6 drivers. +#define NV3_W95TXT_COLORA_RECT_START 0x0400 +#define NV3_W95TXT_COLORA_RECT_END 0x05FF /* Class context switch method */ typedef struct nv3_class_ctx_switch_method_s diff --git a/src/video/nv/nv3/subsystems/nv3_pfifo.c b/src/video/nv/nv3/subsystems/nv3_pfifo.c index d8b7cef96..088ddf1ba 100644 --- a/src/video/nv/nv3/subsystems/nv3_pfifo.c +++ b/src/video/nv/nv3/subsystems/nv3_pfifo.c @@ -653,14 +653,27 @@ that existed here before didn't make any sense #define NV3_GRAY_TABLE_NUM_ENTRIES 64 uint8_t nv3_pfifo_cache1_gray_code_table[NV3_GRAY_TABLE_NUM_ENTRIES] = { - 0b000000, 0b000001, 0b000011, 0b000010, 0b000110, 0b000111, 0b000101, 0b000100, - 0b001100, 0b001101, 0b001111, 0b001110, 0b001010, 0b001011, 0b001001, 0b001000, - 0b011000, 0b011001, 0b011011, 0b011010, 0b011110, 0b011111, 0b011101, 0b011100, - 0b010100, 0b010101, 0b010111, 0b010110, 0b010010, 0b010011, 0b010001, 0b010000, - 0b110000, 0b110001, 0b110011, 0b110010, 0b110110, 0b110111, 0b110101, 0b110100, - 0b111100, 0b111101, 0b111111, 0b111110, 0b111010, 0b111011, 0b111001, 0b111000, - 0b101000, 0b101001, 0b101011, 0b101010, 0b101110, 0b101111, 0b101101, 0b101100, - 0b100100, 0b100101, 0b100111, 0b100110, 0b100010, 0b100011, 0b100001, 0b100000 + 0b000000, 0b000001, 0b000011, 0b000010, 0b000110, 0b000111, 0b000101, 0b000100, //0x07 + 0b001100, 0b001101, 0b001111, 0b001110, 0b001010, 0b001011, 0b001001, 0b001000, //0x0F + 0b011000, 0b011001, 0b011011, 0b011010, 0b011110, 0b011111, 0b011101, 0b011100, //0x17 + 0b010100, 0b010101, 0b010111, 0b010110, 0b010010, 0b010011, 0b010001, 0b010000, //0x1F + 0b110000, 0b110001, 0b110011, 0b110010, 0b110110, 0b110111, 0b110101, 0b110100, //0x27 + 0b111100, 0b111101, 0b111111, 0b111110, 0b111010, 0b111011, 0b111001, 0b111000, //0x2F + 0b101000, 0b101001, 0b101011, 0b101010, 0b101110, 0b101111, 0b101101, 0b101100, //0x37 + 0b100100, 0b100101, 0b100111, 0b100110, 0b100010, 0b100011, 0b100001, 0b100000 //0x3F +}; + +/* The function is called up to hundreds of thousands of times per second, it's too slow to do anything else */ +uint8_t nv3_pfifo_cache1_binary_code_table[NV3_GRAY_TABLE_NUM_ENTRIES] = +{ + 0x00, 0x01, 0x03, 0x02, 0x07, 0x06, 0x04, 0x05, // 0x07 (0) + 0x0F, 0x0E, 0x0C, 0x0D, 0x08, 0x09, 0x0B, 0x0A, // 0x0F (1000) + 0x1F, 0x1E, 0x1C, 0x1D, 0x18, 0x19, 0x1B, 0x1A, // 0x17 (10000) + 0x10, 0x11, 0x13, 0x12, 0x17, 0x16, 0x14, 0x15, // 0x1F (11000) + 0x3F, 0x3E, 0x3C, 0x3D, 0x38, 0x39, 0x3B, 0x3A, // 0x27 (100000) + 0x30, 0x31, 0x33, 0x32, 0x37, 0x36, 0x34, 0x35, // 0x2F (101000) + 0x20, 0x21, 0x23, 0x22, 0x27, 0x26, 0x24, 0x25, // 0x37 (110000) + 0x2F, 0x2E, 0x2C, 0x2D, 0x28, 0x29, 0x2B, 0x2A, // 0X3f (111000) }; uint32_t nv3_pfifo_cache1_normal2gray(uint32_t val) @@ -673,14 +686,7 @@ Back to sanity */ uint32_t nv3_pfifo_cache1_gray2normal(uint32_t val) { - /* Is this a good idea? */ - for (uint32_t i = 0; i < NV3_GRAY_TABLE_NUM_ENTRIES; i++) - { - if (nv3_pfifo_cache1_gray_code_table[i] == val) - return i; - } - - return 0x00; + return nv3_pfifo_cache1_binary_code_table[val]; } // Submits graphics objects INTO cache0 @@ -854,13 +860,13 @@ void nv3_pfifo_cache1_push(uint32_t addr, uint32_t param) } // We didn't. Let's put it in CACHE1 - uint32_t current_put_address = nv3->pfifo.cache1_settings.put_address >> 2; - nv3->pfifo.cache1_entries[current_put_address].subchannel = subchannel; - nv3->pfifo.cache1_entries[current_put_address].method = method_offset; - nv3->pfifo.cache1_entries[current_put_address].data = param; + uint32_t current_put_index = nv3->pfifo.cache1_settings.put_address >> 2; + nv3->pfifo.cache1_entries[current_put_index].subchannel = subchannel; + nv3->pfifo.cache1_entries[current_put_index].method = method_offset; + nv3->pfifo.cache1_entries[current_put_index].data = param; // now we have to recalculate the cache1 put address - uint32_t next_put_address = nv3_pfifo_cache1_gray2normal(current_put_address); + uint32_t next_put_address = nv3_pfifo_cache1_gray2normal(current_put_index); next_put_address++; if (nv3->nvbase.gpu_revision >= NV3_BOOT_REG_REV_C00) // RIVA 128ZX#