Skip to content

Commit 5229ad4

Browse files
committed
D3D fix-ups #3 (Performance)
1 parent 699dad6 commit 5229ad4

File tree

4 files changed

+83
-32
lines changed

4 files changed

+83
-32
lines changed

Client/core/DXHook/CDirect3DData.cpp

Lines changed: 41 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,16 @@ template <>
1717
CDirect3DData* CSingleton<CDirect3DData>::m_pSingleton = NULL;
1818

1919
CDirect3DData::CDirect3DData()
20+
: m_mViewMatrix{}
21+
, m_mProjMatrix{}
22+
, m_mWorldMatrix{}
23+
, m_hDeviceWindow(0)
24+
, m_dwViewportX(0)
25+
, m_dwViewportY(0)
26+
, m_dwViewportWidth(0)
27+
, m_dwViewportHeight(0)
2028
{
21-
// Zero out our matricies.
22-
memset(&m_mViewMatrix, 0, sizeof(D3DMATRIX));
23-
memset(&m_mProjMatrix, 0, sizeof(D3DMATRIX));
24-
memset(&m_mWorldMatrix, 0, sizeof(D3DMATRIX));
25-
26-
m_hDeviceWindow = 0;
27-
m_dwViewportX = 0;
28-
m_dwViewportY = 0;
29-
m_dwViewportWidth = 0;
30-
m_dwViewportHeight = 0;
29+
3130
}
3231

3332
CDirect3DData::~CDirect3DData()
@@ -36,49 +35,65 @@ CDirect3DData::~CDirect3DData()
3635

3736
void CDirect3DData::StoreTransform(D3DTRANSFORMSTATETYPE dwMatrixToStore, const D3DMATRIX* pMatrix)
3837
{
38+
if (!pMatrix)
39+
return;
40+
41+
// Use direct assignment instead of memcpy for better performance
3942
switch (dwMatrixToStore)
4043
{
4144
case D3DTS_VIEW:
42-
// Copy the real view matrix.
43-
memcpy(&m_mViewMatrix, pMatrix, sizeof(D3DMATRIX));
45+
m_mViewMatrix = *pMatrix;
4446
break;
4547
case D3DTS_PROJECTION:
46-
// Copy the real projection marix.
47-
memcpy(&m_mProjMatrix, pMatrix, sizeof(D3DMATRIX));
48+
m_mProjMatrix = *pMatrix;
4849
break;
4950
case D3DTS_WORLD:
50-
// Copy the real world matrix.
51-
memcpy(&m_mWorldMatrix, pMatrix, sizeof(D3DMATRIX));
51+
m_mWorldMatrix = *pMatrix;
5252
break;
5353
default:
54-
// Do nothing.
54+
// Do nothing for unsupported transforms
5555
break;
5656
}
5757
}
5858

5959
void CDirect3DData::GetTransform(D3DTRANSFORMSTATETYPE dwRequestedMatrix, D3DMATRIX* pMatrixOut)
6060
{
61+
if (!pMatrixOut)
62+
return;
63+
64+
// Use direct assignment instead of memcpy for better performance
6165
switch (dwRequestedMatrix)
6266
{
6367
case D3DTS_VIEW:
64-
// Copy the stored view matrix.
65-
memcpy(pMatrixOut, &m_mViewMatrix, sizeof(D3DMATRIX));
68+
*pMatrixOut = m_mViewMatrix;
6669
break;
6770
case D3DTS_PROJECTION:
68-
// Copy the stored projection matrix.
69-
memcpy(pMatrixOut, &m_mProjMatrix, sizeof(D3DMATRIX));
71+
*pMatrixOut = m_mProjMatrix;
7072
break;
7173
case D3DTS_WORLD:
72-
// Copy the stored world matrix.
73-
memcpy(pMatrixOut, &m_mWorldMatrix, sizeof(D3DMATRIX));
74+
*pMatrixOut = m_mWorldMatrix;
7475
break;
7576
default:
76-
// Zero out the structure for the user.
77-
memset(pMatrixOut, 0, sizeof(D3DMATRIX));
77+
// Zero out for unsupported transforms
78+
*pMatrixOut = {};
7879
break;
7980
}
81+
}
8082

81-
// assert ( 0 ); // Too expensive to be used because SetTransform is used too often
83+
const D3DMATRIX* CDirect3DData::GetTransformPtr(D3DTRANSFORMSTATETYPE dwRequestedMatrix) const
84+
{
85+
// Return direct pointer to cached matrix - avoids copy overhead
86+
switch (dwRequestedMatrix)
87+
{
88+
case D3DTS_VIEW:
89+
return &m_mViewMatrix;
90+
case D3DTS_PROJECTION:
91+
return &m_mProjMatrix;
92+
case D3DTS_WORLD:
93+
return &m_mWorldMatrix;
94+
default:
95+
return nullptr;
96+
}
8297
}
8398

8499
void CDirect3DData::StoreViewport(DWORD dwX, DWORD dwY, DWORD dwWidth, DWORD dwHeight)

Client/core/DXHook/CDirect3DData.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ class CDirect3DData : public CSingleton<CDirect3DData>
2222

2323
void StoreTransform(D3DTRANSFORMSTATETYPE dwMatrixToStore, const D3DMATRIX* pMatrix);
2424
void GetTransform(D3DTRANSFORMSTATETYPE dwRequestedMatrix, D3DMATRIX* pMatrixOut);
25+
const D3DMATRIX* GetTransformPtr(D3DTRANSFORMSTATETYPE dwRequestedMatrix) const;
2526

2627
HWND GetDeviceWindow() { return m_hDeviceWindow; };
2728
void StoreDeviceWindow(HWND hDeviceWindow) { m_hDeviceWindow = hDeviceWindow; };

Client/core/DXHook/CProxyDirect3D9.cpp

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ namespace
2424
// Cached static Direct3D pointer for lockless fast-path access
2525
std::atomic<IDirect3D9*> g_cachedStaticDirect3D{nullptr};
2626
std::atomic<bool> g_cachedDirect3DValid{false};
27+
// Cached adapter monitor for lockless fast-path access
28+
std::atomic<HMONITOR> g_cachedAdapterMonitor{nullptr};
29+
std::atomic<bool> g_cachedAdapterMonitorValid{false};
2730

2831
IDirect3D9* GetFirstValidTrackedDirect3D(std::vector<IDirect3D9*>& trackedList)
2932
{
@@ -85,8 +88,9 @@ CProxyDirect3D9::~CProxyDirect3D9()
8588
{
8689
std::lock_guard<std::mutex> lock(ms_Direct3D9ListMutex);
8790
ListRemove(ms_CreatedDirect3D9List, m_pDevice);
88-
// Invalidate cache when removing this device
91+
// Invalidate both caches when removing this device
8992
g_cachedDirect3DValid.store(false, std::memory_order_release);
93+
g_cachedAdapterMonitorValid.store(false, std::memory_order_release);
9094
}
9195
ReleaseInterface(m_pDevice, 8752);
9296
}
@@ -232,12 +236,30 @@ HMONITOR CProxyDirect3D9::GetAdapterMonitor(UINT Adapter)
232236

233237
HMONITOR CProxyDirect3D9::StaticGetAdapterMonitor(UINT Adapter)
234238
{
239+
// Fast path: use cached monitor without lock (for default adapter 0)
240+
if (Adapter == 0 && g_cachedAdapterMonitorValid.load(std::memory_order_acquire))
241+
{
242+
HMONITOR hMonitor = g_cachedAdapterMonitor.load(std::memory_order_acquire);
243+
if (hMonitor)
244+
return hMonitor;
245+
}
246+
247+
// Slow path: refresh cache under lock
235248
std::lock_guard<std::mutex> lock(ms_Direct3D9ListMutex);
236249
IDirect3D9* pDirect3D = GetFirstValidTrackedDirect3D(ms_CreatedDirect3D9List);
237250
if (!pDirect3D)
238251
return NULL;
239252

240-
return pDirect3D->GetAdapterMonitor(Adapter);
253+
HMONITOR hMonitor = pDirect3D->GetAdapterMonitor(Adapter);
254+
255+
// Cache result for default adapter
256+
if (Adapter == 0 && hMonitor)
257+
{
258+
g_cachedAdapterMonitor.store(hMonitor, std::memory_order_release);
259+
g_cachedAdapterMonitorValid.store(true, std::memory_order_release);
260+
}
261+
262+
return hMonitor;
241263
}
242264

243265
IDirect3D9* CProxyDirect3D9::StaticGetDirect3D()

Client/core/DXHook/CProxyDirect3DDevice9.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -836,7 +836,7 @@ HRESULT CProxyDirect3DDevice9::Reset(D3DPRESENT_PARAMETERS* pPresentationParamet
836836
HRESULT CProxyDirect3DDevice9::Present(CONST RECT* pSourceRect, CONST RECT* pDestRect, HWND hDestWindowOverride, CONST RGNDATA* pDirtyRegion)
837837
{
838838
// Reset frame stat counters
839-
memset(&DeviceState.FrameStats, 0, sizeof(DeviceState.FrameStats));
839+
DeviceState.FrameStats = {};
840840

841841
bool bDeviceTemporarilyLost = false;
842842
HRESULT hrCoopLevel = D3DERR_INVALIDCALL;
@@ -848,9 +848,11 @@ HRESULT CProxyDirect3DDevice9::Present(CONST RECT* pSourceRect, CONST RECT* pDes
848848
CDirect3DEvents9::OnPresent(m_pDevice);
849849

850850
// A fog flicker fix for some ATI cards
851-
D3DMATRIX projMatrix;
852-
m_pData->GetTransform(D3DTS_PROJECTION, &projMatrix);
853-
m_pDevice->SetTransform(D3DTS_PROJECTION, &projMatrix);
851+
const D3DMATRIX* pCachedProjection = m_pData->GetTransformPtr(D3DTS_PROJECTION);
852+
if (pCachedProjection)
853+
{
854+
m_pDevice->SetTransform(D3DTS_PROJECTION, pCachedProjection);
855+
}
854856

855857
TIMING_GRAPH("Present");
856858
HRESULT hr = CDirect3DEvents9::PresentGuarded(m_pDevice, pSourceRect, pDestRect, hDestWindowOverride, pDirtyRegion);
@@ -1195,6 +1197,17 @@ HRESULT CProxyDirect3DDevice9::SetTransform(D3DTRANSFORMSTATETYPE State, CONST D
11951197

11961198
HRESULT CProxyDirect3DDevice9::GetTransform(D3DTRANSFORMSTATETYPE State, D3DMATRIX* pMatrix)
11971199
{
1200+
// Use cached data if available to avoid expensive device query
1201+
if (pMatrix)
1202+
{
1203+
const D3DMATRIX* pCached = m_pData->GetTransformPtr(State);
1204+
if (pCached)
1205+
{
1206+
*pMatrix = *pCached;
1207+
return D3D_OK;
1208+
}
1209+
}
1210+
// Fallback to device query for unsupported transform types
11981211
return m_pDevice->GetTransform(State, pMatrix);
11991212
}
12001213

0 commit comments

Comments
 (0)