the game where you go into mines and start crafting! but for consoles (forked directly from smartcmd's github)
1/* SCE CONFIDENTIAL
2PlayStation(R)3 Programmer Tool Runtime Library 430.001
3* Copyright (C) 2007 Sony Computer Entertainment Inc.
4* All Rights Reserved.
5*/
6
7/* common headers */
8#include <stdint.h>
9#include <stdlib.h>
10#include <alloca.h>
11#include <spu_intrinsics.h>
12#include <cell/spurs.h>
13#include <cell/dma.h>
14#include <cell/spurs/job_queue.h>
15
16#include "LevelRenderer_cull.h"
17#include "..\Common\DmaData.h"
18#include <vectormath/c/vectormath_aos_v.h>
19
20
21
22// #define SPU_HEAPSIZE (128*1024)
23// #define SPU_STACKSIZE (16*1024)
24//
25// CELL_SPU_LS_PARAM(128*1024, 16*1024); // can't use #defines here as it seems to create an asm instruction
26
27
28static const bool sc_verbose = false;
29
30CellSpursJobContext2* g_pSpursJobContext;
31
32
33// The flag definitions
34static const int CHUNK_FLAG_COMPILED = 0x01;
35static const int CHUNK_FLAG_DIRTY = 0x02;
36static const int CHUNK_FLAG_EMPTY0 = 0x04;
37static const int CHUNK_FLAG_EMPTY1 = 0x08;
38static const int CHUNK_FLAG_EMPTYBOTH = 0x0c;
39static const int CHUNK_FLAG_NOTSKYLIT = 0x10;
40static const int CHUNK_FLAG_REF_MASK = 0x07;
41static const int CHUNK_FLAG_REF_SHIFT = 5;
42
43
44bool inline clip(float *bb, float *frustum)
45{
46 for (int i = 0; i < 6; ++i, frustum += 4)
47 {
48 if (frustum[0] * (bb[0]) + frustum[1] * (bb[1]) + frustum[2] * (bb[2]) + frustum[3] > 0) continue;
49 if (frustum[0] * (bb[3]) + frustum[1] * (bb[1]) + frustum[2] * (bb[2]) + frustum[3] > 0) continue;
50 if (frustum[0] * (bb[0]) + frustum[1] * (bb[4]) + frustum[2] * (bb[2]) + frustum[3] > 0) continue;
51 if (frustum[0] * (bb[3]) + frustum[1] * (bb[4]) + frustum[2] * (bb[2]) + frustum[3] > 0) continue;
52 if (frustum[0] * (bb[0]) + frustum[1] * (bb[1]) + frustum[2] * (bb[5]) + frustum[3] > 0) continue;
53 if (frustum[0] * (bb[3]) + frustum[1] * (bb[1]) + frustum[2] * (bb[5]) + frustum[3] > 0) continue;
54 if (frustum[0] * (bb[0]) + frustum[1] * (bb[4]) + frustum[2] * (bb[5]) + frustum[3] > 0) continue;
55 if (frustum[0] * (bb[3]) + frustum[1] * (bb[4]) + frustum[2] * (bb[5]) + frustum[3] > 0) continue;
56 return false;
57 }
58 return true;
59}
60
61class PPUStoreArray
62{
63 static const int sc_cacheSize = 128;
64 int m_localCache[128];
65 int* m_pDataPPU;
66 int m_cachePos;
67 int m_ppuPos;
68
69public:
70 PPUStoreArray(uintptr_t pDataPPU) { m_pDataPPU = (int*)pDataPPU; m_cachePos = 0; m_ppuPos = 0;}
71
72 void store(int val)
73 {
74 m_localCache[m_cachePos] = val;
75 m_cachePos++;
76 if(m_cachePos >= sc_cacheSize)
77 flush();
78 }
79
80 void flush()
81 {
82 if(m_cachePos > 0)
83 {
84 // dma the local cache back to PPU and start again
85// spu_print("DMAing %d bytes from 0x%08x(SPU) to 0x%08x(PPU)\n",(int)( m_cachePos*sizeof(int)), (int)m_localCache, (int)&m_pDataPPU[m_ppuPos]);
86 DmaData_SPU::put(m_localCache, (uintptr_t)&m_pDataPPU[m_ppuPos], DmaData_SPU::roundUpDMASize(m_cachePos*sizeof(int)));
87 m_ppuPos += m_cachePos;
88 m_cachePos = 0;
89 }
90 }
91 int getSize() { return m_ppuPos; }
92};
93
94void cull(LevelRenderer_cull_DataIn* dataIn, LevelRenderer_cull_DataIn* eaDataIn)
95{
96 ClipChunk_SPU chunkDoubleBuffer[2];
97 int chunkIdx = 0;
98 unsigned char* globalChunkFlags = (unsigned char*)alloca(dataIn->numGlobalChunks); // 164K !!!
99 DmaData_SPU::getAndWait(globalChunkFlags, (uintptr_t)dataIn->pGlobalChunkFlags, sizeof(unsigned char)*dataIn->numGlobalChunks);
100
101 PPUStoreArray layer0List((uintptr_t)dataIn->listArray_layer0);
102 PPUStoreArray layer1List((uintptr_t)dataIn->listArray_layer1);
103
104 PPUStoreArray layer0ZDepth((uintptr_t)dataIn->zDepth_layer0);
105 PPUStoreArray layer1ZDepth((uintptr_t)dataIn->zDepth_layer1);
106 float xOff = dataIn->clipMat[3][0];
107 float yOff = dataIn->clipMat[3][1];
108 float zOff = dataIn->clipMat[3][2];
109 dataIn->clipMat[3][0] = 0;
110 dataIn->clipMat[3][1] = 0;
111 dataIn->clipMat[3][2] = 0;
112
113
114
115
116 // DMA up the first chunk
117 if(dataIn->numClipChunks > 0)
118 DmaData_SPU::get(&chunkDoubleBuffer[chunkIdx], (uintptr_t)&dataIn->pClipChunks[0], sizeof(ClipChunk_SPU));
119
120 for(int i=0;i<dataIn->numClipChunks; i++)
121 {
122 DmaData_SPU::wait();// wait for the last chunk to have been uploaded
123 ClipChunk_SPU& chunk = chunkDoubleBuffer[chunkIdx];
124 chunkIdx ^= 1;
125 // queue up the next chunk
126 DmaData_SPU::get(&chunkDoubleBuffer[chunkIdx], (uintptr_t)&dataIn->pClipChunks[i+1], sizeof(ClipChunk_SPU));
127
128 Vectormath::Aos::Vector4 pos(chunk.aabb[0] + xOff, chunk.aabb[1] + yOff, chunk.aabb[2] + zOff, 1.0f);
129 Vectormath::Aos::Vector4 transPos = dataIn->clipMat * pos;
130 float zDepth = -transPos.getZ();
131// spu_print("z val = %f : maxDepth = %f\n", zDepth, dataIn->maxDepthRender);
132 if(zDepth > dataIn->maxDepthRender || chunk.aabb[1] < dataIn->maxHeightRender )
133 chunk.visible = false;
134 else
135 {
136 unsigned char flags = chunk.globalIdx == -1 ? 0 : globalChunkFlags[ chunk.globalIdx ];
137 chunk.visible = false;
138 if ( (flags & CHUNK_FLAG_COMPILED ) && ( ( flags & CHUNK_FLAG_EMPTYBOTH ) != CHUNK_FLAG_EMPTYBOTH ) )
139 {
140 chunk.visible = clip(chunk.aabb, dataIn->fdraw);
141 }
142 }
143 // write the visible flag directly into the chunk structure on PPU
144 DmaData_SPU::put(&chunk, (uintptr_t)&dataIn->pClipChunks[i], 16);// only DMA the first 16 bytes, as they contain the visible flag we need to pass back
145
146 // add the data for renderChunks
147 if( !chunk.visible )
148 continue; // This will be set if the chunk isn't visible, or isn't compiled, or has both empty flags set
149 if( chunk.globalIdx == -1 )
150 continue; // Not sure if we should ever encounter this... TODO check
151 int layer = 0;
152// zDepth /= transPos.getW();
153 if( ( globalChunkFlags[chunk.globalIdx] & CHUNK_FLAG_EMPTY0 ) == 0) // Check that this particular layer isn't empty
154 {
155 // List can be calculated directly from the chunk's global index
156 int list = chunk.globalIdx * 2 + layer;
157 list += dataIn->chunkLists;
158 layer0List.store(list);
159 layer0ZDepth.store(*((int*)&zDepth));
160 }
161
162 layer = 1;
163 if( ( globalChunkFlags[chunk.globalIdx] & CHUNK_FLAG_EMPTY1 ) == 0) // Check that this particular layer isn't empty
164 {
165 // List can be calculated directly from the chunk's global idex
166 int list = chunk.globalIdx * 2 + layer;
167 list += dataIn->chunkLists;
168 layer1List.store(list);
169 layer0ZDepth.store(*((int*)&zDepth));
170 }
171 }
172 layer0List.flush();
173 layer1List.flush();
174 layer0ZDepth.flush();
175 cellDmaPutUint32(layer0List.getSize(), (uintptr_t)&eaDataIn->numToRender_layer0, 0, 0, 0);
176 cellDmaPutUint32(layer1List.getSize(), (uintptr_t)&eaDataIn->numToRender_layer1, 0, 0, 0);
177
178}
179
180
181
182
183void cellSpursJobQueueMain(CellSpursJobContext2 *pContext, CellSpursJob256 *pJob)
184{
185 // CellSpursTaskId idTask = cellSpursGetTaskId();
186 unsigned int idSpu = cellSpursGetCurrentSpuId();
187
188 if(sc_verbose)
189 spu_print("LevelRenderer_cull [SPU#%u] start\n", idSpu);
190
191 g_pSpursJobContext = pContext;
192 uint32_t eaDataIn = pJob->workArea.userData[0];
193// uint32_t eaDataOut =pJob->workArea.userData[1];
194
195 LevelRenderer_cull_DataIn dataIn;
196 DmaData_SPU::getAndWait(&dataIn, eaDataIn, sizeof(LevelRenderer_cull_DataIn));
197
198 if(sc_verbose)
199 spu_print("LevelRenderer_cull [SPU#%u] DMA'd input data, ready to cull %d chunks\n", idSpu, dataIn.numClipChunks);
200
201 int numForDMA = dataIn.numClipChunks;
202 if(numForDMA % 16)
203 {
204 numForDMA &= ~0x0f;
205 numForDMA += 0x10;
206 }
207
208 cull(&dataIn, (LevelRenderer_cull_DataIn*)eaDataIn);
209// DmaData_SPU::putAndWait(outputData, eaDataOut, sizeof(char) * numForDMA);
210
211
212 if(sc_verbose)
213 spu_print("LevelRenderer_cull [SPU#%u] exit\n", idSpu);
214}
215