[Add] Some SSE stuff for speedups.
This commit is contained in:
parent
1442259c0b
commit
ec6cabf2fe
51
src/perlin.c
51
src/perlin.c
@ -2,6 +2,10 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __SSE__
|
||||
#include <xmmintrin.h>
|
||||
#endif /* __SSE__ */
|
||||
|
||||
#include "lephisto.h"
|
||||
#include "log.h"
|
||||
#include "rng.h"
|
||||
@ -45,7 +49,36 @@ static void noise_delete(noise_t noise);
|
||||
|
||||
static float lattice(perling_data_t* pdata, int ix, float fx, int iy,
|
||||
float fy, int iz, float fz, int iw, float fw) {
|
||||
#ifdef __SSE__
|
||||
(void)iw;
|
||||
(void)fw;
|
||||
int nindex;
|
||||
__m128 a, b, c;
|
||||
|
||||
nindex = 0;
|
||||
nindex = pdata->map[(nindex + ix) & 0xFF];
|
||||
nindex = pdata->map[(nindex + iy) & 0xFF];
|
||||
nindex = pdata->map[(nindex + iz) & 0xFF];
|
||||
|
||||
float inp_sse1[4] __attribute__((aligned(16))) = {
|
||||
pdata->buffer[nindex][0],
|
||||
pdata->buffer[nindex][1],
|
||||
pdata->buffer[nindex][2],
|
||||
0.
|
||||
};
|
||||
float inp_sse2[4] __attribute__ ((aligned(16))) = {
|
||||
fx, fy, fz, 0.
|
||||
};
|
||||
float out_sse[4] __attribute__((aligned(16)));
|
||||
|
||||
a = _mm_load_ps(inp_sse1);
|
||||
b = _mm_load_ps(inp_sse2);
|
||||
c = _mm_mul_ps(a, b);
|
||||
_mm_store_ps(out_sse, c);
|
||||
|
||||
return out_sse[0] + out_sse[1] + out_sse[2];
|
||||
|
||||
#else /* __SSE__ */
|
||||
int n[4] = { ix, iy, iz, iw };
|
||||
float f[4] = { fx, fy, fz, fw };
|
||||
int nindex = 0;
|
||||
@ -58,6 +91,7 @@ static float lattice(perling_data_t* pdata, int ix, float fx, int iy,
|
||||
value += pdata->buffer[nindex][i] * f[i];
|
||||
|
||||
return value;
|
||||
#endif /* __SSE__ */
|
||||
}
|
||||
|
||||
#define DEFAULT_SEED 0x15687436
|
||||
@ -248,11 +282,14 @@ static float* genNebulaeMap(const int w, const int h, const int n, float rug) {
|
||||
noise_t noise;
|
||||
float* nebulae;;
|
||||
float value;
|
||||
unsigned int* t, s;
|
||||
|
||||
/* Pretty default values. */
|
||||
octaves = 3.;
|
||||
hurst = NOISE_DEFAULT_HURST;
|
||||
lacunarity = NOISE_DEFAULT_LACUNARITY;
|
||||
|
||||
/* Create noiuse and data. */
|
||||
noise = noise_new(2, hurst, lacunarity);
|
||||
|
||||
nebulae = malloc(sizeof(float)*w*h*n);
|
||||
@ -261,6 +298,12 @@ static float* genNebulaeMap(const int w, const int h, const int n, float rug) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Some debug information and time setting. */
|
||||
s = SDL_GetTicks();
|
||||
t = malloc(sizeof(unsigned int)*n);
|
||||
DEBUG("Generating Nebulae of size %dx%dx%d", w, h, n);
|
||||
|
||||
/* Start to create the nebulae. */
|
||||
f[2] = 0.;
|
||||
for(z = 0; z < n; z++) {
|
||||
for(y = 0; y < h; y++) {
|
||||
@ -278,10 +321,18 @@ static float* genNebulaeMap(const int w, const int h, const int n, float rug) {
|
||||
}
|
||||
}
|
||||
f[2] += 0.01;
|
||||
|
||||
/* More time magic debug. */
|
||||
t[z] = SDL_GetTicks();
|
||||
DEBUG(" Layer %d/%d generated in %dms", z+1, n,
|
||||
(z>0) ? t[z] - t[z-1] : t[z] - s);
|
||||
}
|
||||
|
||||
/* Cleanup. */
|
||||
noise_delete(noise);
|
||||
|
||||
/* Results. */
|
||||
DEBUG("Nebulae Generated in %dms", SDL_GetTicks() - s);
|
||||
return nebulae;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user