diff --git a/src/perlin.c b/src/perlin.c index 2ca7aeb..72bbb08 100644 --- a/src/perlin.c +++ b/src/perlin.c @@ -2,6 +2,10 @@ #include <stdlib.h> #include <string.h> +#ifdef __SSE__ +#include <xmmintrin.h> +#endif /* __SSE__ */ + #include "lephisto.h" #include "log.h" #include "rng.h" @@ -45,7 +49,36 @@ static void noise_delete(noise_t noise); static float lattice(perling_data_t* pdata, int ix, float fx, int iy, float fy, int iz, float fz, int iw, float fw) { +#ifdef __SSE__ + (void)iw; + (void)fw; + int nindex; + __m128 a, b, c; + + nindex = 0; + nindex = pdata->map[(nindex + ix) & 0xFF]; + nindex = pdata->map[(nindex + iy) & 0xFF]; + nindex = pdata->map[(nindex + iz) & 0xFF]; + float inp_sse1[4] __attribute__((aligned(16))) = { + pdata->buffer[nindex][0], + pdata->buffer[nindex][1], + pdata->buffer[nindex][2], + 0. + }; + float inp_sse2[4] __attribute__ ((aligned(16))) = { + fx, fy, fz, 0. + }; + float out_sse[4] __attribute__((aligned(16))); + + a = _mm_load_ps(inp_sse1); + b = _mm_load_ps(inp_sse2); + c = _mm_mul_ps(a, b); + _mm_store_ps(out_sse, c); + + return out_sse[0] + out_sse[1] + out_sse[2]; + +#else /* __SSE__ */ int n[4] = { ix, iy, iz, iw }; float f[4] = { fx, fy, fz, fw }; int nindex = 0; @@ -58,6 +91,7 @@ static float lattice(perling_data_t* pdata, int ix, float fx, int iy, value += pdata->buffer[nindex][i] * f[i]; return value; +#endif /* __SSE__ */ } #define DEFAULT_SEED 0x15687436 @@ -248,11 +282,14 @@ static float* genNebulaeMap(const int w, const int h, const int n, float rug) { noise_t noise; float* nebulae;; float value; + unsigned int* t, s; + /* Pretty default values. */ octaves = 3.; hurst = NOISE_DEFAULT_HURST; lacunarity = NOISE_DEFAULT_LACUNARITY; + /* Create noiuse and data. */ noise = noise_new(2, hurst, lacunarity); nebulae = malloc(sizeof(float)*w*h*n); @@ -261,6 +298,12 @@ static float* genNebulaeMap(const int w, const int h, const int n, float rug) { return NULL; } + /* Some debug information and time setting. */ + s = SDL_GetTicks(); + t = malloc(sizeof(unsigned int)*n); + DEBUG("Generating Nebulae of size %dx%dx%d", w, h, n); + + /* Start to create the nebulae. */ f[2] = 0.; for(z = 0; z < n; z++) { for(y = 0; y < h; y++) { @@ -278,10 +321,18 @@ static float* genNebulaeMap(const int w, const int h, const int n, float rug) { } } f[2] += 0.01; + + /* More time magic debug. */ + t[z] = SDL_GetTicks(); + DEBUG(" Layer %d/%d generated in %dms", z+1, n, + (z>0) ? t[z] - t[z-1] : t[z] - s); } + /* Cleanup. */ noise_delete(noise); + /* Results. */ + DEBUG("Nebulae Generated in %dms", SDL_GetTicks() - s); return nebulae; }