diff --git a/src/perlin.c b/src/perlin.c
index b93875a..b2a5913 100644
--- a/src/perlin.c
+++ b/src/perlin.c
@@ -1,3 +1,18 @@
+/**
+ * @file perlin.c
+ *
+ * @brief Handle creating noise based on perlin noise.
+ *
+ * Code tries to handle basically 2D/3D cases, without much genericness
+ * because it needs to be pretty fast. Originally sped up the code from
+ * about 20 seconds to 8 seconds per Nebulae image with the manual loop
+ * unrolling.
+ *
+ * @note Tried to optimize a while back with SSE and the works, but because
+ * of the nature of how it's implemented in non-linear fashion it just
+ * wound up complicating the code without actually making it faster.
+ */
+
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
@@ -11,7 +26,7 @@
 
 #include "perlin.h"
 
-#define NOISE_MAX_OCTAVES         128
+#define NOISE_MAX_OCTAVES         4
 #define NOISE_DEFAULT_HURST       0.5
 #define NOISE_DEFAULT_LACUNARITY  2.
 
@@ -22,23 +37,33 @@ typedef void* noise_t;
 
 /* Used internally. */
 typedef struct {
-  unsigned char map[256];   /* Randomized map of indexes into buffer. */
-  float buffer[256][3];     /* Random 256x3 buffer. */
+  int ndim;                 /**< Dimension of the noise. */
+  unsigned char map[256];   /**< Randomized map of indexes into buffer. */
+  float buffer[256][3];     /**< Random 256x3 buffer. */
   /* Fractal stuff. */
   float H;
   float lacunarity;
   float exponent[NOISE_MAX_OCTAVES];
 } perlin_data_t;
 
-static perlin_data_t* noise_new(float hurst, float lacunarity);
-/* Basic perlin noise. */
-static float noise_get(perlin_data_t* pdata, float* f);
-/* Fractional brownian motion. */
-/* Turbulence. */
-static float noise_turbulence(perlin_data_t* noise, float* f, float octaves);
+/* Perlin data handling. */
+static perlin_data_t* noise_new(int dim, float hurst, float lacunarity);
 static void noise_delete(perlin_data_t* noise);
+/* Normalizing. */
+static void normalize3(float f[3]);
+static void normalize2(float f[2]);
+/* Noise processing. */
+static float lattice3(perlin_data_t* pdata, int ix, float fx,
+    int iy, float fy, int iz, float fz);
+static float lattice2(perlin_data_t* pdata, int ix, float fx, int iy, float fy);
+/* Basic perlin noise. */
+static float noise_get3(perlin_data_t* pdata, float f[3]);
+static float noise_get2(perlin_data_t* pdata, float f[2]);
+/* Turbulence. */
+static float noise_turbulence3(perlin_data_t* noise, float f[3], int octaves);
+static float noise_turbulence2(perlin_data_t* nouse, float f[2], int octaves);
 
-static float lattice(perlin_data_t* pdata, int ix, float fx, int iy,
+static float lattice3(perlin_data_t* pdata, int ix, float fx, int iy,
     float fy, int iz, float fz) {
 
   int nindex;
@@ -56,12 +81,30 @@ static float lattice(perlin_data_t* pdata, int ix, float fx, int iy,
   return value;
 }
 
+static float lattice2(perlin_data_t* pdata, int ix, float fx, int iy, float fy) {
+  int nIndex;
+  float value;
+
+  nIndex = 0;
+  nIndex = pdata->map[(nIndex + ix) & 0xFF];
+  nIndex = pdata->map[(nIndex + iy) & 0xFF];
+
+  value  = pdata->buffer[nIndex][0] * fx;
+  value += pdata->buffer[nIndex][1] * fy;
+
+  return value;
+}
+
 #define SWAP(a, b, t)   t = a; a = b; b = t
 
 #define FLOOR(a)        ((int) a - (a < 0 && a != (int)a))
 #define CUBIC(a)        (a * a * (3 - 2 * a))
 
-static void normalize(float f[3]) {
+/**
+ * @brief Normalizes a 3d vector.
+ *    @param f Vector to normalize.
+ */
+static void normalize3(float f[3]) {
   float magnitude;
 
   magnitude = 1. / sqrtf(f[0]*f[0] + f[1]*f[1] + f[2]*f[2]);
@@ -70,24 +113,60 @@ static void normalize(float f[3]) {
   f[2] *= magnitude;
 }
 
-static perlin_data_t* noise_new(float hurst, float lacunarity) {
-  perlin_data_t* pdata = (perlin_data_t*)calloc(sizeof(perlin_data_t), 1);
+/**
+ * @brief Normalizes a 2d vector.
+ *    @param f Vector to normalize.
+ */
+static void normalize2(float f[2]) {
+  float magnitude;
+
+  magnitude = 1. / sqrtf(f[0]*f[0] + f[1]*f[1]);
+  f[0] *= magnitude;
+  f[1] *= magnitude;
+}
+
+/**
+ * @brief Creates a new perlin noise generator.
+ *    @param dim Dimension of the noise.
+ *    @param hurst
+ *    @param lacunarity
+ */
+static perlin_data_t* noise_new(int dim, float hurst, float lacunarity) {
+  perlin_data_t* pdata;
   int i, j;
   unsigned char tmp;
-  float f = 1;
-  for(i = 0; i < 256; i++) {
-    pdata->map[i] = (unsigned char)i;
-    pdata->buffer[i][0] = RNGF()-0.5;
-    pdata->buffer[i][1] = RNGF()-0.5;
-    pdata->buffer[i][2] = RNGF()-0.5;
-    normalize(pdata->buffer[i]);
+  float f;
+
+  /* Create the data. */
+  pdata = calloc(sizeof(perlin_data_t), 1);
+  pdata->ndim = dim;
+
+  /* Create the buffer and map. */
+  if(dim == 3) {
+    for(i = 0; i < 256; i++) {
+      pdata->map[i] = (unsigned char)i;
+      pdata->buffer[i][0] = RNGF()-0.5;
+      pdata->buffer[i][1] = RNGF()-0.5;
+      pdata->buffer[i][2] = RNGF()-0.5;
+      normalize3(pdata->buffer[i]);
+    }
   }
+  else if(dim == 2) {
+    for(i = 0; i < 256; i++) {
+      pdata->map[i] = (unsigned char)i;
+      pdata->buffer[i][0] = RNGF()-0.5;
+      pdata->buffer[i][1] = RNGF()-0.5;
+      normalize2(pdata->buffer[i]);
+    }
+  }
+
 
   while(--i) {
     j = RNG(0, 255);
     SWAP(pdata->map[i], pdata->map[j], tmp);
   }
 
+  f = 1.;
   pdata->H = hurst;
   pdata->lacunarity = lacunarity;
   for(i = 0; i < NOISE_MAX_OCTAVES; i++) {
@@ -95,10 +174,17 @@ static perlin_data_t* noise_new(float hurst, float lacunarity) {
     pdata->exponent[i] = 1. / f;
     f *= lacunarity;
   }
-  return (noise_t)pdata;
+  return pdata;
 }
 
-static float noise_get(perlin_data_t* pdata, float *f ) {
+/**
+ * @brief Get some 3d perlin noise from the data.
+ *
+ * Somewhat optimized for speed, probably can't get optimized much more.
+ *    @param pdata Perlin data to use.
+ *    @param f Position of the noise to get.
+ */
+static float noise_get3(perlin_data_t* pdata, float f[3] ) {
   int n[3];     /* Indexes to pass to lattice function. */
   float r[3];   /* Remainders to pass to lattice function. */
   float w[3];   /* Cubic values to pass to interpolation function. */
@@ -120,18 +206,18 @@ static float noise_get(perlin_data_t* pdata, float *f ) {
    * This is the big ugly part that is in dire need
    * of optimisation!!!!
    */
-  value = LERP(LERP(LERP(lattice(pdata,n[0], r[0], n[1], r[1], n[2], r[2]),
-          lattice(pdata,n[0]+1, r[0]-1, n[1], r[1], n[2], r[2]),
+  value = LERP(LERP(LERP(lattice3(pdata,n[0], r[0], n[1], r[1], n[2], r[2]),
+          lattice3(pdata,n[0]+1, r[0]-1, n[1], r[1], n[2], r[2]),
           w[0]),
-        LERP(lattice(pdata,n[0], r[0], n[1]+1, r[1]-1, n[2], r[2]),
-          lattice(pdata,n[0]+1, r[0]-1, n[1]+1, r[1]-1, n[2], r[2]),
+        LERP(lattice3(pdata,n[0], r[0], n[1]+1, r[1]-1, n[2], r[2]),
+          lattice3(pdata,n[0]+1, r[0]-1, n[1]+1, r[1]-1, n[2], r[2]),
           w[0]),
         w[1]),
-      LERP(LERP(lattice(pdata,n[0], r[0], n[1], r[1], n[2]+1, r[2]-1),
-          lattice(pdata,n[0]+1, r[0]-1, n[1], r[1], n[2]+1, r[2]-1),
+      LERP(LERP(lattice3(pdata,n[0], r[0], n[1], r[1], n[2]+1, r[2]-1),
+          lattice3(pdata,n[0]+1, r[0]-1, n[1], r[1], n[2]+1, r[2]-1),
           w[0]),
-        LERP(lattice(pdata,n[0], r[0], n[1]+1, r[1]-1, n[2]+1, r[2]-1),
-          lattice(pdata,n[0]+1, r[0]-1, n[1]+1, r[1]-1, n[2]+1, r[2]-1),
+        LERP(lattice3(pdata,n[0], r[0], n[1]+1, r[1]-1, n[2]+1, r[2]-1),
+          lattice3(pdata,n[0]+1, r[0]-1, n[1]+1, r[1]-1, n[2]+1, r[2]-1),
           w[0]),
         w[1]),
       w[2]);
@@ -139,7 +225,48 @@ static float noise_get(perlin_data_t* pdata, float *f ) {
   return CLAMP(-0.99999f, 0.99999f, value);
 }
 
-static float noise_turbulence(perlin_data_t* noise, float* f, float octaves) {
+/**
+ * @brief Get some 2D perlin noise from the data.
+ *
+ * Somewhat optimized for speed, probably can't get optimized much more.
+ *    @param pdata Perlin data to use.
+ *    @param f position of the noise to get.
+ */
+static float noise_get2(perlin_data_t* pdata, float f[2]) {
+  int n[2];   /* Indexes to pass to lattice function. */
+  float r[2]; /* Remainders to pass to lattice function. */
+  float w[2]; /* Cubic values to pass to interpolation function. */
+  float value;
+
+  n[0] = FLOOR(f[0]);
+  n[1] = FLOOR(f[1]);
+
+  r[0] = f[0] - n[0];
+  r[1] = f[1] - n[1];
+
+  w[0] = CUBIC(r[0]);
+  w[1] = CUBIC(r[1]);
+
+  /* Much faster in 2d. */
+  value = LERP(LERP(lattice2(pdata, n[0], r[0], n[1], r[1]),
+        lattice2(pdata, n[0]+1, r[0]-1, n[1], r[1]),
+        w[0]),
+      LERP(lattice2(pdata, n[0], r[0], n[1]+1, r[1]-1),
+        lattice2(pdata, n[0]+1, r[0]-1, n[1]+1, r[1]-1),
+        w[0]),
+      w[1]);
+
+  return CLAMP(-0.99999f, 0.99999f, value);
+}
+
+/**
+ * @brief Get 3d tubulence noise for a position.
+ *    @param noise Perlin data to generate noise from.
+ *    @param f Position of the noise.
+ *    @param octaves to use.
+ *    @return The noise level at the position.
+ */
+static float noise_turbulence3(perlin_data_t* noise, float f[3], int octaves) {
   float tf[3];
   perlin_data_t* pdata = (perlin_data_t*) noise;
   /* Init locals. */
@@ -152,7 +279,7 @@ static float noise_turbulence(perlin_data_t* noise, float* f, float octaves) {
 
   /* Inner loop of spectral construction, where the fractal is built. */
   for(i = 0; i < octaves; i++) {
-    value += ABS(noise_get(noise, tf)) * pdata->exponent[i];
+    value += ABS(noise_get3(noise, tf)) * pdata->exponent[i];
     tf[0] *= pdata->lacunarity;
     tf[1] *= pdata->lacunarity;
     tf[2] *= pdata->lacunarity;
@@ -161,11 +288,49 @@ static float noise_turbulence(perlin_data_t* noise, float* f, float octaves) {
   return CLAMP(-0.99999f, 0.99999f, value);
 }
 
+/**
+ * @brief Get 2d turbulence noise for a position.
+ *    @param noise Perlin data to generate noise from.
+ *    @param f Position of the noise.
+ *    @param octaves Octaves to use.
+ *    @return The noise level at the position.
+ */
+static float noise_turbulence2(perlin_data_t* noise, float f[2], int octaves) {
+  float tf[2];
+  perlin_data_t* pdata = (perlin_data_t*) noise;
+  /* Initialize locals. */
+  float value = 0;
+  int i;
+
+  tf[0] = f[0];
+  tf[1] = f[1];
+
+  /* Inner loop of spectral construction, where the fractal is built. */
+  for(i = 0; i < octaves; i++) {
+    value += ABS(noise_get2(noise, tf)) * pdata->exponent[i];
+    tf[0] *= pdata->lacunarity;
+    tf[1] *= pdata->lacunarity;
+  }
+  
+  return CLAMP(-0.99999f, 0.99999f, value);
+}
+
+/**
+ * @brief Free some noise data.
+ *    @param noise Noise data to free.
+ */
 void noise_delete(perlin_data_t* noise) {
   free(noise);
 }
 
-/* Generate a 3d nebulae map of dimensions w,h,n with ruggedness rig. */
+/**
+ * @brief Generate a 3d nebulae map.
+ *    @param w Width of the map.
+ *    @param h Height of the map.
+ *    @param n Number of slices of the map (2d planes).
+ *    @param rug Rugosity of the map.
+ *    @return The map generated.
+ */
 float* noise_genNebulaeMap(const int w, const int h, const int n, float rug) {
   int x, y, z;
   float f[3];
@@ -186,8 +351,7 @@ float* noise_genNebulaeMap(const int w, const int h, const int n, float rug) {
   zoom = rug * ((float)h/768.)*((float)w/1024.);
 
   /* Create noise and data. */
-  noise = noise_new(hurst, lacunarity);
-
+  noise = noise_new(3, hurst, lacunarity);
   nebulae = malloc(sizeof(float)*w*h*n);
   if(nebulae == NULL) {
     WARN("Out of memory!");
@@ -213,7 +377,7 @@ float* noise_genNebulaeMap(const int w, const int h, const int n, float rug) {
  
         f[0] = zoom * (float)x / (float)w;
 
-        value = noise_turbulence(noise, f, octaves);
+        value = noise_turbulence3(noise, f, octaves);
         if(max < value) max = value;
 
         nebulae[z*w*h + y*w+x] = value;
@@ -241,11 +405,17 @@ float* noise_genNebulaeMap(const int w, const int h, const int n, float rug) {
   return nebulae;
 }
 
-/* Generate tiny nebuale puffs */
+/**
+ * @brief Generate tiny nebulae puffs.
+ *    @param w Width of the puff to generate.
+ *    @param h Height of the puff to generate.
+ *    @param rug Rugosity of the puff.
+ *    @return The puff generated.
+ */
 float* noise_genNebulaePuffMap(const int w, const int h, float rug) {
   int x, y, hw, hh;
   float d;
-  float f[3];
+  float f[2];
   int octaves;
   float hurst;
   float lacunarity;
@@ -262,7 +432,7 @@ float* noise_genNebulaePuffMap(const int w, const int h, float rug) {
   zoom = rug;
 
   /* Create noise and data. */
-  noise = noise_new(hurst, lacunarity);
+  noise = noise_new(2, hurst, lacunarity);
   nebulae = malloc(sizeof(float)*w*h);
   if(nebulae == NULL) {
     WARN("Out of memory!");
@@ -271,7 +441,6 @@ float* noise_genNebulaePuffMap(const int w, const int h, float rug) {
 
   /* Start to create the nebulae. */
   max = 0.;
-  f[2] = 0.;
   hw = w/2;
   hh = h/2;
   d = (float)MIN(hw, hh);
@@ -280,25 +449,21 @@ float* noise_genNebulaePuffMap(const int w, const int h, float rug) {
     for(x = 0; x < w; x++) {
       f[0] = zoom * (float)x / (float)w;
 
-      value = noise_turbulence(noise, f, octaves);
+      /* Get the 2d noise. */
+      value = noise_turbulence2(noise, f, octaves);
 
       /* Make value also depend on distance from center. */
       value *= (d - 1. - sqrtf((float)((x-hw)*(x-hw)+(y-hh)*(y-hh))))/d;
       if(value < 0.) value = 0.;
 
+      /* Cap at maximum. */
       if(max < value) max = value;
 
+      /* Set the value. */
       nebulae[y*w + x] = value;
     }
   }
 
-  /* Post filtering. */
-  /*value = 1. - max;
-  for(y = 0; y < h; y++)
-    for(x = 0; x < w; x++)
-      if(nebulae[y*w+x] > 0.)
-        nebulae[y*w + x] += value;*/
-
   /* Clean up. */
   noise_delete(noise);