Basic Image Quantization into 256 colors in 90 lines

Here’s a module I’ve been working on. Post a reply if there is anything that can be made better/faster.
This class quantizes an image into 256 colors (generating a palette) and makes the image pixels become indices into the palette.
It’s a very naive approach using vector quantization but I thought it was the simplest to understand and I did not want to implement an octree.

The REFERENCE_PALETTE is an array of 256 colors that are essentially “waypoint” colors in the entire 0-0xffffff range. There is an exponential curve applied to it, similar to gamma.
The program maps the colors in the image into one of these 256 waypoints, then calculates the final palette by averaging all the values in the waypoints into one color per waypoint. The color distance is influenced by a ‘weight’ per channel which just adjusts the results so that green is given more priority than red and red is given more priority than blue (since we can see shades of green the best and blue the worst).


class Quantize256 {

	private static final int[] REFERENCE_PALETTE = new int[256];
	private static int[] palette = new int[256];
	private static int[][] map = new int[256][16];
	private static int[] sizes = new int[256];
	private static float[] channel_weight = { 0.30f, 0.59f, 0.11f };

	static {
		for (int i = 0; i < 256; i++) {
			REFERENCE_PALETTE[i] = (int) Math.pow(i << 3, 2.18293872);
		}
	}

	static int[] quantize8Bit(int[] image) {
		for (int i = 0; i < 256; i++) {
			sizes[i] = 0;
		}
		for (int i = 0; i < image.length; i++) {
			int color = image[i];
			int min = dist(REFERENCE_PALETTE[0], color);
			int closest = 0;
			for (int j = 1; j < 256; j++) {
				int dst = dist(REFERENCE_PALETTE[j], color);
				if (dst < min) {
					min = dst;
					closest = j;
				}
			}
			add(closest, color);
		}
		for (int i = 0; i < 256; i++) {
			palette[i] = avg(i);
		}
		for (int i = 0; i < image.length; i++) {
			int color = image[i];
			int min = dist(palette[0], color);
			int closest = 0;
			for (int j = 1; j < 256; j++) {
				int dst = dist(palette[j], color);
				if (dst < min) {
					min = dst;
					closest = j;
				}
			}
			image[i] = closest;
		}
		return palette;
	}

	private static int dist(int x, int y) {
		int r = (x >> 16 & 0xff) - (y >> 16 & 0xff);
		int g = (x >> 8 & 0xff) - (y >> 8 & 0xff);
		int b = (x & 0xff) - (y & 0xff);
		return (int) ((r * r) * channel_weight[0] + (g * g) * channel_weight[1] + (b * b) * channel_weight[2]);
	}

	private static int avg(int v) {
		int size = sizes[v];
		if (size == 0) {
			return 0;
		}
		int r = 0;
		int g = 0;
		int b = 0;
		for (int i = 0; i < size; i++) {
			int c = map[v][i];
			r += c >> 16 & 0xff;
			g += c >> 8 & 0xff;
			b += c & 0xff;
		}
		return (r / size) << 16 | (g / size) << 8 | (b / size);
	}

	public static void add(int i, int n) {
		int next = sizes[i] + 1;
		if (next > map[i].length) {
			int nc = map[i].length << 1;
			if (next > nc) {
				nc = next;
			}
			int[] copy = new int[nc];
			System.arraycopy(map[i], 0, copy, 0, Math.min(map[i].length, nc));
			map[i] = copy;
		}
		map[i][sizes[i]++] = n;
	}

}

For computing the difference between two colors for clustering alike/nearby colors you might consider using a three-dimensional vector and not try to reduce the difference to just a single scalar. As an example: with your color difference calculation the colors [113, 3, 22] and [30, 0, 139] have the exact same distance to [30, 0, 0]. So [30, 0, 139] is likely to be associated to the “red color cluster”/reference color depending only on the order of the colors you initialize your REFERENCE_PALETTE with.
It seems that clustering the colors for quantization is usually done with three-dimensional spatial acceleration data structures and then the average/reference colors are computed as the averages of the respective cluster, instead of precomputing “expected” average colors and then deciding for all colors in the image what reference color it should be associated with.

Ok i’ve taken your advice into consideration.
This is what I’ve done. It’s not complete but it produces both very good and very bad results.



	static int[] quantize8Bit(int[] image) {
		for (int i = 0; i < 256; i++) {
			sizes[i] = 0;
		}
		for (int i = 0; i < image.length; i++) {
			int color = image[i];
			int minr = dist('r', REFERENCE_PALETTE[0], color);
			int ming = dist('g', REFERENCE_PALETTE[0], color);
			int minb = dist('b', REFERENCE_PALETTE[0], color);
			int closestr = 0;
			int closestg = 0;
			int closestb = 0;
			for (int j = 1; j < 256; j++) {
				int dst = dist('r', REFERENCE_PALETTE[j], color);
				if (dst < minr) {
					minr = dst;
					closestr = j;
				}
				dst = dist('g', REFERENCE_PALETTE[j], color);
				if (dst < ming) {
					ming = dst;
					closestg = j;
				}
				dst = dist('b', REFERENCE_PALETTE[j], color);
				if (dst < minb) {
					minb = dst;
					closestb = j;
				}
			}
			int dr = dist(REFERENCE_PALETTE[closestr], color);
			int dg = dist(REFERENCE_PALETTE[closestg], color);
			int db = dist(REFERENCE_PALETTE[closestb], color);
			if (dr < dg && dr < db) {
				add(closestr, color);
			} else if (dg < dr && dg < db) {
				add(closestg, color);
			} else {
				add(closestb, color);
			}
		}
		for (int i = 0; i < 256; i++) {
			palette[i] = avg(i);
		}
		for (int i = 0; i < image.length; i++) {
			int color = image[i];
			int min = dist(palette[0], color);
			int closest = 0;
			for (int j = 1; j < 256; j++) {
				int dst = dist(palette[j], color);
				if (dst < min) {
					min = dst;
					closest = j;
				}
			}
			image[i] = closest;
		}
		return palette;
	}

	private static int dist(int x, int y) {
		int r = (x >> 16 & 0xff) - (y >> 16 & 0xff);
		float mr = channel_weight[0];
		int g = (x >> 8 & 0xff) - (y >> 8 & 0xff);
		float mg = channel_weight[1];
		int b = (x & 0xff) - (y & 0xff);
		float mb = channel_weight[2];
		return (int) (r * r * mr + g * g * mg + b * b * mb);
	}

	private static int dist(char c, int x, int y) {
		switch (c) {
			case 'r':
				int r = (x >> 16 & 0xff) - (y >> 16 & 0xff);
				float mr = channel_weight[0];
				return (int) (r * r * mr);
			case 'g':
				int g = (x >> 8 & 0xff) - (y >> 8 & 0xff);
				float mg = channel_weight[1];
				return (int) (g * g * mg);
			case 'b':
				int b = (x & 0xff) - (y & 0xff);
				float mb = channel_weight[2];
				return (int) (b * b * mb);
			default:
				return 0;
		}
	}

RBG isn’t a good space to measure color differences. Web search: “perceptual color spaces” You’ll also get better results if you don’t simply map each pixel directly to a palette entry. There are various methods, search “error diffusion”

@Roquen I know… and I did research some color spaces and learned a bit but I’m more comfortable with RGB right now. And yeah this new algorithm does something smarter lol.

Here’s code for the new and improved algorithm.
It produces very good results for images with a lot of colors but doesn’t produce the best 256 color result for images with a few colors to begin with. This is because the algorithm now quantizes the colors in the image into a 12-bit RGB color and uses this as a index into the ‘map’ array.
The color is added to the Group object at the index and then the algorithm begins to prune the Groups with the least number of colors by just merging them with the one next to them until the final number of colors in the image is 256.


class MVQ2 {

	private int[] palette = new int[256];
	private Group[] map = new Group[4096];
	private int min_size = 0x7fffffff;
	private int colors = 0;

	MVQ2() {
		for (int i = 0; i < map.length; i++) {
			map[i] = new Group();
		}
	}

	int[] to_256_color_paletted_image(int[] image) {
		for (int i = 0; i < map.length; i++) {
			map[i].clear();
		}
		group(image);
		do {
			prune(min_size);
		} while (colors > 256);
		create_palette();
		index(image);
		return palette;
	}

	private void create_palette() {
		int idx = 0;
		for (int i = 0; i < map.length; i++) {
			if (map[i].n != 0) {
				palette[idx++] = map[i].avg();
			}
		}
	}

	private void index(int[] image) {
		for (int i = 0; i < image.length; i++) {
			image[i] = nearest_entry(palette, image[i]);
		}
	}

	private void group(int[] image) {
		for (int i = 0; i < image.length; i++) {
			map[downsample444(image[i])].add(image[i]);
		}
	}

	private void prune(int min) {
		boolean stop_pruning = false;
		min_size = 0x7fffffff;
		int c = colors;
		colors = 0;
		for (int i = 0; i < map.length; i++) {
			int size = map[i].n;
			if (size != 0) {
				if (size < min_size) {
					min_size = size;
				}
				if (size <= min) {
					if (!stop_pruning) {
						if (i > 0) {
							map[i - 1].add(map[i]);
						}
						c--;
						map[i].clear();
					}
					if (c < 256) {
						stop_pruning = true;
					}
				}
				colors++;
			}
		}
	}

	private int downsample444(int c) {
		return (((c >> 20 & 0xf) << 8) | ((c >> 12 & 0xf) << 4) | ((c >> 4 & 0xf))) & 0xfff;
	}

	private int nearest_entry(int[] pal, int c) {
		int index = 0;
		float min = dist(c, pal[0]);
		for (int i = 1; i < pal.length; i++) {
			float d = dist(c, pal[i]);
			if (d < min) {
				min = d;
				index = i;
			}
		}
		return index;
	}

	private float dist(int a, int b) {
		float rbar = ((a >> 16 & 0xff) + (b >> 16 & 0xff)) / 2.0f;
		int dr = (a >> 16 & 0xff) - (b >> 16 & 0xff);
		int dg = (a >> 8 & 0xff) - (b >> 8 & 0xff);
		int db = (a & 0xff) - (b & 0xff);
		return (2.0f + rbar / 256.0f) * (dr * dr) + ((dg * dg) << 2) + (2.0f + (255.0f - rbar) / 256.0f) * (db * db);
	}

	private class Group {
		private int r, g, b, n;

		void add(int c) {
			r += c >> 16 & 0xff;
			g += c >> 8 & 0xff;
			b += c & 0xff;
			n++;
		}

		void add(Group o) {
			r += o.r;
			g += o.g;
			b += o.b;
			n += o.n;
		}

		int avg() {
			if (n == 0) {
				return 0;
			}
			int hn = n >> 1;
			return ((((r + hn) / n) & 0xff) << 16 | (((g + hn) / n) & 0xff) << 8 | (((b + hn) / n) & 0xff)) & 0xffffff;
		}

		void clear() {
			r = g = b = n = 0;
		}
	}

}