I wrote a fancy trie once. It is readonly though, the encoder is separate. It only supports 75bit text but is ultra efficient disk- and memory-wise. Eg, it stores 293,365 words in 889,614 bytes.
public class Trie {
static public final int START = 0xC0000000;
static public final int INVALID = 0x800000;
static public final int LEAF = 0x80000000;
static private final int END_OF_WORD = 0x80;
static private final int HAS_MORE = 0x40;
static private final int IS_LEAF = 0x20;
byte[] data;
public Trie () {
}
public Trie (FileHandle file) {
DataInputStream input = new DataInputStream(file.read());
try {
data = new byte[input.readInt()];
int bytes = 0;
while (true) {
int count = input.read(data, bytes, data.length - bytes);
if (count == 0) break;
bytes += count;
}
} catch (IOException ex) {
throw new RuntimeException("Error reading file: " + file, ex);
}
}
public int get (int c, int offset) {
if (offset == LEAF) return INVALID;
byte[] data = this.data;
c -= 97;
if (offset == START) {
offset = c * 3;
int index = (data[offset++] & 0xFF) << 16;
index |= (data[offset++] & 0xFF) << 8;
index |= data[offset] & 0xFF;
return index;
}
if (offset < 0) offset = -offset;
// Find matching char.
int indexOffset = 0, charIndex = 0;
byte b;
while (true) {
b = data[offset++];
if ((b & 31) == c) break;
if ((b & HAS_MORE) == 0) return INVALID;
if ((b & IS_LEAF) == 0) indexOffset++; // A leaf has no indexes.
charIndex++;
}
if ((b & IS_LEAF) != 0) return LEAF;
int nextIndex;
if (charIndex == 0 && (b & HAS_MORE) == 0) {
// A single child has no indexes, use the next byte.
nextIndex = offset;
} else {
// Skip remaining chars.
while ((b & HAS_MORE) != 0)
b = data[offset++];
// Skip indexes before the one we want.
int indexesStart = offset;
for (int i = 0; i < indexOffset; i++) {
if ((data[offset++] & 0x80) == 0) continue;
if ((data[offset++] & 0x80) == 0) continue;
offset++;
}
// Decode the one we want.
b = data[offset++];
nextIndex = b & 0x7F;
if ((b & 0x80) != 0) {
b = data[offset++];
nextIndex |= (b & 0x7F) << 7;
if ((b & 0x80) != 0) nextIndex |= (data[offset] & 0x7F) << 14;
}
nextIndex += indexesStart;
}
if ((data[nextIndex] & END_OF_WORD) != 0) return -nextIndex;
return nextIndex;
}
public boolean contains (String text) {
int index = START;
for (int i = 0, n = text.length(); i < n; i++) {
// System.out.println(text.charAt(i) + ": "
// + (get(text.charAt(i), index) == INVALID ? "INVALID" : get(text.charAt(i), index)));
index = get(text.charAt(i), index);
if (index == INVALID) return false;
}
return index < 0;
}
}