Ok, it’s MUCH better now but not the 4-8x you promised. 
Whereas before, it would take about 4-5 seconds to load it all in, it’s more around 1.5 seconds to load in 16 24-bit or 32-bit TGA files all of the 1024x864 variant with file sizes of 2593 KB. That all said, PNG’s still loaded in marginally faster at about 1 I’d estimate. I’ll post up the code in case I goofed up somewhere, but I did about what you said. I actually tried a variant where it read the whole file in, and it came out slower than the solution I’m posting where I read in the header as normal, then bulk read the image data and then use arrays from there.
All in all, it’s at least within the same order of magnitude now and more than acceptable for me, especially since the game won’t ever load this many images of this size at once, usually 1 per level and occasionally several if they are animated or if there’s a background and foreground image.
public static BufferedImage loadIntoImage(InputStream fis, boolean flipped) throws IOException {
		byte red = 0;
		byte green = 0;
		byte blue = 0;
		byte alpha = 0;
		
		BufferedInputStream bis = new BufferedInputStream(fis, 100000);
		DataInputStream dis = new DataInputStream(bis);
		
		// Read in the Header
		short idLength = (short) dis.read();
		short colorMapType = (short) dis.read();
		short imageType = (short) dis.read();
		short cMapStart = flipEndian(dis.readShort());
		short cMapLength = flipEndian(dis.readShort());
		short cMapDepth = (short) dis.read();
		short xOffset = flipEndian(dis.readShort());
		short yOffset = flipEndian(dis.readShort());
		
		width = flipEndian(dis.readShort());
		height = flipEndian(dis.readShort());
		pixelDepth = (short) dis.read();
		
		texWidth = width;
		texHeight = height;
		
		short imageDescriptor = (short) dis.read();
		
		// Skip image ID
		if(idLength > 0) 
		{
			bis.skip(idLength);
		}
		
		//Now we stop the normal process, read in bulk and then process.
		byte[] allData = null;
		byte[] rawData = null;
		
		if (pixelDepth == 32)
		{
			allData = new byte[texWidth * texHeight * 4];
			rawData = new byte[texWidth * texHeight * 4];
		}
		else
		{
			allData = new byte[texWidth * texHeight * 3];
			rawData = new byte[texWidth * texHeight * 3];
		}
		
		dis.read(allData);
		
		int pos = 0;
		
		if (pixelDepth == 24) {
			for (int i = height-1; i >= 0; i--) {
				for (int j = 0; j < width; j++) {
					blue = allData[pos];
					pos++;
					green = allData[pos];
					pos++;
					red = allData[pos];
					pos++;
					
					int ofs = ((j + (i * texWidth)) * 3);
					rawData[ofs] = (byte) red;
					rawData[ofs + 1] = (byte) green;
					rawData[ofs + 2] = (byte) blue;
				}
			}
		} else if (pixelDepth == 32) {
			if (flipped) {
				for (int i = height-1; i >= 0; i--) {
					for (int j = 0; j < width; j++) {
						blue = allData[pos];
						pos++;
						green = allData[pos];
						pos++;
						red = allData[pos];
						pos++;
						alpha = allData[pos];
						pos++;
						
						int ofs = ((j + (i * texWidth)) * 4);
						
						rawData[ofs] = (byte) red;
						rawData[ofs + 1] = (byte) green;
						rawData[ofs + 2] = (byte) blue;
						rawData[ofs + 3] = (byte) alpha;
						
						if (alpha == 0) {
							rawData[ofs + 2] = (byte) 0;
							rawData[ofs + 1] = (byte) 0;
							rawData[ofs] = (byte) 0;
						}
					}
				}
			} else {
				for (int i = 0; i < height; i++) {
					for (int j = 0; j < width; j++) {
						blue = allData[pos];
						pos++;
						green = allData[pos];
						pos++;
						red = allData[pos];
						pos++;
						alpha = allData[pos];
						pos++;
						
						int ofs = ((j + (i * texWidth)) * 4);
						
						rawData[ofs + 2] = (byte) red;
						rawData[ofs + 1] = (byte) green;
						rawData[ofs] = (byte) blue;
						rawData[ofs + 3] = (byte) alpha;
						
						if (alpha == 0) {
							rawData[ofs + 2] = (byte) 0;
							rawData[ofs + 1] = (byte) 0;
							rawData[ofs] = (byte) 0;
						}
					}
				}
			}
		}
		fis.close();
		
		//End Kev's Code
		
		DataBufferByte dataBuffer = new DataBufferByte
		(
			rawData, 
			rawData.length
		);
		
		int[] offsets = null;
		
		if(pixelDepth == 24)
		{
			int[] offsets24 = {0,1,2};
			offsets = offsets24;
		}
		
		else
		{
			int[] offsets32 = {0,1,2,3};
			offsets = offsets32;
		}
		
		PixelInterleavedSampleModel sampleModel = null;
		
		if(pixelDepth == 24)
		{
			sampleModel = new PixelInterleavedSampleModel
			(
				DataBuffer.TYPE_BYTE,
				texWidth, 
				texHeight,
				3,
				3 * texWidth,
				offsets
			);
		}
		
		else
		{
			sampleModel = new PixelInterleavedSampleModel
			(
				DataBuffer.TYPE_BYTE,
				texWidth, 
				texHeight,
				4,
				4 * texWidth,
				offsets
			);
		}
		
		WritableRaster raster = Raster.createWritableRaster
		(
			sampleModel,
			dataBuffer,
			new Point(0,0)
		);
		
		ColorModel cm = null;
		
		if(pixelDepth == 24)
		{
			cm = new ComponentColorModel
			(
				ColorSpace.getInstance
				(ColorSpace.CS_sRGB),
	            new int[] {8,8,8},
	            false,
	            false,
	            ComponentColorModel.OPAQUE,
	            DataBuffer.TYPE_BYTE
	        );
		}
		
		else
		{
			cm = new ComponentColorModel
			(
				ColorSpace.getInstance(ColorSpace.CS_sRGB),
                new int[] {8,8,8,8},
                true,
                false,
                ComponentColorModel.TRANSLUCENT,
                DataBuffer.TYPE_BYTE
            );
		}
		
		BufferedImage img = new BufferedImage(cm, raster, false, null);
		return img;
	}
 
      
    