I am wondering what programmers can do to speed up their programs.
according to a profiling tool, more than 45% of the CPU time is spent in one method:
// encodes a given pixel at point (w,h)
final public void encode(int w,int h,int k,int width,byte[][] frame, boolean verbose,boolean[][] ODCodes,boolean [][] IDCodes,boolean[][] huffCodes, BitOutputStream controlW,BitOutputStream IDW,BitOutputStream ODW,BitOutputStream CCW) throws IOException
{
j=h*width+w;
currentPixelRed=frame[j][0];
currentPixelGreen=frame[j][1];
currentPixelBlue=frame[j][2];
if (k>-1)
{
comparatorPixelRed=frame[k][0];
comparatorPixelGreen=frame[k][1];
comparatorPixelBlue=frame[k][2];
Rdiff=256+((currentPixelRed & 0xFF)-(comparatorPixelRed & 0xFF));
Gdiff=256+((currentPixelGreen & 0xFF)-(comparatorPixelGreen & 0xFF));
Bdiff=256+((currentPixelBlue & 0xFF)-(comparatorPixelBlue & 0xFF));
}
innerBitCount=0;
// Determine the number of bits compressing via the InnerDifferences or saving Colour Component will give.
innerBitCount+=huffCodes[((int)(currentPixelRed) & 0xff)].length+1;
RGdiff= 256+((currentPixelGreen & 0xFF)-(currentPixelRed & 0xFF));
if (IDCodes[RGdiff].length>0 && IDCodes[RGdiff].length<huffCodes[(currentPixelGreen & 0xFF)].length)
{
innerBitCount+=IDCodes[RGdiff].length+1;
}
else
{
innerBitCount+=huffCodes[((int)(currentPixelGreen) & 0xff)].length+1;
}
RBdiff= 256+((currentPixelBlue & 0xFF)-(currentPixelRed & 0xFF));
if (IDCodes[RBdiff].length>0 && IDCodes[RBdiff].length<huffCodes[(currentPixelBlue & 0xFF)].length)
{
innerBitCount+=IDCodes[RGdiff].length+1;
}
else
{
innerBitCount+=huffCodes[((int)(currentPixelBlue) & 0xff)].length+1;
}
// if the pixel we are comparing the current pixel to is in range and all the RGB components of the current pixel are in
// the Outer Differences and coding using the OuterDifferences yields better compression than via the InnerDifferences write.write the OD codes.
if (k>-1 && ODCodes[Rdiff].length>0 && ODCodes[Gdiff].length>0 && ODCodes[Bdiff].length>0 && (innerBitCount>ODCodes[Rdiff].length+ODCodes[Gdiff].length+ODCodes[Bdiff].length+1))
{
controlW.value=controlW.value<<1 | bitMask; controlW.count++;
controlW.addToBuffer();
for (int i=0;i<ODCodes[Rdiff].length;i++)
{
ODW.value=ODW.value<<1; ODW.count++;
if (ODCodes[Rdiff][i]) ODW.value=ODW.value| bitMask;
ODW.addToBuffer();
}
for (int i=0;i<ODCodes[Gdiff].length;i++)
{
ODW.value=ODW.value<<1; ODW.count++;
if (ODCodes[Gdiff][i]) ODW.value=ODW.value| bitMask;
ODW.addToBuffer();
}
for (int i=0;i<ODCodes[Bdiff].length;i++)
{
ODW.value=ODW.value<<1; ODW.count++;
if (ODCodes[Bdiff][i]) ODW.value=ODW.value| bitMask;
ODW.addToBuffer();
}
}
else
{
// set Red component to its huffman code
red=((int)(currentPixelRed) & 0xff);
controlW.value=controlW.value<<1;
controlW.count++;
controlW.addToBuffer();
for (int i=0;i<huffCodes[red].length;i++)
{
CCW.value=CCW.value<<1; CCW.count++;
if (huffCodes[red][i]) CCW.value=CCW.value| bitMask;
CCW.addToBuffer();
}
// if the Red-Green Difference is in the Inner Differences encode the component using Inner Differences
// unless the Green component huffman code is less than the difference code in which case the Green component huffman code is written.
RGdiff= 256+((currentPixelGreen & 0xFF)-(currentPixelRed & 0xFF));
if (IDCodes[RGdiff].length>0 && IDCodes[RGdiff].length<huffCodes[(currentPixelGreen & 0xFF)].length)
{
controlW.value=controlW.value<<1 | bitMask; controlW.count++;
controlW.addToBuffer();
for (int i=0;i<IDCodes[RGdiff].length;i++)
{
IDW.value=IDW.value<<1; IDW.count++;
if (IDCodes[RGdiff][i]) IDW.value=IDW.value| bitMask;
IDW.addToBuffer();
}
}
else
{
controlW.value=controlW.value<<1; controlW.count++;
controlW.addToBuffer();
for (int i=0;i<huffCodes[(currentPixelGreen & 0xFF)].length;i++)
{
CCW.value=CCW.value<<1; CCW.count++;
if (huffCodes[(currentPixelGreen & 0xFF)][i]) CCW.value=CCW.value| bitMask;
CCW.addToBuffer();
}
}
// if the Red-Blue Difference is in the Inner Differences encode the component using Inner Differences
// unless the Blue component huffman code is less than the difference code in which case the Blue component huffman code is written.
RBdiff= 256+((currentPixelBlue & 0xFF)-(currentPixelRed & 0xFF));
if (IDCodes[RBdiff].length >0 && IDCodes[RBdiff].length<huffCodes[(currentPixelBlue & 0xFF)].length)
{
controlW.value=controlW.value<<1 | bitMask; controlW.count++;
controlW.addToBuffer();
for (int i=0;i<IDCodes[RBdiff].length;i++)
{
IDW.value=IDW.value<<1; IDW.count++;
if (IDCodes[RBdiff][i]) IDW.value=IDW.value| bitMask;
IDW.addToBuffer();
}
}
else
{
controlW.value=controlW.value<<1; controlW.count++;
controlW.addToBuffer();
for (int i=0;i<huffCodes[(currentPixelBlue & 0xFF)].length;i++)
{
CCW.value=CCW.value<<1; CCW.count++;
if (huffCodes[(currentPixelBlue & 0xFF)][i]) CCW.value=CCW.value| bitMask;
CCW.addToBuffer();
}
}
}
This method is called over 300000 times, depending on the frame’s size, and is called by the following block of code:
///// encode pixels /////
// Generate Outer Differences
// encode Pixels using Scan Method 1
if (mode==1)
{
prev=-1;
for (h=0;h<height;h++)
{
for (w=0;w<width;w++)
{
encode.encode(w,h,h*width+w+prev,width,frame,verbose,ODCodes,IDCodes,huffCodes,controlW,IDW,ODW,CCW);
}
}
}
// encode Pixels using Scan Method 2
else if (mode==2)
{
prev=-width;
for (w=0;w<width;w++)
{
for (h=0;h<height;h++)
{
encode.encode(w,h,h*width+w+prev,width,frame,verbose,ODCodes,IDCodes,huffCodes,controlW,IDW,ODW,CCW);
}
}
}
// encode Pixels using Scan Method 3
else if (mode==3)
{
prev=-width;
for (h=0;h<height;h++)
{
for (w=0;w<width;w++)
{
encode.encode(w,h,h*width+w+prev,width,frame,verbose,ODCodes,IDCodes,huffCodes,controlW,IDW,ODW,CCW);
}
}
}
// encode Pixels using Scan Method 4
else
{
for (h=0;h<height;h++)
{
encode.encode(0,h,-1,width,frame,verbose,ODCodes,IDCodes,huffCodes,controlW,IDW,ODW,CCW);
}
prev=-1;
for (w=1;w<width;w++)
{
for (h=0;h<height;h++)
{
encode.encode(w,h,h*width+w+prev,width,frame,verbose,ODCodes,IDCodes,huffCodes,controlW,IDW,ODW,CCW);
}
}
}
As you can see there are four different ‘scan modes’ which dictate how the frame is scanned.
I have read that calling a method in java is much more expensive than in C/C++ I am wondering whether there is a pattern that allows me to make four classes? that basically have the same functionalitly (with out the encode method call, i.e. inline ) but differ on how the frame is scanned? And do do this without uncessary replication of the code?
Also, i would love to hear how c/c++ programs are able to quickly itterate over an array… for example in video codecs… how are they able to itterate over the pixels of a frames 30 times a second? i can bearly perform 1 time per second.
Any help is very much appreciated!