Difference between revisions of "Sequence logo.pde"
(New page: <code><pre> // DNA_logo // D. Thiebaut // Creating a sequence logo //--------------------------------------------------------------------- // GEOMETRY //----------------------------------...) |
|||
Line 2: | Line 2: | ||
// DNA_logo | // DNA_logo | ||
// D. Thiebaut | // D. Thiebaut | ||
− | // | + | // This program creates a DNA-sequence logo |
+ | // It uses 4 images for the 4 symbols A, C, G, and T, a.png, c.png, g.png | ||
+ | // and t.png. The images have black background and are captured from the | ||
+ | // display generated by the function generateACGTbitmaps() | ||
+ | // | ||
+ | // The program reads several sequences of identical length that reprensent | ||
+ | // potential binding regions between two strands, and calculates | ||
+ | // the frequency of each symbol appearing in each position of the consensus | ||
+ | // sequence, along with the information present in each symbol of the consensus | ||
+ | // string. The maximum value is 2 bits. | ||
+ | // This program assumes that each symbol is equally likely to appear in each | ||
+ | // position of the sequence. | ||
+ | // | ||
+ | // The symbols are shown with their height proportional to their frequency and | ||
+ | // the amount of information in each position. | ||
//--------------------------------------------------------------------- | //--------------------------------------------------------------------- | ||
// GEOMETRY | // GEOMETRY | ||
//--------------------------------------------------------------------- | //--------------------------------------------------------------------- | ||
− | int WIDTH = 800; | + | int WIDTH = 800; // width of the window in pixels |
− | int MIDWIDTH = WIDTH/2; | + | int MIDWIDTH = WIDTH/2; // half that |
− | int HEIGHT = 600; | + | int HEIGHT = 600; // height, in pixels. |
− | int BORDER = 40; | + | int BORDER = 40; // border around the window where nothing |
− | int TITLELINE = 20; | + | // is displayed |
− | int ALINE = HEIGHT/2; | + | int TITLELINE = 20; // y position of title line from top |
+ | int ALINE = HEIGHT/2; // y position of line where logo appears | ||
PFont font; // the font used to display the symbols | PFont font; // the font used to display the symbols | ||
− | int NOSEQS = 8; | + | int NOSEQS = 8; // number of sequences |
− | |||
− | |||
− | |||
− | |||
− | float Afreq[]; | + | float Afreq[]; // frequency of A symbols in sequences |
− | float Cfreq[]; | + | float Cfreq[]; // C |
− | float Gfreq[]; | + | float Gfreq[]; // G |
− | float Tfreq[]; | + | float Tfreq[]; // T |
− | float information[]; | + | float information[]; // amount of information at each location |
+ | // of the consensus sequence | ||
+ | |||
+ | String seq[] = new String[NOSEQS]; // array of sequences | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
//--------------------------------------------------------------------- | //--------------------------------------------------------------------- | ||
// INITWINDOW: Draw the fixed text in the window | // INITWINDOW: Draw the fixed text in the window | ||
//--------------------------------------------------------------------- | //--------------------------------------------------------------------- | ||
void initWindow( String title ) { | void initWindow( String title ) { | ||
+ | font = loadFont( "GillSans-60.vlw" ); // 60 points... very large! | ||
textFont( font ); | textFont( font ); | ||
− | color myColor = color( 99, 66, 204 ); | + | color myColor = color( 99, 66, 204 ); // font color |
− | fill( myColor ); | + | fill( myColor ); |
− | textSize( 24 ); | + | textSize( 24 ); // shrink for title |
− | text( title, BORDER, TITLELINE ); | + | text( title, BORDER, TITLELINE ); // show title |
} | } | ||
//--------------------------------------------------------------------- | //--------------------------------------------------------------------- | ||
− | // SETUP: called once when app starts | + | // SETUP: called once when app starts. |
//--------------------------------------------------------------------- | //--------------------------------------------------------------------- | ||
void setup() { | void setup() { | ||
size( WIDTH, HEIGHT ); | size( WIDTH, HEIGHT ); | ||
− | background( 0, 0, 0 ); | + | background( 0, 0, 0 ); // black background |
− | |||
initWindow( "Sequence Logo" ); | initWindow( "Sequence Logo" ); | ||
Line 67: | Line 73: | ||
seq[7] = "CCAATTGTTTTG"; | seq[7] = "CCAATTGTTTTG"; | ||
− | //--- generate arrays --- | + | //--- generate arrays of frequencies and information --- |
int noSymbols = seq[0].length( ); | int noSymbols = seq[0].length( ); | ||
Afreq = new float[ noSymbols ]; | Afreq = new float[ noSymbols ]; | ||
Line 73: | Line 79: | ||
Gfreq = new float[ noSymbols ]; | Gfreq = new float[ noSymbols ]; | ||
Tfreq = new float[ noSymbols ]; | Tfreq = new float[ noSymbols ]; | ||
− | + | information = new float[ noSymbols ]; | |
//--- compute information at each position of sequence --- | //--- compute information at each position of sequence --- | ||
Line 83: | Line 89: | ||
//--------------------------------------------------------------------- | //--------------------------------------------------------------------- | ||
+ | // GENERATEACGTBITMAPS: use only when new png files needed | ||
//--------------------------------------------------------------------- | //--------------------------------------------------------------------- | ||
void generateACGTbitmaps() { | void generateACGTbitmaps() { | ||
Line 97: | Line 104: | ||
//--------------------------------------------------------------------- | //--------------------------------------------------------------------- | ||
+ | // xlog2x: returns x * log( x )/log( 2 ) which would return NaN if | ||
+ | // not testing for 0 condition... | ||
//--------------------------------------------------------------------- | //--------------------------------------------------------------------- | ||
float xlog2x( float x ) { | float xlog2x( float x ) { | ||
− | if ( x==0 ) return 0; | + | if ( x==0 ) return 0; // because 0 * log( 0 ) is 0 |
return x * log(x)/log(2); | return x * log(x)/log(2); | ||
} | } | ||
− | |||
− | |||
//--------------------------------------------------------------------- | //--------------------------------------------------------------------- | ||
+ | // fubdFreqsAndInformation: given the sequences, computes the frequency | ||
+ | // with which each symbol appears in each position, along with the | ||
+ | // information present in each symbol being the right one for that | ||
+ | // position. | ||
//--------------------------------------------------------------------- | //--------------------------------------------------------------------- | ||
void findFreqsAndInformation() { | void findFreqsAndInformation() { | ||
− | //--- | + | |
+ | //--- count how many times each --- | ||
int noSymbols = seq[0].length( ); | int noSymbols = seq[0].length( ); | ||
int Acount[] = new int[noSymbols ]; | int Acount[] = new int[noSymbols ]; | ||
Line 115: | Line 127: | ||
int Gcount[] = new int[noSymbols ]; | int Gcount[] = new int[noSymbols ]; | ||
int Tcount[] = new int[noSymbols ]; | int Tcount[] = new int[noSymbols ]; | ||
− | |||
− | //--- compute | + | //--- compute counts of A, C, G, and T in sequences --- |
for ( int i=0; i<seq[0].length(); i++ ) { | for ( int i=0; i<seq[0].length(); i++ ) { | ||
Acount[i] = 0; | Acount[i] = 0; | ||
Line 141: | Line 152: | ||
+xlog2x( Cfreq[i] ) + xlog2x( Gfreq[i] ) | +xlog2x( Cfreq[i] ) + xlog2x( Gfreq[i] ) | ||
+ xlog2x( Tfreq[i] ); | + xlog2x( Tfreq[i] ); | ||
− | println( "information["+i+"] = "+information[i] ); | + | //println( "information["+i+"] = "+information[i] ); |
} | } | ||
} | } | ||
//--------------------------------------------------------------------- | //--------------------------------------------------------------------- | ||
+ | // displayLogo: displays the logo in the window, at y = ALINE. | ||
+ | // Goes through all the possible symbol location and | ||
+ | // stack up the 4 images for the A, C, G, and T symbols | ||
+ | // where the height of the image is scaled by the information | ||
+ | // present in the consensus sequence, and by the frequency | ||
+ | // of each symbol. Maximum height is 2 bits. | ||
//--------------------------------------------------------------------- | //--------------------------------------------------------------------- | ||
void displayLogo() { | void displayLogo() { | ||
− | PImage a, c, g, t; | + | PImage a, c, g, t; // the 4 images of the 4 symbols |
− | a = loadImage( "a.png" ); | + | a = loadImage( "a.png" ); // load them from file into variables |
c = loadImage( "c.png" ); | c = loadImage( "c.png" ); | ||
g = loadImage( "g.png" ); | g = loadImage( "g.png" ); | ||
t = loadImage( "t.png" ); | t = loadImage( "t.png" ); | ||
− | //--- | + | |
+ | //--- compute geometrical information --- | ||
int noSymbols = seq[0].length(); | int noSymbols = seq[0].length(); | ||
float charWidth = (WIDTH-BORDER*2)/noSymbols; | float charWidth = (WIDTH-BORDER*2)/noSymbols; | ||
char symbols[] = { 'A', 'C', 'G', 'T' }; | char symbols[] = { 'A', 'C', 'G', 'T' }; | ||
+ | //--- for each symbol position... | ||
for ( int i=0; i< noSymbols; i++ ) { | for ( int i=0; i< noSymbols; i++ ) { | ||
float charBase = ALINE; | float charBase = ALINE; | ||
+ | |||
+ | //--- for each possible symbol ... | ||
for ( int j=0; j<4; j++ ) { | for ( int j=0; j<4; j++ ) { | ||
char sym = symbols[j]; | char sym = symbols[j]; | ||
Line 172: | Line 193: | ||
float charHeight = 50 * information[i] * freq; | float charHeight = 50 * information[i] * freq; | ||
image( img, BORDER + i*charWidth, charBase-charHeight, charWidth, charHeight ); | image( img, BORDER + i*charWidth, charBase-charHeight, charWidth, charHeight ); | ||
+ | |||
+ | //--- change the y location for the base of the next image for stacking effect==> | ||
charBase = charBase - charHeight; | charBase = charBase - charHeight; | ||
} | } |
Latest revision as of 17:28, 2 August 2008
// DNA_logo
// D. Thiebaut
// This program creates a DNA-sequence logo
// It uses 4 images for the 4 symbols A, C, G, and T, a.png, c.png, g.png
// and t.png. The images have black background and are captured from the
// display generated by the function generateACGTbitmaps()
//
// The program reads several sequences of identical length that reprensent
// potential binding regions between two strands, and calculates
// the frequency of each symbol appearing in each position of the consensus
// sequence, along with the information present in each symbol of the consensus
// string. The maximum value is 2 bits.
// This program assumes that each symbol is equally likely to appear in each
// position of the sequence.
//
// The symbols are shown with their height proportional to their frequency and
// the amount of information in each position.
//---------------------------------------------------------------------
// GEOMETRY
//---------------------------------------------------------------------
int WIDTH = 800; // width of the window in pixels
int MIDWIDTH = WIDTH/2; // half that
int HEIGHT = 600; // height, in pixels.
int BORDER = 40; // border around the window where nothing
// is displayed
int TITLELINE = 20; // y position of title line from top
int ALINE = HEIGHT/2; // y position of line where logo appears
PFont font; // the font used to display the symbols
int NOSEQS = 8; // number of sequences
float Afreq[]; // frequency of A symbols in sequences
float Cfreq[]; // C
float Gfreq[]; // G
float Tfreq[]; // T
float information[]; // amount of information at each location
// of the consensus sequence
String seq[] = new String[NOSEQS]; // array of sequences
//---------------------------------------------------------------------
// INITWINDOW: Draw the fixed text in the window
//---------------------------------------------------------------------
void initWindow( String title ) {
font = loadFont( "GillSans-60.vlw" ); // 60 points... very large!
textFont( font );
color myColor = color( 99, 66, 204 ); // font color
fill( myColor );
textSize( 24 ); // shrink for title
text( title, BORDER, TITLELINE ); // show title
}
//---------------------------------------------------------------------
// SETUP: called once when app starts.
//---------------------------------------------------------------------
void setup() {
size( WIDTH, HEIGHT );
background( 0, 0, 0 ); // black background
initWindow( "Sequence Logo" );
//--- initialize all 8 sequences ---
seq[0] = "CCCATTGTTCTC";
seq[1] = "TTTCTGGTTCTC";
seq[2] = "TCAATTGTTTAG";
seq[3] = "CTCATTGTTGTC";
seq[4] = "TCCATTGTTCTC";
seq[5] = "CCTATTGTTCTC";
seq[6] = "TCCATTGTTCGT";
seq[7] = "CCAATTGTTTTG";
//--- generate arrays of frequencies and information ---
int noSymbols = seq[0].length( );
Afreq = new float[ noSymbols ];
Cfreq = new float[ noSymbols ];
Gfreq = new float[ noSymbols ];
Tfreq = new float[ noSymbols ];
information = new float[ noSymbols ];
//--- compute information at each position of sequence ---
findFreqsAndInformation();
//--- display the logo ---
displayLogo();
}
//---------------------------------------------------------------------
// GENERATEACGTBITMAPS: use only when new png files needed
//---------------------------------------------------------------------
void generateACGTbitmaps() {
textSize( 60 );
fill( 255, 204, 0 );
text( "ACGT", BORDER, BORDER+60 );
fill( 132, 99, 0 );
text( "ACGT", BORDER, BORDER+60*2 );
fill( 99, 132, 204 );
text( "ACGT", BORDER, BORDER+60*3 );
fill( 99, 204, 33 );
text( "ACGT", BORDER, BORDER+60*4 );
}
//---------------------------------------------------------------------
// xlog2x: returns x * log( x )/log( 2 ) which would return NaN if
// not testing for 0 condition...
//---------------------------------------------------------------------
float xlog2x( float x ) {
if ( x==0 ) return 0; // because 0 * log( 0 ) is 0
return x * log(x)/log(2);
}
//---------------------------------------------------------------------
// fubdFreqsAndInformation: given the sequences, computes the frequency
// with which each symbol appears in each position, along with the
// information present in each symbol being the right one for that
// position.
//---------------------------------------------------------------------
void findFreqsAndInformation() {
//--- count how many times each ---
int noSymbols = seq[0].length( );
int Acount[] = new int[noSymbols ];
int Ccount[] = new int[noSymbols ];
int Gcount[] = new int[noSymbols ];
int Tcount[] = new int[noSymbols ];
//--- compute counts of A, C, G, and T in sequences ---
for ( int i=0; i<seq[0].length(); i++ ) {
Acount[i] = 0;
Ccount[i] = 0;
Gcount[i] = 0;
Tcount[i] = 0;
for ( int j=0; j<NOSEQS; j++ ) {
if ( seq[j].charAt(i)=='A' ) Acount[i] += 1;
if ( seq[j].charAt(i)=='C' ) Ccount[i] += 1;
if ( seq[j].charAt(i)=='G' ) Gcount[i] += 1;
if ( seq[j].charAt(i)=='T' ) Tcount[i] += 1;
}
}
//--- compute information in each place ---
for ( int i=0; i<seq[0].length(); i++ ) {
Afreq[i] = Acount[i]*1.0/ NOSEQS;
Cfreq[i] = Ccount[i]*1.0/ NOSEQS;
Gfreq[i] = Gcount[i]*1.0/ NOSEQS;
Tfreq[i] = Tcount[i]*1.0/ NOSEQS;
information[i] = 2.0 + xlog2x(Afreq[i])
+xlog2x( Cfreq[i] ) + xlog2x( Gfreq[i] )
+ xlog2x( Tfreq[i] );
//println( "information["+i+"] = "+information[i] );
}
}
//---------------------------------------------------------------------
// displayLogo: displays the logo in the window, at y = ALINE.
// Goes through all the possible symbol location and
// stack up the 4 images for the A, C, G, and T symbols
// where the height of the image is scaled by the information
// present in the consensus sequence, and by the frequency
// of each symbol. Maximum height is 2 bits.
//---------------------------------------------------------------------
void displayLogo() {
PImage a, c, g, t; // the 4 images of the 4 symbols
a = loadImage( "a.png" ); // load them from file into variables
c = loadImage( "c.png" );
g = loadImage( "g.png" );
t = loadImage( "t.png" );
//--- compute geometrical information ---
int noSymbols = seq[0].length();
float charWidth = (WIDTH-BORDER*2)/noSymbols;
char symbols[] = { 'A', 'C', 'G', 'T' };
//--- for each symbol position...
for ( int i=0; i< noSymbols; i++ ) {
float charBase = ALINE;
//--- for each possible symbol ...
for ( int j=0; j<4; j++ ) {
char sym = symbols[j];
PImage img;
float freq;
img = a; freq = Afreq[i];
if ( sym=='G' ) { img = g; freq = Gfreq[i]; }
if ( sym=='C' ) { img = c; freq = Cfreq[i]; }
if ( sym=='T' ) { img = t; freq = Tfreq[i]; }
float charHeight = 50 * information[i] * freq;
image( img, BORDER + i*charWidth, charBase-charHeight, charWidth, charHeight );
//--- change the y location for the base of the next image for stacking effect==>
charBase = charBase - charHeight;
}
}
}