Difference between revisions of "Sequence logo.pde"

From dftwiki3
Jump to: navigation, search
(New page: <code><pre> // DNA_logo // D. Thiebaut // Creating a sequence logo //--------------------------------------------------------------------- // GEOMETRY //----------------------------------...)
 
 
Line 2: Line 2:
 
// DNA_logo
 
// DNA_logo
 
// D. Thiebaut
 
// D. Thiebaut
// Creating a sequence logo
+
// This program creates a DNA-sequence logo
 +
// It uses 4 images for the 4 symbols A, C, G, and T, a.png, c.png, g.png
 +
// and t.png.  The images have black background and are captured from the
 +
// display generated by the function generateACGTbitmaps()
 +
//
 +
// The program reads several sequences of identical length that reprensent
 +
// potential binding regions between two strands, and calculates
 +
// the frequency of each symbol appearing in each position of the consensus
 +
// sequence, along with the information present in each symbol of the consensus
 +
// string.  The maximum value is 2 bits.
 +
// This program assumes that each symbol is equally likely to appear in each
 +
// position of the sequence.
 +
//
 +
// The symbols are shown with their height proportional to their frequency and
 +
// the amount of information in each position.
  
 
//---------------------------------------------------------------------
 
//---------------------------------------------------------------------
 
// GEOMETRY
 
// GEOMETRY
 
//---------------------------------------------------------------------
 
//---------------------------------------------------------------------
int WIDTH        = 800;
+
int WIDTH        = 800;             // width of the window in pixels
int MIDWIDTH    = WIDTH/2;
+
int MIDWIDTH    = WIDTH/2;         // half that
int HEIGHT      = 600;
+
int HEIGHT      = 600;             // height, in pixels.
int BORDER      = 40;
+
int BORDER      = 40;               // border around the window where nothing
int TITLELINE    = 20;
+
                                    // is displayed
int ALINE        = HEIGHT/2;
+
int TITLELINE    = 20;               // y position of title line from top
 +
int ALINE        = HEIGHT/2;         // y position of line where logo appears
 
PFont font;                          // the font used to display the symbols
 
PFont font;                          // the font used to display the symbols
  
int NOSEQS = 8;   // number of sequences
+
int NOSEQS = 8;                     // number of sequences
int A = 0;
 
int C = 1;
 
int G = 2;
 
int T = 3;
 
  
float Afreq[];
+
float Afreq[];                       // frequency of A symbols in sequences
float Cfreq[];
+
float Cfreq[];                       //              C
float Gfreq[];
+
float Gfreq[];                       //              G
float Tfreq[];
+
float Tfreq[];                       //              T
  
float information[];
+
float information[];                 // amount of information at each location
 +
                                    // of the consensus sequence
 +
 
 +
String seq[] = new String[NOSEQS];  // array of sequences
  
String seq[] = new String[NOSEQS];
 
  
//---------------------------------------------------------------------
 
// INITFONT: initialize the fonts (must be nonproportional ---
 
//---------------------------------------------------------------------
 
void initFont( ) { 
 
  font = loadFont( "GillSans-60.vlw" ); // ( "Monaco-60.vlw" );
 
  textFont( font );
 
}
 
 
//---------------------------------------------------------------------
 
//---------------------------------------------------------------------
 
// INITWINDOW: Draw the fixed text in the window
 
// INITWINDOW: Draw the fixed text in the window
 
//---------------------------------------------------------------------
 
//---------------------------------------------------------------------
 
void initWindow( String title ) {
 
void initWindow( String title ) {
 +
  font = loadFont( "GillSans-60.vlw" ); // 60 points... very large!
 
   textFont( font );
 
   textFont( font );
   color myColor = color( 99, 66, 204 );
+
   color myColor = color( 99, 66, 204 ); // font color
   fill( myColor );
+
   fill( myColor );                    
   textSize( 24 );
+
   textSize( 24 );                       // shrink for title
   text( title, BORDER, TITLELINE );
+
   text( title, BORDER, TITLELINE );     // show title
 
}
 
}
  
 
//---------------------------------------------------------------------
 
//---------------------------------------------------------------------
// SETUP: called once when app starts
+
// SETUP: called once when app starts
 
//---------------------------------------------------------------------
 
//---------------------------------------------------------------------
 
void setup() {
 
void setup() {
 
   size( WIDTH, HEIGHT );
 
   size( WIDTH, HEIGHT );
   background( 0, 0, 0 );
+
   background( 0, 0, 0 );               // black background
  initFont();
 
 
   initWindow( "Sequence Logo" );
 
   initWindow( "Sequence Logo" );
 
    
 
    
Line 67: Line 73:
 
   seq[7] = "CCAATTGTTTTG";
 
   seq[7] = "CCAATTGTTTTG";
  
   //--- generate arrays ---
+
   //--- generate arrays of frequencies and information ---
 
   int noSymbols = seq[0].length( );
 
   int noSymbols = seq[0].length( );
 
   Afreq  = new float[ noSymbols ];
 
   Afreq  = new float[ noSymbols ];
Line 73: Line 79:
 
   Gfreq  = new float[ noSymbols ];
 
   Gfreq  = new float[ noSymbols ];
 
   Tfreq  = new float[ noSymbols ];
 
   Tfreq  = new float[ noSymbols ];
    
+
   information = new float[ noSymbols ]; 
 
    
 
    
 
   //--- compute information at each position of sequence ---
 
   //--- compute information at each position of sequence ---
Line 83: Line 89:
  
 
//---------------------------------------------------------------------
 
//---------------------------------------------------------------------
 +
// GENERATEACGTBITMAPS: use only when new png files needed
 
//---------------------------------------------------------------------
 
//---------------------------------------------------------------------
 
void generateACGTbitmaps() {
 
void generateACGTbitmaps() {
Line 97: Line 104:
  
 
//---------------------------------------------------------------------
 
//---------------------------------------------------------------------
 +
// xlog2x: returns x * log( x )/log( 2 ) which would return NaN if
 +
//        not testing for 0 condition...
 
//---------------------------------------------------------------------
 
//---------------------------------------------------------------------
 
float xlog2x( float x ) {
 
float xlog2x( float x ) {
   if ( x==0 ) return 0;
+
   if ( x==0 ) return 0;       // because 0 * log( 0 ) is 0
 
   return x * log(x)/log(2);
 
   return x * log(x)/log(2);
 
}
 
}
 
 
  
  
 
//---------------------------------------------------------------------
 
//---------------------------------------------------------------------
 +
// fubdFreqsAndInformation: given the sequences, computes the frequency
 +
// with which each symbol appears in each position, along with the
 +
// information present in each symbol being the right one for that
 +
// position.
 
//---------------------------------------------------------------------
 
//---------------------------------------------------------------------
 
void findFreqsAndInformation() {
 
void findFreqsAndInformation() {
   //--- create frequency arrays for each symbol ---
+
 
 +
   //--- count how many times each ---
 
   int noSymbols = seq[0].length( );
 
   int noSymbols = seq[0].length( );
 
   int Acount[] = new int[noSymbols ];
 
   int Acount[] = new int[noSymbols ];
Line 115: Line 127:
 
   int Gcount[] = new int[noSymbols ];
 
   int Gcount[] = new int[noSymbols ];
 
   int Tcount[] = new int[noSymbols ];
 
   int Tcount[] = new int[noSymbols ];
  information = new float[ noSymbols ]; 
 
 
    
 
    
   //--- compute frequency of A, C, G, and T in sequences ---
+
   //--- compute counts of A, C, G, and T in sequences ---
 
   for ( int i=0; i<seq[0].length(); i++ ) {
 
   for ( int i=0; i<seq[0].length(); i++ ) {
 
     Acount[i] = 0;
 
     Acount[i] = 0;
Line 141: Line 152:
 
               +xlog2x( Cfreq[i] ) + xlog2x( Gfreq[i] )
 
               +xlog2x( Cfreq[i] ) + xlog2x( Gfreq[i] )
 
               + xlog2x( Tfreq[i] );
 
               + xlog2x( Tfreq[i] );
     println( "information["+i+"] = "+information[i] );
+
     //println( "information["+i+"] = "+information[i] );
 
   }
 
   }
 
}
 
}
  
 
//---------------------------------------------------------------------
 
//---------------------------------------------------------------------
 +
// displayLogo: displays the logo in the window, at y = ALINE.
 +
//              Goes through all the possible symbol location and
 +
//              stack up the 4 images for the A, C, G, and T symbols
 +
//              where the height of the image is scaled by the information
 +
//              present in the consensus sequence, and by the frequency
 +
//              of each symbol.  Maximum height is 2 bits.
 
//---------------------------------------------------------------------
 
//---------------------------------------------------------------------
 
void displayLogo() {
 
void displayLogo() {
 
    
 
    
   PImage a, c, g, t;
+
   PImage a, c, g, t;           // the 4 images of the 4 symbols
 
    
 
    
   a = loadImage( "a.png" );
+
   a = loadImage( "a.png" );     // load them from file into variables
 
   c = loadImage( "c.png" );
 
   c = loadImage( "c.png" );
 
   g = loadImage( "g.png" );
 
   g = loadImage( "g.png" );
 
   t = loadImage( "t.png" );
 
   t = loadImage( "t.png" );
   //--- display seq[0], to test ---
+
 
 +
   //--- compute geometrical information ---
 
   int noSymbols = seq[0].length();
 
   int noSymbols = seq[0].length();
 
   float charWidth = (WIDTH-BORDER*2)/noSymbols;
 
   float charWidth = (WIDTH-BORDER*2)/noSymbols;
 
   char symbols[] = { 'A', 'C', 'G', 'T' };
 
   char symbols[] = { 'A', 'C', 'G', 'T' };
 
    
 
    
 +
  //--- for each symbol position...
 
   for ( int i=0; i< noSymbols; i++ ) {
 
   for ( int i=0; i< noSymbols; i++ ) {
 
     float charBase = ALINE;
 
     float charBase = ALINE;
 +
   
 +
    //--- for each possible symbol ...
 
     for ( int j=0; j<4; j++ ) {
 
     for ( int j=0; j<4; j++ ) {
 
         char sym = symbols[j];
 
         char sym = symbols[j];
Line 172: Line 193:
 
         float charHeight = 50 * information[i] * freq;   
 
         float charHeight = 50 * information[i] * freq;   
 
         image( img, BORDER + i*charWidth, charBase-charHeight, charWidth, charHeight );
 
         image( img, BORDER + i*charWidth, charBase-charHeight, charWidth, charHeight );
 +
       
 +
        //--- change the y location for the base of the next image for stacking effect==>
 
         charBase = charBase - charHeight;
 
         charBase = charBase - charHeight;
 
     }
 
     }

Latest revision as of 17:28, 2 August 2008

// DNA_logo
// D. Thiebaut
// This program creates a DNA-sequence logo
// It uses 4 images for the 4 symbols A, C, G, and T, a.png, c.png, g.png
// and t.png.  The images have black background and are captured from the
// display generated by the function generateACGTbitmaps()
// 
// The program reads several sequences of identical length that reprensent
// potential binding regions between two strands, and calculates
// the frequency of each symbol appearing in each position of the consensus
// sequence, along with the information present in each symbol of the consensus
// string.  The maximum value is 2 bits.
// This program assumes that each symbol is equally likely to appear in each
// position of the sequence.
//
// The symbols are shown with their height proportional to their frequency and
// the amount of information in each position.

//---------------------------------------------------------------------
// GEOMETRY
//---------------------------------------------------------------------
int WIDTH        = 800;              // width of the window in pixels
int MIDWIDTH     = WIDTH/2;          // half that
int HEIGHT       = 600;              // height, in pixels.
int BORDER       = 40;               // border around the window where nothing
                                     // is displayed
int TITLELINE    = 20;               // y position of title line from top
int ALINE        = HEIGHT/2;         // y position of line where logo appears
PFont font;                          // the font used to display the symbols

int NOSEQS = 8;                      // number of sequences

float Afreq[];                       // frequency of A symbols in sequences
float Cfreq[];                       //              C
float Gfreq[];                       //              G
float Tfreq[];                       //              T

float information[];                 // amount of information at each location
                                     // of the consensus sequence

String seq[] = new String[NOSEQS];   // array of sequences


//---------------------------------------------------------------------
// INITWINDOW: Draw the fixed text in the window
//---------------------------------------------------------------------
void initWindow( String title ) {
  font = loadFont( "GillSans-60.vlw" ); // 60 points... very large!
  textFont( font );
  color myColor = color( 99, 66, 204 ); // font color
  fill( myColor );                      
  textSize( 24 );                       // shrink for title
  text( title, BORDER, TITLELINE );     // show title
}

//---------------------------------------------------------------------
// SETUP: called once when app starts.  
//---------------------------------------------------------------------
void setup() {
  size( WIDTH, HEIGHT );
  background( 0, 0, 0 );                // black background
  initWindow( "Sequence Logo" );
  
  //---  initialize all 8 sequences ---
  seq[0] = "CCCATTGTTCTC";
  seq[1] = "TTTCTGGTTCTC";
  seq[2] = "TCAATTGTTTAG";
  seq[3] = "CTCATTGTTGTC";
  seq[4] = "TCCATTGTTCTC";
  seq[5] = "CCTATTGTTCTC";
  seq[6] = "TCCATTGTTCGT";
  seq[7] = "CCAATTGTTTTG";

  //--- generate arrays of frequencies and information ---
  int noSymbols = seq[0].length( );
  Afreq  = new float[ noSymbols ];
  Cfreq  = new float[ noSymbols ];
  Gfreq  = new float[ noSymbols ];
  Tfreq  = new float[ noSymbols ];
  information = new float[ noSymbols ];  
  
  //--- compute information at each position of sequence ---
  findFreqsAndInformation();
  
  //--- display the logo ---
  displayLogo();
}

//---------------------------------------------------------------------
// GENERATEACGTBITMAPS: use only when new png files needed
//---------------------------------------------------------------------
void generateACGTbitmaps() {
  textSize( 60 );
  fill( 255, 204, 0 );
  text( "ACGT", BORDER, BORDER+60 );
  fill( 132, 99, 0 );
  text( "ACGT", BORDER, BORDER+60*2 );
  fill( 99, 132, 204 );
  text( "ACGT", BORDER, BORDER+60*3 );
  fill( 99, 204,  33 );
  text( "ACGT", BORDER, BORDER+60*4 );
}

//---------------------------------------------------------------------
// xlog2x: returns x * log( x )/log( 2 ) which would return NaN if 
//         not testing for 0 condition...
//---------------------------------------------------------------------
float xlog2x( float x ) {
  if ( x==0 ) return 0;       // because 0 * log( 0 ) is 0
  return x * log(x)/log(2);
}


//---------------------------------------------------------------------
// fubdFreqsAndInformation: given the sequences, computes the frequency
// with which each symbol appears in each position, along with the
// information present in each symbol being the right one for that
// position.
//---------------------------------------------------------------------
void findFreqsAndInformation() {
  
  //--- count how many times each  ---
  int noSymbols = seq[0].length( );
  int Acount[] = new int[noSymbols ];
  int Ccount[] = new int[noSymbols ];
  int Gcount[] = new int[noSymbols ];
  int Tcount[] = new int[noSymbols ];
  
  //--- compute counts of A, C, G, and T in sequences ---
  for ( int i=0; i<seq[0].length(); i++ ) {
     Acount[i] = 0;
     Ccount[i] = 0;
     Gcount[i] = 0;
     Tcount[i] = 0;
     for ( int j=0; j<NOSEQS; j++ ) {
        if ( seq[j].charAt(i)=='A' ) Acount[i] += 1;
        if ( seq[j].charAt(i)=='C' ) Ccount[i] += 1;
        if ( seq[j].charAt(i)=='G' ) Gcount[i] += 1;
        if ( seq[j].charAt(i)=='T' ) Tcount[i] += 1;
     }
  }
  
  //--- compute information in each place ---
  for ( int i=0; i<seq[0].length(); i++ ) {
     Afreq[i] = Acount[i]*1.0/ NOSEQS;
     Cfreq[i] = Ccount[i]*1.0/ NOSEQS;
     Gfreq[i] = Gcount[i]*1.0/ NOSEQS;
     Tfreq[i] = Tcount[i]*1.0/ NOSEQS;
    
     information[i] = 2.0 + xlog2x(Afreq[i])
               +xlog2x( Cfreq[i] ) + xlog2x( Gfreq[i] )
               + xlog2x( Tfreq[i] );
     //println( "information["+i+"] = "+information[i] );
  }
}

//---------------------------------------------------------------------
// displayLogo: displays the logo in the window, at y = ALINE.
//              Goes through all the possible symbol location and
//              stack up the 4 images for the A, C, G, and T symbols
//              where the height of the image is scaled by the information
//              present in the consensus sequence, and by the frequency
//              of each symbol.  Maximum height is 2 bits.
//---------------------------------------------------------------------
void displayLogo() {
  
  PImage a, c, g, t;            // the 4 images of the 4 symbols
  
  a = loadImage( "a.png" );     // load them from file into variables
  c = loadImage( "c.png" );
  g = loadImage( "g.png" );
  t = loadImage( "t.png" );
  
  //--- compute geometrical information ---
  int noSymbols = seq[0].length();
  float charWidth = (WIDTH-BORDER*2)/noSymbols;
  char symbols[] = { 'A', 'C', 'G', 'T' };
  
  //--- for each symbol position...
  for ( int i=0; i< noSymbols; i++ ) {
    float charBase = ALINE;
    
    //--- for each possible symbol ...
    for ( int j=0; j<4; j++ ) {
        char sym = symbols[j];
        PImage img;
        float freq;
        img = a; freq = Afreq[i];
        if ( sym=='G' ) { img = g; freq = Gfreq[i]; }
        if ( sym=='C' ) { img = c; freq = Cfreq[i]; }
        if ( sym=='T' ) { img = t; freq = Tfreq[i]; }
        float charHeight = 50 * information[i] * freq;   
        image( img, BORDER + i*charWidth, charBase-charHeight, charWidth, charHeight );
        
        //--- change the y location for the base of the next image for stacking effect==>
        charBase = charBase - charHeight;
    }
  }  
}