Difference between revisions of "DNA Repeats.pde"

From dftwiki3
Jump to: navigation, search
(New page: Back to Lab 4 <hr /> <code><pre> // DNAREpeats.pde // D. Thiebaut // Finding repeats in DNA sequence //-----------------------------------------------------------------...)
 
 
Line 3: Line 3:
  
 
<code><pre>
 
<code><pre>
// DNAREpeats.pde
+
// DNA3.pde
 
// D. Thiebaut
 
// D. Thiebaut
 
// Finding repeats in DNA sequence
 
// Finding repeats in DNA sequence
Line 68: Line 68:
 
int SQRHEIGHT    = SQRWIDTH;
 
int SQRHEIGHT    = SQRWIDTH;
 
PFont font;                          // the font used to display the symbols
 
PFont font;                          // the font used to display the symbols
 
+
int longesti = 0;
 +
int longestj = 0;
 +
int longestRepeat = 0;
  
  
Line 192: Line 194:
 
   dna1.drawText();
 
   dna1.drawText();
 
   findRepeats( 5 );     
 
   findRepeats( 5 );     
 +
  drawCircle( longesti-longestRepeat/2, longestj-longestRepeat/2, int( longestRepeat * 1.5 ) );
 
}
 
}
  
Line 208: Line 211:
 
   for ( int k=0; k<n; k++ )
 
   for ( int k=0; k<n; k++ )
 
     point( SQRTOPX+1+i-k, SQRTOPY+1+j-k );
 
     point( SQRTOPX+1+i-k, SQRTOPY+1+j-k );
 +
}
 +
 +
//---------------------------------------------------------------------
 +
//---------------------------------------------------------------------
 +
void drawCircle( int i, int j, int radius ) {
 +
  color myColor = color( 200, 33, 33 );
 +
  stroke( myColor );
 +
  noFill();
 +
  //arc( i+SQRTOPX+1, j+SQRTOPY+1, radius, radius, 0, TWO_PI ); 
 +
  ellipse( i+SQRTOPX+1, j+SQRTOPY+1, radius, radius ); 
 
}
 
}
  
Line 237: Line 250:
 
         //--- long enough? if so, display ---
 
         //--- long enough? if so, display ---
 
         if ( repeats >= threshold )  
 
         if ( repeats >= threshold )  
           drawPoints( i, j, repeats );               
+
           drawPoints( i, j, repeats );      
 +
          if ( repeats > longestRepeat ) {
 +
               longestRepeat = repeats;
 +
              longesti = i;
 +
              longestj = j;
 +
          }       
 
       }
 
       }
 
     }   
 
     }   
Line 247: Line 265:
 
void  keyPressed() {
 
void  keyPressed() {
 
}
 
}
 +
 +
 +
  
  

Latest revision as of 14:58, 24 July 2008

Back to Lab 4


// DNA3.pde
// D. Thiebaut
// Finding repeats in DNA sequence

//---------------------------------------------------------------------
//  ___ _     _          _    
// / __| |___| |__  __ _| |___
//| (_ | / _ \ '_ \/ _` | (_-<
// \___|_\___/_.__/\__,_|_/__/
//
//---------------------------------------------------------------------
DNAString dna1;

String FASTA0 = "gi|194306025|dbj|AB426820.1| Escherichia coli ompT mRNA for outer membrane protease T, partial cds, strain: JCM 5491\n"
+"TGGGAATAGTCCTGACAACCCCTATTGCGATCAGCTCTTTTGCTTCTACCGAGACTTTATCGTTTACTCC"
+"TGACAACATAAATGCGGACATTAGTCTTGGAACTCTGAGCGGAAAAACAAAAGAGCGTGTTTATCTAGCC"
+"GAAGAAGGAGGCCGAAAGGTCAGTCAACTTGACTGGAAATTCAATAACGCTGCAATTATTAAAGGTGCAA"
+"TTAATTGGGATTTGATGCCCCAGATATCTATCGGGGCTGCTGGCTGGACAACTCTCGGTAGCCGAGGTGG"  
+"CAATATGGTCGATCGGGACTGGATGGATTCCAGTAACCCCGGAACCTGGACGGATGAAAGTAGACACCCT"
+"GATACACAACTCAATTATGCCAACGAATTTGATCTGAATATCAGAGGCTGGCTCCCCAACGAACCCAATT"
+"ACCGCCTGGGACTCATGGCCGGATATCAGGAAAGCCGTTATAGCTTTACAGCCAGAGGGGGTTCCTATAT"
+"CTACAGTTCTGAGGAGGGATTCAGAGATGATATCGGCTCCTTCCCGAATGGAGAAAGAGCAATCGGCTAC"
+"AAACAACGTTTTAAAATGCCCTACATTGGCTTGACTGGAAGTTATCGTTATGAAGATTTTGAGCTAGGTG"
+"GTACATTTAAATACAGCGGCTGGGTGGAAGCATTTGATAACGATGAACACTATGACCCAGGAAAAAGAAT"
+"CACTTATCGCAGTAAAGTCAAAGACCAAAATTACTATTCTGTTGCAGTCAATGCAGGTTATTACGTAACG"
+"CCTAATGCAAAAGTTTATATTGAAGGCGCATGGAATCGGGTTACGAATAAAAAAGGTGATACTTCACTTT"
+"ATGATCACAATGATAACACTTCTGACTACAGCAAAAATGGTGCAGGCATAGAAAACTATAACTTCATCAC"
+"TACTGCTGGTC";

String FASTA3 = "gi|194306025|dbj|AB426820.1| Escherichia coli ompT mRNA for outer membrane protease T, partial cds, strain: JCM 5491\n"
+"TGGGAATAGTCCTGACAACCCCTATTGCGATCAGCTCTTTTGCTTCTACCGAGACTTTATCGTTTACTCC"
+"TGACAACATAAATGCGGACATTAGTCTCTGACAACCCCTATTGGAAACAAAAGAGCGTGTTTATCTAGCC"
+"GAAGAAGGAGGCCGAAAGGTCAGTCAACTTGACTGGAAATTCAATAACGCTGCAATTATTAAAGGTGCAA"
+"TTAATTGGGATTTGATGCCCCAGATATCTATCGGGGCTGCTGGCTGGACAACTCTCGGTAGCCGAGGTGG"  
+"CAATATGGTCGATCGGGACTGGATGGATTCCAGTAACCCCGGAACCTGGACGGATGAAAGTAGACACCCT"
+"GATACACAACTCAATTATGCCAACGAATTTGATCTGAATATCAGAGGCTGGCTCCCCAACGAACCCAATT"
+"ACCGCCTGGGACTCATGGCCGGATATCAGGAAAGCCGTTATAGCTTTACAGCCAGAGGGGGTTCCTATAT"
+"CTACAGTTCTGAGGAGGGATTCAGAGATGATATCGGCTCCTTCCCGAATGGAGAAAGAGCAATCGGCTAC"
+"AAACAACGTTTTAAAATGCCCTACATTGGCTTGACTGGAAGTTATCGTTATGAAGATTTTGAGCTAGGTG"
+"GTACATTTAAATACAGCGGCTGGGTGGAAGCATTTGATAACGATGAACACTATGACCCAGGAAAAAGAAT"
+"CACTTATCGCAGTAAAGTCAAAGACCAAAATTACTATTCTGTTGCAGTCAATGCAGGTTATTACGTAACG"
+"CCTAATGCAAAAGTTTATATTGAAGGCGCATGGAATCGGGTTACGAATAAAAAAGGTGATACTTCACTTT"
+"ATGATCACAATGATAACACTTCTGACTACAGCAAAAATGGTGCAGGCATAGAAAACTATAACTTCATCAC"
+"TACTGCTGGTC";

String FASTA1 = "gi|194306025|dbj|AB426820.1| Escherichia coli ompT mRNA for outer membrane protease T, partial cds, strain: JCM 5491\n"
+"TGGGAATAGTCCTGACAACCCCTATTGCGATCAGCTCTTTTGCTTCTACCGAGACTTTATCGTTTACTCC";

String FASTA2 = "gi|194306025|dbj|AB426820.1| Escherichia coli ompT mRNA for outer membrane protease T, partial cds, strain: JCM 5491\n"
+"TGACAACATAAATGCGGACATTAGTCTTGGAACTCTGAGCGGAAAAACAAAAGAGCGTGTTTATCTAGCC";

//---------------------------------------------------------------------
// GEOMETRY
//---------------------------------------------------------------------
int WIDTH        = 800;
int MIDWIDTH     = WIDTH/2;
int HEIGHT       = 600;
int BORDER       = 10;
int TITLELINE    = 20;
int ALINE        = 30;
int SQRTOPX      = BORDER;
int SQRTOPY      = 120;
int SQRWIDTH     = min( HEIGHT-BORDER-SQRTOPY, WIDTH-BORDER*2);
int SQRHEIGHT    = SQRWIDTH;
PFont font;                          // the font used to display the symbols
int longesti = 0;
int longestj = 0;
int longestRepeat = 0;


//---------------------------------------------------------------------
// ___  _  _   _   ___ _       _           
//|   \| \| | /_\ / __| |_ _ _(_)_ _  __ _ 
//| |) | .` |/ _ \\__ \  _| '_| | ' \/ _` |
//|___/|_|\_/_/ \_\___/\__|_| |_|_||_\__, |
//                                   |___/ 
// DNAString Class: a class holding a DNA sequence, both as a text string
// and as an array of DNASymbol objects.
//---------------------------------------------------------------------
class DNAString {
  String symbols;     // the string of chars
  int noSymbols;      // the # of symbols
  int offsetX;        // x-value of left-most position on screen
  int offsetY;        // y-value of screen position
  int displayWidth;
  int displayHeight;

  //--- default constructor ---
  DNAString( ) {      
    symbols = "";
    noSymbols = 0;
  }

  //--- constructor ---
  DNAString( String fastaString ) {
    String[] list = split( fastaString, '\n' );
    symbols = list[1];
    noSymbols = symbols.length();
  }    

  void setBoundingBox( int x, int y, int w, int h ) {
    offsetX = x;
    offsetY = y;
    displayWidth = w;
    displayHeight = h;  
  }

  //--- returns the string of symbols ---
  String getText() {  
    return symbols;
  }

  int length() {
    return noSymbols;
  }

  void truncate( int n ) {
    if ( n < noSymbols ) {
      symbols = symbols.substring( 0, n );
      noSymbols = symbols.length();
    }  
  }

  //--- display text using default method ---
  void drawText() {   
    color myColor = color( 204, 99, 66 );
    fill( myColor );
    text( symbols, offsetX, offsetY, displayWidth, displayHeight );
  }


  //--- erase the symbols and the legs, if any ---
  void erase() {
    int tw = int( textWidth( "A" ) );
    fill( 0 );
    stroke( 0 );
    rect( BORDER*2, ALINE-12, noSymbols*tw, 12 );
  }
}


//---------------------------------------------------------------------
// ___             _   _             
//| __|  _ _ _  __| |_(_)___ _ _  ___
//| _| || | ' \/ _|  _| / _ \ ' \(_-<
//|_| \_,_|_||_\__|\__|_\___/_||_/__/
//---------------------------------------------------------------------

//---------------------------------------------------------------------
// INITFONT: initialize the fonts (must be nonproportional ---
//---------------------------------------------------------------------
void initFont( ) {  
  font = loadFont( "Monaco-12.vlw" );
  textFont( font );
}

//---------------------------------------------------------------------
// INITWINDOW: Draw the fixed text in the window
//---------------------------------------------------------------------
void initWindow( String title ) {
  color myColor = color( 99, 66, 204 );
  fill( myColor );
  text( title, BORDER, TITLELINE );
  textSize( 12 );
  //text( "A:", BORDER, ALINE );   
  stroke( myColor ); 
  line( SQRTOPX, SQRTOPY, SQRTOPX+SQRWIDTH, SQRTOPY );
  line( SQRTOPX+SQRWIDTH, SQRTOPY, SQRTOPX+SQRWIDTH, SQRTOPY+SQRHEIGHT );
  line( SQRTOPX+SQRWIDTH, SQRTOPY+SQRHEIGHT, SQRTOPX, SQRTOPY+SQRHEIGHT );
  line( SQRTOPX, SQRTOPY+SQRHEIGHT, SQRTOPX, SQRTOPY );

}

//---------------------------------------------------------------------
// SETUP: <=== STARTUP POINT.  PROCESSING STARTS HERE!!!
//---------------------------------------------------------------------
void setup() {
  size( WIDTH, HEIGHT );
  initFont();
  background( 0 );
  noStroke();   

  //--- initialize the window ---
  initWindow( "DNA Repeats" );

  //--- create the first DNA string, and position it in window ---
  dna1 = new DNAString( FASTA3 );
  dna1.truncate( SQRWIDTH-1 );
  dna1.setBoundingBox( BORDER, ALINE, WIDTH-2*BORDER, SQRTOPY-ALINE );
  dna1.drawText();
  findRepeats( 5 );    
  drawCircle( longesti-longestRepeat/2, longestj-longestRepeat/2, int( longestRepeat * 1.5 ) ); 
}

//---------------------------------------------------------------------
// DRAW:  This is called many times a second.  Use it to do animation.
//---------------------------------------------------------------------
void draw() {
}

//---------------------------------------------------------------------
//---------------------------------------------------------------------
void drawPoints( int i, int j, int n ) {
  color myColor = color( 66, 204, 99 );
  fill( myColor );
  stroke( myColor );
  for ( int k=0; k<n; k++ )
    point( SQRTOPX+1+i-k, SQRTOPY+1+j-k );
}

//---------------------------------------------------------------------
//---------------------------------------------------------------------
void drawCircle( int i, int j, int radius ) {
  color myColor = color( 200, 33, 33 );
  stroke( myColor );
  noFill();
  //arc( i+SQRTOPX+1, j+SQRTOPY+1, radius, radius, 0, TWO_PI );  
  ellipse( i+SQRTOPX+1, j+SQRTOPY+1, radius, radius );  
}

//---------------------------------------------------------------------
// FINDREPEATS: 
//---------------------------------------------------------------------
void findRepeats( int threshold ) {
  int len = dna1.length();  
  String seq = dna1.getText();
  int repeats;

  for ( int starti=0; starti< len ; starti++ ) 
    for ( int startj=0; startj<len; startj++ ) {
      //--- skip diagonal ---
      if ( starti==startj ) continue;

      //--- if we have a match, explore both subsequences in parallel---
      if ( seq.charAt( starti )==seq.charAt( startj ) ) {
        repeats = 1;
        int i = starti+1;
        int j = startj+1;
        for ( ; i<len && j<len; i++, j++ ) {
          if ( seq.charAt( i ) == seq.charAt( j ) )
            repeats += 1;
          else
            break;
        }

        //--- long enough? if so, display ---
        if ( repeats >= threshold ) 
          drawPoints( i, j, repeats );       
          if ( repeats > longestRepeat ) {
              longestRepeat = repeats;
              longesti = i;
              longestj = j;
          }        
      }
    }  
}

//---------------------------------------------------------------------
// KEYPRESSED(): called every time a key is pressed.  
//---------------------------------------------------------------------
void  keyPressed() {
}


















Back to Lab 4