Difference between revisions of "DNA Repeats.pde"
(New page: Back to Lab 4 <hr /> <code><pre> // DNAREpeats.pde // D. Thiebaut // Finding repeats in DNA sequence //-----------------------------------------------------------------...) |
|||
Line 3: | Line 3: | ||
<code><pre> | <code><pre> | ||
− | // | + | // DNA3.pde |
// D. Thiebaut | // D. Thiebaut | ||
// Finding repeats in DNA sequence | // Finding repeats in DNA sequence | ||
Line 68: | Line 68: | ||
int SQRHEIGHT = SQRWIDTH; | int SQRHEIGHT = SQRWIDTH; | ||
PFont font; // the font used to display the symbols | PFont font; // the font used to display the symbols | ||
− | + | int longesti = 0; | |
+ | int longestj = 0; | ||
+ | int longestRepeat = 0; | ||
Line 192: | Line 194: | ||
dna1.drawText(); | dna1.drawText(); | ||
findRepeats( 5 ); | findRepeats( 5 ); | ||
+ | drawCircle( longesti-longestRepeat/2, longestj-longestRepeat/2, int( longestRepeat * 1.5 ) ); | ||
} | } | ||
Line 208: | Line 211: | ||
for ( int k=0; k<n; k++ ) | for ( int k=0; k<n; k++ ) | ||
point( SQRTOPX+1+i-k, SQRTOPY+1+j-k ); | point( SQRTOPX+1+i-k, SQRTOPY+1+j-k ); | ||
+ | } | ||
+ | |||
+ | //--------------------------------------------------------------------- | ||
+ | //--------------------------------------------------------------------- | ||
+ | void drawCircle( int i, int j, int radius ) { | ||
+ | color myColor = color( 200, 33, 33 ); | ||
+ | stroke( myColor ); | ||
+ | noFill(); | ||
+ | //arc( i+SQRTOPX+1, j+SQRTOPY+1, radius, radius, 0, TWO_PI ); | ||
+ | ellipse( i+SQRTOPX+1, j+SQRTOPY+1, radius, radius ); | ||
} | } | ||
Line 237: | Line 250: | ||
//--- long enough? if so, display --- | //--- long enough? if so, display --- | ||
if ( repeats >= threshold ) | if ( repeats >= threshold ) | ||
− | drawPoints( i, j, repeats ); | + | drawPoints( i, j, repeats ); |
+ | if ( repeats > longestRepeat ) { | ||
+ | longestRepeat = repeats; | ||
+ | longesti = i; | ||
+ | longestj = j; | ||
+ | } | ||
} | } | ||
} | } | ||
Line 247: | Line 265: | ||
void keyPressed() { | void keyPressed() { | ||
} | } | ||
+ | |||
+ | |||
+ | |||
Latest revision as of 14:58, 24 July 2008
// DNA3.pde
// D. Thiebaut
// Finding repeats in DNA sequence
//---------------------------------------------------------------------
// ___ _ _ _
// / __| |___| |__ __ _| |___
//| (_ | / _ \ '_ \/ _` | (_-<
// \___|_\___/_.__/\__,_|_/__/
//
//---------------------------------------------------------------------
DNAString dna1;
String FASTA0 = "gi|194306025|dbj|AB426820.1| Escherichia coli ompT mRNA for outer membrane protease T, partial cds, strain: JCM 5491\n"
+"TGGGAATAGTCCTGACAACCCCTATTGCGATCAGCTCTTTTGCTTCTACCGAGACTTTATCGTTTACTCC"
+"TGACAACATAAATGCGGACATTAGTCTTGGAACTCTGAGCGGAAAAACAAAAGAGCGTGTTTATCTAGCC"
+"GAAGAAGGAGGCCGAAAGGTCAGTCAACTTGACTGGAAATTCAATAACGCTGCAATTATTAAAGGTGCAA"
+"TTAATTGGGATTTGATGCCCCAGATATCTATCGGGGCTGCTGGCTGGACAACTCTCGGTAGCCGAGGTGG"
+"CAATATGGTCGATCGGGACTGGATGGATTCCAGTAACCCCGGAACCTGGACGGATGAAAGTAGACACCCT"
+"GATACACAACTCAATTATGCCAACGAATTTGATCTGAATATCAGAGGCTGGCTCCCCAACGAACCCAATT"
+"ACCGCCTGGGACTCATGGCCGGATATCAGGAAAGCCGTTATAGCTTTACAGCCAGAGGGGGTTCCTATAT"
+"CTACAGTTCTGAGGAGGGATTCAGAGATGATATCGGCTCCTTCCCGAATGGAGAAAGAGCAATCGGCTAC"
+"AAACAACGTTTTAAAATGCCCTACATTGGCTTGACTGGAAGTTATCGTTATGAAGATTTTGAGCTAGGTG"
+"GTACATTTAAATACAGCGGCTGGGTGGAAGCATTTGATAACGATGAACACTATGACCCAGGAAAAAGAAT"
+"CACTTATCGCAGTAAAGTCAAAGACCAAAATTACTATTCTGTTGCAGTCAATGCAGGTTATTACGTAACG"
+"CCTAATGCAAAAGTTTATATTGAAGGCGCATGGAATCGGGTTACGAATAAAAAAGGTGATACTTCACTTT"
+"ATGATCACAATGATAACACTTCTGACTACAGCAAAAATGGTGCAGGCATAGAAAACTATAACTTCATCAC"
+"TACTGCTGGTC";
String FASTA3 = "gi|194306025|dbj|AB426820.1| Escherichia coli ompT mRNA for outer membrane protease T, partial cds, strain: JCM 5491\n"
+"TGGGAATAGTCCTGACAACCCCTATTGCGATCAGCTCTTTTGCTTCTACCGAGACTTTATCGTTTACTCC"
+"TGACAACATAAATGCGGACATTAGTCTCTGACAACCCCTATTGGAAACAAAAGAGCGTGTTTATCTAGCC"
+"GAAGAAGGAGGCCGAAAGGTCAGTCAACTTGACTGGAAATTCAATAACGCTGCAATTATTAAAGGTGCAA"
+"TTAATTGGGATTTGATGCCCCAGATATCTATCGGGGCTGCTGGCTGGACAACTCTCGGTAGCCGAGGTGG"
+"CAATATGGTCGATCGGGACTGGATGGATTCCAGTAACCCCGGAACCTGGACGGATGAAAGTAGACACCCT"
+"GATACACAACTCAATTATGCCAACGAATTTGATCTGAATATCAGAGGCTGGCTCCCCAACGAACCCAATT"
+"ACCGCCTGGGACTCATGGCCGGATATCAGGAAAGCCGTTATAGCTTTACAGCCAGAGGGGGTTCCTATAT"
+"CTACAGTTCTGAGGAGGGATTCAGAGATGATATCGGCTCCTTCCCGAATGGAGAAAGAGCAATCGGCTAC"
+"AAACAACGTTTTAAAATGCCCTACATTGGCTTGACTGGAAGTTATCGTTATGAAGATTTTGAGCTAGGTG"
+"GTACATTTAAATACAGCGGCTGGGTGGAAGCATTTGATAACGATGAACACTATGACCCAGGAAAAAGAAT"
+"CACTTATCGCAGTAAAGTCAAAGACCAAAATTACTATTCTGTTGCAGTCAATGCAGGTTATTACGTAACG"
+"CCTAATGCAAAAGTTTATATTGAAGGCGCATGGAATCGGGTTACGAATAAAAAAGGTGATACTTCACTTT"
+"ATGATCACAATGATAACACTTCTGACTACAGCAAAAATGGTGCAGGCATAGAAAACTATAACTTCATCAC"
+"TACTGCTGGTC";
String FASTA1 = "gi|194306025|dbj|AB426820.1| Escherichia coli ompT mRNA for outer membrane protease T, partial cds, strain: JCM 5491\n"
+"TGGGAATAGTCCTGACAACCCCTATTGCGATCAGCTCTTTTGCTTCTACCGAGACTTTATCGTTTACTCC";
String FASTA2 = "gi|194306025|dbj|AB426820.1| Escherichia coli ompT mRNA for outer membrane protease T, partial cds, strain: JCM 5491\n"
+"TGACAACATAAATGCGGACATTAGTCTTGGAACTCTGAGCGGAAAAACAAAAGAGCGTGTTTATCTAGCC";
//---------------------------------------------------------------------
// GEOMETRY
//---------------------------------------------------------------------
int WIDTH = 800;
int MIDWIDTH = WIDTH/2;
int HEIGHT = 600;
int BORDER = 10;
int TITLELINE = 20;
int ALINE = 30;
int SQRTOPX = BORDER;
int SQRTOPY = 120;
int SQRWIDTH = min( HEIGHT-BORDER-SQRTOPY, WIDTH-BORDER*2);
int SQRHEIGHT = SQRWIDTH;
PFont font; // the font used to display the symbols
int longesti = 0;
int longestj = 0;
int longestRepeat = 0;
//---------------------------------------------------------------------
// ___ _ _ _ ___ _ _
//| \| \| | /_\ / __| |_ _ _(_)_ _ __ _
//| |) | .` |/ _ \\__ \ _| '_| | ' \/ _` |
//|___/|_|\_/_/ \_\___/\__|_| |_|_||_\__, |
// |___/
// DNAString Class: a class holding a DNA sequence, both as a text string
// and as an array of DNASymbol objects.
//---------------------------------------------------------------------
class DNAString {
String symbols; // the string of chars
int noSymbols; // the # of symbols
int offsetX; // x-value of left-most position on screen
int offsetY; // y-value of screen position
int displayWidth;
int displayHeight;
//--- default constructor ---
DNAString( ) {
symbols = "";
noSymbols = 0;
}
//--- constructor ---
DNAString( String fastaString ) {
String[] list = split( fastaString, '\n' );
symbols = list[1];
noSymbols = symbols.length();
}
void setBoundingBox( int x, int y, int w, int h ) {
offsetX = x;
offsetY = y;
displayWidth = w;
displayHeight = h;
}
//--- returns the string of symbols ---
String getText() {
return symbols;
}
int length() {
return noSymbols;
}
void truncate( int n ) {
if ( n < noSymbols ) {
symbols = symbols.substring( 0, n );
noSymbols = symbols.length();
}
}
//--- display text using default method ---
void drawText() {
color myColor = color( 204, 99, 66 );
fill( myColor );
text( symbols, offsetX, offsetY, displayWidth, displayHeight );
}
//--- erase the symbols and the legs, if any ---
void erase() {
int tw = int( textWidth( "A" ) );
fill( 0 );
stroke( 0 );
rect( BORDER*2, ALINE-12, noSymbols*tw, 12 );
}
}
//---------------------------------------------------------------------
// ___ _ _
//| __| _ _ _ __| |_(_)___ _ _ ___
//| _| || | ' \/ _| _| / _ \ ' \(_-<
//|_| \_,_|_||_\__|\__|_\___/_||_/__/
//---------------------------------------------------------------------
//---------------------------------------------------------------------
// INITFONT: initialize the fonts (must be nonproportional ---
//---------------------------------------------------------------------
void initFont( ) {
font = loadFont( "Monaco-12.vlw" );
textFont( font );
}
//---------------------------------------------------------------------
// INITWINDOW: Draw the fixed text in the window
//---------------------------------------------------------------------
void initWindow( String title ) {
color myColor = color( 99, 66, 204 );
fill( myColor );
text( title, BORDER, TITLELINE );
textSize( 12 );
//text( "A:", BORDER, ALINE );
stroke( myColor );
line( SQRTOPX, SQRTOPY, SQRTOPX+SQRWIDTH, SQRTOPY );
line( SQRTOPX+SQRWIDTH, SQRTOPY, SQRTOPX+SQRWIDTH, SQRTOPY+SQRHEIGHT );
line( SQRTOPX+SQRWIDTH, SQRTOPY+SQRHEIGHT, SQRTOPX, SQRTOPY+SQRHEIGHT );
line( SQRTOPX, SQRTOPY+SQRHEIGHT, SQRTOPX, SQRTOPY );
}
//---------------------------------------------------------------------
// SETUP: <=== STARTUP POINT. PROCESSING STARTS HERE!!!
//---------------------------------------------------------------------
void setup() {
size( WIDTH, HEIGHT );
initFont();
background( 0 );
noStroke();
//--- initialize the window ---
initWindow( "DNA Repeats" );
//--- create the first DNA string, and position it in window ---
dna1 = new DNAString( FASTA3 );
dna1.truncate( SQRWIDTH-1 );
dna1.setBoundingBox( BORDER, ALINE, WIDTH-2*BORDER, SQRTOPY-ALINE );
dna1.drawText();
findRepeats( 5 );
drawCircle( longesti-longestRepeat/2, longestj-longestRepeat/2, int( longestRepeat * 1.5 ) );
}
//---------------------------------------------------------------------
// DRAW: This is called many times a second. Use it to do animation.
//---------------------------------------------------------------------
void draw() {
}
//---------------------------------------------------------------------
//---------------------------------------------------------------------
void drawPoints( int i, int j, int n ) {
color myColor = color( 66, 204, 99 );
fill( myColor );
stroke( myColor );
for ( int k=0; k<n; k++ )
point( SQRTOPX+1+i-k, SQRTOPY+1+j-k );
}
//---------------------------------------------------------------------
//---------------------------------------------------------------------
void drawCircle( int i, int j, int radius ) {
color myColor = color( 200, 33, 33 );
stroke( myColor );
noFill();
//arc( i+SQRTOPX+1, j+SQRTOPY+1, radius, radius, 0, TWO_PI );
ellipse( i+SQRTOPX+1, j+SQRTOPY+1, radius, radius );
}
//---------------------------------------------------------------------
// FINDREPEATS:
//---------------------------------------------------------------------
void findRepeats( int threshold ) {
int len = dna1.length();
String seq = dna1.getText();
int repeats;
for ( int starti=0; starti< len ; starti++ )
for ( int startj=0; startj<len; startj++ ) {
//--- skip diagonal ---
if ( starti==startj ) continue;
//--- if we have a match, explore both subsequences in parallel---
if ( seq.charAt( starti )==seq.charAt( startj ) ) {
repeats = 1;
int i = starti+1;
int j = startj+1;
for ( ; i<len && j<len; i++, j++ ) {
if ( seq.charAt( i ) == seq.charAt( j ) )
repeats += 1;
else
break;
}
//--- long enough? if so, display ---
if ( repeats >= threshold )
drawPoints( i, j, repeats );
if ( repeats > longestRepeat ) {
longestRepeat = repeats;
longesti = i;
longestj = j;
}
}
}
}
//---------------------------------------------------------------------
// KEYPRESSED(): called every time a key is pressed.
//---------------------------------------------------------------------
void keyPressed() {
}