Tutorial 1: R and Simulation of Population Growth
--D. Thiebaut (talk) 09:26, 17 July 2015 (EDT)
{r, include=FALSE} # Don't delete this chunk if you are using the mosaic package # This loads the mosaic and dplyr packages require(mosaic)
</p>
```{r, include=FALSE} # Some customization. You can alter or delete as desired (if you know what you are doing).
Contents
- 1 This changes the default colors in lattice plots.
- 2 knitr settings to control how R chunks work.
- 3 Getting the data
- 4 Reading the Data In
- 5 Create Mean of Points
- 6 Plot
- 7 Option 1: jagged mean
- 8 Option 2: smooth means, but data in bins
- 9 Option 3: Plotting just the Mean
- 10 Step 4: computing discrete slope
- 11 Shiny Application Ver. 1: Local Files
- 12 Shiny Application Ver. 2: Load Data File Dynamically
- 13 Shiny Application Ver. 3: Grab file dynamically from Web Page
- 14 Shiny Application Ver. 4: Grab file dynamically from cgi-script
- 15 Steps
- 16 New shiny file.
- 17 Shiny Application Ver. 4: Slider, Grab file dynamically from cgi-script
- 18 Shiny Application Ver. 5: adding an input widget
- 19 Publishing to ShinyApps.io
This changes the default colors in lattice plots.
trellis.par.set(theme=theme.mosaic())
knitr settings to control how R chunks work.
require(knitr) opts_chunk$set( tidy=FALSE, # display code as typed size="small" # slightly smaller font for code ) ```
{r, include=FALSE} # Load additional packages here. Uncomment the line below to use Project MOSAIC data sets. # require(mosaicData)
</p>
Getting the data
<code># generate pop growth # # generatePop.py # D. Thiebaut import random # define parameters dataFileName = "pop%04d.dat" maxT = 2 # how fast we progress in time T = 3 * 31 # max time frame (3 months) maxPop = 2400 # max # of students proportion = 0.50 # how much of the population contributes # to new cases of infection oneBigFile = True severalFiles = True def generateOneInfectionHistory( Id ): global dataFileName # iterate and generate population pop = 0 # starting pop t = 0 out = "" while t <= T: out += "%d, %d, %d\n" % ( Id, t, pop ) if pop < maxPop / 2: pop += 1 + random.randrange( int( pop*proportion) +1 ) else: pop += 1 + random.randrange( int( (maxPop - pop)*proportion) + 1 ) pop = min( maxPop, pop ) t += 1 + random.randrange( maxT ) return out def main(): allOut = "Id, time, pop\n" for i in range( 10 ): out = generateOneInfectionHistory( i+1 ) print( dataFileName % (i+1), "created" ) allOut += out if severalFiles: out = "Id, time, pop\n" + out open( "pop%04d.dat" % (i+1), 'w' ).write( out ) if oneBigFile: open( "pop%04d_%04d.dat" % (0,(i+1)), 'w' ).write( allOut ) main()</code>
Reading the Data In
<code> pop0000_0200 <- read.csv("~/Desktop/Dropbox/CVC2015_Workshop/pop50_0000_0200.dat")</code>
This generates N different files, and 1 large file containing all the N files.
The format of a file is:
<code>Id, time, pop 1, 0, 0 1, 2, 1 1, 3, 2 1, 5, 3 1, 7, 4 1, 9, 5 1, 11, 8 1, 12, 10 1, 13, 12 1, 15, 16 ... 1, 84, 2400 1, 86, 2400 1, 88, 2400 1, 89, 2400 1, 90, 2400 1, 92, 2400 1, 93, 2400</code>
Create Mean of Points
<code>pop0000_0200avgCount <- pop0000_0200 %>% group_by( time = 5*trunc(time/5) ) %>% summarise( avgPop = mean( pop ), count=n() ) </code>
Plot
Option 1: jagged mean
{r, out.width = '750px', dpi=200} ggplot( data=pop0000_0200, aes( x = time, y = pop, color = Id ) ) + geom_point( ) + scale_colour_gradientn(colours=rainbow(4)) + stat_summary(fun.y = mean, geom = 'line', color = 'blue' )
</p>
Option 2: smooth means, but data in bins
{r, out.width = '750px', dpi=200} ggplot( data=pop0000_0200, aes( x = trunc(time/3)*3, y = pop, color = Id ) ) + geom_point( ) + scale_colour_gradientn(colours=rainbow(4)) + stat_summary(fun.y = mean, geom = 'smooth', color = 'blue', width=3 )
</p>
Option 3: Plotting just the Mean
<code>ggplot( data=pop0000_0200avgCount ) + geom_line( aes( x=time, y=avgPop ), color='blue' ) + xlab( 'time' ) + ylab( 'Infected Population' ) </code>
Step 4: computing discrete slope
Add an additional variable (column) to pop0000_0200avgCount, call it slope, and fill it with the avgPop. Then run a for-loop and fill the new column with the value of the slope.
<code>temp <- mutate( pop0000_0200avgCount, slope=avgPop ) for ( i in 2: nrow( temp) ) { temp[i,4] <- temp[i,2]-temp[i-1,2] } head( temp )</code>
Display the resulting slopes as points:
<code>ggplot( ) + geom_line( data=temp, aes( x=time, y=avgPop ), color='blue' ) + geom_point( data=temp, aes(x=time, y=slope), color='red' )</code>
Compute the max slope:
<code>summarize( temp, maxSlope=max( slope ) )</code>
Shiny Application Ver. 1: Local Files
<code># app.R # D. Thiebaut library( "ggplot2" ) library( "shiny" ) data_sets <- list() data_sets[["pop10"]] <- read.csv("~/Desktop/Dropbox/CVC2015_Workshop/pop10_0000_0200.dat") data_sets[["pop20"]] <- read.csv("~/Desktop/Dropbox/CVC2015_Workshop/pop20_0000_0200.dat") data_sets[["pop30"]] <- read.csv("~/Desktop/Dropbox/CVC2015_Workshop/pop30_0000_0200.dat") data_sets[["pop40"]] <- read.csv("~/Desktop/Dropbox/CVC2015_Workshop/pop40_0000_0200.dat") data_sets[["pop50"]] <- read.csv("~/Desktop/Dropbox/CVC2015_Workshop/pop50_0000_0200.dat") data_sets[["pop60"]] <- read.csv("~/Desktop/Dropbox/CVC2015_Workshop/pop60_0000_0200.dat") data_sets[["pop70"]] <- read.csv("~/Desktop/Dropbox/CVC2015_Workshop/pop70_0000_0200.dat") data_sets[["pop80"]] <- read.csv("~/Desktop/Dropbox/CVC2015_Workshop/pop80_0000_0200.dat") data_sets[["pop90"]] <- read.csv("~/Desktop/Dropbox/CVC2015_Workshop/pop90_0000_0200.dat") server <- function( input, output ) { output$main_plot <- renderPlot({ ggplot( data=data_sets[[paste0("pop", input$n_breaks)]], aes( x = time, y = pop, color = Id ) ) + geom_point( ) + scale_colour_gradientn(colours=rainbow(4)) + stat_summary(fun.y = mean, geom = 'line', color = 'blue' ) } ) } ui <- fluidPage( selectInput(inputId = "n_breaks", label = "Population Growth (magic param):", choices = c(10, 20, 30, 40, 50, 60, 70, 80, 90), selected = 50), plotOutput(outputId = "main_plot", height = "300px") ) shinyApp(ui = ui, server = server) </code>
Shiny Application Ver. 2: Load Data File Dynamically
<code># app.R # D. Thiebaut library( "ggplot2" ) library( "shiny" ) server <- function( input, output ) { dataSet <- reactive( { fileName <- paste0( "~/Desktop/Dropbox/CVC2015_Workshop/pop", input$n_breaks, "_0000_0200.dat") read.csv( fileName ) } ) output$main_plot <- renderPlot({ ggplot( data=dataSet(), aes( x = time, y = pop, color = Id ) ) + geom_point( ) + scale_colour_gradientn(colours=rainbow(4)) + stat_summary(fun.y = mean, geom = 'line', color = 'blue' ) } ) } ui <- fluidPage( selectInput(inputId = "n_breaks", label = "Population Growth (magic param):", choices = c(10, 20, 30, 40, 50, 60, 70, 80, 90), selected = 50), plotOutput(outputId = "main_plot", height = "300px") ) shinyApp(ui = ui, server = server) </code>
Shiny Application Ver. 3: Grab file dynamically from Web Page
Simply create the dataset differently. The URL for the data is http://hadoop0.dyndns.org/R/
<code> Index of /R [ICO] Name Last modified Size Description [PARENTDIR] Parent Directory - [TXT] generatePop.py 2015-07-16 09:21 1.8K [ ] pop10_0000_0200.dat 2015-07-16 11:45 141K [ ] pop20_0000_0200.dat 2015-07-16 09:22 146K [ ] pop30_0000_0200.dat 2015-07-16 09:21 150K [ ] pop40_0000_0200.dat 2015-07-16 09:22 153K [ ] pop50_0000_0200.dat 2015-07-16 09:21 154K [ ] pop60_0000_0200.dat 2015-07-16 09:22 155K [ ] pop70_0000_0200.dat 2015-07-16 09:22 156K [ ] pop80_0000_0200.dat 2015-07-16 09:22 157K [ ] pop90_0000_0200.dat 2015-07-16 09:22 157K Apache/2.4.7 (Ubuntu) Server at hadoop0.dyndns.org Port 80</code>
And we just need to change how the data-set is created: ```{r, eval=FALSE} dataSet <- reactive( { fileName <- paste0( "http://hadoop0.dyndns.org/R/pop%22, input$n_breaks, "_0000_0200.dat") read.csv( url( fileName ) ) } )
```
Shiny Application Ver. 4: Grab file dynamically from cgi-script
Steps
- add cgi-bin capability to Apache server on Hadoop0
- create python cgi-bin script
- slightly modify how to get the data from a file name created using the number generated in the input widget.
- the new URL is http://hadoop0.dyndns.org/cgi-bin/generatePop.py?param=50
- change 50 to some int between 1 and 99.
<code>#! /usr/bin/env python3 # D. Thiebaut # generate pop growth # import random, sys import cgi #--- cgi setup --- print( "Content-Type: text/plain" ) print() #--- define global parameters --- dataFileName = "pop%02d_%04d.dat" maxT = 2 # how fast we progress in time T = 3 * 31 # max time frame (3 months) maxPop = 2400 # max # of students proportion = 0.50 # how much of the population contributes # to new cases of infection noFiles = 200 oneBigFile = True severalFiles = False printOut = True def getParams(): """ get parameters from URL""" dico = {} arguments = cgi.FieldStorage() for i in arguments.keys(): #print( i, "-->", arguments[i].value ) dico[i] = arguments[i].value return dico def getProportion(): """ get proportion parameter from URL""" dico = getParams() try: return int(dico["param"])/100.0 except: return 0.50 # default value if nothing is passed # in URL def generateOneInfectionHistory( Id ): """generate 1 semester worth of data, showing increase of infected students population as a function of days (1 semester max)""" global dataFileName # iterate and generate population pop = 0 # starting pop t = 0 out = "" while t <= T: out += "%d, %d, %d\n" % ( Id, t, pop ) if pop < maxPop / 2: pop += 1 + random.randrange( int( pop*proportion) +1 ) else: pop += 1 + random.randrange( int( (maxPop - pop)*proportion) + 1 ) pop = min( maxPop, pop ) t += 1 + random.randrange( maxT ) return out def main(): global noFiles, proportion # get proportion parameter from URL proportion = getProportion() allOut = "Id, time, pop\n" if printOut: print( allOut, end="" ) for i in range( noFiles ): out = generateOneInfectionHistory( i+1 ) if printOut: print( out, end="" ) allOut += out if severalFiles: out = "Id, time, pop\n" + out open( "pop%02d_%04d.dat" % (int(proportion*100),i+1), 'w' ).write( out ) #print( dataFileName % (int(proportion*100), i+1), "created" ) if oneBigFile: open( "pop%02d_%04d_%04d.dat" % (int(proportion*100),0,(i+1)), 'w' ).write( allOut ) main()</code>
New shiny file.
<code># app.R # D. Thiebaut # reads data from files on a Web server # library( "ggplot2" ) library( "shiny" ) server <- function( input, output ) { dataSet <- reactive( { fileName <- paste0( "http://hadoop0.dyndns.org/cgi-bin/generatePop.py?param=", input$n_breaks ) read.csv( url( fileName ) ) } ) output$main_plot <- renderPlot({ ggplot( data=dataSet(), aes( x = time, y = pop, color = Id ) ) + geom_point( ) + scale_colour_gradientn(colours=rainbow(4)) + stat_summary(fun.y = mean, geom = 'line', color = 'blue' ) } ) } ui <- fluidPage( selectInput(inputId = "n_breaks", label = "Population Growth (magic param):", choices = c(10, 20, 30, 40, 50, 60, 70, 80, 90), selected = 50), plotOutput(outputId = "main_plot", height = "300px") ) shinyApp(ui = ui, server = server) </code>
Shiny Application Ver. 4: Slider, Grab file dynamically from cgi-script
Just change the UI.
<code># app.R # D. Thiebaut # reads data from files on a Web server using cgi-bin # uses a slider library( "ggplot2" ) library( "shiny" ) server <- function( input, output ) { dataSet <- reactive( { fileName <- paste0( "http://hadoop0.dyndns.org/cgi-bin/generatePop.py?param=", input$n_breaks ) read.csv( url( fileName ) ) } ) output$main_plot <- renderPlot({ ggplot( data=dataSet(), aes( x = time, y = pop, color = Id ) ) + geom_point( ) + scale_colour_gradientn(colours=rainbow(4)) + stat_summary(fun.y = mean, geom = 'line', color = 'blue' ) } ) } ui <- fluidPage( titlePanel( "Infected Population", windowTitle = "Growth of Infected Population" ), sliderInput( inputId = "n_breaks", label = "Population Growth (magic param):", min = 1, max = 99, step = 0.5, value = 50 ), plotOutput(outputId = "main_plot", height = "300px") ) shinyApp(ui = ui, server = server) </code>
Shiny Application Ver. 5: adding an input widget
<code># app.R # D. Thiebaut # reads data from files on a Web server using cgi-bin # uses a slider library( "ggplot2" ) library( "shiny" ) server <- function( input, output ) { dataSet <- reactive( { fileName <- paste0( "http://hadoop0.dyndns.org/cgi-bin/generatePop.py?proportion=", input$n_breaks, "&simulations=", input$noSimulations ) read.csv( url( fileName ) ) } ) output$main_plot <- renderPlot({ ggplot( data=dataSet(), aes( x = time, y = pop, color = Id ) ) + geom_point( ) + scale_colour_gradientn(colours=rainbow(4)) + stat_summary(fun.y = mean, geom = 'line', color = 'blue' ) } ) } ui <- fluidPage( titlePanel( "Infected Population", windowTitle = "Growth of Infected Population" ), mainPanel( h2( "Description"), p( "This graph shows the result of 200 simulations of the growth a population of infected students on a campus, as a function of some 'magic' parameter controlled by the slider."), p( "The points show the growth resulting from the 200 simulations, and the line shows the average of the points over bins of 3 time periods." ), p( "The data are read from a URL where a server generates data on the fly. The value of the slider is sent as a suffix to the URL (e.g. http://hadoop0.dyndns.org/cgi-bin/generatePop.py?param=71) and the server generates 200 different simulations." ) ), selectInput(inputId = "noSimulations", label = "Number of Simulations:", choices = c(20, 100, 250, 500, 1000), selected = 250), sliderInput( inputId = "n_breaks", label = "Population Growth (magic param):", min = 1, max = 99, step = 0.5, value = 50 ), plotOutput(outputId = "main_plot", height = "300px") ) shinyApp(ui = ui, server = server) </code>
Publishing to ShinyApps.io
- Just File/Publish
- Create account dthiebaut/shiny2foisNeuf2fois
- URL: https://dthiebaut.shinyapps.io/GrowthInfectedPopulation