Tutorial 1: R and Simulation of Population Growth
--D. Thiebaut (talk) 09:26, 17 July 2015 (EDT)
Contents
- 1 This changes the default colors in lattice plots.
- 2 knitr settings to control how R chunks work.
- 3 Getting the data
- 4 Reading the Data In
- 5 Create Mean of Points
- 6 Plot
- 7 Option 1: jagged mean
- 8 Option 2: smooth means, but data in bins
- 9 Step 4: computing discrete slope
- 10 Shiny Application Ver. 1: Local Files
- 11 Shiny Application Ver. 2: Load Data File Dynamically
- 12 Shiny Application Ver. 3: Grab file dynamically from Web Page
- 13 Shiny Application Ver. 4: Grab file dynamically from cgi-script
- 14 Steps
- 15 New shiny file.
- 16 Shiny Application Ver. 4: Slider, Grab file dynamically from cgi-script
- 17 Shiny Application Ver. 5: adding an input widget
- 18 Publishing to ShinyApps.io
This changes the default colors in lattice plots.
knitr settings to control how R chunks work.
require(knitr) opts_chunk$set( tidy=FALSE, # display code as typed size="small" # slightly smaller font for code ) ```
Getting the data
# generate pop growth # # generatePop.py # D. Thiebaut import random # define parameters dataFileName = "pop%04d.dat" maxT = 2 # how fast we progress in time T = 3 * 31 # max time frame (3 months) maxPop = 2400 # max # of students proportion = 0.50 # how much of the population contributes # to new cases of infection oneBigFile = True severalFiles = True def generateOneInfectionHistory( Id ): global dataFileName # iterate and generate population pop = 0 # starting pop t = 0 out = "" while t <= T: out += "%d, %d, %d\n" % ( Id, t, pop ) if pop < maxPop / 2: pop += 1 + random.randrange( int( pop*proportion) +1 ) else: pop += 1 + random.randrange( int( (maxPop - pop)*proportion) + 1 ) pop = min( maxPop, pop ) t += 1 + random.randrange( maxT ) return out def main(): allOut = "Id, time, pop\n" for i in range( 10 ): out = generateOneInfectionHistory( i+1 ) print( dataFileName % (i+1), "created" ) allOut += out if severalFiles: out = "Id, time, pop\n" + out open( "pop%04d.dat" % (i+1), 'w' ).write( out ) if oneBigFile: open( "pop%04d_%04d.dat" % (0,(i+1)), 'w' ).write( allOut ) main()
Reading the Data In
pop0000_0200 <- read.csv("~/Desktop/Dropbox/CVC2015_Workshop/pop50_0000_0200.dat")
This generates N different files, and 1 large file containing all the N files.
The format of a file is:
Id, time, pop 1, 0, 0 1, 2, 1 1, 3, 2 1, 5, 3 1, 7, 4 1, 9, 5 1, 11, 8 1, 12, 10 1, 13, 12 1, 15, 16 ... 1, 84, 2400 1, 86, 2400 1, 88, 2400 1, 89, 2400 1, 90, 2400 1, 92, 2400 1, 93, 2400
Create Mean of Points
pop0000_0200avgCount <- pop0000_0200 %>% group_by( time = 5*trunc(time/5) ) %>% summarise( avgPop = mean( pop ), count=n() )
Plot
Option 1: jagged mean
ggplot( data=pop0000_0200, aes( x = time, y = pop, color = Id ) ) + geom_point( ) + scale_colour_gradientn(colours=rainbow(4)) + stat_summary(fun.y = mean, geom = 'line', color = 'blue' )
Option 2: smooth means, but data in bins
ggplot( data=pop0000_0200, aes( x = trunc(time/3)*3, y = pop, color = Id ) ) + geom_point( ) + scale_colour_gradientn(colours=rainbow(4)) + stat_summary(fun.y = mean, geom = 'smooth', color = 'blue', width=3 ) <h1 id="option-3-plotting-just-the-mean">Option 3: Plotting just the Mean</h1> ::<source lang="text"> ggplot( data=pop0000_0200avgCount ) + geom_line( aes( x=time, y=avgPop ), color='blue' ) + xlab( 'time' ) + ylab( 'Infected Population' )
Step 4: computing discrete slope
Add an additional variable (column) to pop0000_0200avgCount, call it slope, and fill it with the avgPop. Then run a for-loop and fill the new column with the value of the slope.
temp <- mutate( pop0000_0200avgCount, slope=avgPop ) for ( i in 2: nrow( temp) ) { temp[i,4] <- temp[i,2]-temp[i-1,2] } head( temp )
Display the resulting slopes as points:
ggplot( ) + geom_line( data=temp, aes( x=time, y=avgPop ), color='blue' ) + geom_point( data=temp, aes(x=time, y=slope), color='red' )
Compute the max slope:
summarize( temp, maxSlope=max( slope ) )
Shiny Application Ver. 1: Local Files
# app.R # D. Thiebaut library( "ggplot2" ) library( "shiny" ) data_sets <- list() data_sets[["pop10"]] <- read.csv("~/Desktop/Dropbox/CVC2015_Workshop/pop10_0000_0200.dat") data_sets[["pop20"]] <- read.csv("~/Desktop/Dropbox/CVC2015_Workshop/pop20_0000_0200.dat") data_sets[["pop30"]] <- read.csv("~/Desktop/Dropbox/CVC2015_Workshop/pop30_0000_0200.dat") data_sets[["pop40"]] <- read.csv("~/Desktop/Dropbox/CVC2015_Workshop/pop40_0000_0200.dat") data_sets[["pop50"]] <- read.csv("~/Desktop/Dropbox/CVC2015_Workshop/pop50_0000_0200.dat") data_sets[["pop60"]] <- read.csv("~/Desktop/Dropbox/CVC2015_Workshop/pop60_0000_0200.dat") data_sets[["pop70"]] <- read.csv("~/Desktop/Dropbox/CVC2015_Workshop/pop70_0000_0200.dat") data_sets[["pop80"]] <- read.csv("~/Desktop/Dropbox/CVC2015_Workshop/pop80_0000_0200.dat") data_sets[["pop90"]] <- read.csv("~/Desktop/Dropbox/CVC2015_Workshop/pop90_0000_0200.dat") server <- function( input, output ) { output$main_plot <- renderPlot({ ggplot( data=data_sets[[paste0("pop", input$n_breaks)]], aes( x = time, y = pop, color = Id ) ) + geom_point( ) + scale_colour_gradientn(colours=rainbow(4)) + stat_summary(fun.y = mean, geom = 'line', color = 'blue' ) } ) } ui <- fluidPage( selectInput(inputId = "n_breaks", label = "Population Growth (magic param):", choices = c(10, 20, 30, 40, 50, 60, 70, 80, 90), selected = 50), plotOutput(outputId = "main_plot", height = "300px") ) shinyApp(ui = ui, server = server)
Shiny Application Ver. 2: Load Data File Dynamically
# app.R # D. Thiebaut library( "ggplot2" ) library( "shiny" ) server <- function( input, output ) { dataSet <- reactive( { fileName <- paste0( "~/Desktop/Dropbox/CVC2015_Workshop/pop", input$n_breaks, "_0000_0200.dat") read.csv( fileName ) } ) output$main_plot <- renderPlot({ ggplot( data=dataSet(), aes( x = time, y = pop, color = Id ) ) + geom_point( ) + scale_colour_gradientn(colours=rainbow(4)) + stat_summary(fun.y = mean, geom = 'line', color = 'blue' ) } ) } ui <- fluidPage( selectInput(inputId = "n_breaks", label = "Population Growth (magic param):", choices = c(10, 20, 30, 40, 50, 60, 70, 80, 90), selected = 50), plotOutput(outputId = "main_plot", height = "300px") ) shinyApp(ui = ui, server = server)
Shiny Application Ver. 3: Grab file dynamically from Web Page
Simply create the dataset differently. The URL for the data is http://hadoop0.dyndns.org/R/
Index of /R [ICO] Name Last modified Size Description [PARENTDIR] Parent Directory - [TXT] generatePop.py 2015-07-16 09:21 1.8K [ ] pop10_0000_0200.dat 2015-07-16 11:45 141K [ ] pop20_0000_0200.dat 2015-07-16 09:22 146K [ ] pop30_0000_0200.dat 2015-07-16 09:21 150K [ ] pop40_0000_0200.dat 2015-07-16 09:22 153K [ ] pop50_0000_0200.dat 2015-07-16 09:21 154K [ ] pop60_0000_0200.dat 2015-07-16 09:22 155K [ ] pop70_0000_0200.dat 2015-07-16 09:22 156K [ ] pop80_0000_0200.dat 2015-07-16 09:22 157K [ ] pop90_0000_0200.dat 2015-07-16 09:22 157K Apache/2.4.7 (Ubuntu) Server at hadoop0.dyndns.org Port 80
And we just need to change how the data-set is created:
dataSet <- reactive( { fileName <- paste0( "http://hadoop0.dyndns.org/R/pop", input$n_breaks, "_0000_0200.dat") read.csv( url( fileName ) ) } )
Shiny Application Ver. 4: Grab file dynamically from cgi-script
Steps
- add cgi-bin capability to Apache server on Hadoop0
- create python cgi-bin script
- slightly modify how to get the data from a file name created using the number generated in the input widget.
- the new URL is http://hadoop0.dyndns.org/cgi-bin/generatePop.py?param=50
- change 50 to some int between 1 and 99.
#! /usr/bin/env python3 # D. Thiebaut # generate pop growth # import random, sys import cgi #--- cgi setup --- print( "Content-Type: text/plain" ) print() #--- define global parameters --- dataFileName = "pop%02d_%04d.dat" maxT = 2 # how fast we progress in time T = 3 * 31 # max time frame (3 months) maxPop = 2400 # max # of students proportion = 0.50 # how much of the population contributes # to new cases of infection noFiles = 200 oneBigFile = True severalFiles = False printOut = True def getParams(): """ get parameters from URL""" dico = {} arguments = cgi.FieldStorage() for i in arguments.keys(): #print( i, "-->", arguments[i].value ) dico[i] = arguments[i].value return dico def getProportion(): """ get proportion parameter from URL""" dico = getParams() try: return int(dico["param"])/100.0 except: return 0.50 # default value if nothing is passed # in URL def generateOneInfectionHistory( Id ): """generate 1 semester worth of data, showing increase of infected students population as a function of days (1 semester max)""" global dataFileName # iterate and generate population pop = 0 # starting pop t = 0 out = "" while t <= T: out += "%d, %d, %d\n" % ( Id, t, pop ) if pop < maxPop / 2: pop += 1 + random.randrange( int( pop*proportion) +1 ) else: pop += 1 + random.randrange( int( (maxPop - pop)*proportion) + 1 ) pop = min( maxPop, pop ) t += 1 + random.randrange( maxT ) return out def main(): global noFiles, proportion # get proportion parameter from URL proportion = getProportion() allOut = "Id, time, pop\n" if printOut: print( allOut, end="" ) for i in range( noFiles ): out = generateOneInfectionHistory( i+1 ) if printOut: print( out, end="" ) allOut += out if severalFiles: out = "Id, time, pop\n" + out open( "pop%02d_%04d.dat" % (int(proportion*100),i+1), 'w' ).write( out ) #print( dataFileName % (int(proportion*100), i+1), "created" ) if oneBigFile: open( "pop%02d_%04d_%04d.dat" % (int(proportion*100),0,(i+1)), 'w' ).write( allOut ) main()
New shiny file.
# app.R # D. Thiebaut # reads data from files on a Web server # library( "ggplot2" ) library( "shiny" ) server <- function( input, output ) { dataSet <- reactive( { fileName <- paste0( "http://hadoop0.dyndns.org/cgi-bin/generatePop.py?param=", input$n_breaks ) read.csv( url( fileName ) ) } ) output$main_plot <- renderPlot({ ggplot( data=dataSet(), aes( x = time, y = pop, color = Id ) ) + geom_point( ) + scale_colour_gradientn(colours=rainbow(4)) + stat_summary(fun.y = mean, geom = 'line', color = 'blue' ) } ) } ui <- fluidPage( selectInput(inputId = "n_breaks", label = "Population Growth (magic param):", choices = c(10, 20, 30, 40, 50, 60, 70, 80, 90), selected = 50), plotOutput(outputId = "main_plot", height = "300px") ) shinyApp(ui = ui, server = server)
Shiny Application Ver. 4: Slider, Grab file dynamically from cgi-script
<p>Just change the UI.# app.R # D. Thiebaut # reads data from files on a Web server using cgi-bin # uses a slider library( "ggplot2" ) library( "shiny" ) server <- function( input, output ) { dataSet <- reactive( { fileName <- paste0( "http://hadoop0.dyndns.org/cgi-bin/generatePop.py?param=", input$n_breaks ) read.csv( url( fileName ) ) } ) output$main_plot <- renderPlot({ ggplot( data=dataSet(), aes( x = time, y = pop, color = Id ) ) + geom_point( ) + scale_colour_gradientn(colours=rainbow(4)) + stat_summary(fun.y = mean, geom = 'line', color = 'blue' ) } ) } ui <- fluidPage( titlePanel( "Infected Population", windowTitle = "Growth of Infected Population" ), sliderInput( inputId = "n_breaks", label = "Population Growth (magic param):", min = 1, max = 99, step = 0.5, value = 50 ), plotOutput(outputId = "main_plot", height = "300px") ) shinyApp(ui = ui, server = server)
Shiny Application Ver. 5: adding an input widget
# app.R # D. Thiebaut # reads data from files on a Web server using cgi-bin # uses a slider library( "ggplot2" ) library( "shiny" ) server <- function( input, output ) { dataSet <- reactive( { fileName <- paste0( "http://hadoop0.dyndns.org/cgi-bin/generatePop.py?proportion=", input$n_breaks, "&simulations=", input$noSimulations ) read.csv( url( fileName ) ) } ) output$main_plot <- renderPlot({ ggplot( data=dataSet(), aes( x = time, y = pop, color = Id ) ) + geom_point( ) + scale_colour_gradientn(colours=rainbow(4)) + stat_summary(fun.y = mean, geom = 'line', color = 'blue' ) } ) } ui <- fluidPage( titlePanel( "Infected Population", windowTitle = "Growth of Infected Population" ), mainPanel( h2( "Description"), p( "This graph shows the result of 200 simulations of the growth a population of infected students on a campus, as a function of some 'magic' parameter controlled by the slider."), p( "The points show the growth resulting from the 200 simulations, and the line shows the average of the points over bins of 3 time periods." ), p( "The data are read from a URL where a server generates data on the fly. The value of the slider is sent as a suffix to the URL (e.g. http://hadoop0.dyndns.org/cgi-bin/generatePop.py?param=71) and the server generates 200 different simulations." ) ), selectInput(inputId = "noSimulations", label = "Number of Simulations:", choices = c(20, 100, 250, 500, 1000), selected = 250), sliderInput( inputId = "n_breaks", label = "Population Growth (magic param):", min = 1, max = 99, step = 0.5, value = 50 ), plotOutput(outputId = "main_plot", height = "300px") ) shinyApp(ui = ui, server = server)
Publishing to ShinyApps.io
- Just File/Publish</li>