Difference between revisions of "CSC352 Homework 4 Solutions"

From dftwiki3
Jump to: navigation, search
(Created page with "--~~~~ ---- <onlysmith> =Problem 1= <br /> We have a pretty complete solution by Emily that catches a lot of image file names for the English Wikipedia. <source lang="C"> /*...")
 
 
(One intermediate revision by the same user not shown)
Line 165: Line 165:
 
=Problem 2=
 
=Problem 2=
 
<br />
 
<br />
 +
Another good program by Emily.  Note the use of the coinToss() function.  Figure out what it does.  Nice addition to the program.
 
<source lang="C">
 
<source lang="C">
 +
 +
/*
 +
* filter4topK.c
 +
* Emily Flynn
 +
* CSC 352
 +
* 11/1/2013
 +
*
 +
* This is a version of filter4.c that gets the K images with the top
 +
* views. If there is no K provided as the third parameter, this
 +
* program works the same as filter4.c. That program reads in the
 +
* wikipedia image statistics file and outputs only the lines of
 +
* interest that start with "en " and also contain " File:" or "
 +
* file:".  It takes two parameters from the command line: (1) the
 +
* input file, and (2) the name of the output file.
 +
*
 +
* I used this command to check my output. It is a little slow.
 +
*  awk '{if ($1=="en") print;}' <inName> | grep -i "[fF]ile:" | sort -grk 3 - | head -K > <outName>
 +
*
 +
* To run:
 +
*  gcc -o filter4topK filter4topK.c
 +
*  ./filter4topK <inName> <outName> K
 +
*/
 +
 +
#include <stdio.h>
 +
#include <stdlib.h>
 +
#include <string.h>
 +
#include <time.h>
 +
 +
#define MAX_LINE_BUFF 2000
 +
 +
// structure for file information
 +
struct FileInfo {
 +
  char *fileLine;
 +
  int numViews;
 +
};
 +
 +
// prototypes
 +
int keepLine( const char* line );
 +
int getNumViews( const char* line);
 +
void mySort( struct FileInfo *A, int K);
 +
char *myStrcpy(char *name);
 +
 +
// helpful function
 +
int coinToss(){
 +
  return rand() % 2;
 +
}
 +
 +
// -----------------------------------------------------------
 +
// Main method to read in a line, decide whether to keep
 +
// it, and if specified, only output the K images with the
 +
// highest number of views.
 +
//
 +
//  parameters: inputFile
 +
//              outputFile
 +
//              K (optional) - number of images to output info
 +
//                if not specified, will output all
 +
// -----------------------------------------------------------
 +
int main(int argc, const char** argv){
 +
  FILE *fp_in, *fp_out;
 +
  const char* inFile;
 +
  const char* outFile;
 +
  int K, i, numViews;
 +
  struct FileInfo *A;
 +
  char line[MAX_LINE_BUFF];
 +
 +
  // check the number of parameters provided and display an error
 +
  // message if this is not corect
 +
  if (argc < 3){
 +
    printf("Incorrect syntax. Please use: ./filter4topK inFile outFile K.\n");
 +
    return 1;
 +
  }
 +
 
 +
  // grab the arguments
 +
  inFile = argv[1];
 +
  outFile = argv[2];
 +
 
 +
  if (argc == 3){ // work the same as filter4.c if no K provided
 +
    K = 0;
 +
  } else {
 +
    K = atoi(argv[3]);
 +
  }
 +
 +
  // allocate fileInfo space for the top K pieces of information
 +
  A = (struct FileInfo *) malloc( K* sizeof(struct FileInfo));
 +
 +
  // initialize all numViews to zero
 +
  for (i=0;i<K;i++){
 +
    A[i].numViews = 0;
 +
  }
 +
 +
  // seed the random generator for breaking ties
 +
  srand(time(NULL));
 +
 
 +
  // open the input and output files
 +
  fp_in = fopen(inFile, "r");
 +
  fp_out = fopen(outFile, "w");
 +
 +
 +
  // read through the input file
 +
  while (!feof(fp_in)){
 +
 +
    // get the next line in the file
 +
    fgets(line, sizeof(line), fp_in);
 +
    line[sizeof(line) -1]='\0'; // truncate for safety
 +
    if (feof(fp_in)){
 +
      break; // stop if not a line
 +
    }
 +
   
 +
    // check if we want to keep the line
 +
    if (keepLine(line) == 0){
 +
 +
      // just print it to the output file if we are not looking for
 +
      // top images
 +
      if (K == 0){
 +
fprintf(fp_out, "%s", line);
 +
      }
 +
 +
      // otherwise grab the the number of views
 +
      else {
 +
numViews = getNumViews(line);
 +
 +
// if the number of views is greater than the
 +
// lowest in the array
 +
if (numViews > A[0].numViews){
 +
 +
  // add to the first position in the array
 +
  A[0].numViews = numViews;
 +
  A[0].fileLine = myStrcpy(line);
 +
 +
  // if it is higher than the second number
 +
  // sort it the array
 +
  if (numViews > A[1].numViews){
 +
    mySort(A, K);
 +
  }
 +
}
 +
 +
// if the number of views is the same as the
 +
// lowest, randomly pick between the lowest
 +
// and the current number
 +
else if ((numViews == A[0].numViews) && (coinToss()==0)){
 +
    A[0].numViews = numViews;
 +
    A[0].fileLine=myStrcpy(line);  
 +
}  
 +
      }
 +
    }
 +
  }
 +
 +
  // print out the array into the output file
 +
  for (i=0; i<K;i++){
 +
    fprintf(fp_out, "%s",A[i].fileLine);
 +
  }
 +
 +
  // close the input and output files;
 +
  fclose(fp_in);
 +
  fclose(fp_out);
 +
 
 +
  // free the space allocated for the array
 +
  free (A);
 +
 +
  return 0;
 +
}
 +
 +
/*
 +
* Function to get the number of views for a particular line
 +
 +
*  parameter: line to examine
 +
*  return: number of views for the line
 +
*          0 if the line does not have the proper format
 +
*/
 +
int getNumViews(const char* line){
 +
  int numViews;
 +
  char *spcChar, *nextSpc;
 +
 
 +
  // look for the start of the second column
 +
  spcChar = strchr(line, ' ');
 +
  if (spcChar == NULL){
 +
    return 0;
 +
  }
 +
 +
  // look for the start of the third column
 +
  spcChar = strchr(spcChar+1, ' ');
 +
  if (spcChar == NULL){
 +
    return 0;
 +
  }
 +
 
 +
  // look for the end of the third column
 +
  nextSpc = strchr(spcChar+1, ' ');
 +
  if (nextSpc == NULL){
 +
    return 0;
 +
  }
 +
 +
  *nextSpc = '\0'; // end the string at the end of the 3rd col
 +
  numViews = atoi(spcChar+1); // grab the numViews
 +
  *nextSpc = ' '; // convert the string back to original format
 +
 +
  // return the number of views
 +
  return numViews;
 +
}
 +
 +
 +
/*
 +
* Function to check if we should keep the inputted line
 +
*
 +
*  parameter: line to examine
 +
*  return: 0 if keep the line,
 +
*          1 if discard
 +
*
 +
* (this is identical to the function in filter4.c)
 +
*/
 +
int keepLine(const char* line){
 +
  char * splitChar;
 +
  char * split2;
 +
  const char* firstCol = "en";
 +
  const char* secondCol = "file:";
 +
  const char* secondCol2 = "File:";
 +
  int result;
 +
  const int compareLimit1 = strlen(firstCol);
 +
  const int compareLimit2 = strlen(secondCol);
 +
 
 +
  // split the line at the first whitespace
 +
  splitChar = strchr(line, ' ');
 +
   
 +
  // return 1 if there is no white space
 +
  if (splitChar == NULL){
 +
    return 1;
 +
  }
 +
 +
  // check if the first is "en"
 +
  *splitChar = '\0';
 +
  result = strncmp(line, firstCol, compareLimit1);
 +
   
 +
  // if the first column is "en" with no trailing chars
 +
  if ((result == 0) && (strlen(line)==2)){
 +
    *(splitChar) = ' ';
 +
   
 +
    // look to see if "file:" is in the line
 +
    split2 = strchr(splitChar, 'f');
 +
    while (split2 != NULL){
 +
      result = strncmp(split2, secondCol, compareLimit2);
 +
      if (result == 0){
 +
return result;
 +
      }
 +
      split2 = strchr(split2+1, 'f');
 +
    }
 +
 +
    // look to see if "File:" is in the line
 +
    split2 = strchr(splitChar, 'F');
 +
    while (split2 != NULL){
 +
      result = strncmp(split2, secondCol2, compareLimit2);   
 +
      if (result == 0){
 +
  return result;
 +
      }
 +
      split2 = strchr(split2+1, 'F');
 +
    }
 +
  }
 +
 
 +
  // if "en" or "File:"/"file:" is not found, return 1
 +
  return 1;
 +
}
 +
 +
//------------------------------------------------------------
 +
// myStrcpy: combines strcpy and malloc to create a new string
 +
//          big enough to contain the chars of the string passed
 +
//          as a parameter. Returns a pointer to the new copy.
 +
// from sortArrayOfStruct.c by DT
 +
//------------------------------------------------------------
 +
char *myStrcpy( char *name ) {
 +
  char *p = (char *) malloc( (strlen( name ) + 1) * sizeof( char ) );
 +
  strcpy( p, name );
 +
  return p;
 +
}
 +
 +
//---------------------------------------------------------
 +
// swap: swaps 2 structs in the array of structs.  The two
 +
//      entries are identified by two indexes.
 +
// modified from sortArrayOfStruct.c by DT
 +
//---------------------------------------------------------
 +
void swap( struct FileInfo *A, int i, int j ){
 +
  int tempViews = A[i].numViews;
 +
  char *tempLine = A[i].fileLine;
 +
 +
  A[i].numViews = A[j].numViews;
 +
  A[i].fileLine = A[j].fileLine;
 +
 +
  A[j].numViews = tempViews;
 +
  A[j].fileLine = tempLine;
 +
}
 +
 +
 +
//--------------------------------------------------------
 +
// mySort: sorts the array of structs using insertion sort
 +
//    parameters:
 +
//        A - array of FileInfo
 +
//        K - total number of items in the array
 +
// modified from sortArrayOfStruct.c by DT
 +
//---------------------------------------------------------
 +
void mySort( struct FileInfo *A, int K ){
 +
 +
  int i,j;
 +
  int temp;
 +
 +
  for ( i = 0; i < K-1; i++ ){
 +
    for (j = i+1; j < K; j++ ){
 +
      temp = i;
 +
      if (A[j].numViews < A[temp].numViews ){
 +
temp = j;
 +
      }
 +
      swap (A, i, temp);
 +
    }
 +
  }
 +
}
 +
  
 
</source>
 
</source>
Line 171: Line 484:
  
 
=Problem 3=
 
=Problem 3=
 +
<br />
 +
A good program by Danae for the MPI question.  Note the use of '''perworker''' and '''permanager''' to make sure the total number of samples adds up ''exactly'' to '''numsamples'''.  Nice touch.
 
<br />
 
<br />
 
<source lang="C">
 
<source lang="C">
 +
[352a@beowulf 352a-aa]$ cat mpiMonteCarlo4.c
 +
/* mpiMonteCarlo4.c
 +
// danae
 +
//
 +
// Computes Pi using the Monte Carlo method with MPI,
 +
// NP (user-specified) processes, and N random samples.
 +
// Note that N should be evenly divisible by NP in order
 +
// to truly calculate N samples.
 +
//
 +
// To compile and run:
 +
// mpicc -o mpiMonteCarlo4 mpiMonteCarlo4.c
 +
// mpirun -np [NP] ./mpiMonteCarlo [N]
 +
//
 +
*/
 +
 +
#include "mpi.h"
 +
#include <stdio.h>
 +
#include <stdlib.h>
 +
 +
#define MANAGER 0
 +
 +
//--------------------------------------------------------------------
 +
//                        P R O T O T Y P E S
 +
//--------------------------------------------------------------------
 +
void doManager(int,int);
 +
void doWorker();
 +
 +
//--------------------------------------------------------------------
 +
//                          M  A  I  N
 +
//--------------------------------------------------------------------
 +
int main(int argc, char *argv[]) {
 +
  int myId, noProcs, nameLen;
 +
  char procName[MPI_MAX_PROCESSOR_NAME];
 +
  int numsamples;
 +
 +
  if ( argc<2 ) {
 +
    printf( "Please Use Correct Syntax: mpirun -np [NP] ./pi2 [N]\n" );
 +
    return 1;
 +
  }
 +
 +
  // get the number of samples to generate
 +
  numsamples = atoi(argv[1]);
 +
 
 +
  //--- start MPI ---
 +
  MPI_Init(&argc, &argv);
 +
  MPI_Comm_rank(MPI_COMM_WORLD, &myId);
 +
  MPI_Comm_size(MPI_COMM_WORLD, &noProcs);
 +
  MPI_Get_processor_name(procName, &nameLen);
 +
 
 +
  //--- display which process we are on ---
 +
  printf( "Process %d of %d started.\n", myId,  noProcs);
 +
 +
  //--- farm out the work: 1 manager, several workers ---
 +
  if ( myId == MANAGER )
 +
    doManager(noProcs, numsamples);
 +
  else
 +
    doWorker();
 +
 
 +
  //--- close up MPI ---
 +
  MPI_Finalize();
 +
 
 +
  return 0;
 +
}
 +
 +
//--------------------------------------------------------------------
 +
// The manager's main work function
 +
//--------------------------------------------------------------------
 +
void doManager(int noProcs, int numsamples ) {
 +
  double incircle = 0;
 +
  double workersum;
 +
  int i;
 +
  MPI_Status status;
 +
 +
  int perworker = (1.0)*numsamples/noProcs;
 +
  int permanager = numsamples - perworker*noProcs; //manager picks up slack
 +
 +
  // printf("perworker: %d\n", perworker);
 +
  //--- first to all workers ---
 +
  for(i=1; i<noProcs; i++) {
 +
    MPI_Send( &perworker, 1, MPI_INT, i, 0, MPI_COMM_WORLD );
 +
  }
 +
 +
  //calculate manager portion
 +
  srand(time(NULL)); //seed the random number generator with time
 +
  float x, y;
 +
  for(i=0; i<permanager+perworker; i++) {
 +
    x = 1 - (((float)rand()/(float)(RAND_MAX)) * 2.0); //random float [-1,1]
 +
    y = 1 - (((float)rand()/(float)(RAND_MAX)) * 2.0); //random float [-1,1]
 +
    if ((float)x*x + (float)y*y <= (float)(1.0)) {
 +
      incircle += 1;
 +
    }
 +
  }
 +
 
 +
  //--- wait for worker sums ---
 +
  for(i=1; i<noProcs; i++) {
 +
    MPI_Recv( &workersum, 1, MPI_DOUBLE, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status );
 +
    incircle += workersum;
 +
  }
 +
 +
  //--- output result ---
 +
  printf( "%d iterations: Pi = %f\n", permanager + noProcs*perworker, 4.0*(incircle)/(noProcs*perworker));
 +
}
 +
 +
//--------------------------------------------------------------------
 +
// The worker's main work function
 +
//--------------------------------------------------------------------
 +
void doWorker( ) {
 +
  int numthrows, i;
 +
 +
  //--- get n from manager ---
 +
  MPI_Status status;
 +
  MPI_Recv( &numthrows, 1, MPI_INT, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status );
 +
 +
  //--- do portion of the work ---
 +
  double incircle = 0;
 +
  float x, y;
 +
  for (i=0; i<numthrows; i++) {
 +
    // generate random x, y as float [0,2], subtracts 1 to be in range [-1,1]
 +
    x = 1 - (((float)rand()/(float)(RAND_MAX)) * 2.0); //random float [-1,1]
 +
    y = 1 - (((float)rand()/(float)(RAND_MAX)) * 2.0); //random float [-1,1]
 +
    if ((float)x*x + (float)y*y <= (float)(1.0))
 +
      incircle += 1;
 +
  }
 +
 +
  //-- send result to manager ---
 +
  MPI_Send( &incircle, 1, MPI_DOUBLE, MANAGER, 0, MPI_COMM_WORLD );
 +
}
 +
  
 
</source>
 
</source>

Latest revision as of 14:46, 8 November 2013

--D. Thiebaut (talk) 14:41, 8 November 2013 (EST)



This section is only visible to computers located at Smith College