Difference between revisions of "CSC352 Homework 4 Solutions"

From dftwiki3
Jump to: navigation, search
(Created page with "--~~~~ ---- <onlysmith> =Problem 1= <br /> We have a pretty complete solution by Emily that catches a lot of image file names for the English Wikipedia. <source lang="C"> /*...")
 
Line 165: Line 165:
 
=Problem 2=
 
=Problem 2=
 
<br />
 
<br />
 +
Another good program by Emily.  Note the use of the coinToss() function.  Figure out what it does.  Nice addition to the program.
 
<source lang="C">
 
<source lang="C">
 +
 +
/*
 +
* filter4topK.c
 +
* Emily Flynn
 +
* CSC 352
 +
* 11/1/2013
 +
*
 +
* This is a version of filter4.c that gets the K images with the top
 +
* views. If there is no K provided as the third parameter, this
 +
* program works the same as filter4.c. That program reads in the
 +
* wikipedia image statistics file and outputs only the lines of
 +
* interest that start with "en " and also contain " File:" or "
 +
* file:".  It takes two parameters from the command line: (1) the
 +
* input file, and (2) the name of the output file.
 +
*
 +
* I used this command to check my output. It is a little slow.
 +
*  awk '{if ($1=="en") print;}' <inName> | grep -i "[fF]ile:" | sort -grk 3 - | head -K > <outName>
 +
*
 +
* To run:
 +
*  gcc -o filter4topK filter4topK.c
 +
*  ./filter4topK <inName> <outName> K
 +
*/
 +
 +
#include <stdio.h>
 +
#include <stdlib.h>
 +
#include <string.h>
 +
#include <time.h>
 +
 +
#define MAX_LINE_BUFF 2000
 +
 +
// structure for file information
 +
struct FileInfo {
 +
  char *fileLine;
 +
  int numViews;
 +
};
 +
 +
// prototypes
 +
int keepLine( const char* line );
 +
int getNumViews( const char* line);
 +
void mySort( struct FileInfo *A, int K);
 +
char *myStrcpy(char *name);
 +
 +
// helpful function
 +
int coinToss(){
 +
  return rand() % 2;
 +
}
 +
 +
// -----------------------------------------------------------
 +
// Main method to read in a line, decide whether to keep
 +
// it, and if specified, only output the K images with the
 +
// highest number of views.
 +
//
 +
//  parameters: inputFile
 +
//              outputFile
 +
//              K (optional) - number of images to output info
 +
//                if not specified, will output all
 +
// -----------------------------------------------------------
 +
int main(int argc, const char** argv){
 +
  FILE *fp_in, *fp_out;
 +
  const char* inFile;
 +
  const char* outFile;
 +
  int K, i, numViews;
 +
  struct FileInfo *A;
 +
  char line[MAX_LINE_BUFF];
 +
 +
  // check the number of parameters provided and display an error
 +
  // message if this is not corect
 +
  if (argc < 3){
 +
    printf("Incorrect syntax. Please use: ./filter4topK inFile outFile K.\n");
 +
    return 1;
 +
  }
 +
 
 +
  // grab the arguments
 +
  inFile = argv[1];
 +
  outFile = argv[2];
 +
 
 +
  if (argc == 3){ // work the same as filter4.c if no K provided
 +
    K = 0;
 +
  } else {
 +
    K = atoi(argv[3]);
 +
  }
 +
 +
  // allocate fileInfo space for the top K pieces of information
 +
  A = (struct FileInfo *) malloc( K* sizeof(struct FileInfo));
 +
 +
  // initialize all numViews to zero
 +
  for (i=0;i<K;i++){
 +
    A[i].numViews = 0;
 +
  }
 +
 +
  // seed the random generator for breaking ties
 +
  srand(time(NULL));
 +
 
 +
  // open the input and output files
 +
  fp_in = fopen(inFile, "r");
 +
  fp_out = fopen(outFile, "w");
 +
 +
 +
  // read through the input file
 +
  while (!feof(fp_in)){
 +
 +
    // get the next line in the file
 +
    fgets(line, sizeof(line), fp_in);
 +
    line[sizeof(line) -1]='\0'; // truncate for safety
 +
    if (feof(fp_in)){
 +
      break; // stop if not a line
 +
    }
 +
   
 +
    // check if we want to keep the line
 +
    if (keepLine(line) == 0){
 +
 +
      // just print it to the output file if we are not looking for
 +
      // top images
 +
      if (K == 0){
 +
fprintf(fp_out, "%s", line);
 +
      }
 +
 +
      // otherwise grab the the number of views
 +
      else {
 +
numViews = getNumViews(line);
 +
 +
// if the number of views is greater than the
 +
// lowest in the array
 +
if (numViews > A[0].numViews){
 +
 +
  // add to the first position in the array
 +
  A[0].numViews = numViews;
 +
  A[0].fileLine = myStrcpy(line);
 +
 +
  // if it is higher than the second number
 +
  // sort it the array
 +
  if (numViews > A[1].numViews){
 +
    mySort(A, K);
 +
  }
 +
}
 +
 +
// if the number of views is the same as the
 +
// lowest, randomly pick between the lowest
 +
// and the current number
 +
else if ((numViews == A[0].numViews) && (coinToss()==0)){
 +
    A[0].numViews = numViews;
 +
    A[0].fileLine=myStrcpy(line);  
 +
}  
 +
      }
 +
    }
 +
  }
 +
 +
  // print out the array into the output file
 +
  for (i=0; i<K;i++){
 +
    fprintf(fp_out, "%s",A[i].fileLine);
 +
  }
 +
 +
  // close the input and output files;
 +
  fclose(fp_in);
 +
  fclose(fp_out);
 +
 
 +
  // free the space allocated for the array
 +
  free (A);
 +
 +
  return 0;
 +
}
 +
 +
/*
 +
* Function to get the number of views for a particular line
 +
 +
*  parameter: line to examine
 +
*  return: number of views for the line
 +
*          0 if the line does not have the proper format
 +
*/
 +
int getNumViews(const char* line){
 +
  int numViews;
 +
  char *spcChar, *nextSpc;
 +
 
 +
  // look for the start of the second column
 +
  spcChar = strchr(line, ' ');
 +
  if (spcChar == NULL){
 +
    return 0;
 +
  }
 +
 +
  // look for the start of the third column
 +
  spcChar = strchr(spcChar+1, ' ');
 +
  if (spcChar == NULL){
 +
    return 0;
 +
  }
 +
 
 +
  // look for the end of the third column
 +
  nextSpc = strchr(spcChar+1, ' ');
 +
  if (nextSpc == NULL){
 +
    return 0;
 +
  }
 +
 +
  *nextSpc = '\0'; // end the string at the end of the 3rd col
 +
  numViews = atoi(spcChar+1); // grab the numViews
 +
  *nextSpc = ' '; // convert the string back to original format
 +
 +
  // return the number of views
 +
  return numViews;
 +
}
 +
 +
 +
/*
 +
* Function to check if we should keep the inputted line
 +
*
 +
*  parameter: line to examine
 +
*  return: 0 if keep the line,
 +
*          1 if discard
 +
*
 +
* (this is identical to the function in filter4.c)
 +
*/
 +
int keepLine(const char* line){
 +
  char * splitChar;
 +
  char * split2;
 +
  const char* firstCol = "en";
 +
  const char* secondCol = "file:";
 +
  const char* secondCol2 = "File:";
 +
  int result;
 +
  const int compareLimit1 = strlen(firstCol);
 +
  const int compareLimit2 = strlen(secondCol);
 +
 
 +
  // split the line at the first whitespace
 +
  splitChar = strchr(line, ' ');
 +
   
 +
  // return 1 if there is no white space
 +
  if (splitChar == NULL){
 +
    return 1;
 +
  }
 +
 +
  // check if the first is "en"
 +
  *splitChar = '\0';
 +
  result = strncmp(line, firstCol, compareLimit1);
 +
   
 +
  // if the first column is "en" with no trailing chars
 +
  if ((result == 0) && (strlen(line)==2)){
 +
    *(splitChar) = ' ';
 +
   
 +
    // look to see if "file:" is in the line
 +
    split2 = strchr(splitChar, 'f');
 +
    while (split2 != NULL){
 +
      result = strncmp(split2, secondCol, compareLimit2);
 +
      if (result == 0){
 +
return result;
 +
      }
 +
      split2 = strchr(split2+1, 'f');
 +
    }
 +
 +
    // look to see if "File:" is in the line
 +
    split2 = strchr(splitChar, 'F');
 +
    while (split2 != NULL){
 +
      result = strncmp(split2, secondCol2, compareLimit2);   
 +
      if (result == 0){
 +
  return result;
 +
      }
 +
      split2 = strchr(split2+1, 'F');
 +
    }
 +
  }
 +
 
 +
  // if "en" or "File:"/"file:" is not found, return 1
 +
  return 1;
 +
}
 +
 +
//------------------------------------------------------------
 +
// myStrcpy: combines strcpy and malloc to create a new string
 +
//          big enough to contain the chars of the string passed
 +
//          as a parameter. Returns a pointer to the new copy.
 +
// from sortArrayOfStruct.c by DT
 +
//------------------------------------------------------------
 +
char *myStrcpy( char *name ) {
 +
  char *p = (char *) malloc( (strlen( name ) + 1) * sizeof( char ) );
 +
  strcpy( p, name );
 +
  return p;
 +
}
 +
 +
//---------------------------------------------------------
 +
// swap: swaps 2 structs in the array of structs.  The two
 +
//      entries are identified by two indexes.
 +
// modified from sortArrayOfStruct.c by DT
 +
//---------------------------------------------------------
 +
void swap( struct FileInfo *A, int i, int j ){
 +
  int tempViews = A[i].numViews;
 +
  char *tempLine = A[i].fileLine;
 +
 +
  A[i].numViews = A[j].numViews;
 +
  A[i].fileLine = A[j].fileLine;
 +
 +
  A[j].numViews = tempViews;
 +
  A[j].fileLine = tempLine;
 +
}
 +
 +
 +
//--------------------------------------------------------
 +
// mySort: sorts the array of structs using insertion sort
 +
//    parameters:
 +
//        A - array of FileInfo
 +
//        K - total number of items in the array
 +
// modified from sortArrayOfStruct.c by DT
 +
//---------------------------------------------------------
 +
void mySort( struct FileInfo *A, int K ){
 +
 +
  int i,j;
 +
  int temp;
 +
 +
  for ( i = 0; i < K-1; i++ ){
 +
    for (j = i+1; j < K; j++ ){
 +
      temp = i;
 +
      if (A[j].numViews < A[temp].numViews ){
 +
temp = j;
 +
      }
 +
      swap (A, i, temp);
 +
    }
 +
  }
 +
}
 +
  
 
</source>
 
</source>

Revision as of 14:43, 8 November 2013

--D. Thiebaut (talk) 14:41, 8 November 2013 (EST)



This section is only visible to computers located at Smith College