Difference between revisions of "CSC352 Homework 4 Solutions"
(Created page with "--~~~~ ---- <onlysmith> =Problem 1= <br /> We have a pretty complete solution by Emily that catches a lot of image file names for the English Wikipedia. <source lang="C"> /*...") |
|||
Line 165: | Line 165: | ||
=Problem 2= | =Problem 2= | ||
<br /> | <br /> | ||
+ | Another good program by Emily. Note the use of the coinToss() function. Figure out what it does. Nice addition to the program. | ||
<source lang="C"> | <source lang="C"> | ||
+ | |||
+ | /* | ||
+ | * filter4topK.c | ||
+ | * Emily Flynn | ||
+ | * CSC 352 | ||
+ | * 11/1/2013 | ||
+ | * | ||
+ | * This is a version of filter4.c that gets the K images with the top | ||
+ | * views. If there is no K provided as the third parameter, this | ||
+ | * program works the same as filter4.c. That program reads in the | ||
+ | * wikipedia image statistics file and outputs only the lines of | ||
+ | * interest that start with "en " and also contain " File:" or " | ||
+ | * file:". It takes two parameters from the command line: (1) the | ||
+ | * input file, and (2) the name of the output file. | ||
+ | * | ||
+ | * I used this command to check my output. It is a little slow. | ||
+ | * awk '{if ($1=="en") print;}' <inName> | grep -i "[fF]ile:" | sort -grk 3 - | head -K > <outName> | ||
+ | * | ||
+ | * To run: | ||
+ | * gcc -o filter4topK filter4topK.c | ||
+ | * ./filter4topK <inName> <outName> K | ||
+ | */ | ||
+ | |||
+ | #include <stdio.h> | ||
+ | #include <stdlib.h> | ||
+ | #include <string.h> | ||
+ | #include <time.h> | ||
+ | |||
+ | #define MAX_LINE_BUFF 2000 | ||
+ | |||
+ | // structure for file information | ||
+ | struct FileInfo { | ||
+ | char *fileLine; | ||
+ | int numViews; | ||
+ | }; | ||
+ | |||
+ | // prototypes | ||
+ | int keepLine( const char* line ); | ||
+ | int getNumViews( const char* line); | ||
+ | void mySort( struct FileInfo *A, int K); | ||
+ | char *myStrcpy(char *name); | ||
+ | |||
+ | // helpful function | ||
+ | int coinToss(){ | ||
+ | return rand() % 2; | ||
+ | } | ||
+ | |||
+ | // ----------------------------------------------------------- | ||
+ | // Main method to read in a line, decide whether to keep | ||
+ | // it, and if specified, only output the K images with the | ||
+ | // highest number of views. | ||
+ | // | ||
+ | // parameters: inputFile | ||
+ | // outputFile | ||
+ | // K (optional) - number of images to output info | ||
+ | // if not specified, will output all | ||
+ | // ----------------------------------------------------------- | ||
+ | int main(int argc, const char** argv){ | ||
+ | FILE *fp_in, *fp_out; | ||
+ | const char* inFile; | ||
+ | const char* outFile; | ||
+ | int K, i, numViews; | ||
+ | struct FileInfo *A; | ||
+ | char line[MAX_LINE_BUFF]; | ||
+ | |||
+ | // check the number of parameters provided and display an error | ||
+ | // message if this is not corect | ||
+ | if (argc < 3){ | ||
+ | printf("Incorrect syntax. Please use: ./filter4topK inFile outFile K.\n"); | ||
+ | return 1; | ||
+ | } | ||
+ | |||
+ | // grab the arguments | ||
+ | inFile = argv[1]; | ||
+ | outFile = argv[2]; | ||
+ | |||
+ | if (argc == 3){ // work the same as filter4.c if no K provided | ||
+ | K = 0; | ||
+ | } else { | ||
+ | K = atoi(argv[3]); | ||
+ | } | ||
+ | |||
+ | // allocate fileInfo space for the top K pieces of information | ||
+ | A = (struct FileInfo *) malloc( K* sizeof(struct FileInfo)); | ||
+ | |||
+ | // initialize all numViews to zero | ||
+ | for (i=0;i<K;i++){ | ||
+ | A[i].numViews = 0; | ||
+ | } | ||
+ | |||
+ | // seed the random generator for breaking ties | ||
+ | srand(time(NULL)); | ||
+ | |||
+ | // open the input and output files | ||
+ | fp_in = fopen(inFile, "r"); | ||
+ | fp_out = fopen(outFile, "w"); | ||
+ | |||
+ | |||
+ | // read through the input file | ||
+ | while (!feof(fp_in)){ | ||
+ | |||
+ | // get the next line in the file | ||
+ | fgets(line, sizeof(line), fp_in); | ||
+ | line[sizeof(line) -1]='\0'; // truncate for safety | ||
+ | if (feof(fp_in)){ | ||
+ | break; // stop if not a line | ||
+ | } | ||
+ | |||
+ | // check if we want to keep the line | ||
+ | if (keepLine(line) == 0){ | ||
+ | |||
+ | // just print it to the output file if we are not looking for | ||
+ | // top images | ||
+ | if (K == 0){ | ||
+ | fprintf(fp_out, "%s", line); | ||
+ | } | ||
+ | |||
+ | // otherwise grab the the number of views | ||
+ | else { | ||
+ | numViews = getNumViews(line); | ||
+ | |||
+ | // if the number of views is greater than the | ||
+ | // lowest in the array | ||
+ | if (numViews > A[0].numViews){ | ||
+ | |||
+ | // add to the first position in the array | ||
+ | A[0].numViews = numViews; | ||
+ | A[0].fileLine = myStrcpy(line); | ||
+ | |||
+ | // if it is higher than the second number | ||
+ | // sort it the array | ||
+ | if (numViews > A[1].numViews){ | ||
+ | mySort(A, K); | ||
+ | } | ||
+ | } | ||
+ | |||
+ | // if the number of views is the same as the | ||
+ | // lowest, randomly pick between the lowest | ||
+ | // and the current number | ||
+ | else if ((numViews == A[0].numViews) && (coinToss()==0)){ | ||
+ | A[0].numViews = numViews; | ||
+ | A[0].fileLine=myStrcpy(line); | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | |||
+ | // print out the array into the output file | ||
+ | for (i=0; i<K;i++){ | ||
+ | fprintf(fp_out, "%s",A[i].fileLine); | ||
+ | } | ||
+ | |||
+ | // close the input and output files; | ||
+ | fclose(fp_in); | ||
+ | fclose(fp_out); | ||
+ | |||
+ | // free the space allocated for the array | ||
+ | free (A); | ||
+ | |||
+ | return 0; | ||
+ | } | ||
+ | |||
+ | /* | ||
+ | * Function to get the number of views for a particular line | ||
+ | * | ||
+ | * parameter: line to examine | ||
+ | * return: number of views for the line | ||
+ | * 0 if the line does not have the proper format | ||
+ | */ | ||
+ | int getNumViews(const char* line){ | ||
+ | int numViews; | ||
+ | char *spcChar, *nextSpc; | ||
+ | |||
+ | // look for the start of the second column | ||
+ | spcChar = strchr(line, ' '); | ||
+ | if (spcChar == NULL){ | ||
+ | return 0; | ||
+ | } | ||
+ | |||
+ | // look for the start of the third column | ||
+ | spcChar = strchr(spcChar+1, ' '); | ||
+ | if (spcChar == NULL){ | ||
+ | return 0; | ||
+ | } | ||
+ | |||
+ | // look for the end of the third column | ||
+ | nextSpc = strchr(spcChar+1, ' '); | ||
+ | if (nextSpc == NULL){ | ||
+ | return 0; | ||
+ | } | ||
+ | |||
+ | *nextSpc = '\0'; // end the string at the end of the 3rd col | ||
+ | numViews = atoi(spcChar+1); // grab the numViews | ||
+ | *nextSpc = ' '; // convert the string back to original format | ||
+ | |||
+ | // return the number of views | ||
+ | return numViews; | ||
+ | } | ||
+ | |||
+ | |||
+ | /* | ||
+ | * Function to check if we should keep the inputted line | ||
+ | * | ||
+ | * parameter: line to examine | ||
+ | * return: 0 if keep the line, | ||
+ | * 1 if discard | ||
+ | * | ||
+ | * (this is identical to the function in filter4.c) | ||
+ | */ | ||
+ | int keepLine(const char* line){ | ||
+ | char * splitChar; | ||
+ | char * split2; | ||
+ | const char* firstCol = "en"; | ||
+ | const char* secondCol = "file:"; | ||
+ | const char* secondCol2 = "File:"; | ||
+ | int result; | ||
+ | const int compareLimit1 = strlen(firstCol); | ||
+ | const int compareLimit2 = strlen(secondCol); | ||
+ | |||
+ | // split the line at the first whitespace | ||
+ | splitChar = strchr(line, ' '); | ||
+ | |||
+ | // return 1 if there is no white space | ||
+ | if (splitChar == NULL){ | ||
+ | return 1; | ||
+ | } | ||
+ | |||
+ | // check if the first is "en" | ||
+ | *splitChar = '\0'; | ||
+ | result = strncmp(line, firstCol, compareLimit1); | ||
+ | |||
+ | // if the first column is "en" with no trailing chars | ||
+ | if ((result == 0) && (strlen(line)==2)){ | ||
+ | *(splitChar) = ' '; | ||
+ | |||
+ | // look to see if "file:" is in the line | ||
+ | split2 = strchr(splitChar, 'f'); | ||
+ | while (split2 != NULL){ | ||
+ | result = strncmp(split2, secondCol, compareLimit2); | ||
+ | if (result == 0){ | ||
+ | return result; | ||
+ | } | ||
+ | split2 = strchr(split2+1, 'f'); | ||
+ | } | ||
+ | |||
+ | // look to see if "File:" is in the line | ||
+ | split2 = strchr(splitChar, 'F'); | ||
+ | while (split2 != NULL){ | ||
+ | result = strncmp(split2, secondCol2, compareLimit2); | ||
+ | if (result == 0){ | ||
+ | return result; | ||
+ | } | ||
+ | split2 = strchr(split2+1, 'F'); | ||
+ | } | ||
+ | } | ||
+ | |||
+ | // if "en" or "File:"/"file:" is not found, return 1 | ||
+ | return 1; | ||
+ | } | ||
+ | |||
+ | //------------------------------------------------------------ | ||
+ | // myStrcpy: combines strcpy and malloc to create a new string | ||
+ | // big enough to contain the chars of the string passed | ||
+ | // as a parameter. Returns a pointer to the new copy. | ||
+ | // from sortArrayOfStruct.c by DT | ||
+ | //------------------------------------------------------------ | ||
+ | char *myStrcpy( char *name ) { | ||
+ | char *p = (char *) malloc( (strlen( name ) + 1) * sizeof( char ) ); | ||
+ | strcpy( p, name ); | ||
+ | return p; | ||
+ | } | ||
+ | |||
+ | //--------------------------------------------------------- | ||
+ | // swap: swaps 2 structs in the array of structs. The two | ||
+ | // entries are identified by two indexes. | ||
+ | // modified from sortArrayOfStruct.c by DT | ||
+ | //--------------------------------------------------------- | ||
+ | void swap( struct FileInfo *A, int i, int j ){ | ||
+ | int tempViews = A[i].numViews; | ||
+ | char *tempLine = A[i].fileLine; | ||
+ | |||
+ | A[i].numViews = A[j].numViews; | ||
+ | A[i].fileLine = A[j].fileLine; | ||
+ | |||
+ | A[j].numViews = tempViews; | ||
+ | A[j].fileLine = tempLine; | ||
+ | } | ||
+ | |||
+ | |||
+ | //-------------------------------------------------------- | ||
+ | // mySort: sorts the array of structs using insertion sort | ||
+ | // parameters: | ||
+ | // A - array of FileInfo | ||
+ | // K - total number of items in the array | ||
+ | // modified from sortArrayOfStruct.c by DT | ||
+ | //--------------------------------------------------------- | ||
+ | void mySort( struct FileInfo *A, int K ){ | ||
+ | |||
+ | int i,j; | ||
+ | int temp; | ||
+ | |||
+ | for ( i = 0; i < K-1; i++ ){ | ||
+ | for (j = i+1; j < K; j++ ){ | ||
+ | temp = i; | ||
+ | if (A[j].numViews < A[temp].numViews ){ | ||
+ | temp = j; | ||
+ | } | ||
+ | swap (A, i, temp); | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | |||
</source> | </source> |
Revision as of 14:43, 8 November 2013
--D. Thiebaut (talk) 14:41, 8 November 2013 (EST)