CSC352 Walking a 2-Level Directory in C

From dftwiki3
Revision as of 10:49, 21 November 2013 by Thiebaut (talk | contribs) (Source 2: Dividing the Application in 3 Files)
Jump to: navigation, search

--D. Thiebaut (talk) 07:28, 21 November 2013 (EST)



The context for this page can be found in a tutorial and in an assignment used in the CSC352 seminar on Parallel and Distributed Processing. The homework can be found here, and the tutorial can be found here.


Create a test hierarchical directory-structure

Directory structure

  • Using bash, from the command line, create a similar structure:
 for i in 0 1 2 3 4 5 6 7 8 9 a b c d e f ; do 
     for j in 0 1 2 3 4 5 6 7 8 9 a b c d e f ; do 
        echo ${i}${j}; 
        mkdir -p ${i}/${i}${j}  
     done
 done


Create fake files

  • create files using the "touch" command
 for i in 0 1 2 3 4 5 6 7 8 9 a b c d e f ; do 
     for j in 0 1 2 3 4 5 6 7 8 9 a b c d e f ; do 
         echo ${i}${j}; 
         touch ${i}/${i}${j}/image_${i}${j}.jpg  
         touch ${i}/${i}${j}/image_${j}${i}.png   
     done  
 done

Source 1: One Source for the Whole Code


// walk2.c
// D. Thiebaut
// Taken originally from http://stackoverflow.com/questions/612097/
//                      how-can-i-get-a-list-of-files-in-a-directory-using-c-or-c
// and adapted to fit the current problem.
//
// This program assumes that there is a directory structure containing data files
// The directory structure has this form:
// en/0
// en/0/00
// en/0/01
// en/0/02
// ...
// en/0/0f
// en/1/10
// en/1/12
// ...
// en/1/1f
// en/2/20
// ...
// en/f/ff
// 
// This program provides a function called nextFile() that will start by reading
// the directory en/0/00 and return all the files in it, ONE AT A TIME.  
// Then it will move on to en/0/01, and return all the files in it, ONE AT A TIME.
// And so on until it has read the final file in en/f/ff, when it will return NULL
// instead of a file name.
// 
// Testing: bash commands to create a directory structure along with 2 dummy image
// files in each directory:
//
// cd ./en
// for i in 0 1 2 3 4 5 6 7 8 9 a b c d e f ; do
//   for j in 0 1 2 3 4 5 6 7 8 9 a b c d e f ; do  
//       echo ${i}${j}
//       mkdir -p ${i}/${i}${j}  
//       touch ${i}/${i}${j}/image_${i}${j}.jpg 
//       touch ${i}/${i}${j}/image_${j}${i}.png 
//   done 
// done 
//
// To compile:
//
//      gcc -o walk2 walk2.c

#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <string.h>

char *path;

//-----------------------------------------------------
// nextPath: given a path of the form en/x/xy
// returns a modified path where x/xy have 
// been incremented so that 0/01 will become
// 0/02, 0/09 will become 0/0a, and a/af will 
// become b/b0.
//-----------------------------------------------------
char* nextPath() {
  int len = strlen( path );

  //--- if last char not 'f', simply increment it ---
  if ( path[ len-1 ]<'9' ) {
    path[len-1] += 1;
    return path;
  }
  else if (path[len-1]=='9' ) {
    path[len-1] = 'a';
    return path;
  }
  else if (path[len-1]<'f' ) {
    path[len-1] += 1;
    return path;
  }

  //--- increment other digit ---
  path[len-1] = '0';
  if ( path[len-2] <'9' ) {
    path[len-2] += 1;
    path[len-4] = path[len-2];
    return path;
  }
  else if ( path[len-2]=='9' ) {
    path[len-2] = 'a';
    path[len-4] = path[len-2];
    return path;
  }
  else if ( path[len-2]<'f' ) {
    path[len-2] += 1;
    path[len-4] = path[len-2];
    return path;
  }

  //--- if we have reached the end, return null ---
  //--- after resetting the path to 0/00  ---
  path[len-1] = '0';
  path[len-2] = '0';
  path[len-4] = '0';
  return NULL;
}

//-----------------------------------------------------
// nextFile(): returns the next file in the current
// path.  If the path directory is exhausted, go
// to the next logical path.   If the last directory
// has been exhausted, return null.
// This function skips . and .. files.
//-----------------------------------------------------
int firstTime = 1;
DIR *dir;
char *nextFile() {
  //--- if first time, open dir ---
  if ( firstTime ) {
    firstTime = 0;
    dir = opendir ( path );
    if ( dir == NULL) 
      return NULL;
  }

  //--- if we've read everything already, return NULL ---
  if ( path==NULL )
    return NULL;

  //--- get next file ---
  struct dirent *ent;
  ent = readdir (dir);
  if ( ent == NULL ) {
    closedir( dir );
    path = nextPath();
    firstTime = 1;
    return nextFile();
  }

  //--- skip . and .. files ---
  if ( strcmp( ent->d_name, "." )== 0
       || strcmp( ent->d_name, ".." )== 0 )
    return nextFile();

  //--- return file ---
  return ent->d_name;
}

//-----------------------------------------------------
// MAIN
// test the two functions by navigating the whole
// directory tree and printing all the files found.
//-----------------------------------------------------
int main( int argc, char **argv) {

  int count = 0;
  char *fileName = NULL;
  path = (char *) malloc( sizeof( "en/0/00" ) * sizeof( char ) );
  strcpy( path, "en/0/00" );

  if ( argc < 2 ) {
    fprintf( stderr, "Syntax: ./walk path" );
    exit( 0 );
  }

  while ( (fileName = nextFile()) != NULL ) {
    printf( "%s\n", fileName );
    count+= 1;
  }
  
  printf( "%d files printed\n\n", count );

}


Source 2: Dividing the Application into 3 Files


The Main Program: testWalk3.c


// testWalk3.c
// D. Thiebaut
// This program assumes an externally declared path of the form
// "en/0/00" (but could be different), and an external function called
// nextFile() that returns the next file from the structure until no more
// files can be found, in which case a NULL value is returned.
// 
// To compile:
//    gcc -c walk3.c
//    gcc -o testWalk3 walk3.o testWalk3.c

#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <string.h>
#include "walk3.h"

extern char* path; // the first valid path of the hierarchical structure

//-----------------------------------------------------
// MAIN
// test the two functions by navigating the whole
// directory tree and printing all the files found.
//-----------------------------------------------------
int main( int argc, char **argv) {

  int count = 0;
  char *fileName = NULL;
  path = (char *) malloc( sizeof( "en/0/00" ) * sizeof( char ) );
  strcpy( path, "en/0/00" );

  if ( argc < 2 ) {
    fprintf( stderr, "Syntax: ./walk path" );
    exit( 0 );
  }

  while ( (fileName = nextFile()) != NULL ) {
    printf( "%s\n", fileName );
    count+= 1;
  }
  
  printf( "%d files printed\n\n", count );

}


The Header File: walk3.h


// walk2.h
// D. Thiebaut
// header file
// Taken originally from http://stackoverflow.com/questions/612097/
//                      how-can-i-get-a-list-of-files-in-a-directory-using-c-or-c
// and adapted to fit the current problem.
//
// This program assumes that there is a directory structure containing data files
// The directory structure has this form:
// en/0
// en/0/00
// en/0/01
// en/0/02
// ...
// en/0/0f
// en/1/10
// en/1/12
// ...
// en/1/1f
// en/2/20
// ...
// en/f/ff
// 
// This program provides a function called nextFile() that will start by reading
// the directory en/0/00 and return all the files in it, ONE AT A TIME.  
// Then it will move on to en/0/01, and return all the files in it, ONE AT A TIME.
// And so on until it has read the final file in en/f/ff, when it will return NULL
// instead of a file name.
// 
// Testing: bash commands to create a directory structure along with 2 dummy image
// files in each directory:
//
// cd ./en
// for i in 0 1 2 3 4 5 6 7 8 9 a b c d e f ; do
//   for j in 0 1 2 3 4 5 6 7 8 9 a b c d e f ; do  
//       echo ${i}${j}
//       mkdir -p ${i}/${i}${j}  
//       touch ${i}/${i}${j}/image_${i}${j}.jpg 
//       touch ${i}/${i}${j}/image_${j}${i}.png 
//   done 
// done 
//

#ifndef WALK2_H
#define WALK2_H

char* path;    // the path of the hierarchical structure.
               // which will be set by the application using
               // this module

char* nextPath();
char *nextFile();

#endif


The Implementation File: walk3.c


// walk3.c
// D. Thiebaut
// (see walk3.h for documentation)
//
// To Compile:
//
//        gcc -c walk3.c
//

#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <string.h>
#include "walk3.h"


//-----------------------------------------------------
// nextPath: given a path of the form en/x/xy
// returns a modified path where x/xy have 
// been incremented so that 0/01 will become
// 0/02, 0/09 will become 0/0a, and a/af will 
// become b/b0.
//-----------------------------------------------------
char* nextPath() {
  int len = strlen( path );

  //--- if last char not 'f', simply increment it ---
  if ( path[ len-1 ]<'9' ) {
    path[len-1] += 1;
    return path;
  }
  else if (path[len-1]=='9' ) {
    path[len-1] = 'a';
    return path;
  }
  else if (path[len-1]<'f' ) {
    path[len-1] += 1;
    return path;
  }

  //--- increment other digit ---
  path[len-1] = '0';
  if ( path[len-2] <'9' ) {
    path[len-2] += 1;
    path[len-4] = path[len-2];
    return path;
  }
  else if ( path[len-2]=='9' ) {
    path[len-2] = 'a';
    path[len-4] = path[len-2];
    return path;
  }
  else if ( path[len-2]<'f' ) {
    path[len-2] += 1;
    path[len-4] = path[len-2];
    return path;
  }

  //--- if we have reached the end, return null ---
  //--- after resetting the path to 0/00  ---
  path[len-1] = '0';
  path[len-2] = '0';
  path[len-4] = '0';
  return NULL;
}

//-----------------------------------------------------
// nextFile(): returns the next file in the current
// path.  If the path directory is exhausted, go
// to the next logical path.   If the last directory
// has been exhausted, return null.
// This function skips . and .. files.
//-----------------------------------------------------
int firstTime = 1;
DIR *dir;
char *nextFile() {
  //--- if first time, open dir ---
  if ( firstTime ) {
    firstTime = 0;
    dir = opendir ( path );
    if ( dir == NULL) 
      return NULL;
  }

  //--- if we've read everything already, return NULL ---
  if ( path==NULL )
    return NULL;

  //--- get next file ---
  struct dirent *ent;
  ent = readdir (dir);
  if ( ent == NULL ) {
    closedir( dir );
    path = nextPath();
    firstTime = 1;
    return nextFile();
  }

  //--- skip . and .. files ---
  if ( strcmp( ent->d_name, "." )== 0
       || strcmp( ent->d_name, ".." )== 0 )
    return nextFile();

  //--- return file ---
  return ent->d_name;
}