Tutorial: C + MySQL + MPI
--D. Thiebaut (talk) 16:30, 13 October 2013 (EDT)
The purpose of this tutorial/lab is to generate an MPI program written in C that walks a directory containing image files, gets their geometry in the form of a width and height, and enters this information in a MySQL database.
Contents
References
Verify that MPI works
- test your installation with the classic hello world program.
Source
// mpi_hello.c
#include <mpi.h>
#include <stdio.h>
int main (int argc, char* argv[])
{
int rank, size;
MPI_Init (&argc, &argv); /* starts MPI */
MPI_Comm_rank (MPI_COMM_WORLD, &rank); /* get current process id */
MPI_Comm_size (MPI_COMM_WORLD, &size); /* get number of processes */
printf( "Hello world from process %d of %d\n", rank, size );
MPI_Finalize();
return 0;
}
Compile & Run
mpicc -o hello mpi_hello.c mpirun -np 2 ./hello Hello world from process 0 of 2 Hello world from process 1 of 2
Verify that the MySQL API works
Using the example and tricks provided at http://www.cyberciti.biz/tips/linux-unix-connect-mysql-c-api-program.html, we can easily test whether we can access a MySQL database from our program:
Source
// mysqlTest.c
// Taken from http://www.cyberciti.biz
// Lists all the tables found in a MySQL database whose name is stored
// in the char[] database variable.
#include <mysql.h>
#include <stdio.h>
#include <stdlib.h>
main() {
MYSQL *conn;
MYSQL_RES *res;
MYSQL_ROW row;
char *server = "localhost";
char *user = "352a";
char *password = "xxxxxx";
char *database = "enwiki_images";
conn = mysql_init(NULL);
/* Connect to database */
if (!mysql_real_connect(conn, server,
user, password, database, 0, NULL, 0)) {
fprintf(stderr, "%s\n", mysql_error(conn));
exit(1);
}
/* send SQL query */
if (mysql_query(conn, "show tables")) {
fprintf(stderr, "%s\n", mysql_error(conn));
exit(1);
}
res = mysql_use_result(conn);
/* output table name */
printf("MySQL Tables in mysql database:\n");
while ((row = mysql_fetch_row(res)) != NULL)
printf("%s \n", row[0]);
/* close connection */
mysql_free_result(res);
mysql_close(conn);
}
Compile & Run
- Once you have installed mysql_config on your system, you can easily get the library switches and compiler switches corresponding to your installation. They are given by
- mysql_config --cflags
- mysql_config --libs
mysql_config --cflags -I/usr/include/mysql -DBIG_JOINS=1 -fno-strict-aliasing -g -DNDEBUG mysql_config --libs -L/usr/lib/x86_64-linux-gnu -lmysqlclient -lpthread -lz -lm -ldl
To compile, simply pass on the output of the mysql_config commands, or substitute their output in the compile line:
gcc -o mysqlTest $(mysql_config --cflags) mysqlTest.c $(mysql_config --libs)
To run:
./mysqlTest images <--- the only table was images
Calling System Applications from C
In our application we need to process a large collection of images and get their geometry information. To get the width and height of images, we simply call imageMagick from the C program and grab the width and height from the command.
Source
// getImageInfo.c
// D. Thiebaut
// Syntax:
// getImageInfo imageFileName
//
// Takes the image file and get identify (part of the ImageMagick tools) to
// get the image width and height in pixels.
//
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main( int argc, char *argv[] ) {
FILE* fp;
char command[100];
char buffer[1000];
int width, height;
//--- display syntax info if user does not specify image name---
if ( argc <= 1 ) {
fprintf( stderr, "Syntax: getImageInfo imageFilename\n\n" );
exit( 1 );
}
//--- create the command with the file name (passed in argv) ---
strcpy( command, "/usr/bin/identify -format \"%w %h\" " );
strcat( command, argv[1] );
//--- open a pipe, make the command run, and return the information ---
//--- in the pipe. ---
if ( ( fp = popen( command, "r" ) ) != NULL ) {
while ( fgets( buffer, 1000, fp ) != NULL ) {
printf( "%s", buffer );
char *p = buffer;
while ( *p != ' ' ) p++;
*p = '\0';
//printf( "width = %s height = %s\n", buffer, p );
width = atoi( buffer );
height = atoi( p+1 );
printf( "width = %d height = %d\n", width, height );
}
}
//--- close pipe ---
pclose( fp );
return 0;
}
Compile & Run
gcc -o getImageInfo getImageInfo.c ./getImageInfo bunny.jpg 1280 800 width = 1280 height = 800
Walking a Directory in C
This program is for running through our tree of directories containing images and to take all the images in one directory and
process them.
Source
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
int main( int argc, char **argv) {
DIR *dir;
char path[] = "/media/dominique/3TB/mediawiki/images/wikipedia/en/0/01";
int count = 0;
struct dirent *ent;
if ( argc < 2 ) {
fprintf( stderr, "Syntax: ./walk path" );
exit( 0 );
}
if ((dir = opendir ( path )) != NULL) {
//-- print all the files and directories within directory
while ((ent = readdir (dir)) != NULL) {
printf ( "%s\n", ent->d_name);
//if ( count++ > 10 )
// break;
}
closedir (dir);
return( 0 );
}
else {
/* could not open directory */
perror ("");
exit( 1 );
}
}
Compile and Run
gcc -o walk walk.c ./walk /media/dominique/3TB/mediawiki/images/wikipedia/en/0/01 . .. Keep_the_Home_Fires_Burning_-_Frederick_Wheeler.ogg Voyage_of_the_Jerle_Shannara.jpg Picw.jpg ... The_Constructicons.jpg StarFeathers.jpg Brickell_district,_Miami.jpg One_Tree_Hill_-_Season_2_-_DVD.JPG Silent_All_These_Years_(Single).png
Serial Walk-Images-Store-Database Version
The version below is a serial version that walks the directory and enters the image file names into the database.
// walkGetSizeAddMySQL.c
// D. Thiebaut
//
// See http://cs.smith.edu/dftwiki/index.php?title=Tutorial:_C_%2B_MySQL_%2B_MPI for
// more information.
//
// to Compile and Run:
// gcc -o walkGetSizeAddMySQL $(mysql_config --cflags) walkGetSizeAddMySQL.c $(mysql_config --libs)
// ./walkGetSizeAddMySQL path n/nn
//
// where "path" is the path to the root directory containing subdirectories of the form 0/00, 0/01, 0/02... a/af, each one
// containing a flat list of image files. The n/nn is one of the subdirectories, for example 0/0a.
//
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <string.h>
#include <mysql.h>
//--- a simple structure to create a pair of ints ---
typedef struct {
int width;
int height;
} Pair;
//--- Globals refering to the MySql database ---
MYSQL *conn;
MYSQL_RES *res;
MYSQL_ROW row;
char *server = "localhost";
char *user = "352a";
char *password = "xxxxxxx"; // MySql password
char *database = "enwiki_images";
int debug = 0; // use to debug the application
//-----------------------------------------------------
// getPrintTableNames: opens the database, and gets the
// names of the table(s) located in the database
// Taken from
// http://www.cyberciti.biz/tips/linux-unix-connect-mysql-c-api-program.html
//
void getPrintTableNames() {
conn = mysql_init(NULL);
/* Connect to database */
if (!mysql_real_connect(conn, server,
user, password, database, 0, NULL, 0)) {
fprintf(stderr, "%s\n", mysql_error(conn));
exit(1);
}
/* send SQL query */
if (mysql_query(conn, "show tables")) {
fprintf(stderr, "%s\n", mysql_error(conn));
exit(1);
}
res = mysql_use_result(conn);
/* output table name */
printf("MySQL Tables in mysql database:\n");
while ((row = mysql_fetch_row(res)) != NULL)
printf("%s \n", row[0]);
/* close connection */
mysql_free_result(res);
mysql_close(conn);
}
//-----------------------------------------------------
// insertFileInDatabase: inserts the name of the image
// file, its path (as in "0/01", its width and height,
// and its scale in the database ase
void insertFileInDatabase( char* filename, char* path, int width, int height, float scale ) {
char query[200];
char escapedFileName[300];
mysql_real_escape_string( conn, escapedFileName, filename, strlen( filename ) );
sprintf( query, "INSERT INTO images2 (name, path, width, height, scale1) VALUES ( '%s', '%s', %d, %d, %f )",
escapedFileName, path, width, height, scale );
// printf( "Query = %s\n", query );
int retCode = mysql_query( conn, query );
if ( retCode ) {
fprintf( stderr, "%s\n", mysql_error( conn ) );
exit( 1 );
}
}
//-----------------------------------------------------
// openDatabase: opens a connection to the database that
// is kept in the global conn variable.
void openDatabase() {
conn = mysql_init( NULL );
/* Connect to database */
if ( !mysql_real_connect( conn, server,
user, password, database, 0, NULL, 0 ) ) {
fprintf( stderr, "%s\n", mysql_error(conn) );
exit(1);
}
}
//-----------------------------------------------------
// closeDatabase: closes the connection.
void closeDatabase() {
mysql_close( conn );
}
//-----------------------------------------------------
// getFileGeometry. Calls the system 'identify' command
// to get the geometry of the file, expressed as its
// width and height.
Pair getFileGeometry( char* fileName ) {
char command[100];
char buffer[1000];
FILE* fp;
Pair pair;
pair.width = 0;
pair.height = 0;
//--- create the command and put " around the file name ---
strcpy( command, "/usr/bin/identify -format \"%w %h\" \"" );
strcat( command, fileName );
strcat( command, "\"" );
//--- open a pipe to get the output of identify ---
if ( ( fp = popen( command, "r" ) ) != NULL ) {
while ( fgets( buffer, 1000, fp ) != NULL ) {
//--- split the width and height into two strings ---
char *p = buffer;
while ( *p != ' ' )
p++;
*p = '\0';
//--- convert them both to ints ---
pair.width = atoi( buffer );
pair.height = atoi( p+1 );
}
}
pclose( fp );
//--- return them in a structure ---
return pair;
}
//-----------------------------------------------------
// getExtension. returns a pointer to the extension of
// the image file, e.g. "jpg"
const char *getExtension(const char *filename) {
const char *dot = strrchr(filename, '.');
if ( !dot || dot == filename )
return "";
return dot + 1;
}
//-----------------------------------------------------
// escapeString: puts \ chars in front of special characters
// that might be in the image file-name, so that it can be
// passed to the "identify" command and not cause the shell
// to misunderstand the name for some shell syntax.
void escapeString( char* name, char* escapedName ) {
char *p = name, *q = escapedName;
while ( *p != '\0' ) {
if ( *p == '\'' || *p == ';' || *p == '"' || *p == '(' || *p == ')'
|| *p == ' ' || *p == ',' || *p == ':' || *p == '&' ) {
*(q++) = '\\';
}
*(q++) = *(p++);
}
*q = '\0';
}
//---------------------------------------------------------------------------
// M A I N
//---------------------------------------------------------------------------
int main( int argc, char **argv) {
DIR *dir;
char path[100];
char shortPath[10];
int count = 0;
struct dirent *ent;
Pair pair;
char pathAndFileName[1000];
char escapedFileName[1000];
int noEntries = 0;
if ( argc < 3 ) {
fprintf( stderr, "Syntax: ./walkGetSizeAddMySql path n/nn" );
fprintf( stderr, "where path is the rooted path of where the image\n" );
fprintf( stderr, "resides, and n/nn is the directory-pair that is\n" );
fprintf( stderr, "md5 hash of the file name. Typically 0/01, or a/0a" );
exit( 0 );
}
//--- put the arguments into variables ---
strcpy( path, argv[1] );
strcpy( shortPath, argv[2] );
if ( path[ strlen( path )-1 ] != '/' )
strcat( path, "/" );
strcat( path, argv[2] );
//--- open connection to database ---
openDatabase();
//--- start walking the image repository ---
if ((dir = opendir ( path )) != NULL) {
//-- go recursively ---
while ((ent = readdir (dir)) != NULL) {
char* fileName = ent->d_name;
char extension[10];
//--- get file extension ---
strcpy( extension, getExtension( fileName ) );
//--- skip . and .. file names ---
if ( !strcmp( fileName, "." ) || !strcmp( fileName, ".." ) )
continue;
//--- skip some format that "identify" cannot recognize ---
if ( !strcmp( extension, "ogg" ) ) continue;
if ( !strcmp( extension, "OGG" ) ) continue;
if ( !strcmp( extension, "svg" ) ) continue;
if ( !strcmp( extension, "SVG" ) ) continue;
if ( !strcmp( extension, "mid" ) ) continue;
if ( !strcmp( extension, "MID" ) ) continue;
//--- escape special characters for identify ---
escapeString( fileName, escapedFileName );
//--- create the full path of the image file name ---
strcpy( pathAndFileName, "\"" );
strcat( pathAndFileName, path );
strcat( pathAndFileName, "/" );
strcat( pathAndFileName, escapedFileName );
strcat( pathAndFileName, "\"" );
//--- get the geometry as read by "identify" ---
pair = getFileGeometry( pathAndFileName );
//--- keep track of how many files we process ---
noEntries++;
if ( debug )
printf ( "%s\t\t%s\t%d %d\n",getExtension( fileName ), fileName,
pair.width, pair.height );
//--- add file and its geometry to database ---
insertFileInDatabase( fileName, shortPath, pair.width, pair.height, 1.0 );
}
//--- done! Close everything ---
closedir(dir);
closeDatabase();
printf( "%d images processed\n\n", noEntries );
return( 0 );
}
//--- if we're here, there was an error reading the root directory ---
else {
/* could not open directory */
perror ("");
exit( 1 );
}
}