IS getWikiInfoSource

From dftwiki3
Revision as of 14:15, 1 July 2008 by Thiebaut (talk | contribs)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to: navigation, search

<html>
<head>
<title>Get Wikidata Example</title>
<!-- D. Thiebaut (c) 2008 -->

</head>

<body>

<?php
printf( "(<a href=\"%s\">Source</a>)", str_ireplace( ".php", ".txt", $_SERVER[ 'PHP_SELF' ] ));
?>


<?php

include_once 'accessvars.php'; // include user login info (private)

//--------------------------------------------------------------
// displayParams: debugging function listing the access 
//                parameters for the database
//--------------------------------------------------------------
function displayParams() {
    foreach ( $params as $key => $value ) {
        printf( "%s = %s<br>\n", $key, $value );
    }
}

//--------------------------------------------------------------
// connectToDB:
// attempts to connect to the database with name $database on 
// server with name $host, using the identity $user and $passwd.
// Returns a value of type array( int, string ), where the int
// is a non-0 link to the database on success, 0 on failure.
// The second parameter is an error string explaining error, if
// any.
//--------------------------------------------------------------
function connectToDB( $host, $user, $passwd, $database ) {

    $link = mysql_connect( $host, $user, $passwd );

    if ( ! $link )
        return array( 0, "Could not connect to mySql host on $host\n" . mysql_error() );

    if ( ! mysql_select_db( $database, $link ) )
        return array( 0, "Could not connect to $database\n" . mysql_error() );

    return array( $link, "" );
}

//--------------------------------------------------------------
// getData: gets a record from the given table.  Returns
// an array of 2 items: the # of rows read, and the first 
// row of data if any: array( #, array( field1, field2, etc... ) )
//--------------------------------------------------------------
function getNRows( $query,  $N=1 ) {

   $query = $query . " LIMIT ". $N;

   $result = mysql_query( $query );
   
   if ( ! $result )
       return array( 0, "Error accessing table" . mysql_error() );
  
   //--- get number of rows ---
   $numRows = mysql_num_rows( $result );
   
   //--- return list with number of items found first, blank next, array third ---
   return array( $numRows, "", $result ); // mysql_fetch_array( $result, MYSQL_ASSOC ) );
}

//--------------------------------------------------------------
//--------------------------------------------------------------
function getPageIdsFromKeyword( $keywords, $N=100 ) {
    $like = " WHERE ";
    $and = "";
    for ( $i=0; $i<count( $keywords ); $i++) {
        $like .= $and. " `title` LIKE '%" . $keywords[$i] ."%' ";
        $and = " AND ";
    }
    $query = "SELECT `PageId`, `title` FROM `pages`"  . $like;
    print "<P>Query = " . $query . "<P>";
    $resultArray = getNRows( $query, $N );

    //--- if we didn't get anything back, return an empty array ---
    if ( $resultArray[0]==0 ) {
        return array();
    }

    //--- else, put the Ids and page titles in an array ---
    $dataArray = $resultArray[2];
    $i = 0;
    $data = array();
    while ( $line = mysql_fetch_array( $dataArray, MYSQL_ASSOC ) ) {
        $i++;
        if ( $i<=1 ) continue;
        $Id = $line[ 'PageId' ];
        $Title = $line[ 'title' ];
        $data[ $Id ] = $Title;
    }
    #print "<P>data = <pre>";
    #print_r( $data );
    #print "</PRE><P>";
    return $data;
}

//--------------------------------------------------------------
// getRevisions for a given page
// Note: currently $Id contains a single variable, but this 
// should be changed so that it contains a list of Ids.  this way
// we can select the contributors to a collection of Page Ids,
// and not just a single ones
//--------------------------------------------------------------
function getContributors( $Id, $N=100 ) {
    $query = 'select `contributorId` from `revisions` where `pageId` in ('
        . $Id . ')';

    print "<P>Query = " . $query . "<P>";
    $resultArray = getNRows( $query, $N );

    //--- if we didn't get anything back, return an empty array ---
    if ( $resultArray[0]==0 ) {
        return array();
    }

    //--- else, put the Ids and page titles in an array ---
    $dataArray = $resultArray[2];
    $i = 0;
    $contributors = array();
    while ( $line = mysql_fetch_array( $dataArray, MYSQL_ASSOC ) ) {
        $i++;
        if ( $i<=1 ) continue;
        $contributorId = $line[ 'contributorId' ];
        $contributors[] = $contributorId;
    }

    #print "<P>contributor data = <pre>";
    #print_r( $contributors );
    #print "</PRE><P>";
    return $contributors;

}

//--------------------------------------------------------------
// printPageIdsTitles
//--------------------------------------------------------------
function printPageIdsTitles( $data ) {
    print "<P>Found " . count( $data ) . " Ids/Titles in table";
  
    //--- display the rows ---

    printf( "<TABLE border=\"1\" cellpadding=\"10\">\n");

    $i=0;
    foreach( $data as $Id => $Title ) {
        $i++;
        if ( $i<=1 ) 
            printf( "<TR><TD><B> Id </B></TD><TD><B> Title </B></TD></TR>" );
        
        printf( "<TR><TD> $Id </TD><TD> $Title </TD></TR>");
    }
    printf( "</TABLE>\n" );
}

//--------------------------------------------------------------
// printContributors
//--------------------------------------------------------------
function printContributors( $contributors ) {
    print "<P>Found " . count( $contributors ) . " contributors to given Id";
  
    //--- display the rows ---

    printf( "<TABLE border=\"1\" cellpadding=\"10\">\n");

    for ( $i=0; $i < count( $contributors ); $i++ ) {
        if ( $i==0 ) 
            printf( "<TR><TD><B> Contributor Id </B></TD></TR>" );
        
        printf( "<TR><TD>". $contributors[$i] . "</TD></TR>");
    }
    printf( "</TABLE>\n" );
}

//--------------------------------------------------------------
// main: accesses the database to retrieve 1 row from the table
//       defined in params['table'].
//--------------------------------------------------------------
function main( ) {
  global $params;

  //--- get a link to the database ---
  $linkArray = connectToDB( $params['host'], $params['user'], $params['passwd'],
                            $params['database'] );
  $link = $linkArray[0];
  $error = $linkArray[1];

  //--- success? ---
  if ( strlen( $error )>0 ) {
    printf( "<h1>Error accessing database</h1>" );
    return;
  }

  //--- get Ids of pages with title matching keyword ---
  $keywords = array( "Hillary", "Clinton" );

  printf( "<h1>Some statistics about Wiki pages containing keywords: %s</h1>" , implode( ', ', $keywords ) );
  unset( $resultArray );
  $data = getPageIdsFromKeyword( $keywords );

  //--- success? ---
  if ( count( $data )== 0 ) {
      printf( "<h1>No pages found for keyword %s</h1>\n", implode( ', ', $keywords ) );
      return;
  }

  //--- print page Ids and Titles ---
  printPageIdsTitles( $data );

  //--- get Id of first page listed  ---
  $firstId = key( $data );
  printf( "<P>First Id = %s", $firstId );

  //--- get the first 100 or fewer contributors for this page ---
  $contributors = getContributors( $firstId, 100 );
  printContributors( $contributors );

  //--- close connection to MySQL server ---
  mysql_close();

}


main(  ); 
?>

</body>
</html>