Difference between revisions of "IS getWikiInfoSource"
(New page: <code><pre> <html> <head> <title>Get Wikidata Example</title> <!-- D. Thiebaut (c) 2008 --> </head> <body> <?php printf( "(<a href=\"%s\">Source</a>)", str_ireplace( ".php", ".txt", $...) |
|||
Line 1: | Line 1: | ||
− | |||
<code><pre> | <code><pre> | ||
Line 102: | Line 101: | ||
$data[ $Id ] = $Title; | $data[ $Id ] = $Title; | ||
} | } | ||
− | #print "<P>data = | + | #print "<P>data = <pre>"; |
#print_r( $data ); | #print_r( $data ); | ||
− | #print " | + | #print "</PRE><P>"; |
return $data; | return $data; | ||
} | } | ||
Line 138: | Line 137: | ||
} | } | ||
− | #print "<P>contributor data = | + | #print "<P>contributor data = <pre>"; |
#print_r( $contributors ); | #print_r( $contributors ); | ||
− | #print " | + | #print "</PRE><P>"; |
return $contributors; | return $contributors; | ||
Latest revision as of 14:15, 1 July 2008
<html>
<head>
<title>Get Wikidata Example</title>
<!-- D. Thiebaut (c) 2008 -->
</head>
<body>
<?php
printf( "(<a href=\"%s\">Source</a>)", str_ireplace( ".php", ".txt", $_SERVER[ 'PHP_SELF' ] ));
?>
<?php
include_once 'accessvars.php'; // include user login info (private)
//--------------------------------------------------------------
// displayParams: debugging function listing the access
// parameters for the database
//--------------------------------------------------------------
function displayParams() {
foreach ( $params as $key => $value ) {
printf( "%s = %s<br>\n", $key, $value );
}
}
//--------------------------------------------------------------
// connectToDB:
// attempts to connect to the database with name $database on
// server with name $host, using the identity $user and $passwd.
// Returns a value of type array( int, string ), where the int
// is a non-0 link to the database on success, 0 on failure.
// The second parameter is an error string explaining error, if
// any.
//--------------------------------------------------------------
function connectToDB( $host, $user, $passwd, $database ) {
$link = mysql_connect( $host, $user, $passwd );
if ( ! $link )
return array( 0, "Could not connect to mySql host on $host\n" . mysql_error() );
if ( ! mysql_select_db( $database, $link ) )
return array( 0, "Could not connect to $database\n" . mysql_error() );
return array( $link, "" );
}
//--------------------------------------------------------------
// getData: gets a record from the given table. Returns
// an array of 2 items: the # of rows read, and the first
// row of data if any: array( #, array( field1, field2, etc... ) )
//--------------------------------------------------------------
function getNRows( $query, $N=1 ) {
$query = $query . " LIMIT ". $N;
$result = mysql_query( $query );
if ( ! $result )
return array( 0, "Error accessing table" . mysql_error() );
//--- get number of rows ---
$numRows = mysql_num_rows( $result );
//--- return list with number of items found first, blank next, array third ---
return array( $numRows, "", $result ); // mysql_fetch_array( $result, MYSQL_ASSOC ) );
}
//--------------------------------------------------------------
//--------------------------------------------------------------
function getPageIdsFromKeyword( $keywords, $N=100 ) {
$like = " WHERE ";
$and = "";
for ( $i=0; $i<count( $keywords ); $i++) {
$like .= $and. " `title` LIKE '%" . $keywords[$i] ."%' ";
$and = " AND ";
}
$query = "SELECT `PageId`, `title` FROM `pages`" . $like;
print "<P>Query = " . $query . "<P>";
$resultArray = getNRows( $query, $N );
//--- if we didn't get anything back, return an empty array ---
if ( $resultArray[0]==0 ) {
return array();
}
//--- else, put the Ids and page titles in an array ---
$dataArray = $resultArray[2];
$i = 0;
$data = array();
while ( $line = mysql_fetch_array( $dataArray, MYSQL_ASSOC ) ) {
$i++;
if ( $i<=1 ) continue;
$Id = $line[ 'PageId' ];
$Title = $line[ 'title' ];
$data[ $Id ] = $Title;
}
#print "<P>data = <pre>";
#print_r( $data );
#print "</PRE><P>";
return $data;
}
//--------------------------------------------------------------
// getRevisions for a given page
// Note: currently $Id contains a single variable, but this
// should be changed so that it contains a list of Ids. this way
// we can select the contributors to a collection of Page Ids,
// and not just a single ones
//--------------------------------------------------------------
function getContributors( $Id, $N=100 ) {
$query = 'select `contributorId` from `revisions` where `pageId` in ('
. $Id . ')';
print "<P>Query = " . $query . "<P>";
$resultArray = getNRows( $query, $N );
//--- if we didn't get anything back, return an empty array ---
if ( $resultArray[0]==0 ) {
return array();
}
//--- else, put the Ids and page titles in an array ---
$dataArray = $resultArray[2];
$i = 0;
$contributors = array();
while ( $line = mysql_fetch_array( $dataArray, MYSQL_ASSOC ) ) {
$i++;
if ( $i<=1 ) continue;
$contributorId = $line[ 'contributorId' ];
$contributors[] = $contributorId;
}
#print "<P>contributor data = <pre>";
#print_r( $contributors );
#print "</PRE><P>";
return $contributors;
}
//--------------------------------------------------------------
// printPageIdsTitles
//--------------------------------------------------------------
function printPageIdsTitles( $data ) {
print "<P>Found " . count( $data ) . " Ids/Titles in table";
//--- display the rows ---
printf( "<TABLE border=\"1\" cellpadding=\"10\">\n");
$i=0;
foreach( $data as $Id => $Title ) {
$i++;
if ( $i<=1 )
printf( "<TR><TD><B> Id </B></TD><TD><B> Title </B></TD></TR>" );
printf( "<TR><TD> $Id </TD><TD> $Title </TD></TR>");
}
printf( "</TABLE>\n" );
}
//--------------------------------------------------------------
// printContributors
//--------------------------------------------------------------
function printContributors( $contributors ) {
print "<P>Found " . count( $contributors ) . " contributors to given Id";
//--- display the rows ---
printf( "<TABLE border=\"1\" cellpadding=\"10\">\n");
for ( $i=0; $i < count( $contributors ); $i++ ) {
if ( $i==0 )
printf( "<TR><TD><B> Contributor Id </B></TD></TR>" );
printf( "<TR><TD>". $contributors[$i] . "</TD></TR>");
}
printf( "</TABLE>\n" );
}
//--------------------------------------------------------------
// main: accesses the database to retrieve 1 row from the table
// defined in params['table'].
//--------------------------------------------------------------
function main( ) {
global $params;
//--- get a link to the database ---
$linkArray = connectToDB( $params['host'], $params['user'], $params['passwd'],
$params['database'] );
$link = $linkArray[0];
$error = $linkArray[1];
//--- success? ---
if ( strlen( $error )>0 ) {
printf( "<h1>Error accessing database</h1>" );
return;
}
//--- get Ids of pages with title matching keyword ---
$keywords = array( "Hillary", "Clinton" );
printf( "<h1>Some statistics about Wiki pages containing keywords: %s</h1>" , implode( ', ', $keywords ) );
unset( $resultArray );
$data = getPageIdsFromKeyword( $keywords );
//--- success? ---
if ( count( $data )== 0 ) {
printf( "<h1>No pages found for keyword %s</h1>\n", implode( ', ', $keywords ) );
return;
}
//--- print page Ids and Titles ---
printPageIdsTitles( $data );
//--- get Id of first page listed ---
$firstId = key( $data );
printf( "<P>First Id = %s", $firstId );
//--- get the first 100 or fewer contributors for this page ---
$contributors = getContributors( $firstId, 100 );
printContributors( $contributors );
//--- close connection to MySQL server ---
mysql_close();
}
main( );
?>
</body>
</html>