<?php
# SQL> CREATE TABLE keywords (
# 2 kwID INT AUTO_INCREMENT PRIMARY KEY,
# 3 keyword VARCHAR(64) );
#
# SQL> CREATE TABLE url_title (
# 2 urlID INT AUTO_INCREMENT PRIMARY KEY,
# 3 url VARCHAR(128),
# 4 title VARCHAR(128) );
#
# SQL> CREATE TABLE www_index (
# 2 kwID INT,
# 3 urlID INT,
# 4 PRIMARY KEY ( kwID, urlID ),
# 5 FOREIGN KEY ( kwID ) REFERENCES keywords ( kwID ),
# 6 FOREIGN KEY ( urlID ) REFERENCES url_title ( urlID ) );
include 'password.php'; // Containing only one line: $password="your-pw";
$keyword = $argv[1];
$URL = $argv[2];
$host = "undcemmysql.mysql.database.azure.com";
$username = "your-id";
$database = "your-db";
$conn = mysqli_init( );
mysqli_ssl_set( $conn, NULL, NULL, "DigiCertGlobalRootCA.crt.pem", NULL, NULL );
mysqli_real_connect( $conn, $host, $username, $password, $database, 3306 );
if ( mysqli_connect_errno( ) )
die( 'Failed to connect to MySQL: ' . mysqli_connect_error( ) );
# Dump the source code to the file result.txt.
$cmd = "lynx -dump -source '" . $URL . "' > result.txt";
system( "chmod 777 result.txt ../2/" );
system( $cmd );
system( "chmod 755 ../2/" );
# Find the page title by using a regular expression.
$file = file_get_contents( "result.txt" );
$pattern = '/<title>.*?<\/title>/';
preg_match( $pattern, $file, $matches );
$title = strip_tags( $matches[0] );
# Check whether the page contains the keyword.
$file = fopen( "result.txt", "r" ) or
exit( "Unable to open file!" );
$found = false;
while ( !feof( $file ) ) {
$line = fgets( $file );
if ( substr_count( $line, $keyword ) != 0 ) {
$found = true;
break;
}
}
fclose( $file );
# Find the ID of the input keyword from the keywords table.
$query = "SELECT kwID FROM keywords WHERE keyword='$keyword';";
echo( $query . "\n\n" );
$result = mysqli_query( $conn, $query );
$row = mysqli_fetch_assoc( $result );
if ( mysqli_num_rows( $result ) > 0 )
do
$kwID = $row['kwID'];
while( $row = mysqli_fetch_assoc( $result ) );
else {
$query = "INSERT INTO keywords( keyword ) VALUES ( '$keyword' );";
echo( $query . "\n\n" );
mysqli_query( $conn, $query );
$query = "SELECT kwID FROM keywords WHERE keyword='$keyword';";
echo( $query . "\n\n" );
$result = mysqli_query( $conn, $query );
$row = mysqli_fetch_assoc( $result );
if ( mysqli_num_rows( $result ) > 0 )
do
$kwID = $row['kwID'];
while( $row = mysqli_fetch_assoc( $result ) );
}
# Find the ID of the input URL from the url_title table.
$query = "SELECT urlID FROM url_title WHERE url='$URL';";
echo( $query . "\n\n" );
$result = mysqli_query( $conn, $query );
$row = mysqli_fetch_assoc( $result );
if ( mysqli_num_rows( $result ) > 0 )
do
$urlID = $row['urlID'];
while( $row = mysqli_fetch_assoc( $result ) );
else {
$query = "INSERT INTO url_title( url, title ) VALUES ( '$URL', '$title' );";
echo( $query . "\n\n" );
mysqli_query( $conn, $query );
$query = "SELECT urlID FROM url_title WHERE url='$URL';";
$result = mysqli_query( $conn, $query );
$row = mysqli_fetch_assoc( $result );
if ( mysqli_num_rows( $result ) > 0 )
do
$urlID = $row['urlID'];
while( $row = mysqli_fetch_assoc( $result ) );
}
# Update the inverted list if the keyword is found.
if ( $found == true ) {
$query = "INSERT INTO www_index VALUES ( '$kwID', '$urlID' );";
echo( $query . "\n\n" );
mysqli_query( $conn, $query );
}
// Close the connection.
mysqli_close( $conn );
?>
|