Web Page Expiry Checker - v2.3


9/12/04
 

Web Page Expiry Checker - v2.3

At the moment the Expiry Checker will treat URLs such as --
  (1) http://myweb.lsbu.ac.uk/~bushm/index.html
  (2) http://myweb.lsbu.ac.uk/~bushm/index.shtml
  (3) http://myweb.lsbu.ac.uk/~bushm/
  (4) http://myweb.lsbu.ac.uk/~bushm
-- as being different, which is clearly wrong since they all relate to the same web page. Consequently, it's conceivable that up to four automated email messages could get sent out when one would have been sufficient. In this version I've fixed it so that URLs in the form of (1), (2) or (3) are truncated to a URL in the form of (4).

Also, sometimes we put a "?" at the end of URLs to force the browser to always fetch the latest version, as in --

  (5) http://myweb.lsbu.ac.uk/~bushm/index.html?
-- so the script will now truncate that as well.
 
<?php

/*
// WEB PAGE EXPIRY CHECKER - version 2.3
//
// A script to automatically email web page content owners
// once every Monday after their web pages have expired.
//
// [See also this accompanying script: "expirydataviewer.php4".]
//
//
// martin.bush@lsbu.ac.uk, December 2004.
//
//
// Permission is granted to re-use all or part of this software
// under the terms of the GNU General Public License as published
// by the Free Software Foundation [www.gnu.org/copyleft/gpl.html].
// The author would very much appreciate being informed about any
// re-use of this software . . . thanks!
*/

/*
// INSTALLATION INSTRUCTIONS (assuming use for LSBU WWW web pages):
// 1) Put this script ("expirychecker.php4") into cgi-bin
//    ...or, better still, cgi-bin/expirychecker/
// 2) Add two blank files to cgi-bin(/expirychecker/) called:
//    "ExpiryData.xml" & "ExpiryDataLastWeek.xml".
//
// ExpiryData.xml will get properly formatted on the first Monday
// after a page expires (assuming that the page invokes the script).
// No emails will be sent until the SECOND Monday.
//
// Once installed, this script can be called from any LSBU WWW web
// page by including the following (with "???" corresponding to the
// appropriate path) anywhere within the body of the web page:
//
// <!-- Insert values below for "owner", "expirydate" -->
// <!-- and "message" to activate the Expiry Checker. -->
// <!-- Avoid quote marks '" within the message text. -->
// <img width="1" height="1" border="0" alt=""
// src="http://www.lsbu.ac.uk/php4-cgiwrap/???/expirychecker.php4?
// owner=email@ddress
// &
// expirydate=dd/mm/yy
// &
// message=a line of free text - no quotation marks please!
// ">
//
// This will insert a one pixel image into the web page; it'll be
// imperceptible using modern browsers, but a small dot will be
// displayed when using both Netscape 4 and IE 4 on a Mac
// (at least), and it may even cause a blank line to appear, so
// bear this in mind when deciding where to postion it.
//
// If the value for the expiry date is left as the literal
// character string "dd/mm/yy" this will be ignored. The script
// will accept dd/mm/yy dates containing single digits - e.g.
// either 09/09/04 or 9/9/04 is acceptable.
*/

//----------------------------------------------------------------

function ReformatExpiryDataFile($ExpiryDataFile) {

  // open ExpiryData file ("r+" for read/write)
  $ExpiryData_fp = fopen($ExpiryDataFile, "r+");

  // lock ExpiryData file ("2" for exclusive writing lock)
  $lock = flock($ExpiryData_fp, 2);

  // continue when lock is obtained
  if ( $lock) {

    // truncate to empty the file
    ftruncate($ExpiryData_fp, 0);

    // write contents to create a properly formatted empty file
    fwrite($ExpiryData_fp, "<?xml version=\"1.0\"?>");
    fwrite($ExpiryData_fp, "\n\n");
    fwrite($ExpiryData_fp, "<expiredpages>");
    fwrite($ExpiryData_fp, "\n\n");
    fwrite($ExpiryData_fp, "<date>");
    fwrite($ExpiryData_fp, date("d/m/y"));
    fwrite($ExpiryData_fp, "</date>");
    fwrite($ExpiryData_fp, "\n\n");
    fwrite($ExpiryData_fp, "</expiredpages>");

  }

  // unlock and close ExpiryData file ("3" is for unlock)
  $lock = flock($ExpiryData_fp, 3);
  fclose($ExpiryData_fp);

}

//----------------------------------------------------------------

function TruncateURL($theURL) {

// Truncate URL if it ends with a "?", and then truncate it further
// - if necessary - to remove "/index.html", "/index.shtml" or "/"

  if (substr($theURL,-strlen("?"))=="?") {
    $theURL = substr($theURL,0,-strlen("?"));
  }
  if (substr($theURL,-strlen("/index.html"))=="/index.html") {
    $theURL = substr($theURL,0,-strlen("/index.html"));
  }
  if (substr($theURL,-strlen("/index.shtml"))=="/index.shtml") {
    $theURL = substr($theURL,0,-strlen("/index.shtml"));
  }
  if (substr($theURL,-strlen("/"))=="/") {
    $theURL = substr($theURL,0,-strlen("/"));
  }

return($theURL);

}

//----------------------------------------------------------------

function GetTodayYYMMDD() {

// Get today's date in dd/mm/yy format
$today = date("d/m/y");
// Convert to yymmdd format - e.g. 24/08/02 becomes 020824
// *** End-of-century bug - will fail in the year 2100! ***
$today_day = substr($today,-8,2);
$today_month = substr($today,-5,2);
$today_year = substr($today,-2,2);
$today_yymmdd = $today_year.$today_month.$today_day;

return($today_yymmdd);

}

//----------------------------------------------------------------

function ConvertDateToYYMMDD($theDate) {

  // prepare $theDate for conversion
  // handle dd/mm/yy dates including single digits for dd, mm, yy
  // - e.g. 06/06/03, 6/6/03, 06/6/03, 6/06/03, 06/06/3 etc.
  $position_of_first_slash = strpos($theDate, "/");
  $expiry_day = substr($theDate, 0, $position_of_first_slash);
  $expiry_mm_yy = substr($theDate, $position_of_first_slash + 1, 
                                                strlen($theDate));
  $position_of_second_slash = strpos($expiry_mm_yy, "/");
  $expiry_month = substr($expiry_mm_yy, 0, $position_of_second_slash);
  $expiry_year = substr($expiry_mm_yy, $position_of_second_slash + 1, 
                                                strlen($theDate));
  // if any of dd, mm or yy are single digits then add a leading zero
  if ( strlen($expiry_day) == 1 ) { 
    $expiry_day = "0".$expiry_day; 
  }
  if ( strlen($expiry_month) == 1 ) {
    $expiry_month = "0".$expiry_month; 
  }
  if ( strlen($expiry_year) == 1 ) {
    $expiry_year = "0".$expiry_year; 
  }
  // now able to convert expiry date to yymmdd format
  $expiry_yymmdd = $expiry_year.$expiry_month.$expiry_day;

  // if $theDate contained no "/"s then it was invalid, in which case
  // return zero (this is important for the Expiry Checker)
  if ($position_of_first_slash == 0) {
    return(0);
  } else {
    return($expiry_yymmdd);
  }
}

//----------------------------------------------------------------

function AppendExpiryDataToFile($theURL, $theOwner, $theExpiryDate, 
$theMessage, $theFile) {

  // open and lock the expiry data file
  $theFile_fp = fopen($theFile, "r+");
  $lock = flock($theFile_fp, 2);

  // continue when lock is obtained
  if ($lock) {

    // read expired page data file
    $db = readDatabase($theFile);

    // rewind and read date on the 5th line of the file
    fseek($theFile_fp, 0);
    $i = 1;
    while ($i < 6):
      $line = fgets($theFile_fp);
      $i = $i + 1;
    endwhile;
    $theDate = substr($line, 6, 8);

  }

  $found = FALSE;
  // Pop each ExpiredPage object and see if the URL matches
  while (($poppedExpiredPage = (array_pop($db))) && (!$found)):
    if ($poppedExpiredPage->url == $theURL) {
      $found = TRUE;
    }
  endwhile;
    
  // if there's no entry for the web page then append to file
  if (!$found) {

    // go to end of file, back two lines (i.e. 15 places)
    fseek($theFile_fp, -15, SEEK_END);

    // now append...
    fwrite($theFile_fp, "<expiredpage>\n");
    fwrite($theFile_fp, "  <url>");
    fwrite($theFile_fp, $theURL);
    fwrite($theFile_fp, "</url>\n");
    fwrite($theFile_fp, "  <owner>");
    fwrite($theFile_fp, $theOwner);
    fwrite($theFile_fp, "</owner>\n");
    fwrite($theFile_fp, "  <expired>");
    fwrite($theFile_fp, $theExpiryDate);
    fwrite($theFile_fp, "</expired>\n");
    fwrite($theFile_fp, "  <message>");
    fwrite($theFile_fp, $theMessage);
    fwrite($theFile_fp, "</message>\n");
    fwrite($theFile_fp, "</expiredpage>\n\n");
    fwrite($theFile_fp, "</expiredpages>");
  }

  // unlock and close ExpiryData file
  $lock = flock($theFile_fp, 3);
  fclose($theFile_fp);

  return $theDate;

}

//----------------------------------------------------------------

function EmailExpiryMessages($FileName) {

  // open and lock the expiry data file
  $theFile_fp = fopen($FileName, "r+");
  $lock = flock($theFile_fp, 2);

  // continue when lock is obtained
  if ($lock) {

    $db = readDatabase($FileName);

    // Pop each ExpiredPage object and send the email
    while ($poppedExpiredPage = (array_pop($db))):

      $pageURL = $poppedExpiredPage->url;
      $pageOwner = $poppedExpiredPage->owner;
      $pageExpiryDate = $poppedExpiredPage->expired;
      $pageMessage = $poppedExpiredPage->message;

// Prepare the email
$mail_message = "This is to remind you that this web page...
\n  $pageURL
\n...expired on $pageExpiryDate. Here is the reminder message (if any):
\n*** $pageMessage ***
\nYou will receive a reminder each Monday until the page is updated. 
Please update the page as necessary, and remember to specify a new 
expiry date."; 

      // Now send the email
      mail($pageOwner, "Expiry Checker: $pageURL", $mail_message);

    endwhile;

  }  // unlock and close ExpiryData file
  $lock = flock($theFile_fp, 3);
  fclose($theFile_fp);

}

//================================================================
// This section based on: http://jp2.php.net/xml_parse_into_struct
//----------------------------------------------------------------
class ExpiredPage {
  var $url;
  var $owner;
  var $expired;
  var $message;
  function ExpiredPage ($aa) {
    foreach ($aa as $k=>$v)
      $this->$k = $aa[$k];
  }
}
//----------------------------------------------------------------
function readDatabase($file) {
  // read the xml database of expired pages
  $data = implode("",file($file));
  $parser = xml_parser_create();
  xml_parser_set_option($parser,XML_OPTION_CASE_FOLDING,0);
  xml_parser_set_option($parser,XML_OPTION_SKIP_WHITE,1);
  xml_parse_into_struct($parser,$data,$values,$tags);
  xml_parser_free($parser);
  // loop through the structures
  foreach ($tags as $key=>$val) {
    if ($key == "expiredpage") {
      $molranges = $val;
      // each contiguous pair of array entries are the 
      // lower and upper range for each expiredpage definition
      for ($i=0; $i < count($molranges); $i+=2) {
        $offset = $molranges[$i] + 1;
        $len = $molranges[$i + 1] - $offset;
        $tdb[] = parseMol(array_slice($values, $offset, $len));
      }
    } else {
      continue;
    }
  }
  return $tdb;
}
//----------------------------------------------------------------
function parseMol($mvalues) {
  for ($i=0; $i < count($mvalues); $i++)
    $mol[$mvalues[$i]["tag"]] = $mvalues[$i]["value"];
  return new ExpiredPage($mol);
}
//================================================================


/*
// MAIN PROGRAM
*/

// ExpiryData.xml and ExpiryDataLastWeek.xml must exist initially
$file = "ExpiryData.xml";
$fileForLastWeek = "ExpiryDataLastWeek.xml";

// Discover url of referring web page
$url = $_ENV['HTTP_REFERER'];

// Get $owner, $expirydate and $message from referring web page
$owner = $_GET['owner'];
$expirydate = $_GET['expirydate'];
$message = $_GET['message'];

// Use ConvertDateToYYMMDD to re-format $expirydate
$expirydateYYMMDD = ConvertDateToYYMMDD($expirydate);

// Get today's date in YYMMDD format
$todayYYMMDD = GetTodayYYMMDD();

// If the referring page has expired then
if ($expirydateYYMMDD <= $todayYYMMDD) {

  // If the referring page is a valid LSBU WWW page then
  if ((substr($url, 0, 21) == "http://www.lsbu.ac.uk")) {

    // Truncate $url if it ends with a "?", and subsequently with
    // "/index.html", "/index.shtml" or "/", to avoid duplicate
    // entries for the same web page
    $url = TruncateURL($url);

    // Append page data to ExpiryData.xml and retrieve $filedate
    $filedate = AppendExpiryDataToFile($url, $owner, $expirydate,
                                                  $message, $file);
    // Re-format $filedate
    $filedateYYMMDD = ConvertDateToYYMMDD($filedate);

    // If ((today is a Monday) and (file is last week's)) then
    if ((date("D") == "Mon") && ($filedateYYMMDD < $todayYYMMDD)) {

      // If this is the first Monday, then ExpiryData.xml may not
      // yet be properly formatted, in which case $filedateYYMMDD
      // will equal 0, in which case don't send any emails.
      if ($filedateYYMMDD > 1) {

        // Email the expiry messages
        EmailExpiryMessages($file);

        // Copy ExpiryData.xml to ExpiryDataLastWeek.xml
        copy($file, $fileForLastWeek);

      }

      // Clear and re-format ExpiryData.xml
      ReformatExpiryDataFile($file);

    }
  }
}

?>
 
 
 

<<contents ^top^