Web Page Expiry Checker - v2.2


1/12/04
 

Web Page Expiry Checker - v2.2

Sally J. has been testing it, and emailed me to say that she was expecting an email but didn't get one. I realised that the installation instructions weren't quite accurate. In the installation instructions I said this --
// ExpiryData.xml will get properly formatted on the first Monday.
// No emails will be sent until the SECOND Monday.
-- but I should have said this --
// ExpiryData.xml will get properly formatted on the first Monday
// after a page expires (assuming that the page invokes the script).
// No emails will be sent until the SECOND Monday.
I've now edited the script to include this additional comment line.
 
<?php

/*
// WEB PAGE EXPIRY CHECKER - version 2.2
//
// A script to automatically email web page content owners
// once every Monday after their web pages have expired.
//
// [See also this accompanying script: "expirydataviewer.php4".]
//
//
// martin.bush@lsbu.ac.uk, December 2004.
//
//
// Permission is granted to re-use all or part of this software
// under the terms of the GNU General Public License as published
// by the Free Software Foundation [www.gnu.org/copyleft/gpl.html].
// The author would very much appreciate being informed about any
// re-use of this software . . . thanks!
*/

/*
// INSTALLATION INSTRUCTIONS (assuming use for LSBU WWW web pages):
// 1) Put this script ("expirychecker.php4") into cgi-bin
//    ...or, better still, cgi-bin/expirychecker/
// 2) Add two blank files to cgi-bin(/expirychecker/) called:
//    "ExpiryData.xml" & "ExpiryDataLastWeek.xml".
//
// ExpiryData.xml will get properly formatted on the first Monday
// after a page expires (assuming that the page invokes the script).
// No emails will be sent until the SECOND Monday.
//
// Once installed, this script can be called from any LSBU WWW web
// page by including the following (with "???" corresponding to the
// appropriate path) anywhere within the body of the web page:
//
// <!-- Insert values below for "owner", "expirydate" -->
// <!-- and "message" to activate the Expiry Checker. -->
// <!-- Avoid quote marks '" within the message text. -->
// <img width="1" height="1" border="0" alt=""
// src="http://www.lsbu.ac.uk/php4-cgiwrap/???/expirychecker.php4?
// owner=email@ddress
// &
// expirydate=dd/mm/yy
// &
// message=a line of free text - no quotation marks please!
// ">
//
// This will insert a one pixel image into the web page; it'll be
// imperceptible using modern browsers, but a small dot will be
// displayed when using both Netscape 4 and IE 4 on a Mac
// (at least), and it may even cause a blank line to appear, so
// bear this in mind when deciding where to postion it.
//
// If the value for the expiry date is left as the literal
// character string "dd/mm/yy" this will be ignored. The script
// will accept dd/mm/yy dates containing single digits - e.g.
// either 09/09/04 or 9/9/04 is acceptable.
*/

//----------------------------------------------------------------

function ReformatExpiryDataFile($ExpiryDataFile) {

  // open ExpiryData file ("r+" for read/write)
  $ExpiryData_fp = fopen($ExpiryDataFile, "r+");

  // lock ExpiryData file ("2" for exclusive writing lock)
  $lock = flock($ExpiryData_fp, 2);

  // continue when lock is obtained
  if ( $lock) {

    // truncate to empty the file
    ftruncate($ExpiryData_fp, 0);

    // write contents to create a properly formatted empty file
    fwrite($ExpiryData_fp, "<?xml version=\"1.0\"?>");
    fwrite($ExpiryData_fp, "\n\n");
    fwrite($ExpiryData_fp, "<expiredpages>");
    fwrite($ExpiryData_fp, "\n\n");
    fwrite($ExpiryData_fp, "<date>");
    fwrite($ExpiryData_fp, date("d/m/y"));
    fwrite($ExpiryData_fp, "</date>");
    fwrite($ExpiryData_fp, "\n\n");
    fwrite($ExpiryData_fp, "</expiredpages>");

  }

  // unlock and close ExpiryData file ("3" is for unlock)
  $lock = flock($ExpiryData_fp, 3);
  fclose($ExpiryData_fp);

}

//----------------------------------------------------------------

function GetTodayYYMMDD() {

// Get today's date in dd/mm/yy format
$today = date("d/m/y");
// Convert to yymmdd format - e.g. 24/08/02 becomes 020824
// *** End-of-century bug - will fail in the year 2100! ***
$today_day = substr($today,-8,2);
$today_month = substr($today,-5,2);
$today_year = substr($today,-2,2);
$today_yymmdd = $today_year.$today_month.$today_day;

return($today_yymmdd);

}

//----------------------------------------------------------------

function ConvertDateToYYMMDD($theDate) {

  // prepare $theDate for conversion
  // handle dd/mm/yy dates including single digits for dd, mm, yy
  // - e.g. 06/06/03, 6/6/03, 06/6/03, 6/06/03, 06/06/3 etc.
  $position_of_first_slash = strpos($theDate, "/");
  $expiry_day = substr($theDate, 0, $position_of_first_slash);
  $expiry_mm_yy = substr($theDate, $position_of_first_slash + 1, 
                                                strlen($theDate));
  $position_of_second_slash = strpos($expiry_mm_yy, "/");
  $expiry_month = substr($expiry_mm_yy, 0, $position_of_second_slash);
  $expiry_year = substr($expiry_mm_yy, $position_of_second_slash + 1, 
                                                strlen($theDate));
  // if any of dd, mm or yy are single digits then add a leading zero
  if ( strlen($expiry_day) == 1 ) { 
    $expiry_day = "0".$expiry_day; 
  }
  if ( strlen($expiry_month) == 1 ) {
    $expiry_month = "0".$expiry_month; 
  }
  if ( strlen($expiry_year) == 1 ) {
    $expiry_year = "0".$expiry_year; 
  }
  // now able to convert expiry date to yymmdd format
  $expiry_yymmdd = $expiry_year.$expiry_month.$expiry_day;

  // if $theDate contained no "/"s then it was invalid, in which case
  // return zero (this is important for the Expiry Checker)
  if ($position_of_first_slash == 0) {
    return(0);
  } else {
    return($expiry_yymmdd);
  }
}

//----------------------------------------------------------------

function AppendExpiryDataToFile($theURL, $theOwner, $theExpiryDate, 
$theMessage, $theFile) {

  // open and lock the expiry data file
  $theFile_fp = fopen($theFile, "r+");
  $lock = flock($theFile_fp, 2);

  // continue when lock is obtained
  if ($lock) {

    // read expired page data file
    $db = readDatabase($theFile);

    // rewind and read date on the 5th line of the file
    fseek($theFile_fp, 0);
    $i = 1;
    while ($i < 6):
      $line = fgets($theFile_fp);
      $i = $i + 1;
    endwhile;
    $theDate = substr($line, 6, 8);

  }

  $found = FALSE;
  // Pop each ExpiredPage object and see if the URL matches
  while (($poppedExpiredPage = (array_pop($db))) && (!$found)):
    if ($poppedExpiredPage->url == $theURL) {
      $found = TRUE;
    }
  endwhile;
    
  // if there's no entry for the web page then append to file
  if (!$found) {

    // go to end of file, back two lines (i.e. 15 places)
    fseek($theFile_fp, -15, SEEK_END);

    // now append...
    fwrite($theFile_fp, "<expiredpage>\n");
    fwrite($theFile_fp, "  <url>");
    fwrite($theFile_fp, $theURL);
    fwrite($theFile_fp, "</url>\n");
    fwrite($theFile_fp, "  <owner>");
    fwrite($theFile_fp, $theOwner);
    fwrite($theFile_fp, "</owner>\n");
    fwrite($theFile_fp, "  <expired>");
    fwrite($theFile_fp, $theExpiryDate);
    fwrite($theFile_fp, "</expired>\n");
    fwrite($theFile_fp, "  <message>");
    fwrite($theFile_fp, $theMessage);
    fwrite($theFile_fp, "</message>\n");
    fwrite($theFile_fp, "</expiredpage>\n\n");
    fwrite($theFile_fp, "</expiredpages>");
  }

  // unlock and close ExpiryData file
  $lock = flock($theFile_fp, 3);
  fclose($theFile_fp);

  return $theDate;

}

//----------------------------------------------------------------

function EmailExpiryMessages($FileName) {

  // open and lock the expiry data file
  $theFile_fp = fopen($FileName, "r+");
  $lock = flock($theFile_fp, 2);

  // continue when lock is obtained
  if ($lock) {

    $db = readDatabase($FileName);

    // Pop each ExpiredPage object and send the email
    while ($poppedExpiredPage = (array_pop($db))):

      $pageURL = $poppedExpiredPage->url;
      $pageOwner = $poppedExpiredPage->owner;
      $pageExpiryDate = $poppedExpiredPage->expired;
      $pageMessage = $poppedExpiredPage->message;

// Prepare the email
$mail_message = "This is to remind you that this web page...
\n  $pageURL
\n...expired on $pageExpiryDate. Here is the reminder message (if any):
\n*** $pageMessage ***
\nYou will receive a reminder each Monday until the page is updated. 
Please update the page as necessary, and remember to specify a new 
expiry date."; 

      // Now send the email
      mail($pageOwner, "Expiry Checker: $pageURL", $mail_message);

    endwhile;

  }  // unlock and close ExpiryData file
  $lock = flock($theFile_fp, 3);
  fclose($theFile_fp);

}

//================================================================
// This section based on: http://jp2.php.net/xml_parse_into_struct
//----------------------------------------------------------------
class ExpiredPage {
  var $url;
  var $owner;
  var $expired;
  var $message;
  function ExpiredPage ($aa) {
    foreach ($aa as $k=>$v)
      $this->$k = $aa[$k];
  }
}
//----------------------------------------------------------------
function readDatabase($file) {
  // read the xml database of expired pages
  $data = implode("",file($file));
  $parser = xml_parser_create();
  xml_parser_set_option($parser,XML_OPTION_CASE_FOLDING,0);
  xml_parser_set_option($parser,XML_OPTION_SKIP_WHITE,1);
  xml_parse_into_struct($parser,$data,$values,$tags);
  xml_parser_free($parser);
  // loop through the structures
  foreach ($tags as $key=>$val) {
    if ($key == "expiredpage") {
      $molranges = $val;
      // each contiguous pair of array entries are the 
      // lower and upper range for each expiredpage definition
      for ($i=0; $i < count($molranges); $i+=2) {
        $offset = $molranges[$i] + 1;
        $len = $molranges[$i + 1] - $offset;
        $tdb[] = parseMol(array_slice($values, $offset, $len));
      }
    } else {
      continue;
    }
  }
  return $tdb;
}
//----------------------------------------------------------------
function parseMol($mvalues) {
  for ($i=0; $i < count($mvalues); $i++)
    $mol[$mvalues[$i]["tag"]] = $mvalues[$i]["value"];
  return new ExpiredPage($mol);
}
//================================================================


/*
// MAIN PROGRAM
*/

// ExpiryData.xml and ExpiryDataLastWeek.xml must exist initially
$file = "ExpiryData.xml";
$fileForLastWeek = "ExpiryDataLastWeek.xml";

// Discover url of referring web page
$url = $_ENV['HTTP_REFERER'];

// Get $owner, $expirydate and $message from referring web page
$owner = $_GET['owner'];
$expirydate = $_GET['expirydate'];
$message = $_GET['message'];

// Use ConvertDateToYYMMDD to re-format $expirydate
$expirydateYYMMDD = ConvertDateToYYMMDD($expirydate);

// Get today's date in YYMMDD format
$todayYYMMDD = GetTodayYYMMDD();

// If the referring page has expired then
if ($expirydateYYMMDD <= $todayYYMMDD) {

  // If the referring page is a valid LSBU WWW page then
  if ((substr($url, 0, 21) == "http://www.lsbu.ac.uk")) {

    // Append page data to ExpiryData.xml and retrieve $filedate
    $filedate = AppendExpiryDataToFile($url, $owner, $expirydate,
                                                  $message, $file);
    // Re-format $filedate
    $filedateYYMMDD = ConvertDateToYYMMDD($filedate);

    // If ((today is a Monday) and (file is last week's)) then
    if ((date("D") == "Mon") && ($filedateYYMMDD < $todayYYMMDD)) {

      // If this is the first Monday, then ExpiryData.xml may not
      // yet be properly formatted, in which case $filedateYYMMDD
      // will equal 0, in which case don't send any emails.
      if ($filedateYYMMDD > 1) {

        // Email the expiry messages
        EmailExpiryMessages($file);

        // Copy ExpiryData.xml to ExpiryDataLastWeek.xml
        copy($file, $fileForLastWeek);

      }

      // Clear and re-format ExpiryData.xml
      ReformatExpiryDataFile($file);

    }
  }
}

?>
 
 
 

<<contents ^top^