/*
* L I N K S M A N A G E R
* Author: Sarah King
* http://sarahk.pcpropertymanager.com
* March 2005
*
* Description: This script is designed to review the links on a page and identify
* if the links are already held in the tables of a links directory
* If not, then the link is presented in a submission form which can be customised
* for the users own directory system.
*
* The CURL option is fully debugged, was not able to fully test the fsockopen version
*
* Setup: The class has 4 variables used for connecting to the database.
* These need to be changed.
*
* $query needs to be updated with the actual query used to check for a links existence
*
* $form works for my old version of the wsnlinks directory. You need to change this
* so that it works for the directory system you use
*
* $useCurl should be true if curl is available. fsockopen functions haven't been fully tested
* $user_agent should be set to your site name
*
* getScratchPad is for static information you need to copy and paste from time to time
*
*/
class LinkManager
{
// user variables
var $allowSubDomains = true; // treat subdomains as separate sites
var $myDomains = array('pcpropertymanager.com',
'https://secure.maxnet.co.nz/pcpm/',
'propertyinvestor.info'
); // I don't want to detect links on my domain
// the default form works on my directory system, you need to change this to the form needed on your system
var $form = "
";
//curl option is fully debugged
var $useCurl = true; //set to false to use fsockopen
var $ch; //curl handler if we need it
//curl lets us be good citizens and declare our user agent
var $user_agent='http://www.pcpropertymanager.com/wsnlinks/ [Property Investment Directory] ';
//any words which mean a local page shouldn't be checked for a redirect
var $stopWords = array('archive', 'clicks', 'content', 'download', 'Your_Account', 'profile', 'private','report');
var $debug = false;
// internal variables
var $html;
var $links;
//var $singles;
var $domain;
var $errors = array();
/**
* @return string
* @desc Fill this up with the info you need frequently when filling in link requests
*/
function getScratchPad()
{
$output = "Test Pages
";
return $output;
}//getScratchPad
///////////////////////////////////
// No changes to the class should be made below here
///////////////////////////////////
/**
* @return string
* @param array $item
* @desc Controls the presentation of the link form and allows it to be hidden
*/
function getLinkForm($item, $cnter)
{
$form = $this->form;
$form = str_replace('{URL}', $item['url'], $form);
$form = str_replace('{TITLE}', $item['title'], $form);
$output = "\n";
return $output;
}//getLinkForm
/**
* @return LinkManager
* @desc Constructor, just sets up the page
* if no url is passed check to see if the form has been submitted
*/
function LinkManager($url='', $singles=false)
{
global $_SERVER;
if (empty($url))
{
$url = $this->getGetVar('url');
//$singles = $this->getGetVar('singles');
}
$this->domain = $this->extractDomain($url);
//$this->singles = $singles;
echo $this->getHeader($url);
if (!empty($url))
{
if ($this->useCurl) $this->initialiseCurl();
$this->processPage($url, 0);
}
else
{
echo "Enter a page to check
";
}
echo $this->getFooter();
if ($this->useCurl) $this->closeCurl();
}//LinkManager
/**
* @return string
* @param string $url
* @desc Returns the main part of the domain for comparison in the database
*/
function extractDomain($url)
{
$bits = parse_url($url);
$domain = $bits['host'];
if ($this->allowSubDomains == false)
{
$bits = explode('.', $domain);
krsort($bits);
$ok = true;
$parts = 0;
$domain = $dot = '';
foreach($bits as $v)
{
if ($parts < 2) $domain = $v . $dot . $domain;
elseif ($ok)
{
if ($v != 'www') $domain = $v . '.' . $domain;
$ok = false;
}
$parts++;
$dot = '.';
}
}
return $domain;
}//extractDomain
/**
* @return array
* @param string $data
* @desc returns an array of extracted links
*/
function extractLinks()
{
$data = $this->html;
unset($location);
$links = array();
$pos = 0;
$i = 0;
while (!(($pos = strpos($data,"<",$pos)) === false))
{
$pos++;
$curLink = array();
$endpos = strpos($data,">",$pos);
$tag = substr($data,$pos,$endpos-$pos);
$tag = trim($tag);
if (isset($location)) { // look for a
if (!strcasecmp(strtok($tag," "),"/A"))
{
$link = substr($data, $linkpos, $pos-1-$linkpos);
if (eregi( ".*.*", $link, $out)) {
$curLink['title'] = $out[1];
}
else $curLink['title'] = strip_tags($link);
$curLink['url'] = $location;
$curLink['status'] = 0;
$links[] = $curLink;
unset($location);
}
$pos = $endpos+1;
}
else
{ // look for a
if (!strcasecmp(strtok($tag,' '),'A'))
{
$regs[] = array();
preg_match('/href\s*=\s*([\'"]?)([^\'">\s]+)\1/i', $tag, $regs);
if ($regs[2])
{ // Only use it if it seems to be reasonable
$location = $regs[2];
}
$pos = $endpos+1;
$linkpos = $pos;
} else $pos = $endpos+1;
}
$i++;
}
$this->links = $links;
} //extract_links
/**
* @return void
* @param string $url
* @desc Retrieves the html for the page in question
*/
function getPage($url)
{
$output = $this->getHTTPContent($url, 'GET');
$output = implode(' ', $output);
$output = ereg_replace("\n|\r", ' ', $output);
$this->html = $output;
}//getPage
/**
* @return string
* @desc Works through the links and displays according to status
*/
function showPossibleLinks()
{
$output = '';
for ($i = 0; $i < 4; $i++)
{
if ($i == 2) $output .= "Outbound Links
\n";
if ($i == 3) $output .= "Internal Links
\n";
if ($i == 0 && $this->debug) $output .= "Rejected
\n";
foreach ($this->links as $k => $varray)
{
$label = $varray['title'];
if (empty($label)) $label = $varray['url'];
if ($varray['status'] == $i && ($i == 1 || $i == 3))
{
$output .= "- {$label}
\n";
}
elseif ($varray['status'] == 2 && $i == 2)
{
$output .= $this->getLinkForm($varray, $k);
}
elseif ($this->debug) $output .= "- {$label}
\n";
}
if ($i == 0 && $this->debug) $output .= "
\n";
}
return $output;
}//showPossibleLinks
function checkLinks()
{
foreach ($this->links as $k => $varray)
{
// see if we have a redirect
if ( stristr($varray['url'], $this->domain))
{
$varray['url'] = $this->detectExternalLink($varray['url']);
$this->links[$k]['url'] = $varray['url'];
$this->links[$k]['status'] = 3;
}
elseif ( substr($varray['url'],0,4) != 'http')
{
$varray['url'] = $this->detectExternalLink($this->domain . '/'. $varray['url']);
$this->links[$k]['url'] = $varray['url'];
}
else $this->links[$k]['status'] = 2;
}
}//checkLinks
/**
* @return boolean
* @param string $url
* @desc Checks to see if this is a domain we want to think about linking to
*/
function validDomain($url)
{
if (empty($url)) return false;
if (substr($url,0,4) != 'http') return false;
if (stristr($url, $this->domain)) return false;
foreach($this->myDomains as $d)
{
if (stristr($url, $d)) return false;
}
return true;
}//validDomain
function detectExternalLink($url)
{
$newUrl = '';
if ($this->checkNoStopWords($url))
{
$header = $this->getHTTPContent($url);
foreach($header as $lines)
{
if (substr($lines, 0, 9) == 'Location:') $newUrl = trim( substr( $lines, 10));
}
}
return $newUrl;
}//detectExternalLink
/**
* @return boolean
* @param string $url
* @desc Check to see if the url has any stopwords
*/
function checkNoStopWords($url)
{
foreach($this->stopWords as $needle)
{
if (stristr($url, $needle)) return false;
}
return true;
}//checkNoStopWords
/**
* @return array
* @param string $url
* @param string $method
* @desc Returns the requested information in an array
*/
function getHTTPContent($url, $method = 'HEAD')
{
$output = array();
$info = parse_url($url);
if (!isset($info['host'])) $info['host'] = $this->domain;
$host = $info['host'];
if (!isset($info['port'])) $info['port'] = 80;
if (!isset($info['path'])) $info['path'] = '/';
$path = $info['path'];
if ($info['query']) {
$path .= '?' . $info['query'];
}
if (!stristr($path, $host)) $path = $host. $path;
if ($this->useCurl)
{
curl_setopt ($this->ch, CURLOPT_URL, $path);
$output[] = curl_exec ($this->ch);
if ($method == 'HEAD')
{
$header = curl_getinfo($this->ch);
$output[] = 'Location: ' . $header['url'];
}
if (curl_errno($this->ch))
$this->errors[] = "Curl Error: " . curl_error($this->ch) . " ==> {$path}
\n";
}
else
{
// open connection
$fp = fsockopen( $info['host'], $info['port'], $errno, $errstr, 60);
if ($fp)
{
// send request
fwrite ($fp, "{$method} {$path} HTTP/1.0\r\nHost: {$host}\r\n\r\n");
while (!feof($fp))
{
$output[] = fgets($fp, 1028);
}
}
else $this->errors[] = "FSock Error: {$errstr} ({$errno})
\n";
fclose($fp);
}
return $output;
}//openHTTPConnection
/**
* @return void
* @param string $url
* @desc Controlling script for processing a page
*/
function processPage($url)
{
$this->getPage($url);
$this->extractLinks();
$this->checkLinks();
echo $this->showPossibleLinks();
}//processPage
/**
* @return string
* @desc Returns the basic page setup
*/
function getHeader($url)
{
$title = ($this->domain)?$this->domain:'Links Manager';
$output = "
Link Manager
| Domain | {$title} | Start Again |
| URL | {$url} |
| Date | ".date('d/m/Y H:i')." |
";
return $output;
}//getHeader
/**
* @return string
* @desc Finishes off the html for the footer
*/
function getFooter()
{
if (count($this->errors) > 0)
{
$output .= "\n";
foreach ($this->errors as $val) $output .= "- {$val}
\n";
$output .= " \n";
}
$output = " |
Scratchpad\n". $this->getScratchPad() . " |
\n";
return $output;
}//getFooter
/**
* @return void
* @desc Create the curl option and store in this object
*/
function initialiseCurl()
{
$ch = curl_init();
curl_setopt ($ch, CURLOPT_USERAGENT, $this->user_agent);
curl_setopt ($ch, CURLOPT_REFERER, 'http://www.pcpropertymanager.com/wsnlinks/');
curl_setopt ($ch, CURLOPT_HEADER, 1);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt ($ch, CURLOPT_FAILONERROR, 1);
$this->ch = $ch;
}//initialiseCurl
/**
* @return void
* @desc Close the curl connection
*/
function closeCurl()
{
curl_close($this->ch);
}//closeCurl
/**
* @return string
* @param string $name
* @param string $default
* @desc Returns a GET variable
*/
function getGetVar($name, $default='')
{
global $_GET;
if (isset($_GET[$name])) $output = $_GET[$name];
else $output = $default;
return $output;
}//getGetVar
}//class UTIL_linkmanager
$lm = new LinkManager();
?>