Title:
URL:Go
"; //curl option is fully debugged var $useCurl = true; //set to false to use fsockopen var $ch; //curl handler if we need it //curl lets us be good citizens and declare our user agent var $user_agent='http://www.pcpropertymanager.com/wsnlinks/ [Property Investment Directory] '; //any words which mean a local page shouldn't be checked for a redirect var $stopWords = array('archive', 'clicks', 'content', 'download', 'Your_Account', 'profile', 'private','report'); var $debug = false; // internal variables var $html; var $links; //var $singles; var $domain; var $errors = array(); /** * @return string * @desc Fill this up with the info you need frequently when filling in link requests */ function getScratchPad() { $output = "

Test Pages

"; return $output; }//getScratchPad /////////////////////////////////// // No changes to the class should be made below here /////////////////////////////////// /** * @return string * @param array $item * @desc Controls the presentation of the link form and allows it to be hidden */ function getLinkForm($item, $cnter) { $form = $this->form; $form = str_replace('{URL}', $item['url'], $form); $form = str_replace('{TITLE}', $item['title'], $form); $output = "
{$item['title']}

+/-

\n{$form}
\n
\n"; return $output; }//getLinkForm /** * @return LinkManager * @desc Constructor, just sets up the page * if no url is passed check to see if the form has been submitted */ function LinkManager($url='', $singles=false) { global $_SERVER; if (empty($url)) { $url = $this->getGetVar('url'); //$singles = $this->getGetVar('singles'); } $this->domain = $this->extractDomain($url); //$this->singles = $singles; echo $this->getHeader($url); if (!empty($url)) { if ($this->useCurl) $this->initialiseCurl(); $this->processPage($url, 0); } else { echo "

Enter a page to check


"; } echo $this->getFooter(); if ($this->useCurl) $this->closeCurl(); }//LinkManager /** * @return string * @param string $url * @desc Returns the main part of the domain for comparison in the database */ function extractDomain($url) { $bits = parse_url($url); $domain = $bits['host']; if ($this->allowSubDomains == false) { $bits = explode('.', $domain); krsort($bits); $ok = true; $parts = 0; $domain = $dot = ''; foreach($bits as $v) { if ($parts < 2) $domain = $v . $dot . $domain; elseif ($ok) { if ($v != 'www') $domain = $v . '.' . $domain; $ok = false; } $parts++; $dot = '.'; } } return $domain; }//extractDomain /** * @return array * @param string $data * @desc returns an array of extracted links */ function extractLinks() { $data = $this->html; unset($location); $links = array(); $pos = 0; $i = 0; while (!(($pos = strpos($data,"<",$pos)) === false)) { $pos++; $curLink = array(); $endpos = strpos($data,">",$pos); $tag = substr($data,$pos,$endpos-$pos); $tag = trim($tag); if (isset($location)) { // look for a if (!strcasecmp(strtok($tag," "),"/A")) { $link = substr($data, $linkpos, $pos-1-$linkpos); if (eregi( ".*.*", $link, $out)) { $curLink['title'] = $out[1]; } else $curLink['title'] = strip_tags($link); $curLink['url'] = $location; $curLink['status'] = 0; $links[] = $curLink; unset($location); } $pos = $endpos+1; } else { // look for a if (!strcasecmp(strtok($tag,' '),'A')) { $regs[] = array(); preg_match('/href\s*=\s*([\'"]?)([^\'">\s]+)\1/i', $tag, $regs); if ($regs[2]) { // Only use it if it seems to be reasonable $location = $regs[2]; } $pos = $endpos+1; $linkpos = $pos; } else $pos = $endpos+1; } $i++; } $this->links = $links; } //extract_links /** * @return void * @param string $url * @desc Retrieves the html for the page in question */ function getPage($url) { $output = $this->getHTTPContent($url, 'GET'); $output = implode(' ', $output); $output = ereg_replace("\n|\r", ' ', $output); $this->html = $output; }//getPage /** * @return string * @desc Works through the links and displays according to status */ function showPossibleLinks() { $output = ''; for ($i = 0; $i < 4; $i++) { if ($i == 2) $output .= "

Outbound Links

\n"; if ($i == 3) $output .= "

Internal Links

\n"; if ($i == 0 && $this->debug) $output .= "

Rejected

\n"; } return $output; }//showPossibleLinks function checkLinks() { foreach ($this->links as $k => $varray) { // see if we have a redirect if ( stristr($varray['url'], $this->domain)) { $varray['url'] = $this->detectExternalLink($varray['url']); $this->links[$k]['url'] = $varray['url']; $this->links[$k]['status'] = 3; } elseif ( substr($varray['url'],0,4) != 'http') { $varray['url'] = $this->detectExternalLink($this->domain . '/'. $varray['url']); $this->links[$k]['url'] = $varray['url']; } else $this->links[$k]['status'] = 2; } }//checkLinks /** * @return boolean * @param string $url * @desc Checks to see if this is a domain we want to think about linking to */ function validDomain($url) { if (empty($url)) return false; if (substr($url,0,4) != 'http') return false; if (stristr($url, $this->domain)) return false; foreach($this->myDomains as $d) { if (stristr($url, $d)) return false; } return true; }//validDomain function detectExternalLink($url) { $newUrl = ''; if ($this->checkNoStopWords($url)) { $header = $this->getHTTPContent($url); foreach($header as $lines) { if (substr($lines, 0, 9) == 'Location:') $newUrl = trim( substr( $lines, 10)); } } return $newUrl; }//detectExternalLink /** * @return boolean * @param string $url * @desc Check to see if the url has any stopwords */ function checkNoStopWords($url) { foreach($this->stopWords as $needle) { if (stristr($url, $needle)) return false; } return true; }//checkNoStopWords /** * @return array * @param string $url * @param string $method * @desc Returns the requested information in an array */ function getHTTPContent($url, $method = 'HEAD') { $output = array(); $info = parse_url($url); if (!isset($info['host'])) $info['host'] = $this->domain; $host = $info['host']; if (!isset($info['port'])) $info['port'] = 80; if (!isset($info['path'])) $info['path'] = '/'; $path = $info['path']; if ($info['query']) { $path .= '?' . $info['query']; } if (!stristr($path, $host)) $path = $host. $path; if ($this->useCurl) { curl_setopt ($this->ch, CURLOPT_URL, $path); $output[] = curl_exec ($this->ch); if ($method == 'HEAD') { $header = curl_getinfo($this->ch); $output[] = 'Location: ' . $header['url']; } if (curl_errno($this->ch)) $this->errors[] = "Curl Error: " . curl_error($this->ch) . " ==> {$path}
\n"; } else { // open connection $fp = fsockopen( $info['host'], $info['port'], $errno, $errstr, 60); if ($fp) { // send request fwrite ($fp, "{$method} {$path} HTTP/1.0\r\nHost: {$host}\r\n\r\n"); while (!feof($fp)) { $output[] = fgets($fp, 1028); } } else $this->errors[] = "FSock Error: {$errstr} ({$errno})
\n"; fclose($fp); } return $output; }//openHTTPConnection /** * @return void * @param string $url * @desc Controlling script for processing a page */ function processPage($url) { $this->getPage($url); $this->extractLinks(); $this->checkLinks(); echo $this->showPossibleLinks(); }//processPage /** * @return string * @desc Returns the basic page setup */ function getHeader($url) { $title = ($this->domain)?$this->domain:'Links Manager'; $output = " Link Manager
Domain{$title}Start Again
URL{$url}
Date".date('d/m/Y H:i')."
"; return $output; }//getHeader /** * @return string * @desc Finishes off the html for the footer */ function getFooter() { if (count($this->errors) > 0) { $output .= "
    \n"; foreach ($this->errors as $val) $output .= "
  • {$val}
  • \n"; $output .= "
\n"; } $output = "

Scratchpad

\n". $this->getScratchPad() . "
Property Investment Directory
\n"; return $output; }//getFooter /** * @return void * @desc Create the curl option and store in this object */ function initialiseCurl() { $ch = curl_init(); curl_setopt ($ch, CURLOPT_USERAGENT, $this->user_agent); curl_setopt ($ch, CURLOPT_REFERER, 'http://www.pcpropertymanager.com/wsnlinks/'); curl_setopt ($ch, CURLOPT_HEADER, 1); curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt ($ch, CURLOPT_FAILONERROR, 1); $this->ch = $ch; }//initialiseCurl /** * @return void * @desc Close the curl connection */ function closeCurl() { curl_close($this->ch); }//closeCurl /** * @return string * @param string $name * @param string $default * @desc Returns a GET variable */ function getGetVar($name, $default='') { global $_GET; if (isset($_GET[$name])) $output = $_GET[$name]; else $output = $default; return $output; }//getGetVar }//class UTIL_linkmanager $lm = new LinkManager(); ?>