Most Referenced Functions
  » google_pagerank()
  » preg_replace()
  » imagecreatefrompng()
  » site_pageranks()
  » imagepng()
  » imagestring()
  » imagedestroy()
  » htmlentities()
  » imagecolorallocate()
  » fopen()
  » preg_match()
  » header()
  » getimagesize()
  » session_start()
  » ob_start()
  » preg_match_all()
  » strstr()
  » ob_flush()
  » flush()
  » htmlspecialchars()
  » str_replace()
  » strpos()
  » array2vars()
  » nl2br()
  » setcookie()
  » urlencode()
  » preg_split()
  » ereg()
  » ereg_replace()
  » var_dump()

Become a sponsor for $15/month. Link is sitewide - PR5 homepage, 20+ PR4 pages, 90+ PR3 pages. Email dave[AT]icemelon[D0T]c0m.

PHP Functions

Function: site_pageranks
By dave

Note! This function is not a built-in PHP function. To use it, you will need to copy the source code (located at the bottom of this page) into your script.

site_pageranks -- Crawls a website and calculates the Google PageRanks for every webpage within that domain.

Description

void site_pageranks ( start page URL, domain URL )

This function will crawl a web site and report all the Google PageRanks of every webpage within that domain. Include 'http://' in the start page URL. Do not include 'http://' for the domain URL. This function uses PHP sessions and will store the URLs in the variable $_SESSION['pageranks'].

This function is based on the code for the google_pagerank function.

Example 1. site_pageranks() example

<?php
session_start();
set_time_limit(0);
site_pageranks('http://www.icemelon.com', 'icemelon.com');
print_r($_SESSION['pageranks']);
?>

Note: This function may take a long time to run, so you may want to use set_time_limit(0).

Source Code

<?php
define('GOOGLE_MAGIC', 0xE6359A60);

//unsigned shift right
function zeroFill($a, $b)
{
    $z = hexdec(80000000);
    if ($z & $a)
    {
        $a = ($a>>1);
        $a &= (~$z);
        $a |= 0x40000000;
        $a = ($a>>($b-1));
    }
    else
    {
        $a = ($a>>$b);
    }
    return $a;
}

function mix($a,$b,$c) {
    $a -= $b; $a -= $c; $a ^= (zeroFill($c,13));
    $b -= $c; $b -= $a; $b ^= ($a<<8);
    $c -= $a; $c -= $b; $c ^= (zeroFill($b,13));
    $a -= $b; $a -= $c; $a ^= (zeroFill($c,12));
    $b -= $c; $b -= $a; $b ^= ($a<<16);
    $c -= $a; $c -= $b; $c ^= (zeroFill($b,5));
    $a -= $b; $a -= $c; $a ^= (zeroFill($c,3)); 
    $b -= $c; $b -= $a; $b ^= ($a<<10);
    $c -= $a; $c -= $b; $c ^= (zeroFill($b,15));
    
    return array($a,$b,$c);
}

function GoogleCH($url, $length=null, $init=GOOGLE_MAGIC) {
    if(is_null($length)) {
        $length = sizeof($url);
    }
    $a = $b = 0x9E3779B9;
    $c = $init;
    $k = 0;
    $len = $length;
    while($len >= 12) {
        $a += ($url[$k+0] +($url[$k+1]<<8) +($url[$k+2]<<16) +($url[$k+3]<<24));
        $b += ($url[$k+4] +($url[$k+5]<<8) +($url[$k+6]<<16) +($url[$k+7]<<24));
        $c += ($url[$k+8] +($url[$k+9]<<8) +($url[$k+10]<<16)+($url[$k+11]<<24));
        $mix = mix($a,$b,$c);
        $a = $mix[0]; $b = $mix[1]; $c = $mix[2];
        $k += 12;
        $len -= 12;
    }

    $c += $length;
    switch($len) /* all the case statements fall through */
    {
        case 11: $c+=($url[$k+10]<<24);
        case 10: $c+=($url[$k+9]<<16);
        case 9 : $c+=($url[$k+8]<<8);
        /* the first byte of c is reserved for the length */
        case 8 : $b+=($url[$k+7]<<24);
        case 7 : $b+=($url[$k+6]<<16);
        case 6 : $b+=($url[$k+5]<<8);
        case 5 : $b+=($url[$k+4]);
        case 4 : $a+=($url[$k+3]<<24);
        case 3 : $a+=($url[$k+2]<<16);
        case 2 : $a+=($url[$k+1]<<8);
        case 1 : $a+=($url[$k+0]);
        /* case 0: nothing left to add */
    }
    $mix = mix($a,$b,$c);
    /*-------------------------------------------- report the result */
    return $mix[2];
}

//converts a string into an array of integers containing the numeric value of the char
function strord($string) {
    for($i=0;$i]/i', $html, $m);
        
    foreach($m[1] AS $url) {
        // check if $domain
        if(preg_match("/^http:\/\/.*$domain/i", $url)) {
            // check if local page
            if(!preg_match('/http:\/\//i', $url))
                $url = "http://www.$domain/$url";
            // get rid of PHPSESSID
            if(preg_match('/(\?PHPSESSID=\w+)$/i', $url, $m2))
                $url = str_replace($m2[1], '', $url);

            // check if url checked
            if(!in_array($url, $_SESSION['urls'])) {
                $_SESSION['urls'][] = $url;
                $pr = google_pagerank($url);
                $pr = trim($pr);
                $_SESSION['pageranks'][$pr][] = $url;          

                site_pageranks($url, $domain);
            }
        }    
    }
}

?>

Related Function(s)

  • google_pagerank()
  • Icemelon -- PHP, CSS, Javascript Tutorials, & More!
      © 2005-2010 Icemelon.com   Email: dave[AT]icemelon[D0T]c0m