File: /var/www/web37/htdocs/fickanzeiger/components/GoogleStats.php
<?php
class GoogleStats
{
private $extractionWay;
private $additionalServerUrl;
public $debug = false;
function getSearcherValue($url, $searcher = "google")
{
$url = urlencode(preg_replace("#^http://#", "", $url));
$searcherUrls = array('google' => 'http://www.google.com/search?hl=en&q=site%3A' . $url,
'googlebl' => 'http://www.google.com/search?hl=en&q=link%3A' . $url);
$searcherPats = array('google' => "#</b> of (?:about )?<b>([\d,]+)</b>#",
'googlebl' => "#</b> of (?:about )?<b>([\d,]+)</b>#");
if (!array_key_exists($searcher, $searcherUrls)) {
$searcher = "google";
}
$sourceUrl = $searcherUrls[$searcher];
$httpClient = new HttpClient();
if ($this->extractionWay == 2) {
$httpClient->additionalServerUrl = $this->additionalServerUrl;
}
$buff = $httpClient->getSiteContent($sourceUrl);
if ($this->debug) {
echo $buff;
}
if (!preg_match($searcherPats[$searcher], $buff, $m)) {
$count = 0;
} else {
$count = str_replace(",", "", $m[1]);
$count = intval($count);
}
return $count;
}
function getBacklinksCount($url)
{
return $this->getSearcherValue($url, "googlebl");
}
function getIndexedPagesCount($url)
{
return $this->getSearcherValue($url, "google");
}
function getPageRank($url)
{
$gpr = new GooglePageRanker();
$gpr->debug = $this->debug;
return $gpr->getPageRank($url, $this->additionalServerUrl);
}
function getGoogleDetailsOfSiteIfCached($url)
{
if (!Config::get('pageRankCachingEnabled')) {
return false;
}
$siteStats = Model::factoryInstance("cacheGoogleDetail")->getGoogleDetailsFromCache($url);
if (empty($siteStats)) {
return false;
}
return $siteStats->toArray();
}
function getGoogleDetailsOfSite($url, $forcedWay = false)
{
$this->extractionWay = $forcedWay ? $forcedWay : Config::get("wayForPagerankExtraction");
if ($this->extractionWay == 2) {
$this->additionalServerUrl = Config::get("additionalServerUrl");
}
$cachedGoogleDetail = Model::factoryInstance("cacheGoogleDetail");
if (!$forcedWay && Config::get('pageRankCachingEnabled') == 1) {
// try to take the pagerank from the cache
$siteStats = $cachedGoogleDetail->getGoogleDetailsFromCache($url);
if (!empty($siteStats)) {
return $siteStats->toArray();
}
}
// if rank is not in cache
// get it and put in the cache
$results = array();
$results['pageRank'] = $this->getPageRank($url);
$results['backlinksCount'] = $this->getBacklinksCount($url);
$results['indexedPagesCount'] = $this->getIndexedPagesCount($url);
if (!$forcedWay && Config::get('pageRankCachingEnabled')) {
if (empty($siteStats)) {
$siteStats = new CacheGoogleDetailRecord();
$siteStats->url = $url;
}
$siteStats->fromArray($results);
$siteStats->save();
$c = new Criteria();
$c->add("url", $url);
foreach(Model::factoryInstance("site")->findAll($c, "siteId") as $site) {
Model::factoryInstance("site")->updateByPk(array("pageRank" => $results['pageRank']),
$site['siteId']);
}
}
return $results;
}
}