Changeset 3 for trunk/Meet.php


Ignore:
Timestamp:
Aug 4, 2018, 10:03:34 PM (6 years ago)
Author:
chronos
Message:
  • Added: Parse meet items from 4 webs.
  • Added: Allow filtering by age and height using jquery.
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/Meet.php

    r2 r3  
    11<?php
     2
     3abstract class Gender
     4{
     5    const Undefined = 0;
     6    const Male = 1;
     7    const Female = 2;
     8}
    29
    310function GetTextBetween(&$Text, $Start, $End)
     
    2936}
    3037
    31 
    32 class Meet
     38function DecodeHtmlEnt($str)
     39{
     40  $prefix = '&#';
     41  $suffix = ';';
     42  $hexchar = 'x';
     43  $ret = html_entity_decode($str, ENT_COMPAT, 'UTF-8');
     44  $p2 = 0;
     45  for(;;)
     46  {
     47    $p = strpos($ret, $prefix, $p2);
     48    if ($p === FALSE)     
     49      break;
     50    $p2 = strpos($ret, $suffix, $p);
     51    if ($p2 === FALSE)
     52    {
     53      $p2 = $p + strlen($prefix);
     54      while (($p2 < strlen($ret)) and is_numeric($ret[$p2]))
     55        $p2++;
     56      if ($p2 <= ($p + strlen($prefix))) break;
     57      $add = 0;
     58    } else $add = 1;
     59           
     60    if (substr($ret, $p + strlen($prefix), strlen($hexchar)) == $hexchar)
     61      $char = hexdec(substr($ret, $p + strlen($prefix) + strlen($hexchar), $p2 - $p - strlen($prefix) - strlen($hexchar)));
     62    else
     63      $char = intval(substr($ret, $p + strlen($prefix), $p2 - $p - strlen($prefix)));
     64           
     65    $newchar = iconv(
     66      'UCS-4', 'UTF-8',
     67      chr(($char >> 24) & 0xFF).chr(($char >> 16) & 0xFF).chr(($char >> 8) & 0xFF).chr($char & 0xFF)
     68    );
     69    $ret = substr_replace($ret, $newchar, $p, $add + $p2 - $p);
     70    $p2 = $p + strlen($newchar) + $add;
     71  }
     72  return $ret;
     73}
     74
     75function RemoveHtmlComments($Content)
     76{
     77  $Result = '';
     78  while (strpos($Content, '<!--') !== false)
     79  {
     80    $Result .= substr($Content, 0, strpos($Content, '<!--'));
     81    $Content = substr($Content, strpos($Content, '<!--') + 4);
     82    $Content = substr($Content, strpos($Content, '-->') + 3);
     83  }
     84  return $Result;
     85  //return preg_replace('/<!--(.|\s)*?-->/', '', $Content);
     86}
     87
     88function is_alpha($Char)
     89{
     90  return ((($Char >= 'a') and ($Char <= 'z')) or (($Char >= 'A') and ($Char <= 'Z')));
     91}
     92
     93function GetNumberBeforeText($Text, $Needle)
     94{
     95  $Result = '';
     96  for(;;)
     97  {
     98    $Pos = strpos($Text, $Needle);
     99    if ($Pos !== false)
     100    { 
     101      if ((($Pos + strlen($Needle)) < strlen($Text)) and (is_alpha($Text[$Pos + strlen($Needle)])))
     102      {
     103        $Text = substr($Text, $Pos + 1);
     104        continue;
     105      }
     106      $Result = substr($Text, 0, $Pos);
     107      $Text = substr($Text, $Pos + 1);
     108      $Start = $Pos - 1;
     109      while (($Start >= 0) and (is_numeric($Result[$Start]) or ($Result[$Start] == ' ')))
     110        $Start--;
     111      $Start++;
     112      $Result = trim(substr($Result, $Start, $Pos - $Start));
     113      break;
     114    } else break;
     115  }
     116  return $Result;
     117}
     118
     119function GetNumberAfterText($Text, $Needle)
     120{
     121  $Result = '';
     122  for(;;)
     123  {
     124    $Pos = strpos($Text, $Needle);
     125    if ($Pos !== false)
     126    { 
     127      if ((($Pos - 1) >= 0) and (is_alpha($Text[$Pos - 1])))
     128      {
     129        $Text = substr($Text, $Pos + 1);
     130        continue;
     131      }
     132      $Result = substr($Text, $Pos + strlen($Needle));
     133      $Text = substr($Text, $Pos + 1);
     134      $End = 0;
     135      while (($End < strlen($Result)) and (is_numeric($Result[$End]) or ($Result[$End] == ' ')))
     136        $End++;
     137      $End--;
     138
     139      $Result = trim(substr($Result, 0, $End + 1));
     140      break;
     141    } else break;
     142  }
     143  return $Result;
     144}
     145
     146function GetAgeFromText($Text)
     147{
     148  $Result = GetNumberBeforeText($Text, 'let');
     149  if ($Result == '') $Result = GetNumberAfterText($Text, 'Je mi');
     150  return $Result;
     151}
     152
     153function GetHeightFromText($Text)
     154{
     155  $Result = GetNumberBeforeText($Text, 'cm');
     156  if ($Result == '') $Result = GetNumberBeforeText($Text, 'bez podpatků'); 
     157  return $Result;
     158}
     159
     160function GetWeightFromText($Text)
     161{
     162  $Result = GetNumberBeforeText($Text, 'kg');
     163  return $Result;
     164}
     165
     166function GetEmailFromText($Text)
     167{
     168  $Result = '';
     169  if (strpos($Text, '@') !== false)
     170  {
     171    $Pattern = '/[a-z0-9_\-\+\.]+@[a-z0-9\-]+\.([a-z]{2,4})(?:\.[a-z]{2})?/i';
     172    preg_match_all($Pattern, $Text, $Matches);
     173    if (count($Matches) > 0)
     174      $Result = $Matches[0][0];
     175  } 
     176  return $Result;
     177}
     178
     179class MeetSources
     180{
     181  public $Database;
     182 
     183  function ParseAll()
     184  {
     185    $DbResult = $this->Database->select('MeetSource', '*');
     186    while ($DbRow = $DbResult->fetch_assoc())
     187    {
     188      $Source = new MeetSource();
     189      $Source->Database = $this->Database;
     190      $Source->Id = $DbRow['Id'];
     191      $Source->URL = $DbRow['URL'];
     192      $Source->Method = $DbRow['Method'];
     193      $Source->Name = $DbRow['Name'];
     194      $this->Items[] = $Source;
     195      $Source->Parse();
     196    }
     197  }
     198}
     199
     200class MeetSource
    33201{
    34202  public $Name;
    35203  public $URL;
     204  public $Method;
    36205  public $Id;
    37206  public $Database;
     
    39208  function Parse()
    40209  {
     210    if ($this->Method == 'hes') $this->ParseHes();
     211    else if ($this->Method == 'vavruska') $this->ParseVavruska();
     212    else if ($this->Method == 'salsadance') $this->ParseSalsaDance();
     213    else if ($this->Method == 'astra') $this->ParseAstra();
     214    else echo('Unsupported parse method: '.$this->Method);
     215  }
     216 
     217  function ParseAstra()
     218  {
     219    echo('Parsing '.$this->Name.'...</br>');
    41220    $Content = file_get_contents($this->URL);
     221    $Content = RemoveHtmlComments($Content);
     222   
     223    $BlockStart = '----------------------------------------------';
     224    $BlockEnd = '<script type="text/javascript"';
     225    $Content = GetTextBetween($Content, $BlockStart, $BlockEnd);
     226    if ($Content == '')
     227    {
     228      echo('Main block not isolated.</br>');
     229      return;
     230    }
     231
     232    $ItemStart = '</div><h2></h2>';
     233    $ItemEnd = '<div class="clearer"></div><div class="clearer">';
     234    while (strpos($Content, $ItemStart) !== false)
     235    {
     236      $Item = GetTextBetween($Content, $ItemStart, $ItemEnd);
     237      $MeetItem = new MeetItem();
     238      $MeetItem->Gender = trim(GetTextBetween($Item, ');"><u>', '</u>'));
     239      if ($MeetItem->Gender == 'Hledám partnerku') $MeetItem->Gender = Gender::Male;
     240        else $MeetItem->Gender = Gender::Female;
     241      $End = '<em>';
     242      $MeetItem->Message = trim(substr($Item, 0, strpos($Item, $End)));
     243      $Item = substr($Item, strpos($Item, $End) + strlen($End));
     244      $MeetItem->Message = trim(strip_tags($MeetItem->Message));
     245      $MeetItem->Date = trim(GetTextBetween($Item, 'datum vložení', '</em>'));
     246      if (substr($MeetItem->Date, 0, 1) == ':')
     247        $MeetItem->Date = trim(substr($MeetItem->Date, 1));
     248      $MeetItem->Date = HumanDateToTime($MeetItem->Date);
     249      $MeetItem->Email = GetEmailFromText($MeetItem->Message);     
     250      $MeetItem->Name = '';
     251      $MeetItem->Age = GetAgeFromText($MeetItem->Message);
     252      $MeetItem->Height = GetHeightFromText($MeetItem->Message);
     253      $MeetItem->Weight = GetWeightFromText($MeetItem->Message);
     254      $MeetItem->Phone = '';     
     255      $MeetItem->Database = $this->Database;     
     256      $MeetItem->Source = $this->Id;
     257      $MeetItem->AddIfNotExist();
     258    }
     259  }
     260 
     261  function ParseSalsaDance()
     262  {
     263    echo('Parsing '.$this->Name.'...</br>');
     264    $Content = file_get_contents($this->URL);
     265    $BlockStart = '<ul class="comments">';
     266    $BlockEnd = '<div class="content-paginator clearfix">';
     267    $Content = GetTextBetween($Content, $BlockStart, $BlockEnd);
     268    if ($Content == '')
     269    {
     270      echo('Main block not isolated.</br>');
     271      return;
     272    }
     273   
     274    $ItemStart = '<li class="comment level-0">';
     275    $ItemEnd = '</li>';
     276    while (strpos($Content, $ItemStart) !== false)
     277    {
     278      $Item = GetTextBetween($Content, $ItemStart, $ItemEnd);
     279      $MeetItem = new MeetItem();
     280      $MeetItem->Date = trim(GetTextBetween($Item, '<span class="created">', '</span>'));
     281      $MeetItem->Date = str_replace('. ', '.', $MeetItem->Date);
     282      if (strpos($MeetItem->Date, 'dnes v') !== false)
     283        $MeetItem->Date = str_replace('dnes v', HumanDate(time()), $MeetItem->Date);
     284      $MeetItem->Date = HumanDateTimeToTime($MeetItem->Date);
     285      $MeetItem->Name = trim(GetTextBetween($Item, '<div class="comment-name">', '</div>'));
     286      $MeetItem->Message = trim(GetTextBetween($Item, '<p>', '</p>'));
     287      $MeetItem->Gender = Gender::Undefined;
     288      $MeetItem->Email = GetEmailFromText($MeetItem->Message);
     289      $MeetItem->Phone = '';
     290      $MeetItem->Height = GetHeightFromText($MeetItem->Message);
     291      $MeetItem->Weight = GetWeightFromText($MeetItem->Message);
     292      $MeetItem->Age = GetAgeFromText($MeetItem->Message);     
     293      $MeetItem->Database = $this->Database;
     294      $MeetItem->Source = $this->Id;
     295      $MeetItem->AddIfNotExist();
     296    }
     297  }
     298 
     299  function ParseVavruska()
     300  {
     301    echo('Parsing '.$this->Name.'...</br>');
     302    $Content = file_get_contents($this->URL);
     303   
     304    $BlockStart = '<table class="seznamka">';
     305    $BlockEnd = '<hr class="welt_bottom" />';
     306    $Content = GetTextBetween($Content, $BlockStart, $BlockEnd);
     307    if ($Content == '')
     308    {
     309      echo('Main block not isolated.</br>');
     310      return;
     311    }
     312
     313    $ItemStart = '<tr class="spc">';
     314    $ItemEnd = '</table>
     315                </td></tr>';
     316    while (strpos($Content, $ItemStart) !== false)
     317    {
     318      $Item = GetTextBetween($Content, $ItemStart, $ItemEnd);
     319      $MeetItem = new MeetItem();
     320     
     321      $MeetItem->Date = MysqlDateTimeToTime(trim(GetTextBetween($Item, 'přidáno:', '</i>')));
     322      $MeetItem->Gender = trim(GetTextBetween($Item, '<img src=/design/', 'alt="avatar"'));
     323      if ($MeetItem->Gender == 'avatar_m_03.png') $MeetItem->Gender = Gender::Male;
     324        else $MeetItem->Gender = Gender::Female;     
     325      $MeetItem->Name = trim(GetTextBetween($Item, 'Jméno:</td><td>', '</td>'));
     326      $MeetItem->Email = DecodeHtmlEnt(trim(GetTextBetween($Item, 'Kontakt:</td><td>', '</td>')));
     327      $MeetItem->Message = trim(GetTextBetween($Item, 'Text:</td><td>', '</td>'));
     328      $MeetItem->Phone = '';
     329      $MeetItem->Height = GetHeightFromText($MeetItem->Message);
     330      $MeetItem->Weight = GetWeightFromText($MeetItem->Message);
     331      $MeetItem->Age = GetAgeFromText($MeetItem->Message);
     332      $MeetItem->Database = $this->Database;
     333      $MeetItem->Source = $this->Id;
     334      $MeetItem->AddIfNotExist();
     335    }
     336  }
     337 
     338  function ParseHes()
     339  {
     340    echo('Parsing '.$this->Name.'...</br>');
     341    $Content = file_get_contents($this->URL);   
    42342   
    43343    $BlockStart = '<div><!--[if IE]><input type=IEbug disabled style="display:none"><![endif]--></div>';
    44344    $BlockEnd = '<div class="paginator">';
    45345    $Content = GetTextBetween($Content, $BlockStart, $BlockEnd);
    46     if ($Content == '') echo('Main block not isolated.</br>');
     346    if ($Content == '')
     347    {
     348      echo('Main block not isolated.</br>');
     349      return;
     350    }
    47351   
    48352    $ItemStart = '<div>';
    49353    $ItemEnd = '</div>';
    50354    $PreviousTime = null;
    51     while (strpos($Content, $ItemStart))
     355    while (strpos($Content, $ItemStart) !== false)
    52356    {
    53357      $Item = GetTextBetween($Content, $ItemStart, $ItemEnd);
    54      
    55358      if ($Item != '')
    56359      {
     
    72375           $MeetItem->Age = trim(str_replace('let', '', $MeetItem->Age));
    73376           $MeetItem->Height = substr($MeetItem->Height, 0, strpos($MeetItem->Height, '('));
    74          } else $MeetItem->Age = ''; 
     377         } else $MeetItem->Age = '';
     378         $MeetItem->Weight = GetWeightFromText($MeetItem->Message);
    75379         $MeetItem->Email = trim(GetTextBetween($Item, '">', '</a>'));
    76380         $MeetItem->Phone = trim(GetTextBetween($Item, 'tel.:', '<br />'));
    77381         $Gender = trim(GetTextBetween($Item, '</strong>', '<strong>'));
    78          if ($Gender == 'partnera,') $MeetItem->Gender = 1;
    79            else $MeetItem->Gender = 0;
    80          $MeetItem->Message = trim(GetTextBetween($Item, '<p class="message">', '</p>'));
    81          
    82          //print_r($MeetItem);
    83          $DbResult = $this->Database->select('MeetItem', '*', '(`Message` = "'.$MeetItem->Message.'") AND '.
    84            '(`Age` = "'.$MeetItem->Age.'") AND '.
    85            '(`Date` = "'.TimeToMysqlDate($MeetItem->Date).'")');
    86          if ($DbResult->num_rows == 0)
    87          {
    88            $this->Database->insert('MeetItem', array(
    89              'Message' => $MeetItem->Message,
    90              'Date' => TimeToMysqlDate($MeetItem->Date),
    91              'Gender' => $MeetItem->Gender,
    92              'Age' => $MeetItem->Age,
    93              'Email' => $MeetItem->Email,
    94              'Phone' => $MeetItem->Phone,
    95              'Name' => $MeetItem->Name,
    96              'Height' => $MeetItem->Height,
    97              'Source' => $this->Id,
    98            ));
    99          }
     382         if ($Gender == 'partnera,') $MeetItem->Gender = Gender::Female;
     383           else if ($Gender == 'partnerku,') $MeetItem->Gender = Gender::Male;
     384           else $MeetItem->Gender = Gender::Undefined;
     385         $MeetItem->Message = trim(GetTextBetween($Item, '<p class="message">', '</p>'));       
     386         $MessageAge = GetAgeFromText($MeetItem->Message);
     387         if ($MessageAge != '') $MeetItem->Age = $MessageAge;
     388         $MessageHeight = GetHeightFromText($MeetItem->Message);
     389         if ($MessageHeight != '') $MeetItem->Height = $MessageHeight;
     390         $MeetItem->Database = $this->Database;
     391         $MeetItem->Source = $this->Id;
     392         $MeetItem->AddIfNotExist();
    100393      }     
    101394    }   
     
    105398class MeetItem
    106399{
    107   var $Name;
    108   var $Message;
    109   var $Date;
    110   var $Gender;
    111   var $Phone;
    112   var $Email; 
    113   var $Age;
    114   var $Height;
    115 }
     400  var $Database;
     401  var $Name = '';
     402  var $Message = '';
     403  var $Date = '';
     404  var $Gender = Gender::Undefined;
     405  var $Phone = '';
     406  var $Email = ''; 
     407  var $Age = '';
     408  var $Height = '';
     409  var $Source = 0;
     410  var $Weight = '';
     411  var $Location = '';
     412
     413  function AddIfNotExist()
     414  {
     415    $DbResult = $this->Database->select('MeetItem', '*',
     416      '(`Message` = "'.$this->Database->real_escape_string($this->Message).'") AND '.
     417      '(`Email` = "'.$this->Database->real_escape_string($this->Email).'") AND '.
     418      '(`Date` = "'.$this->Database->real_escape_string(TimeToMysqlDate($this->Date)).'")');
     419    if ($DbResult->num_rows == 0)
     420    {
     421      $this->Database->insert('MeetItem', array(
     422        'Message' => $this->Message,
     423        'Date' => TimeToMysqlDate($this->Date),
     424        'Gender' => $this->Gender,
     425        'Age' => $this->Age,
     426        'Email' => $this->Email,
     427        'Phone' => $this->Phone,
     428        'Name' => $this->Name,
     429        'Height' => $this->Height,
     430        'Weight' => $this->Weight,
     431        'Location' => $this->Location,
     432        'Source' => $this->Source,
     433      ));
     434    }
     435  }
     436}
Note: See TracChangeset for help on using the changeset viewer.