Changeset 8


Ignore:
Timestamp:
Aug 5, 2018, 10:26:16 PM (6 years ago)
Author:
chronos
Message:
  • Added: Import from seznamka.cz.
  • Added: Show location column in meet list.
Location:
trunk
Files:
6 added
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/Meet.php

    r4 r8  
    11<?php
     2
     3include_once('Import/Seznamka.php');
     4include_once('Import/TanecniSkola.php');
     5include_once('Import/AstraPraha.php');
     6include_once('Import/Vavruska.php');
     7include_once('Import/SalsaDance.php');
    28
    39abstract class Gender
     
    3642}
    3743
    38 function DecodeHtmlEnt($str) 
     44function DecodeHtmlEnt($str)
    3945{
    4046  $prefix = '&#';
     
    4349  $ret = html_entity_decode($str, ENT_COMPAT, 'UTF-8');
    4450  $p2 = 0;
    45   for(;;) 
     51  for(;;)
    4652  {
    4753    $p = strpos($ret, $prefix, $p2);
    48     if ($p === FALSE)     
     54    if ($p === FALSE)
    4955      break;
    5056    $p2 = strpos($ret, $suffix, $p);
     
    5763      $add = 0;
    5864    } else $add = 1;
    59            
     65
    6066    if (substr($ret, $p + strlen($prefix), strlen($hexchar)) == $hexchar)
    6167      $char = hexdec(substr($ret, $p + strlen($prefix) + strlen($hexchar), $p2 - $p - strlen($prefix) - strlen($hexchar)));
    6268    else
    6369      $char = intval(substr($ret, $p + strlen($prefix), $p2 - $p - strlen($prefix)));
    64            
     70
    6571    $newchar = iconv(
    6672      'UCS-4', 'UTF-8',
     
    7379}
    7480
    75 function RemoveHtmlComments($Content) 
     81function RemoveHtmlComments($Content)
    7682{
    7783  $Result = '';
     
    98104    $Pos = strpos($Text, $Needle);
    99105    if ($Pos !== false)
    100     { 
    101       if ((($Pos + strlen($Needle)) < strlen($Text)) and (is_alpha($Text[$Pos + strlen($Needle)]))) 
     106    {
     107      if ((($Pos + strlen($Needle)) < strlen($Text)) and (is_alpha($Text[$Pos + strlen($Needle)])))
    102108      {
    103109        $Text = substr($Text, $Pos + 1);
     
    124130    $Pos = strpos($Text, $Needle);
    125131    if ($Pos !== false)
    126     { 
    127       if ((($Pos - 1) >= 0) and (is_alpha($Text[$Pos - 1]))) 
     132    {
     133      if ((($Pos - 1) >= 0) and (is_alpha($Text[$Pos - 1])))
    128134      {
    129135        $Text = substr($Text, $Pos + 1);
     
    155161{
    156162  $Result = GetNumberBeforeText($Text, 'cm');
    157   if ($Result == '') $Result = GetNumberBeforeText($Text, 'bez podpatků'); 
     163  if ($Result == '') $Result = GetNumberAfterText($Text, 'měřím');
     164  if ($Result == '') $Result = GetNumberBeforeText($Text, 'bez podpatků');
    158165  if ($Result == '') $Result = GetAgeHeightWeightFromText($Text)[1];
    159166  return $Result;
     
    171178  $Result = array('', '', '');
    172179  $Pattern = '/[0-9]+\/[0-9]+\/[0-9]+/i';
    173   if (preg_match_all($Pattern, $Text, $Matches)) 
     180  if (preg_match_all($Pattern, $Text, $Matches))
    174181  {
    175182    $Result = explode('/', $Matches[0][0]);
    176   } else 
     183  } else
    177184  {
    178185    $Pattern = '/[0-9]+\/[0-9]+/i';
    179     if (preg_match_all($Pattern, $Text, $Matches)) 
     186    if (preg_match_all($Pattern, $Text, $Matches))
    180187    {
    181188      $Result = explode('/', $Matches[0][0]);
     
    195202    if (count($Matches) > 0)
    196203      $Result = $Matches[0][0];
    197   } 
    198   return $Result;
     204  }
     205  return $Result;
     206}
     207
     208$Locations = array(
     209  'Praha' => 'Praha',
     210  'Prahy' => 'Praha',
     211  'Praze' => 'Praha',
     212  'Ostrava' => 'Ostrava',
     213  'Ostravě' => 'Ostrava',
     214  'Ostravy' => 'Ostrava',
     215  'Mladé Boleslavi' => 'Mladá Boleslav',
     216);
     217
     218function GetLocationFromText($Text)
     219{
     220  global $Locations;
     221
     222  foreach ($Locations as $Index => $Location)
     223  {
     224    if (strpos($Text, $Index) !== false) return $Location;
     225  }
     226  return '';
    199227}
    200228
     
    202230{
    203231  public $Database;
    204  
     232
    205233  function ParseAll()
    206234  {
     
    208236    while ($DbRow = $DbResult->fetch_assoc())
    209237    {
    210       $Source = new MeetSource();
     238      $Method = $DbRow['Method'];
     239      if ($Method == 'hes') $Source = new MeetSourceTanecniSkola();
     240      else if ($Method == 'vavruska') $Source = new MeetSourceVavruska();
     241      else if ($Method == 'salsadance') $Source = new MeetSourceSalsaDance();
     242      else if ($Method == 'astra') $Source = new MeetSourceAstraPraha();
     243      else if ($Method == 'seznamka') $Source = new MeetSourceSeznamka();
     244      else {
     245        echo('Unsupported parse method: '.$Method);
     246        return;
     247      }
    211248      $Source->Database = $this->Database;
    212249      $Source->Id = $DbRow['Id'];
    213250      $Source->URL = $DbRow['URL'];
    214       $Source->Method = $DbRow['Method'];
     251      $Source->Method = $Method;
    215252      $Source->Name = $DbRow['Name'];
    216253      $this->Items[] = $Source;
    217       $Source->Parse();
     254      $Source->Import();
    218255    }
    219256  }
     
    227264  public $Id;
    228265  public $Database;
    229  
    230   function Parse()
    231   {
    232     if ($this->Method == 'hes') $this->ParseHes();
    233     else if ($this->Method == 'vavruska') $this->ParseVavruska();
    234     else if ($this->Method == 'salsadance') $this->ParseSalsaDance();
    235     else if ($this->Method == 'astra') $this->ParseAstra();
    236     else echo('Unsupported parse method: '.$this->Method);
    237   }
    238  
    239   function ParseAstra()
    240   {
    241     echo('Parsing '.$this->Name.'...</br>');
    242     $Content = file_get_contents($this->URL);
    243     $Content = RemoveHtmlComments($Content);
    244    
    245     $BlockStart = '----------------------------------------------';
    246     $BlockEnd = '<script type="text/javascript"';
    247     $Content = GetTextBetween($Content, $BlockStart, $BlockEnd);
    248     if ($Content == '')
    249     {
    250       echo('Main block not isolated.</br>');
    251       return;
    252     }
    253 
    254     $ItemStart = '</div><h2></h2>';
    255     $ItemEnd = '<div class="clearer"></div><div class="clearer">';
    256     while (strpos($Content, $ItemStart) !== false)
    257     {
    258       $Item = GetTextBetween($Content, $ItemStart, $ItemEnd);
    259       $MeetItem = new MeetItem();
    260       $MeetItem->Gender = trim(GetTextBetween($Item, ');"><u>', '</u>'));
    261       if ($MeetItem->Gender == 'Hledám partnerku') $MeetItem->Gender = Gender::Male;
    262         else $MeetItem->Gender = Gender::Female;
    263       $End = '<em>';
    264       $MeetItem->Message = trim(substr($Item, 0, strpos($Item, $End)));
    265       $Item = substr($Item, strpos($Item, $End) + strlen($End));
    266       $MeetItem->Message = trim(strip_tags($MeetItem->Message));
    267       $MeetItem->Date = trim(GetTextBetween($Item, 'datum vložení', '</em>'));
    268       if (substr($MeetItem->Date, 0, 1) == ':')
    269         $MeetItem->Date = trim(substr($MeetItem->Date, 1));
    270       $MeetItem->Date = HumanDateToTime($MeetItem->Date);
    271       $MeetItem->Email = GetEmailFromText($MeetItem->Message);     
    272       $MeetItem->Name = '';
    273       $MeetItem->Age = GetAgeFromText($MeetItem->Message);
    274       $MeetItem->Height = GetHeightFromText($MeetItem->Message);
    275       $MeetItem->Weight = GetWeightFromText($MeetItem->Message);
    276       $MeetItem->Phone = '';     
    277       $MeetItem->Database = $this->Database;     
    278       $MeetItem->Source = $this->Id;
    279       $MeetItem->AddIfNotExist();
    280     }
    281   }
    282  
    283   function ParseSalsaDance()
    284   {
    285     echo('Parsing '.$this->Name.'...</br>');
    286     $Content = file_get_contents($this->URL);
    287     $BlockStart = '<ul class="comments">';
    288     $BlockEnd = '<div class="content-paginator clearfix">';
    289     $Content = GetTextBetween($Content, $BlockStart, $BlockEnd);
    290     if ($Content == '')
    291     {
    292       echo('Main block not isolated.</br>');
    293       return;
    294     }
    295    
    296     $ItemStart = '<li class="comment level-0">';
    297     $ItemEnd = '</li>';
    298     while (strpos($Content, $ItemStart) !== false)
    299     {
    300       $Item = GetTextBetween($Content, $ItemStart, $ItemEnd);
    301       $MeetItem = new MeetItem();
    302       $MeetItem->Date = trim(GetTextBetween($Item, '<span class="created">', '</span>'));
    303       $MeetItem->Date = str_replace('. ', '.', $MeetItem->Date);
    304       if (strpos($MeetItem->Date, 'dnes v') !== false)
    305         $MeetItem->Date = str_replace('dnes v', HumanDate(time()), $MeetItem->Date);
    306       if (strpos($MeetItem->Date, 'včera v') !== false)
    307         $MeetItem->Date = str_replace('včera v', HumanDate(strtotime('-1 day', time())), $MeetItem->Date);
    308       $MeetItem->Date = HumanDateTimeToTime($MeetItem->Date);
    309       $MeetItem->Name = trim(GetTextBetween($Item, '<div class="comment-name">', '</div>'));
    310       $MeetItem->Message = trim(GetTextBetween($Item, '<p>', '</p>'));
    311       $MeetItem->Gender = Gender::Undefined;
    312       $MeetItem->Email = GetEmailFromText($MeetItem->Message);
    313       $MeetItem->Phone = '';
    314       $MeetItem->Height = GetHeightFromText($MeetItem->Message);
    315       $MeetItem->Weight = GetWeightFromText($MeetItem->Message);
    316       $MeetItem->Age = GetAgeFromText($MeetItem->Message);     
    317       $MeetItem->Database = $this->Database;
    318       $MeetItem->Source = $this->Id;
    319       //print_r($MeetItem);
    320       $MeetItem->AddIfNotExist();
    321     }
    322   }
    323  
    324   function ParseVavruska()
    325   {
    326     echo('Parsing '.$this->Name.'...</br>');
    327     $Content = file_get_contents($this->URL);
    328    
    329     $BlockStart = '<table class="seznamka">';
    330     $BlockEnd = '<hr class="welt_bottom" />';
    331     $Content = GetTextBetween($Content, $BlockStart, $BlockEnd);
    332     if ($Content == '')
    333     {
    334       echo('Main block not isolated.</br>');
    335       return;
    336     }
    337 
    338     $ItemStart = '<tr class="spc">';
    339     $ItemEnd = '</table>
    340                 </td></tr>';
    341     while (strpos($Content, $ItemStart) !== false)
    342     {
    343       $Item = GetTextBetween($Content, $ItemStart, $ItemEnd);
    344       $MeetItem = new MeetItem();
    345      
    346       $MeetItem->Date = MysqlDateTimeToTime(trim(GetTextBetween($Item, 'přidáno:', '</i>')));
    347       $MeetItem->Gender = trim(GetTextBetween($Item, '<img src=/design/', 'alt="avatar"'));
    348       if ($MeetItem->Gender == 'avatar_m_03.png') $MeetItem->Gender = Gender::Male;
    349         else $MeetItem->Gender = Gender::Female;     
    350       $MeetItem->Name = trim(GetTextBetween($Item, 'Jméno:</td><td>', '</td>'));
    351       $MeetItem->Email = DecodeHtmlEnt(trim(GetTextBetween($Item, 'Kontakt:</td><td>', '</td>')));
    352       $MeetItem->Message = trim(GetTextBetween($Item, 'Text:</td><td>', '</td>'));
    353       $MeetItem->Phone = '';
    354       $MeetItem->Height = GetHeightFromText($MeetItem->Message);
    355       $MeetItem->Weight = GetWeightFromText($MeetItem->Message);
    356       $MeetItem->Age = GetAgeFromText($MeetItem->Message);
    357       $MeetItem->Database = $this->Database;
    358       $MeetItem->Source = $this->Id;
    359       $MeetItem->AddIfNotExist();
    360     }
    361   }
    362  
    363   function ParseHes()
    364   {
    365     $HeightRange = array(
    366       '< 120 cm' => 120,
    367       '120 cm - 130 cm' => 125,
    368       '125 cm - 135 cm' => 130,
    369       '130 cm - 140 cm' => 135,
    370       '135 cm - 145 cm' => 140,
    371       '140 cm - 150 cm' => 145,
    372       '145 cm - 155 cm' => 150,
    373       '150 cm - 160 cm' => 155,
    374       '155 cm - 165 cm' => 160,
    375       '160 cm - 170 cm' => 165,
    376       '165 cm - 175 cm' => 170,
    377       '170 cm - 180 cm' => 175,
    378       '175 cm - 185 cm' => 180,
    379       '180 cm - 190 cm' => 185,
    380       '185 cm - 195 cm' => 190,
    381       '190 cm - 200 cm' => 195,
    382       '195 cm - 205 cm' => 200,
    383       '200 cm <' => 205,
    384     );
    385    
    386     echo('Parsing '.$this->Name.'...</br>');
    387     $Content = file_get_contents($this->URL);   
    388    
    389     $BlockStart = '<div><!--[if IE]><input type=IEbug disabled style="display:none"><![endif]--></div>';
    390     $BlockEnd = '<div class="paginator">';
    391     $Content = GetTextBetween($Content, $BlockStart, $BlockEnd);
    392     if ($Content == '')
    393     {
    394       echo('Main block not isolated.</br>');
    395       return;
    396     }
    397    
    398     $ItemStart = '<div>';
    399     $ItemEnd = '</div>';
    400     $PreviousTime = null;
    401     while (strpos($Content, $ItemStart) !== false)
    402     {
    403       $Item = GetTextBetween($Content, $ItemStart, $ItemEnd);
    404       if ($Item != '')
    405       {
    406         $MeetItem = new MeetItem();
    407         $MeetItem->Date = HumanDateToTime(trim(GetTextBetween($Item, '<h3>', '</h3>')));
    408         if ($MeetItem->Date != null)
    409         {
    410           $PreviousTime = $MeetItem->Date;
    411         } else $MeetItem->Date = $PreviousTime;           
    412        
    413         $Part = GetTextBetween($Item, '</strong>', '<strong>');
    414         $Part = explode(',', $Part);
    415         if (count($Part) > 0) $MeetItem->Name = trim($Part[0]);
    416         if (count($Part) > 1) $MeetItem->Height = trim($Part[1]);
    417         if (strpos($MeetItem->Height, '(') !== false)
    418         {
    419           $MeetItem->Age = substr($MeetItem->Height, strpos($MeetItem->Height, '(') + 1);
    420           $MeetItem->Age = substr($MeetItem->Age, 0, strpos($MeetItem->Age, ')'));
    421           $MeetItem->Age = trim(str_replace('let', '', $MeetItem->Age));
    422           $MeetItem->Height = trim(substr($MeetItem->Height, 0, strpos($MeetItem->Height, '(')));
    423         } else $MeetItem->Age = '';
    424         while (strpos($MeetItem->Height, '  ') !== false)
    425         {
    426           $MeetItem->Height = str_replace('  ', ' ', $MeetItem->Height);
    427         }
    428         if (array_key_exists($MeetItem->Height, $HeightRange))
    429           $MeetItem->Height = $HeightRange[$MeetItem->Height];
    430         $MeetItem->Weight = GetWeightFromText($MeetItem->Message);
    431         $MeetItem->Email = trim(GetTextBetween($Item, '">', '</a>'));
    432         $MeetItem->Phone = trim(GetTextBetween($Item, 'tel.:', '<br />'));
    433         $Gender = trim(GetTextBetween($Item, '</strong>', '<strong>'));
    434         if ($Gender == 'partnera,') $MeetItem->Gender = Gender::Female;
    435           else if ($Gender == 'partnerku,') $MeetItem->Gender = Gender::Male;
    436           else $MeetItem->Gender = Gender::Undefined;
    437         $MeetItem->Message = trim(GetTextBetween($Item, '<p class="message">', '</p>'));       
    438         $MessageAge = GetAgeFromText($MeetItem->Message);
    439         if ($MessageAge != '') $MeetItem->Age = $MessageAge;
    440         $MessageHeight = GetHeightFromText($MeetItem->Message);
    441         if ($MessageHeight != '') $MeetItem->Height = $MessageHeight;
    442         $MeetItem->Database = $this->Database;
    443         $MeetItem->Source = $this->Id;
    444         $MeetItem->AddIfNotExist();
    445       }     
    446     }   
    447   }
    448 }
    449 
    450 class MeetItem
     266}
     267
     268class MeetItem
    451269{
    452270  var $Database;
     
    456274  var $Gender = Gender::Undefined;
    457275  var $Phone = '';
    458   var $Email = ''; 
     276  var $Email = '';
    459277  var $Age = '';
    460278  var $Height = '';
     
    462280  var $Weight = '';
    463281  var $Location = '';
     282  var $Image = '';
    464283
    465284  function AddIfNotExist()
    466285  {
    467     $DbResult = $this->Database->select('MeetItem', '*', 
     286    $DbResult = $this->Database->select('MeetItem', '*',
    468287      '(`Message` = "'.$this->Database->real_escape_string($this->Message).'") AND '.
    469288      '(`Email` = "'.$this->Database->real_escape_string($this->Email).'") AND '.
  • trunk/index.php

    r7 r8  
    238238      array('Name' => 'Age', 'Title' => 'Věk'),
    239239      array('Name' => 'Weight', 'Title' => 'Váha'),
     240      array('Name' => 'Location', 'Title' => 'Úmístění'),
    240241      array('Name' => 'Gender', 'Title' => 'Pohlaví'),
    241242      array('Name' => 'Message', 'Title' => 'Zpráva'),
     
    255256        '<td>'.$MeetItem['Age'].'</td>'.
    256257        '<td>'.$MeetItem['Weight'].'</td>'.
     258        '<td>'.$MeetItem['Location'].'</td>'.
    257259        //'<td>'.$MeetItem['Email'].'</td>'.
    258260        //'<td>'.$MeetItem['Phone'].'</td>'.
    259261        '<td>'.$Gender[$MeetItem['Gender']].'</td>'.
    260262        '<td>'.$MeetItem['Message'].'</td>'.
    261         '<td><a href="'.$this->Link($MeetItem['SourceURL']).'">'.$MeetItem['SourceName'].'</a></td>';
     263        '<td><a href="'.$MeetItem['SourceURL'].'">'.$MeetItem['SourceName'].'</a></td>';
    262264      $Output .= '</tr>';
    263265    }
Note: See TracChangeset for help on using the changeset viewer.