Changeset 3 for trunk/Meet.php
- Timestamp:
- Aug 4, 2018, 10:03:34 PM (6 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Meet.php
r2 r3 1 1 <?php 2 3 abstract class Gender 4 { 5 const Undefined = 0; 6 const Male = 1; 7 const Female = 2; 8 } 2 9 3 10 function GetTextBetween(&$Text, $Start, $End) … … 29 36 } 30 37 31 32 class Meet 38 function DecodeHtmlEnt($str) 39 { 40 $prefix = '&#'; 41 $suffix = ';'; 42 $hexchar = 'x'; 43 $ret = html_entity_decode($str, ENT_COMPAT, 'UTF-8'); 44 $p2 = 0; 45 for(;;) 46 { 47 $p = strpos($ret, $prefix, $p2); 48 if ($p === FALSE) 49 break; 50 $p2 = strpos($ret, $suffix, $p); 51 if ($p2 === FALSE) 52 { 53 $p2 = $p + strlen($prefix); 54 while (($p2 < strlen($ret)) and is_numeric($ret[$p2])) 55 $p2++; 56 if ($p2 <= ($p + strlen($prefix))) break; 57 $add = 0; 58 } else $add = 1; 59 60 if (substr($ret, $p + strlen($prefix), strlen($hexchar)) == $hexchar) 61 $char = hexdec(substr($ret, $p + strlen($prefix) + strlen($hexchar), $p2 - $p - strlen($prefix) - strlen($hexchar))); 62 else 63 $char = intval(substr($ret, $p + strlen($prefix), $p2 - $p - strlen($prefix))); 64 65 $newchar = iconv( 66 'UCS-4', 'UTF-8', 67 chr(($char >> 24) & 0xFF).chr(($char >> 16) & 0xFF).chr(($char >> 8) & 0xFF).chr($char & 0xFF) 68 ); 69 $ret = substr_replace($ret, $newchar, $p, $add + $p2 - $p); 70 $p2 = $p + strlen($newchar) + $add; 71 } 72 return $ret; 73 } 74 75 function RemoveHtmlComments($Content) 76 { 77 $Result = ''; 78 while (strpos($Content, '<!--') !== false) 79 { 80 $Result .= substr($Content, 0, strpos($Content, '<!--')); 81 $Content = substr($Content, strpos($Content, '<!--') + 4); 82 $Content = substr($Content, strpos($Content, '-->') + 3); 83 } 84 return $Result; 85 //return preg_replace('/<!--(.|\s)*?-->/', '', $Content); 86 } 87 88 function is_alpha($Char) 89 { 90 return ((($Char >= 'a') and ($Char <= 'z')) or (($Char >= 'A') and ($Char <= 'Z'))); 91 } 92 93 function GetNumberBeforeText($Text, $Needle) 94 { 95 $Result = ''; 96 for(;;) 97 { 98 $Pos = strpos($Text, $Needle); 99 if ($Pos !== false) 100 { 101 if ((($Pos + strlen($Needle)) < strlen($Text)) and (is_alpha($Text[$Pos + strlen($Needle)]))) 102 { 103 $Text = substr($Text, $Pos + 1); 104 continue; 105 } 106 $Result = substr($Text, 0, $Pos); 107 $Text = substr($Text, $Pos + 1); 108 $Start = $Pos - 1; 109 while (($Start >= 0) and (is_numeric($Result[$Start]) or ($Result[$Start] == ' '))) 110 $Start--; 111 $Start++; 112 $Result = trim(substr($Result, $Start, $Pos - $Start)); 113 break; 114 } else break; 115 } 116 return $Result; 117 } 118 119 function GetNumberAfterText($Text, $Needle) 120 { 121 $Result = ''; 122 for(;;) 123 { 124 $Pos = strpos($Text, $Needle); 125 if ($Pos !== false) 126 { 127 if ((($Pos - 1) >= 0) and (is_alpha($Text[$Pos - 1]))) 128 { 129 $Text = substr($Text, $Pos + 1); 130 continue; 131 } 132 $Result = substr($Text, $Pos + strlen($Needle)); 133 $Text = substr($Text, $Pos + 1); 134 $End = 0; 135 while (($End < strlen($Result)) and (is_numeric($Result[$End]) or ($Result[$End] == ' '))) 136 $End++; 137 $End--; 138 139 $Result = trim(substr($Result, 0, $End + 1)); 140 break; 141 } else break; 142 } 143 return $Result; 144 } 145 146 function GetAgeFromText($Text) 147 { 148 $Result = GetNumberBeforeText($Text, 'let'); 149 if ($Result == '') $Result = GetNumberAfterText($Text, 'Je mi'); 150 return $Result; 151 } 152 153 function GetHeightFromText($Text) 154 { 155 $Result = GetNumberBeforeText($Text, 'cm'); 156 if ($Result == '') $Result = GetNumberBeforeText($Text, 'bez podpatků'); 157 return $Result; 158 } 159 160 function GetWeightFromText($Text) 161 { 162 $Result = GetNumberBeforeText($Text, 'kg'); 163 return $Result; 164 } 165 166 function GetEmailFromText($Text) 167 { 168 $Result = ''; 169 if (strpos($Text, '@') !== false) 170 { 171 $Pattern = '/[a-z0-9_\-\+\.]+@[a-z0-9\-]+\.([a-z]{2,4})(?:\.[a-z]{2})?/i'; 172 preg_match_all($Pattern, $Text, $Matches); 173 if (count($Matches) > 0) 174 $Result = $Matches[0][0]; 175 } 176 return $Result; 177 } 178 179 class MeetSources 180 { 181 public $Database; 182 183 function ParseAll() 184 { 185 $DbResult = $this->Database->select('MeetSource', '*'); 186 while ($DbRow = $DbResult->fetch_assoc()) 187 { 188 $Source = new MeetSource(); 189 $Source->Database = $this->Database; 190 $Source->Id = $DbRow['Id']; 191 $Source->URL = $DbRow['URL']; 192 $Source->Method = $DbRow['Method']; 193 $Source->Name = $DbRow['Name']; 194 $this->Items[] = $Source; 195 $Source->Parse(); 196 } 197 } 198 } 199 200 class MeetSource 33 201 { 34 202 public $Name; 35 203 public $URL; 204 public $Method; 36 205 public $Id; 37 206 public $Database; … … 39 208 function Parse() 40 209 { 210 if ($this->Method == 'hes') $this->ParseHes(); 211 else if ($this->Method == 'vavruska') $this->ParseVavruska(); 212 else if ($this->Method == 'salsadance') $this->ParseSalsaDance(); 213 else if ($this->Method == 'astra') $this->ParseAstra(); 214 else echo('Unsupported parse method: '.$this->Method); 215 } 216 217 function ParseAstra() 218 { 219 echo('Parsing '.$this->Name.'...</br>'); 41 220 $Content = file_get_contents($this->URL); 221 $Content = RemoveHtmlComments($Content); 222 223 $BlockStart = '----------------------------------------------'; 224 $BlockEnd = '<script type="text/javascript"'; 225 $Content = GetTextBetween($Content, $BlockStart, $BlockEnd); 226 if ($Content == '') 227 { 228 echo('Main block not isolated.</br>'); 229 return; 230 } 231 232 $ItemStart = '</div><h2></h2>'; 233 $ItemEnd = '<div class="clearer"></div><div class="clearer">'; 234 while (strpos($Content, $ItemStart) !== false) 235 { 236 $Item = GetTextBetween($Content, $ItemStart, $ItemEnd); 237 $MeetItem = new MeetItem(); 238 $MeetItem->Gender = trim(GetTextBetween($Item, ');"><u>', '</u>')); 239 if ($MeetItem->Gender == 'Hledám partnerku') $MeetItem->Gender = Gender::Male; 240 else $MeetItem->Gender = Gender::Female; 241 $End = '<em>'; 242 $MeetItem->Message = trim(substr($Item, 0, strpos($Item, $End))); 243 $Item = substr($Item, strpos($Item, $End) + strlen($End)); 244 $MeetItem->Message = trim(strip_tags($MeetItem->Message)); 245 $MeetItem->Date = trim(GetTextBetween($Item, 'datum vložení', '</em>')); 246 if (substr($MeetItem->Date, 0, 1) == ':') 247 $MeetItem->Date = trim(substr($MeetItem->Date, 1)); 248 $MeetItem->Date = HumanDateToTime($MeetItem->Date); 249 $MeetItem->Email = GetEmailFromText($MeetItem->Message); 250 $MeetItem->Name = ''; 251 $MeetItem->Age = GetAgeFromText($MeetItem->Message); 252 $MeetItem->Height = GetHeightFromText($MeetItem->Message); 253 $MeetItem->Weight = GetWeightFromText($MeetItem->Message); 254 $MeetItem->Phone = ''; 255 $MeetItem->Database = $this->Database; 256 $MeetItem->Source = $this->Id; 257 $MeetItem->AddIfNotExist(); 258 } 259 } 260 261 function ParseSalsaDance() 262 { 263 echo('Parsing '.$this->Name.'...</br>'); 264 $Content = file_get_contents($this->URL); 265 $BlockStart = '<ul class="comments">'; 266 $BlockEnd = '<div class="content-paginator clearfix">'; 267 $Content = GetTextBetween($Content, $BlockStart, $BlockEnd); 268 if ($Content == '') 269 { 270 echo('Main block not isolated.</br>'); 271 return; 272 } 273 274 $ItemStart = '<li class="comment level-0">'; 275 $ItemEnd = '</li>'; 276 while (strpos($Content, $ItemStart) !== false) 277 { 278 $Item = GetTextBetween($Content, $ItemStart, $ItemEnd); 279 $MeetItem = new MeetItem(); 280 $MeetItem->Date = trim(GetTextBetween($Item, '<span class="created">', '</span>')); 281 $MeetItem->Date = str_replace('. ', '.', $MeetItem->Date); 282 if (strpos($MeetItem->Date, 'dnes v') !== false) 283 $MeetItem->Date = str_replace('dnes v', HumanDate(time()), $MeetItem->Date); 284 $MeetItem->Date = HumanDateTimeToTime($MeetItem->Date); 285 $MeetItem->Name = trim(GetTextBetween($Item, '<div class="comment-name">', '</div>')); 286 $MeetItem->Message = trim(GetTextBetween($Item, '<p>', '</p>')); 287 $MeetItem->Gender = Gender::Undefined; 288 $MeetItem->Email = GetEmailFromText($MeetItem->Message); 289 $MeetItem->Phone = ''; 290 $MeetItem->Height = GetHeightFromText($MeetItem->Message); 291 $MeetItem->Weight = GetWeightFromText($MeetItem->Message); 292 $MeetItem->Age = GetAgeFromText($MeetItem->Message); 293 $MeetItem->Database = $this->Database; 294 $MeetItem->Source = $this->Id; 295 $MeetItem->AddIfNotExist(); 296 } 297 } 298 299 function ParseVavruska() 300 { 301 echo('Parsing '.$this->Name.'...</br>'); 302 $Content = file_get_contents($this->URL); 303 304 $BlockStart = '<table class="seznamka">'; 305 $BlockEnd = '<hr class="welt_bottom" />'; 306 $Content = GetTextBetween($Content, $BlockStart, $BlockEnd); 307 if ($Content == '') 308 { 309 echo('Main block not isolated.</br>'); 310 return; 311 } 312 313 $ItemStart = '<tr class="spc">'; 314 $ItemEnd = '</table> 315 </td></tr>'; 316 while (strpos($Content, $ItemStart) !== false) 317 { 318 $Item = GetTextBetween($Content, $ItemStart, $ItemEnd); 319 $MeetItem = new MeetItem(); 320 321 $MeetItem->Date = MysqlDateTimeToTime(trim(GetTextBetween($Item, 'přidáno:', '</i>'))); 322 $MeetItem->Gender = trim(GetTextBetween($Item, '<img src=/design/', 'alt="avatar"')); 323 if ($MeetItem->Gender == 'avatar_m_03.png') $MeetItem->Gender = Gender::Male; 324 else $MeetItem->Gender = Gender::Female; 325 $MeetItem->Name = trim(GetTextBetween($Item, 'Jméno:</td><td>', '</td>')); 326 $MeetItem->Email = DecodeHtmlEnt(trim(GetTextBetween($Item, 'Kontakt:</td><td>', '</td>'))); 327 $MeetItem->Message = trim(GetTextBetween($Item, 'Text:</td><td>', '</td>')); 328 $MeetItem->Phone = ''; 329 $MeetItem->Height = GetHeightFromText($MeetItem->Message); 330 $MeetItem->Weight = GetWeightFromText($MeetItem->Message); 331 $MeetItem->Age = GetAgeFromText($MeetItem->Message); 332 $MeetItem->Database = $this->Database; 333 $MeetItem->Source = $this->Id; 334 $MeetItem->AddIfNotExist(); 335 } 336 } 337 338 function ParseHes() 339 { 340 echo('Parsing '.$this->Name.'...</br>'); 341 $Content = file_get_contents($this->URL); 42 342 43 343 $BlockStart = '<div><!--[if IE]><input type=IEbug disabled style="display:none"><![endif]--></div>'; 44 344 $BlockEnd = '<div class="paginator">'; 45 345 $Content = GetTextBetween($Content, $BlockStart, $BlockEnd); 46 if ($Content == '') echo('Main block not isolated.</br>'); 346 if ($Content == '') 347 { 348 echo('Main block not isolated.</br>'); 349 return; 350 } 47 351 48 352 $ItemStart = '<div>'; 49 353 $ItemEnd = '</div>'; 50 354 $PreviousTime = null; 51 while (strpos($Content, $ItemStart) )355 while (strpos($Content, $ItemStart) !== false) 52 356 { 53 357 $Item = GetTextBetween($Content, $ItemStart, $ItemEnd); 54 55 358 if ($Item != '') 56 359 { … … 72 375 $MeetItem->Age = trim(str_replace('let', '', $MeetItem->Age)); 73 376 $MeetItem->Height = substr($MeetItem->Height, 0, strpos($MeetItem->Height, '(')); 74 } else $MeetItem->Age = ''; 377 } else $MeetItem->Age = ''; 378 $MeetItem->Weight = GetWeightFromText($MeetItem->Message); 75 379 $MeetItem->Email = trim(GetTextBetween($Item, '">', '</a>')); 76 380 $MeetItem->Phone = trim(GetTextBetween($Item, 'tel.:', '<br />')); 77 381 $Gender = trim(GetTextBetween($Item, '</strong>', '<strong>')); 78 if ($Gender == 'partnera,') $MeetItem->Gender = 1; 79 else $MeetItem->Gender = 0; 80 $MeetItem->Message = trim(GetTextBetween($Item, '<p class="message">', '</p>')); 81 82 //print_r($MeetItem); 83 $DbResult = $this->Database->select('MeetItem', '*', '(`Message` = "'.$MeetItem->Message.'") AND '. 84 '(`Age` = "'.$MeetItem->Age.'") AND '. 85 '(`Date` = "'.TimeToMysqlDate($MeetItem->Date).'")'); 86 if ($DbResult->num_rows == 0) 87 { 88 $this->Database->insert('MeetItem', array( 89 'Message' => $MeetItem->Message, 90 'Date' => TimeToMysqlDate($MeetItem->Date), 91 'Gender' => $MeetItem->Gender, 92 'Age' => $MeetItem->Age, 93 'Email' => $MeetItem->Email, 94 'Phone' => $MeetItem->Phone, 95 'Name' => $MeetItem->Name, 96 'Height' => $MeetItem->Height, 97 'Source' => $this->Id, 98 )); 99 } 382 if ($Gender == 'partnera,') $MeetItem->Gender = Gender::Female; 383 else if ($Gender == 'partnerku,') $MeetItem->Gender = Gender::Male; 384 else $MeetItem->Gender = Gender::Undefined; 385 $MeetItem->Message = trim(GetTextBetween($Item, '<p class="message">', '</p>')); 386 $MessageAge = GetAgeFromText($MeetItem->Message); 387 if ($MessageAge != '') $MeetItem->Age = $MessageAge; 388 $MessageHeight = GetHeightFromText($MeetItem->Message); 389 if ($MessageHeight != '') $MeetItem->Height = $MessageHeight; 390 $MeetItem->Database = $this->Database; 391 $MeetItem->Source = $this->Id; 392 $MeetItem->AddIfNotExist(); 100 393 } 101 394 } … … 105 398 class MeetItem 106 399 { 107 var $Name; 108 var $Message; 109 var $Date; 110 var $Gender; 111 var $Phone; 112 var $Email; 113 var $Age; 114 var $Height; 115 } 400 var $Database; 401 var $Name = ''; 402 var $Message = ''; 403 var $Date = ''; 404 var $Gender = Gender::Undefined; 405 var $Phone = ''; 406 var $Email = ''; 407 var $Age = ''; 408 var $Height = ''; 409 var $Source = 0; 410 var $Weight = ''; 411 var $Location = ''; 412 413 function AddIfNotExist() 414 { 415 $DbResult = $this->Database->select('MeetItem', '*', 416 '(`Message` = "'.$this->Database->real_escape_string($this->Message).'") AND '. 417 '(`Email` = "'.$this->Database->real_escape_string($this->Email).'") AND '. 418 '(`Date` = "'.$this->Database->real_escape_string(TimeToMysqlDate($this->Date)).'")'); 419 if ($DbResult->num_rows == 0) 420 { 421 $this->Database->insert('MeetItem', array( 422 'Message' => $this->Message, 423 'Date' => TimeToMysqlDate($this->Date), 424 'Gender' => $this->Gender, 425 'Age' => $this->Age, 426 'Email' => $this->Email, 427 'Phone' => $this->Phone, 428 'Name' => $this->Name, 429 'Height' => $this->Height, 430 'Weight' => $this->Weight, 431 'Location' => $this->Location, 432 'Source' => $this->Source, 433 )); 434 } 435 } 436 }
Note:
See TracChangeset
for help on using the changeset viewer.