Changeset 8
- Timestamp:
- Aug 5, 2018, 10:26:16 PM (6 years ago)
- Location:
- trunk
- Files:
-
- 6 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Meet.php
r4 r8 1 1 <?php 2 3 include_once('Import/Seznamka.php'); 4 include_once('Import/TanecniSkola.php'); 5 include_once('Import/AstraPraha.php'); 6 include_once('Import/Vavruska.php'); 7 include_once('Import/SalsaDance.php'); 2 8 3 9 abstract class Gender … … 36 42 } 37 43 38 function DecodeHtmlEnt($str) 44 function DecodeHtmlEnt($str) 39 45 { 40 46 $prefix = '&#'; … … 43 49 $ret = html_entity_decode($str, ENT_COMPAT, 'UTF-8'); 44 50 $p2 = 0; 45 for(;;) 51 for(;;) 46 52 { 47 53 $p = strpos($ret, $prefix, $p2); 48 if ($p === FALSE) 54 if ($p === FALSE) 49 55 break; 50 56 $p2 = strpos($ret, $suffix, $p); … … 57 63 $add = 0; 58 64 } else $add = 1; 59 65 60 66 if (substr($ret, $p + strlen($prefix), strlen($hexchar)) == $hexchar) 61 67 $char = hexdec(substr($ret, $p + strlen($prefix) + strlen($hexchar), $p2 - $p - strlen($prefix) - strlen($hexchar))); 62 68 else 63 69 $char = intval(substr($ret, $p + strlen($prefix), $p2 - $p - strlen($prefix))); 64 70 65 71 $newchar = iconv( 66 72 'UCS-4', 'UTF-8', … … 73 79 } 74 80 75 function RemoveHtmlComments($Content) 81 function RemoveHtmlComments($Content) 76 82 { 77 83 $Result = ''; … … 98 104 $Pos = strpos($Text, $Needle); 99 105 if ($Pos !== false) 100 { 101 if ((($Pos + strlen($Needle)) < strlen($Text)) and (is_alpha($Text[$Pos + strlen($Needle)]))) 106 { 107 if ((($Pos + strlen($Needle)) < strlen($Text)) and (is_alpha($Text[$Pos + strlen($Needle)]))) 102 108 { 103 109 $Text = substr($Text, $Pos + 1); … … 124 130 $Pos = strpos($Text, $Needle); 125 131 if ($Pos !== false) 126 { 127 if ((($Pos - 1) >= 0) and (is_alpha($Text[$Pos - 1]))) 132 { 133 if ((($Pos - 1) >= 0) and (is_alpha($Text[$Pos - 1]))) 128 134 { 129 135 $Text = substr($Text, $Pos + 1); … … 155 161 { 156 162 $Result = GetNumberBeforeText($Text, 'cm'); 157 if ($Result == '') $Result = GetNumberBeforeText($Text, 'bez podpatků'); 163 if ($Result == '') $Result = GetNumberAfterText($Text, 'měřím'); 164 if ($Result == '') $Result = GetNumberBeforeText($Text, 'bez podpatků'); 158 165 if ($Result == '') $Result = GetAgeHeightWeightFromText($Text)[1]; 159 166 return $Result; … … 171 178 $Result = array('', '', ''); 172 179 $Pattern = '/[0-9]+\/[0-9]+\/[0-9]+/i'; 173 if (preg_match_all($Pattern, $Text, $Matches)) 180 if (preg_match_all($Pattern, $Text, $Matches)) 174 181 { 175 182 $Result = explode('/', $Matches[0][0]); 176 } else 183 } else 177 184 { 178 185 $Pattern = '/[0-9]+\/[0-9]+/i'; 179 if (preg_match_all($Pattern, $Text, $Matches)) 186 if (preg_match_all($Pattern, $Text, $Matches)) 180 187 { 181 188 $Result = explode('/', $Matches[0][0]); … … 195 202 if (count($Matches) > 0) 196 203 $Result = $Matches[0][0]; 197 } 198 return $Result; 204 } 205 return $Result; 206 } 207 208 $Locations = array( 209 'Praha' => 'Praha', 210 'Prahy' => 'Praha', 211 'Praze' => 'Praha', 212 'Ostrava' => 'Ostrava', 213 'Ostravě' => 'Ostrava', 214 'Ostravy' => 'Ostrava', 215 'Mladé Boleslavi' => 'Mladá Boleslav', 216 ); 217 218 function GetLocationFromText($Text) 219 { 220 global $Locations; 221 222 foreach ($Locations as $Index => $Location) 223 { 224 if (strpos($Text, $Index) !== false) return $Location; 225 } 226 return ''; 199 227 } 200 228 … … 202 230 { 203 231 public $Database; 204 232 205 233 function ParseAll() 206 234 { … … 208 236 while ($DbRow = $DbResult->fetch_assoc()) 209 237 { 210 $Source = new MeetSource(); 238 $Method = $DbRow['Method']; 239 if ($Method == 'hes') $Source = new MeetSourceTanecniSkola(); 240 else if ($Method == 'vavruska') $Source = new MeetSourceVavruska(); 241 else if ($Method == 'salsadance') $Source = new MeetSourceSalsaDance(); 242 else if ($Method == 'astra') $Source = new MeetSourceAstraPraha(); 243 else if ($Method == 'seznamka') $Source = new MeetSourceSeznamka(); 244 else { 245 echo('Unsupported parse method: '.$Method); 246 return; 247 } 211 248 $Source->Database = $this->Database; 212 249 $Source->Id = $DbRow['Id']; 213 250 $Source->URL = $DbRow['URL']; 214 $Source->Method = $ DbRow['Method'];251 $Source->Method = $Method; 215 252 $Source->Name = $DbRow['Name']; 216 253 $this->Items[] = $Source; 217 $Source-> Parse();254 $Source->Import(); 218 255 } 219 256 } … … 227 264 public $Id; 228 265 public $Database; 229 230 function Parse() 231 { 232 if ($this->Method == 'hes') $this->ParseHes(); 233 else if ($this->Method == 'vavruska') $this->ParseVavruska(); 234 else if ($this->Method == 'salsadance') $this->ParseSalsaDance(); 235 else if ($this->Method == 'astra') $this->ParseAstra(); 236 else echo('Unsupported parse method: '.$this->Method); 237 } 238 239 function ParseAstra() 240 { 241 echo('Parsing '.$this->Name.'...</br>'); 242 $Content = file_get_contents($this->URL); 243 $Content = RemoveHtmlComments($Content); 244 245 $BlockStart = '----------------------------------------------'; 246 $BlockEnd = '<script type="text/javascript"'; 247 $Content = GetTextBetween($Content, $BlockStart, $BlockEnd); 248 if ($Content == '') 249 { 250 echo('Main block not isolated.</br>'); 251 return; 252 } 253 254 $ItemStart = '</div><h2></h2>'; 255 $ItemEnd = '<div class="clearer"></div><div class="clearer">'; 256 while (strpos($Content, $ItemStart) !== false) 257 { 258 $Item = GetTextBetween($Content, $ItemStart, $ItemEnd); 259 $MeetItem = new MeetItem(); 260 $MeetItem->Gender = trim(GetTextBetween($Item, ');"><u>', '</u>')); 261 if ($MeetItem->Gender == 'Hledám partnerku') $MeetItem->Gender = Gender::Male; 262 else $MeetItem->Gender = Gender::Female; 263 $End = '<em>'; 264 $MeetItem->Message = trim(substr($Item, 0, strpos($Item, $End))); 265 $Item = substr($Item, strpos($Item, $End) + strlen($End)); 266 $MeetItem->Message = trim(strip_tags($MeetItem->Message)); 267 $MeetItem->Date = trim(GetTextBetween($Item, 'datum vložení', '</em>')); 268 if (substr($MeetItem->Date, 0, 1) == ':') 269 $MeetItem->Date = trim(substr($MeetItem->Date, 1)); 270 $MeetItem->Date = HumanDateToTime($MeetItem->Date); 271 $MeetItem->Email = GetEmailFromText($MeetItem->Message); 272 $MeetItem->Name = ''; 273 $MeetItem->Age = GetAgeFromText($MeetItem->Message); 274 $MeetItem->Height = GetHeightFromText($MeetItem->Message); 275 $MeetItem->Weight = GetWeightFromText($MeetItem->Message); 276 $MeetItem->Phone = ''; 277 $MeetItem->Database = $this->Database; 278 $MeetItem->Source = $this->Id; 279 $MeetItem->AddIfNotExist(); 280 } 281 } 282 283 function ParseSalsaDance() 284 { 285 echo('Parsing '.$this->Name.'...</br>'); 286 $Content = file_get_contents($this->URL); 287 $BlockStart = '<ul class="comments">'; 288 $BlockEnd = '<div class="content-paginator clearfix">'; 289 $Content = GetTextBetween($Content, $BlockStart, $BlockEnd); 290 if ($Content == '') 291 { 292 echo('Main block not isolated.</br>'); 293 return; 294 } 295 296 $ItemStart = '<li class="comment level-0">'; 297 $ItemEnd = '</li>'; 298 while (strpos($Content, $ItemStart) !== false) 299 { 300 $Item = GetTextBetween($Content, $ItemStart, $ItemEnd); 301 $MeetItem = new MeetItem(); 302 $MeetItem->Date = trim(GetTextBetween($Item, '<span class="created">', '</span>')); 303 $MeetItem->Date = str_replace('. ', '.', $MeetItem->Date); 304 if (strpos($MeetItem->Date, 'dnes v') !== false) 305 $MeetItem->Date = str_replace('dnes v', HumanDate(time()), $MeetItem->Date); 306 if (strpos($MeetItem->Date, 'včera v') !== false) 307 $MeetItem->Date = str_replace('včera v', HumanDate(strtotime('-1 day', time())), $MeetItem->Date); 308 $MeetItem->Date = HumanDateTimeToTime($MeetItem->Date); 309 $MeetItem->Name = trim(GetTextBetween($Item, '<div class="comment-name">', '</div>')); 310 $MeetItem->Message = trim(GetTextBetween($Item, '<p>', '</p>')); 311 $MeetItem->Gender = Gender::Undefined; 312 $MeetItem->Email = GetEmailFromText($MeetItem->Message); 313 $MeetItem->Phone = ''; 314 $MeetItem->Height = GetHeightFromText($MeetItem->Message); 315 $MeetItem->Weight = GetWeightFromText($MeetItem->Message); 316 $MeetItem->Age = GetAgeFromText($MeetItem->Message); 317 $MeetItem->Database = $this->Database; 318 $MeetItem->Source = $this->Id; 319 //print_r($MeetItem); 320 $MeetItem->AddIfNotExist(); 321 } 322 } 323 324 function ParseVavruska() 325 { 326 echo('Parsing '.$this->Name.'...</br>'); 327 $Content = file_get_contents($this->URL); 328 329 $BlockStart = '<table class="seznamka">'; 330 $BlockEnd = '<hr class="welt_bottom" />'; 331 $Content = GetTextBetween($Content, $BlockStart, $BlockEnd); 332 if ($Content == '') 333 { 334 echo('Main block not isolated.</br>'); 335 return; 336 } 337 338 $ItemStart = '<tr class="spc">'; 339 $ItemEnd = '</table> 340 </td></tr>'; 341 while (strpos($Content, $ItemStart) !== false) 342 { 343 $Item = GetTextBetween($Content, $ItemStart, $ItemEnd); 344 $MeetItem = new MeetItem(); 345 346 $MeetItem->Date = MysqlDateTimeToTime(trim(GetTextBetween($Item, 'přidáno:', '</i>'))); 347 $MeetItem->Gender = trim(GetTextBetween($Item, '<img src=/design/', 'alt="avatar"')); 348 if ($MeetItem->Gender == 'avatar_m_03.png') $MeetItem->Gender = Gender::Male; 349 else $MeetItem->Gender = Gender::Female; 350 $MeetItem->Name = trim(GetTextBetween($Item, 'Jméno:</td><td>', '</td>')); 351 $MeetItem->Email = DecodeHtmlEnt(trim(GetTextBetween($Item, 'Kontakt:</td><td>', '</td>'))); 352 $MeetItem->Message = trim(GetTextBetween($Item, 'Text:</td><td>', '</td>')); 353 $MeetItem->Phone = ''; 354 $MeetItem->Height = GetHeightFromText($MeetItem->Message); 355 $MeetItem->Weight = GetWeightFromText($MeetItem->Message); 356 $MeetItem->Age = GetAgeFromText($MeetItem->Message); 357 $MeetItem->Database = $this->Database; 358 $MeetItem->Source = $this->Id; 359 $MeetItem->AddIfNotExist(); 360 } 361 } 362 363 function ParseHes() 364 { 365 $HeightRange = array( 366 '< 120 cm' => 120, 367 '120 cm - 130 cm' => 125, 368 '125 cm - 135 cm' => 130, 369 '130 cm - 140 cm' => 135, 370 '135 cm - 145 cm' => 140, 371 '140 cm - 150 cm' => 145, 372 '145 cm - 155 cm' => 150, 373 '150 cm - 160 cm' => 155, 374 '155 cm - 165 cm' => 160, 375 '160 cm - 170 cm' => 165, 376 '165 cm - 175 cm' => 170, 377 '170 cm - 180 cm' => 175, 378 '175 cm - 185 cm' => 180, 379 '180 cm - 190 cm' => 185, 380 '185 cm - 195 cm' => 190, 381 '190 cm - 200 cm' => 195, 382 '195 cm - 205 cm' => 200, 383 '200 cm <' => 205, 384 ); 385 386 echo('Parsing '.$this->Name.'...</br>'); 387 $Content = file_get_contents($this->URL); 388 389 $BlockStart = '<div><!--[if IE]><input type=IEbug disabled style="display:none"><![endif]--></div>'; 390 $BlockEnd = '<div class="paginator">'; 391 $Content = GetTextBetween($Content, $BlockStart, $BlockEnd); 392 if ($Content == '') 393 { 394 echo('Main block not isolated.</br>'); 395 return; 396 } 397 398 $ItemStart = '<div>'; 399 $ItemEnd = '</div>'; 400 $PreviousTime = null; 401 while (strpos($Content, $ItemStart) !== false) 402 { 403 $Item = GetTextBetween($Content, $ItemStart, $ItemEnd); 404 if ($Item != '') 405 { 406 $MeetItem = new MeetItem(); 407 $MeetItem->Date = HumanDateToTime(trim(GetTextBetween($Item, '<h3>', '</h3>'))); 408 if ($MeetItem->Date != null) 409 { 410 $PreviousTime = $MeetItem->Date; 411 } else $MeetItem->Date = $PreviousTime; 412 413 $Part = GetTextBetween($Item, '</strong>', '<strong>'); 414 $Part = explode(',', $Part); 415 if (count($Part) > 0) $MeetItem->Name = trim($Part[0]); 416 if (count($Part) > 1) $MeetItem->Height = trim($Part[1]); 417 if (strpos($MeetItem->Height, '(') !== false) 418 { 419 $MeetItem->Age = substr($MeetItem->Height, strpos($MeetItem->Height, '(') + 1); 420 $MeetItem->Age = substr($MeetItem->Age, 0, strpos($MeetItem->Age, ')')); 421 $MeetItem->Age = trim(str_replace('let', '', $MeetItem->Age)); 422 $MeetItem->Height = trim(substr($MeetItem->Height, 0, strpos($MeetItem->Height, '('))); 423 } else $MeetItem->Age = ''; 424 while (strpos($MeetItem->Height, ' ') !== false) 425 { 426 $MeetItem->Height = str_replace(' ', ' ', $MeetItem->Height); 427 } 428 if (array_key_exists($MeetItem->Height, $HeightRange)) 429 $MeetItem->Height = $HeightRange[$MeetItem->Height]; 430 $MeetItem->Weight = GetWeightFromText($MeetItem->Message); 431 $MeetItem->Email = trim(GetTextBetween($Item, '">', '</a>')); 432 $MeetItem->Phone = trim(GetTextBetween($Item, 'tel.:', '<br />')); 433 $Gender = trim(GetTextBetween($Item, '</strong>', '<strong>')); 434 if ($Gender == 'partnera,') $MeetItem->Gender = Gender::Female; 435 else if ($Gender == 'partnerku,') $MeetItem->Gender = Gender::Male; 436 else $MeetItem->Gender = Gender::Undefined; 437 $MeetItem->Message = trim(GetTextBetween($Item, '<p class="message">', '</p>')); 438 $MessageAge = GetAgeFromText($MeetItem->Message); 439 if ($MessageAge != '') $MeetItem->Age = $MessageAge; 440 $MessageHeight = GetHeightFromText($MeetItem->Message); 441 if ($MessageHeight != '') $MeetItem->Height = $MessageHeight; 442 $MeetItem->Database = $this->Database; 443 $MeetItem->Source = $this->Id; 444 $MeetItem->AddIfNotExist(); 445 } 446 } 447 } 448 } 449 450 class MeetItem 266 } 267 268 class MeetItem 451 269 { 452 270 var $Database; … … 456 274 var $Gender = Gender::Undefined; 457 275 var $Phone = ''; 458 var $Email = ''; 276 var $Email = ''; 459 277 var $Age = ''; 460 278 var $Height = ''; … … 462 280 var $Weight = ''; 463 281 var $Location = ''; 282 var $Image = ''; 464 283 465 284 function AddIfNotExist() 466 285 { 467 $DbResult = $this->Database->select('MeetItem', '*', 286 $DbResult = $this->Database->select('MeetItem', '*', 468 287 '(`Message` = "'.$this->Database->real_escape_string($this->Message).'") AND '. 469 288 '(`Email` = "'.$this->Database->real_escape_string($this->Email).'") AND '. -
trunk/index.php
r7 r8 238 238 array('Name' => 'Age', 'Title' => 'Věk'), 239 239 array('Name' => 'Weight', 'Title' => 'Váha'), 240 array('Name' => 'Location', 'Title' => 'Úmístění'), 240 241 array('Name' => 'Gender', 'Title' => 'Pohlaví'), 241 242 array('Name' => 'Message', 'Title' => 'Zpráva'), … … 255 256 '<td>'.$MeetItem['Age'].'</td>'. 256 257 '<td>'.$MeetItem['Weight'].'</td>'. 258 '<td>'.$MeetItem['Location'].'</td>'. 257 259 //'<td>'.$MeetItem['Email'].'</td>'. 258 260 //'<td>'.$MeetItem['Phone'].'</td>'. 259 261 '<td>'.$Gender[$MeetItem['Gender']].'</td>'. 260 262 '<td>'.$MeetItem['Message'].'</td>'. 261 '<td><a href="'.$ this->Link($MeetItem['SourceURL']).'">'.$MeetItem['SourceName'].'</a></td>';263 '<td><a href="'.$MeetItem['SourceURL'].'">'.$MeetItem['SourceName'].'</a></td>'; 262 264 $Output .= '</tr>'; 263 265 }
Note:
See TracChangeset
for help on using the changeset viewer.