source: trunk/Modules/Meet/Meet.php

Last change on this file was 71, checked in by chronos, 7 weeks ago
  • Fixed: Warnings in meet imports.
File size: 15.5 KB
Line 
1<?php
2
3include_once(dirname(__FILE__).'/Import/Seznamka.php');
4include_once(dirname(__FILE__).'/Import/TanecniSkola.php');
5include_once(dirname(__FILE__).'/Import/AstraPraha.php');
6include_once(dirname(__FILE__).'/Import/Vavruska.php');
7include_once(dirname(__FILE__).'/Import/SalsaDance.php');
8include_once(dirname(__FILE__).'/Import/Amblar.php');
9include_once(dirname(__FILE__).'/Import/MajkluvSvet.php');
10include_once(dirname(__FILE__).'/Import/Csts.php');
11include_once(dirname(__FILE__).'/Import/Facebook.php');
12include_once(dirname(__FILE__).'/Import/Eso.php');
13include_once(dirname(__FILE__).'/Import/Ella.php');
14include_once(dirname(__FILE__).'/Import/MgDance.php');
15include_once(dirname(__FILE__).'/Import/LaTropical.php');
16
17abstract class Gender
18{
19 const Undefined = 0;
20 const Male = 1;
21 const Female = 2;
22}
23
24function GetTextBetween(string &$Text, string $Start, string $End): string
25{
26 $Result = '';
27 if ((strpos($Text, $Start) !== false) and (strpos($Text, $End) !== false))
28 {
29 $Text = substr($Text, strpos($Text, $Start) + strlen($Start));
30 $Result = substr($Text, 0, strpos($Text, $End));
31 $Text = substr($Text, strpos($Text, $End) + strlen($End));
32 }
33 return $Result;
34}
35
36function HumanDateTimeToTime(string $DateTime): ?int
37{
38 if ($DateTime == '') return NULL;
39 $DateTime = str_replace('. ', '.', $DateTime);
40 $Parts = explode(' ', $DateTime);
41 if (count($Parts) > 0)
42 {
43 $DateParts = explode('.', $Parts[0]);
44 if (count($DateParts) == 1) $DateParts[1] = '0';
45 if (count($DateParts) == 2) $DateParts[2] = '0';
46 } else return NULL;
47 if (count($Parts) > 1)
48 {
49 $TimeParts = explode(':', $Parts[1]);
50 if (count($TimeParts) == 1) $TimeParts[1] = '0';
51 if (count($TimeParts) == 2) $TimeParts[2] = '0';
52 } else $TimeParts = array(0, 0, 0);
53 $Result = mktime($TimeParts[0], $TimeParts[1], $TimeParts[2], $DateParts[1], $DateParts[0], $DateParts[2]);
54 return $Result;
55}
56
57function HumanDateToTime(string $Date): ?int
58{
59 if ($Date == '') return NULL;
60 return HumanDateTimeToTime($Date.' 0:0:0');
61}
62
63function DecodeHtmlEnt(string $str): string
64{
65 $prefix = '&#';
66 $suffix = ';';
67 $hexchar = 'x';
68 $ret = html_entity_decode($str, ENT_COMPAT, 'UTF-8');
69 $p2 = 0;
70 for (;;)
71 {
72 $p = strpos($ret, $prefix, $p2);
73 if ($p === FALSE)
74 break;
75 $p2 = strpos($ret, $suffix, $p);
76 if ($p2 === FALSE)
77 {
78 $p2 = $p + strlen($prefix);
79 while (($p2 < strlen($ret)) and is_numeric($ret[$p2]))
80 $p2++;
81 if ($p2 <= ($p + strlen($prefix))) break;
82 $add = 0;
83 } else $add = 1;
84
85 if (substr($ret, $p + strlen($prefix), strlen($hexchar)) == $hexchar)
86 $char = hexdec(substr($ret, $p + strlen($prefix) + strlen($hexchar), $p2 - $p - strlen($prefix) - strlen($hexchar)));
87 else
88 $char = intval(substr($ret, $p + strlen($prefix), $p2 - $p - strlen($prefix)));
89
90 $newchar = iconv(
91 'UCS-4', 'UTF-8',
92 chr(($char >> 24) & 0xFF).chr(($char >> 16) & 0xFF).chr(($char >> 8) & 0xFF).chr($char & 0xFF)
93 );
94 $ret = substr_replace($ret, $newchar, $p, $add + $p2 - $p);
95 $p2 = $p + strlen($newchar) + $add;
96 }
97 return $ret;
98}
99
100function RemoveHtmlComments(string $Content): string
101{
102 $Result = '';
103 while (strpos($Content, '<!--') !== false)
104 {
105 $Result .= substr($Content, 0, strpos($Content, '<!--'));
106 $Content = substr($Content, strpos($Content, '<!--') + 4);
107 $Content = substr($Content, strpos($Content, '-->') + 3);
108 }
109 return $Result;
110 //return preg_replace('/<!--(.|\s)*?-->/', '', $Content);
111}
112
113function is_alpha(string $Char): bool
114{
115 return (($Char >= 'a') and ($Char <= 'z')) or (($Char >= 'A') and ($Char <= 'Z'));
116}
117
118function is_white_space(string $Char): bool
119{
120 return ($Char == ' ') or ($Char == "\t");
121}
122
123function GetDefaultMeetFilter(string $Table = ''): string
124{
125 global $Config;
126
127 if ($Table != '') $Table = $Table.'.';
128
129 return '('.$Table.'Hidden=0) AND ('.$Table.'Time > "'.TimeToMysqlDateTime(time() - (int)$Config['MeetInterval']).'")';
130}
131
132function GetNumberBeforeText(string $Text, string $Needle): string
133{
134 $Result = '';
135 for (;;)
136 {
137 $Pos = strpos($Text, $Needle);
138 if ($Pos !== false)
139 {
140 if ((($Pos + strlen($Needle)) < strlen($Text)) and (is_alpha($Text[$Pos + strlen($Needle)])))
141 {
142 $Text = substr($Text, $Pos + 1);
143 continue;
144 }
145 $Result = substr($Text, 0, $Pos);
146 $Text = substr($Text, $Pos + 1);
147 $Start = $Pos - 1;
148 while (($Start >= 0) and (is_numeric($Result[$Start]) or is_white_space($Result[$Start])))
149 $Start--;
150 $Start++;
151 $Result = trim(substr($Result, $Start, $Pos - $Start));
152 if (is_numeric($Result)) break;
153 } else break;
154 }
155 return $Result;
156}
157
158function GetNumberAfterText(string $Text, string $Needle): string
159{
160 $Result = '';
161 for (;;)
162 {
163 $Pos = strpos($Text, $Needle);
164 if ($Pos !== false)
165 {
166 if ((($Pos - 1) >= 0) and (is_alpha($Text[$Pos - 1])))
167 {
168 $Text = substr($Text, $Pos + 1);
169 continue;
170 }
171 $Result = substr($Text, $Pos + strlen($Needle));
172 $Text = substr($Text, $Pos + 1);
173 $End = 0;
174 while (($End < strlen($Result)) and (is_numeric($Result[$End]) or is_white_space($Result[$End])))
175 $End++;
176 $End--;
177
178 $Result = trim(substr($Result, 0, $End + 1));
179 if (is_numeric($Result)) break;
180 } else break;
181 }
182 return $Result;
183}
184
185function GetAgeFromText(string $Text): string
186{
187 $Text = strtolower($Text);
188 $Result = GetNumberAfterText($Text, 'je mi');
189 if ($Result == '') $Result = GetNumberAfterText($Text, 'je mi přes');
190 if ($Result == '') $Result = GetNumberBeforeText($Text, 'let');
191 if ($Result == '') $Result = GetNumberBeforeText($Text, 'rokov');
192 if ($Result == '') $Result = GetNumberBeforeText($Text, 'roků');
193 if ($Result == '') $Result = GetNumberBeforeText($Text, 'letou');
194 if ($Result == '') $Result = GetNumberAfterText($Text, 'čerstvých');
195 if ($Result == '') {
196 $Result = GetNumberAfterText($Text, 'jsem');
197 if ($Result > 100) $Result = ''; // Age over 100 is probably not age but height
198 }
199 if ($Result == '') $Result = GetAgeHeightWeightFromText($Text)[0];
200 if ($Result == '') {
201 $Year = GetNumberAfterText($Text, 'ročník');
202 if ($Year != '') $Result = date('Y', time()) - $Year;
203 }
204 if ($Result == '') {
205 $Year = GetNumberAfterText($Text, 'ročník:');
206 if ($Year != '') $Result = date('Y', time()) - $Year;
207 }
208 if ($Result == '') {
209 $Year = GetNumberAfterText($Text, 'narozen roku');
210 if ($Year != '') $Result = date('Y', time()) - $Year;
211 }
212 if ($Result == '') {
213 $Year = GetNumberAfterText($Text, 'narozena roku');
214 if ($Year != '') $Result = date('Y', time()) - $Year;
215 }
216 if ($Result == '') {
217 $Result = GetNumberAfterText($Text, 'věk');
218 }
219 return $Result;
220}
221
222function GetHeightFromText(string $Text): string
223{
224 $Text = strtolower($Text);
225 $Result = GetNumberAfterText($Text, 'měřím');
226 if ($Result == '') $Result = GetNumberAfterText($Text, 'merim');
227 if ($Result == '') $Result = GetNumberAfterText($Text, 'výška');
228 if ($Result == '') $Result = GetNumberBeforeText($Text, 'cm');
229 if ($Result == '') $Result = GetNumberBeforeText($Text, 'bez podpatků');
230 if ($Result == '') {
231 $Result = GetNumberAfterText($Text, 'jsem');
232 if ($Result < 150) $Result = ''; // Height below 150 is probably not height but age
233 }
234 if ($Result == '') $Result = GetAgeHeightWeightFromText($Text)[1];
235 return $Result;
236}
237
238function GetWeightFromText(string $Text): string
239{
240 $Text = strtolower($Text);
241 $Result = GetNumberBeforeText($Text, 'kg');
242 if ($Result == '') $Result = GetNumberAfterText($Text, 'vážím');
243 if ($Result == '') $Result = GetNumberAfterText($Text, 'váha');
244 if ($Result == '') $Result = GetAgeHeightWeightFromText($Text)[2];
245 return $Result;
246}
247
248function GetAgeHeightWeightFromText(string $Text): array
249{
250 $Result = array('', '', '');
251 $Pattern = '/[0-9]+\/[0-9]+\/[0-9]+/i';
252 if (preg_match_all($Pattern, $Text, $Matches))
253 {
254 $Result = explode('/', $Matches[0][0]);
255 // Avoid dates in a form day/month/year
256 if ($Result[2] > 150) $Result = array('', '', '');
257 } else
258 {
259 $Pattern = '/[0-9]+\/[0-9]+/i';
260 if (preg_match_all($Pattern, $Text, $Matches))
261 {
262 $Result = explode('/', $Matches[0][0]);
263 // If first number is over 100 then its probably height/weight
264 if ($Result[0] > 100) $Result = array('', $Result[0], $Result[1]);
265 $Result[] = '';
266 }
267 }
268 return $Result;
269}
270
271function GetNameFromText(string $Text): string
272{
273 return '';
274}
275
276function GetEmailFromText(string $Text): string
277{
278 $Result = '';
279 if (strpos($Text, '@') !== false)
280 {
281 $Pattern = '/[a-z0-9_\-\+\.]+@[a-z0-9\-]+\.([a-z]{2,4})(?:\.[a-z]{2})?/i';
282 preg_match_all($Pattern, $Text, $Matches);
283 if ((count($Matches) > 0) and (count($Matches[0]) > 0) and ($Matches[0][0] != ''))
284 {
285 $Result = $Matches[0][0];
286 }
287 }
288 return $Result;
289}
290
291$Locations = array(
292 'praha' => 'Praha',
293 'prahy' => 'Praha',
294 'praze' => 'Praha',
295 'brno' => 'Brno',
296 'brně' => 'Brno',
297 'ostrava' => 'Ostrava',
298 'ostravě' => 'Ostrava',
299 'ostravy' => 'Ostrava',
300 'olomouc' => 'Olomouc',
301 'liberec' => 'Liberec',
302 'opava' => 'Opava',
303 'opave' => 'Opava',
304 'plzeň' => 'Plzeň',
305 'plzni' => 'Plzeň',
306 'vyškov' => 'Vyškov',
307 'mladá boleslav' => 'Mladá Boleslav',
308 'mladé boleslavi' => 'Mladá Boleslav',
309 'litoměřice' => 'Litoměřice',
310 'sokolov' => 'Sokolov',
311 'mikulov' => 'Mikulov',
312 'havířov' => 'Havířov',
313 'kolín' => 'Kolín',
314 'kroměříž' => 'Kroměříž',
315 'hradec králové' => 'Hradec Králové',
316);
317
318function GetLocationFromText(string $Text): string
319{
320 global $Locations;
321
322 $Text = strtolower($Text);
323
324 foreach ($Locations as $Index => $Location)
325 {
326 if (strpos($Text, $Index) !== false) return $Location;
327 }
328 return '';
329}
330
331function GetGenderFromName(string $Text): string
332{
333 $Gender = Gender::Male;
334 $Ending = substr($Text, -2);
335 if (($Ending == 'na') or ($Ending == 'ta') or ($Ending == 'va') or
336 ($Ending == 'ka') or ($Ending == 'ga') or ($Ending == 'ie') or
337 ($Ending == 'la') or ($Ending == 'za') or ($Ending == 'še') or
338 ($Ending == 'ra') or ($Ending == 'da') or ($Ending == 'sa') or
339 ($Ending == 'ce') or ($Ending == 'id') or ($Ending == 'ša') or
340 ($Ending == 'ma') or ($Ending == 'ja') or ($Ending == 'ia') or
341 ($Ending == 'ha') or ($Ending == 'is'))
342 return $Gender = Gender::Female;
343 return $Gender;
344}
345
346class MeetSources
347{
348 public Database $Database;
349
350 function Parse(?int $Id = null): string
351 {
352 $Output = '';
353 $Where = '1';
354 if (($Id != null) and is_numeric($Id)) $Where .= ' AND (Id='.$Id.')';
355 $DbResult = $this->Database->select('MeetSource', '*', $Where);
356 while ($DbRow = $DbResult->fetch_assoc())
357 {
358 if ($DbRow['Enabled'] == 1)
359 {
360 $Method = $DbRow['Method'];
361 if ($Method == 'hes') $Source = new MeetSourceTanecniSkola();
362 else if ($Method == 'vavruska') $Source = new MeetSourceVavruska();
363 else if ($Method == 'salsadance') $Source = new MeetSourceSalsaDance();
364 else if ($Method == 'astra') $Source = new MeetSourceAstraPraha();
365 else if ($Method == 'seznamka') $Source = new MeetSourceSeznamka();
366 else if ($Method == 'amblar') $Source = new MeetSourceAmblar();
367 else if ($Method == 'majkluvsvet') $Source = new MeetSourceMajkluvSvet();
368 else if ($Method == 'csts') $Source = new MeetSourceCsts();
369 else if ($Method == 'facebook') $Source = new MeetSourceFacebook();
370 else if ($Method == 'eso') $Source = new MeetSourceEso();
371 else if ($Method == 'ella') $Source = new MeetSourceElla();
372 else if ($Method == 'mgdance') $Source = new MeetSourceMgDance();
373 else if ($Method == 'latropical') $Source = new MeetSourceLaTropical();
374 else {
375 $Output .= 'Unsupported parse method: '.$Method.'<br/>';
376 continue;
377 }
378 $Source->Database = $this->Database;
379 $Source->Id = $DbRow['Id'];
380 $Source->URL = $DbRow['URL'];
381 $Source->Method = $Method;
382 $Source->Name = $DbRow['Name'];
383 $Output .= $Source->DoImport();
384 } else $Output .= 'Parser '.$DbRow['Name'].' (#'.$DbRow['Id'].') disabled.<br/>'."\n";
385 }
386 return $Output;
387 }
388}
389
390class MeetSource
391{
392 public string $Name;
393 public string $URL;
394 public string $Method;
395 public string $Id;
396 public Database $Database;
397 public array $MeetItems;
398 public bool $AddCompareTime;
399 public bool $AddCompareRemoteId;
400 public int $AddTimeInterval;
401 public int $AddedCount;
402
403 function __construct()
404 {
405 $this->MeetItems = array();
406 $this->AddedCount = 0;
407 $this->AddCompareTime = true;
408 $this->AddCompareRemoteId = false;
409 $this->AddTimeInterval = 0;
410 }
411
412 function Import(): string
413 {
414 return '';
415 }
416
417 function DoImport(): string
418 {
419 $this->MeetItems = array();
420 $this->AddedCount = 0;
421 $Output = 'Parsing '.$this->Name.' (#'.$this->Id.')...';
422 $Output .= $this->Import();
423 $Output .= ' parsed: '.count($this->MeetItems);
424 foreach ($this->MeetItems as $MeetItem)
425 {
426 if ($MeetItem->IsSpam()) continue;
427 $this->AddedCount += $MeetItem->AddIfNotExist($this->AddTimeInterval, $this->AddCompareTime, $this->AddCompareRemoteId);
428 }
429 $Output .= ', new added: '.$this->AddedCount;
430 $Output .= '</br>'."\n";
431 return $Output;
432 }
433}
434
435class MeetItem
436{
437 public Database $Database;
438 public string $Name = '';
439 public string $Message = '';
440 public ?int $Time = 0;
441 public int $Gender = Gender::Undefined;
442 public string $Phone = '';
443 public string $Email = '';
444 public string $Age = '';
445 public string $Height = '';
446 public int $Source = 0;
447 public string $Weight = '';
448 public string $Location = '';
449 public string $Image = '';
450 public string $Link = '';
451 public string $Title = '';
452 public string $Level = '';
453 public string $RemoteId = '';
454
455 function AddIfNotExist(int $TimeInterval = 0, bool $CompareTime = true, bool $CompareRemoteId = false): int
456 {
457 $Where = '(`Message` = "'.$this->Database->real_escape_string($this->Message).'") AND '.
458 '(`Email` = "'.$this->Database->real_escape_string($this->Email).'") AND '.
459 '(`Source` = '.$this->Source.')';
460 if ($CompareTime)
461 $Where .= ' AND (`Time` >= "'.$this->Database->real_escape_string(TimeToMysqlDateTime($this->Time - $TimeInterval)).'") AND '.
462 '(`Time` <= "'.$this->Database->real_escape_string(TimeToMysqlDateTime($this->Time + $TimeInterval)).'")';
463 if ($CompareRemoteId)
464 $Where .= ' AND (`RemoteId` = "'.$this->Database->real_escape_string($this->RemoteId).'")';
465 $DbResult = $this->Database->select('MeetItem', '*', $Where);
466 if ($DbResult->num_rows == 0)
467 {
468 if ($this->Age == '') $Age = null;
469 else $Age = $this->Age;
470 if ($this->Height == '') $Height = null;
471 else $Height = $this->Height;
472 if ($this->Weight == '') $Weight = null;
473 else $Weight = $this->Weight;
474 $this->Database->insert('MeetItem', array(
475 'Source' => $this->Source,
476 'Link' => $this->Link,
477 'Email' => $this->Email,
478 'Message' => $this->Message,
479 'Time' => TimeToMysqlDateTime($this->Time),
480 'Gender' => $this->Gender,
481 'Age' => $Age,
482 'Phone' => $this->Phone,
483 'Name' => $this->Name,
484 'Height' => $Height,
485 'Weight' => $Weight,
486 'Location' => $this->Location,
487 'RemoteId' => $this->RemoteId,
488 'TimeImport' => 'NOW()',
489 ));
490 $Result = 1;
491 } else $Result = 0;
492 return $Result;
493 }
494
495 function IsSpam(): bool
496 {
497 $Keywords = array('půjčk', 'úvěr');
498 foreach ($Keywords as $Keyword)
499 {
500 if (strpos($this->Message, $Keyword) !== false)
501 {
502 return true;
503 }
504 }
505 return false;
506 }
507}
Note: See TracBrowser for help on using the repository browser.